summaryrefslogtreecommitdiffstats
path: root/arch/powerpc
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc')
-rw-r--r--arch/powerpc/Kbuild1
-rw-r--r--arch/powerpc/Kconfig131
-rw-r--r--arch/powerpc/Kconfig.debug20
-rw-r--r--arch/powerpc/Makefile21
-rw-r--r--arch/powerpc/Makefile.postlink6
-rw-r--r--arch/powerpc/boot/4xx.c2
-rw-r--r--arch/powerpc/boot/Makefile6
-rw-r--r--arch/powerpc/boot/dts/fsl/kmcent2.dts52
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0-best-effort.dtsi1
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0.dtsi1
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1-best-effort.dtsi1
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1.dtsi1
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-0.dtsi1
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-1.dtsi1
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-2.dtsi1
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-3.dtsi1
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-4.dtsi1
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-5.dtsi1
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-0.dtsi1
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-1.dtsi1
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-0.dtsi1
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-1.dtsi1
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-2.dtsi1
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-3.dtsi1
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-4.dtsi1
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-5.dtsi1
-rw-r--r--arch/powerpc/boot/dts/mgcoge.dts2
-rw-r--r--arch/powerpc/boot/dts/mpc832x_rdb.dts2
-rw-r--r--arch/powerpc/boot/dts/mpc8610_hpcd.dts2
-rw-r--r--arch/powerpc/boot/libfdt_env.h2
-rw-r--r--arch/powerpc/boot/main.c41
-rw-r--r--arch/powerpc/boot/ops.h2
-rwxr-xr-xarch/powerpc/boot/wrapper24
-rw-r--r--arch/powerpc/boot/zImage.lds.S8
-rw-r--r--arch/powerpc/configs/40x/acadia_defconfig3
-rw-r--r--arch/powerpc/configs/40x/ep405_defconfig3
-rw-r--r--arch/powerpc/configs/40x/kilauea_defconfig3
-rw-r--r--arch/powerpc/configs/40x/klondike_defconfig1
-rw-r--r--arch/powerpc/configs/40x/makalu_defconfig3
-rw-r--r--arch/powerpc/configs/40x/obs600_defconfig3
-rw-r--r--arch/powerpc/configs/40x/walnut_defconfig3
-rw-r--r--arch/powerpc/configs/44x/akebono_defconfig4
-rw-r--r--arch/powerpc/configs/44x/arches_defconfig3
-rw-r--r--arch/powerpc/configs/44x/bamboo_defconfig3
-rw-r--r--arch/powerpc/configs/44x/canyonlands_defconfig3
-rw-r--r--arch/powerpc/configs/44x/currituck_defconfig3
-rw-r--r--arch/powerpc/configs/44x/ebony_defconfig3
-rw-r--r--arch/powerpc/configs/44x/eiger_defconfig3
-rw-r--r--arch/powerpc/configs/44x/fsp2_defconfig3
-rw-r--r--arch/powerpc/configs/44x/icon_defconfig3
-rw-r--r--arch/powerpc/configs/44x/iss476-smp_defconfig3
-rw-r--r--arch/powerpc/configs/44x/katmai_defconfig3
-rw-r--r--arch/powerpc/configs/44x/rainier_defconfig3
-rw-r--r--arch/powerpc/configs/44x/redwood_defconfig3
-rw-r--r--arch/powerpc/configs/44x/sam440ep_defconfig5
-rw-r--r--arch/powerpc/configs/44x/sequoia_defconfig3
-rw-r--r--arch/powerpc/configs/44x/taishan_defconfig3
-rw-r--r--arch/powerpc/configs/52xx/pcm030_defconfig5
-rw-r--r--arch/powerpc/configs/83xx/kmeter1_defconfig5
-rw-r--r--arch/powerpc/configs/83xx/mpc837x_rdb_defconfig3
-rw-r--r--arch/powerpc/configs/85xx/ge_imp3a_defconfig1
-rw-r--r--arch/powerpc/configs/adder875_defconfig4
-rw-r--r--arch/powerpc/configs/amigaone_defconfig3
-rw-r--r--arch/powerpc/configs/cell_defconfig2
-rw-r--r--arch/powerpc/configs/chrp32_defconfig3
-rw-r--r--arch/powerpc/configs/corenet_base.config (renamed from arch/powerpc/configs/corenet_basic_defconfig)0
-rw-r--r--arch/powerpc/configs/debug.config1
-rw-r--r--arch/powerpc/configs/ep8248e_defconfig1
-rw-r--r--arch/powerpc/configs/ep88xc_defconfig4
-rw-r--r--arch/powerpc/configs/gamecube_defconfig3
-rw-r--r--arch/powerpc/configs/guest.config1
-rw-r--r--arch/powerpc/configs/mgcoge_defconfig1
-rw-r--r--arch/powerpc/configs/mpc512x_defconfig4
-rw-r--r--arch/powerpc/configs/mpc5200_defconfig1
-rw-r--r--arch/powerpc/configs/mpc85xx_base.config (renamed from arch/powerpc/configs/mpc85xx_basic_defconfig)0
-rw-r--r--arch/powerpc/configs/mpc86xx_base.config (renamed from arch/powerpc/configs/mpc86xx_basic_defconfig)0
-rw-r--r--arch/powerpc/configs/mpc885_ads_defconfig4
-rw-r--r--arch/powerpc/configs/pmac32_defconfig3
-rw-r--r--arch/powerpc/configs/powernv_defconfig6
-rw-r--r--arch/powerpc/configs/ppc40x_defconfig1
-rw-r--r--arch/powerpc/configs/ppc44x_defconfig3
-rw-r--r--arch/powerpc/configs/ppc64_defconfig2
-rw-r--r--arch/powerpc/configs/ppc6xx_defconfig5
-rw-r--r--arch/powerpc/configs/ps3_defconfig3
-rw-r--r--arch/powerpc/configs/pseries_defconfig2
-rw-r--r--arch/powerpc/configs/skiroot_defconfig73
-rw-r--r--arch/powerpc/configs/storcenter_defconfig4
-rw-r--r--arch/powerpc/configs/tqm8xx_defconfig4
-rw-r--r--arch/powerpc/configs/wii_defconfig3
-rw-r--r--arch/powerpc/crypto/aes-spe-glue.c470
-rw-r--r--arch/powerpc/crypto/crc-vpmsum_test.c1
-rw-r--r--arch/powerpc/crypto/crc32c-vpmsum_glue.c4
-rw-r--r--arch/powerpc/include/asm/Kbuild4
-rw-r--r--arch/powerpc/include/asm/archrandom.h29
-rw-r--r--arch/powerpc/include/asm/asm-prototypes.h20
-rw-r--r--arch/powerpc/include/asm/barrier.h2
-rw-r--r--arch/powerpc/include/asm/bitops.h51
-rw-r--r--arch/powerpc/include/asm/book3s/32/kup.h69
-rw-r--r--arch/powerpc/include/asm/book3s/32/pgalloc.h8
-rw-r--r--arch/powerpc/include/asm/book3s/32/pgtable.h23
-rw-r--r--arch/powerpc/include/asm/book3s/64/kup-radix.h40
-rw-r--r--arch/powerpc/include/asm/book3s/64/mmu-hash.h5
-rw-r--r--arch/powerpc/include/asm/book3s/64/mmu.h4
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgalloc.h17
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable-4k.h3
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable-64k.h3
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable.h11
-rw-r--r--arch/powerpc/include/asm/book3s/64/radix.h11
-rw-r--r--arch/powerpc/include/asm/book3s/64/tlbflush-radix.h10
-rw-r--r--arch/powerpc/include/asm/book3s/64/tlbflush.h19
-rw-r--r--arch/powerpc/include/asm/book3s/pgtable.h11
-rw-r--r--arch/powerpc/include/asm/bug.h49
-rw-r--r--arch/powerpc/include/asm/cache.h55
-rw-r--r--arch/powerpc/include/asm/cacheflush.h36
-rw-r--r--arch/powerpc/include/asm/compat.h17
-rw-r--r--arch/powerpc/include/asm/cpm.h172
-rw-r--r--arch/powerpc/include/asm/cputable.h32
-rw-r--r--arch/powerpc/include/asm/current.h3
-rw-r--r--arch/powerpc/include/asm/dma-direct.h13
-rw-r--r--arch/powerpc/include/asm/dma-mapping.h18
-rw-r--r--arch/powerpc/include/asm/eeh.h40
-rw-r--r--arch/powerpc/include/asm/elf.h3
-rw-r--r--arch/powerpc/include/asm/elfnote.h24
-rw-r--r--arch/powerpc/include/asm/error-injection.h13
-rw-r--r--arch/powerpc/include/asm/fadump-internal.h169
-rw-r--r--arch/powerpc/include/asm/fadump.h194
-rw-r--r--arch/powerpc/include/asm/firmware.h11
-rw-r--r--arch/powerpc/include/asm/fixmap.h26
-rw-r--r--arch/powerpc/include/asm/ftrace.h2
-rw-r--r--arch/powerpc/include/asm/futex.h13
-rw-r--r--arch/powerpc/include/asm/head-64.h41
-rw-r--r--arch/powerpc/include/asm/hugetlb.h3
-rw-r--r--arch/powerpc/include/asm/hvcall.h10
-rw-r--r--arch/powerpc/include/asm/hw_breakpoint.h13
-rw-r--r--arch/powerpc/include/asm/hw_irq.h57
-rw-r--r--arch/powerpc/include/asm/io-workarounds.h20
-rw-r--r--arch/powerpc/include/asm/io.h19
-rw-r--r--arch/powerpc/include/asm/iommu.h28
-rw-r--r--arch/powerpc/include/asm/kasan.h2
-rw-r--r--arch/powerpc/include/asm/kup.h49
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_64.h2
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_uvmem.h80
-rw-r--r--arch/powerpc/include/asm/kvm_host.h31
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h108
-rw-r--r--arch/powerpc/include/asm/local.h2
-rw-r--r--arch/powerpc/include/asm/machdep.h7
-rw-r--r--arch/powerpc/include/asm/mce.h10
-rw-r--r--arch/powerpc/include/asm/mem_encrypt.h26
-rw-r--r--arch/powerpc/include/asm/mmu.h2
-rw-r--r--arch/powerpc/include/asm/mmu_context.h5
-rw-r--r--arch/powerpc/include/asm/nohash/32/kup-8xx.h22
-rw-r--r--arch/powerpc/include/asm/nohash/32/pgtable.h23
-rw-r--r--arch/powerpc/include/asm/nohash/64/pgtable.h1
-rw-r--r--arch/powerpc/include/asm/nohash/mmu-book3e.h11
-rw-r--r--arch/powerpc/include/asm/nohash/pgalloc.h8
-rw-r--r--arch/powerpc/include/asm/nohash/pgtable.h13
-rw-r--r--arch/powerpc/include/asm/opal-api.h48
-rw-r--r--arch/powerpc/include/asm/opal.h14
-rw-r--r--arch/powerpc/include/asm/page.h32
-rw-r--r--arch/powerpc/include/asm/page_32.h4
-rw-r--r--arch/powerpc/include/asm/pci-bridge.h8
-rw-r--r--arch/powerpc/include/asm/pci.h3
-rw-r--r--arch/powerpc/include/asm/pgalloc.h2
-rw-r--r--arch/powerpc/include/asm/pgtable.h25
-rw-r--r--arch/powerpc/include/asm/plpar_wrappers.h6
-rw-r--r--arch/powerpc/include/asm/pnv-pci.h1
-rw-r--r--arch/powerpc/include/asm/ppc-pci.h7
-rw-r--r--arch/powerpc/include/asm/ppc4xx_ocm.h31
-rw-r--r--arch/powerpc/include/asm/ppc_asm.h80
-rw-r--r--arch/powerpc/include/asm/processor.h9
-rw-r--r--arch/powerpc/include/asm/ptrace.h6
-rw-r--r--arch/powerpc/include/asm/reg.h50
-rw-r--r--arch/powerpc/include/asm/reg_8xx.h18
-rw-r--r--arch/powerpc/include/asm/scom.h154
-rw-r--r--arch/powerpc/include/asm/sections.h25
-rw-r--r--arch/powerpc/include/asm/secure_boot.h29
-rw-r--r--arch/powerpc/include/asm/security_features.h11
-rw-r--r--arch/powerpc/include/asm/secvar.h35
-rw-r--r--arch/powerpc/include/asm/setjmp.h4
-rw-r--r--arch/powerpc/include/asm/spinlock.h62
-rw-r--r--arch/powerpc/include/asm/string.h2
-rw-r--r--arch/powerpc/include/asm/svm.h31
-rw-r--r--arch/powerpc/include/asm/thread_info.h18
-rw-r--r--arch/powerpc/include/asm/time.h6
-rw-r--r--arch/powerpc/include/asm/timex.h34
-rw-r--r--arch/powerpc/include/asm/tlb.h11
-rw-r--r--arch/powerpc/include/asm/uaccess.h111
-rw-r--r--arch/powerpc/include/asm/ultravisor-api.h39
-rw-r--r--arch/powerpc/include/asm/ultravisor.h85
-rw-r--r--arch/powerpc/include/asm/vdso_datapage.h22
-rw-r--r--arch/powerpc/include/asm/vmalloc.h4
-rw-r--r--arch/powerpc/include/asm/xive-regs.h1
-rw-r--r--arch/powerpc/include/asm/xive.h98
-rw-r--r--arch/powerpc/include/uapi/asm/kvm.h3
-rw-r--r--arch/powerpc/include/uapi/asm/msgbuf.h8
-rw-r--r--arch/powerpc/include/uapi/asm/sembuf.h6
-rw-r--r--arch/powerpc/include/uapi/asm/shmbuf.h6
-rw-r--r--arch/powerpc/include/uapi/asm/spu_info.h14
-rw-r--r--arch/powerpc/include/uapi/asm/stat.h2
-rw-r--r--arch/powerpc/kernel/.gitignore1
-rw-r--r--arch/powerpc/kernel/Makefile46
-rw-r--r--arch/powerpc/kernel/asm-offsets.c32
-rw-r--r--arch/powerpc/kernel/cpu_setup_fsl_booke.S2
-rw-r--r--arch/powerpc/kernel/cpu_setup_power.S6
-rw-r--r--arch/powerpc/kernel/cputable.c6
-rw-r--r--arch/powerpc/kernel/dawr.c6
-rw-r--r--arch/powerpc/kernel/dbell.c6
-rw-r--r--arch/powerpc/kernel/dma-iommu.c13
-rw-r--r--arch/powerpc/kernel/dt_cpu_ftrs.c48
-rw-r--r--arch/powerpc/kernel/early_32.c9
-rw-r--r--arch/powerpc/kernel/eeh.c313
-rw-r--r--arch/powerpc/kernel/eeh_cache.c47
-rw-r--r--arch/powerpc/kernel/eeh_dev.c2
-rw-r--r--arch/powerpc/kernel/eeh_driver.c304
-rw-r--r--arch/powerpc/kernel/eeh_event.c34
-rw-r--r--arch/powerpc/kernel/eeh_pe.c145
-rw-r--r--arch/powerpc/kernel/eeh_sysfs.c40
-rw-r--r--arch/powerpc/kernel/entry_32.S78
-rw-r--r--arch/powerpc/kernel/entry_64.S49
-rw-r--r--arch/powerpc/kernel/exceptions-64e.S34
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S1666
-rw-r--r--arch/powerpc/kernel/fadump.c1353
-rw-r--r--arch/powerpc/kernel/fpu.S3
-rw-r--r--arch/powerpc/kernel/fsl_booke_entry_mapping.S25
-rw-r--r--arch/powerpc/kernel/head_32.S117
-rw-r--r--arch/powerpc/kernel/head_32.h201
-rw-r--r--arch/powerpc/kernel/head_40x.S2
-rw-r--r--arch/powerpc/kernel/head_64.S8
-rw-r--r--arch/powerpc/kernel/head_8xx.S217
-rw-r--r--arch/powerpc/kernel/head_booke.h2
-rw-r--r--arch/powerpc/kernel/head_fsl_booke.S68
-rw-r--r--arch/powerpc/kernel/hw_breakpoint.c173
-rw-r--r--arch/powerpc/kernel/idle.c25
-rw-r--r--arch/powerpc/kernel/idle_book3s.S20
-rw-r--r--arch/powerpc/kernel/idle_power4.S83
-rw-r--r--arch/powerpc/kernel/ima_arch.c78
-rw-r--r--arch/powerpc/kernel/io-workarounds.c13
-rw-r--r--arch/powerpc/kernel/iommu.c97
-rw-r--r--arch/powerpc/kernel/irq.c26
-rw-r--r--arch/powerpc/kernel/kexec_elf_64.c660
-rw-r--r--arch/powerpc/kernel/kvm.c58
-rw-r--r--arch/powerpc/kernel/kvm_emul.S16
-rw-r--r--arch/powerpc/kernel/legacy_serial.c4
-rw-r--r--arch/powerpc/kernel/mce.c71
-rw-r--r--arch/powerpc/kernel/mce_power.c50
-rw-r--r--arch/powerpc/kernel/misc_32.S623
-rw-r--r--arch/powerpc/kernel/misc_64.S109
-rw-r--r--arch/powerpc/kernel/note.S40
-rw-r--r--arch/powerpc/kernel/paca.c52
-rw-r--r--arch/powerpc/kernel/pci-common.c50
-rw-r--r--arch/powerpc/kernel/pci-hotplug.c8
-rw-r--r--arch/powerpc/kernel/pci_32.c4
-rw-r--r--arch/powerpc/kernel/pci_64.c12
-rw-r--r--arch/powerpc/kernel/pci_dn.c68
-rw-r--r--arch/powerpc/kernel/pci_of_scan.c67
-rw-r--r--arch/powerpc/kernel/proc_powerpc.c10
-rw-r--r--arch/powerpc/kernel/process.c121
-rw-r--r--arch/powerpc/kernel/prom.c8
-rw-r--r--arch/powerpc/kernel/prom_init.c147
-rw-r--r--arch/powerpc/kernel/prom_init_check.sh5
-rw-r--r--arch/powerpc/kernel/ptrace.c85
-rw-r--r--arch/powerpc/kernel/rtas-proc.c70
-rw-r--r--arch/powerpc/kernel/rtas.c15
-rw-r--r--arch/powerpc/kernel/rtas_flash.c34
-rw-r--r--arch/powerpc/kernel/rtasd.c14
-rw-r--r--arch/powerpc/kernel/secure_boot.c50
-rw-r--r--arch/powerpc/kernel/security.c123
-rw-r--r--arch/powerpc/kernel/secvar-ops.c17
-rw-r--r--arch/powerpc/kernel/secvar-sysfs.c248
-rw-r--r--arch/powerpc/kernel/setup-common.c37
-rw-r--r--arch/powerpc/kernel/setup.h2
-rw-r--r--arch/powerpc/kernel/setup_32.c22
-rw-r--r--arch/powerpc/kernel/setup_64.c31
-rw-r--r--arch/powerpc/kernel/stacktrace.c2
-rw-r--r--arch/powerpc/kernel/syscalls.c4
-rw-r--r--arch/powerpc/kernel/syscalls/syscall.tbl2
-rw-r--r--arch/powerpc/kernel/sysfs.c20
-rw-r--r--arch/powerpc/kernel/time.c14
-rw-r--r--arch/powerpc/kernel/trace/ftrace.c5
-rw-r--r--arch/powerpc/kernel/trace/ftrace_32.S1
-rw-r--r--arch/powerpc/kernel/trace/ftrace_64_mprofile.S1
-rw-r--r--arch/powerpc/kernel/trace/ftrace_64_pg.S1
-rw-r--r--arch/powerpc/kernel/traps.c25
-rw-r--r--arch/powerpc/kernel/ucall.S14
-rw-r--r--arch/powerpc/kernel/udbg.c14
-rw-r--r--arch/powerpc/kernel/vdso.c27
-rw-r--r--arch/powerpc/kernel/vdso32/Makefile4
-rw-r--r--arch/powerpc/kernel/vdso32/cacheflush.S32
-rw-r--r--arch/powerpc/kernel/vdso32/datapage.S33
-rw-r--r--arch/powerpc/kernel/vdso32/getcpu.S23
-rw-r--r--arch/powerpc/kernel/vdso32/gettimeofday.S130
-rw-r--r--arch/powerpc/kernel/vdso32/vdso32.lds.S6
-rw-r--r--arch/powerpc/kernel/vdso64/cacheflush.S4
-rw-r--r--arch/powerpc/kernel/vdso64/gettimeofday.S15
-rw-r--r--arch/powerpc/kernel/vector.S3
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S39
-rw-r--r--arch/powerpc/kexec/Makefile25
-rw-r--r--arch/powerpc/kexec/core.c (renamed from arch/powerpc/kernel/machine_kexec.c)1
-rw-r--r--arch/powerpc/kexec/core_32.c (renamed from arch/powerpc/kernel/machine_kexec_32.c)0
-rw-r--r--arch/powerpc/kexec/core_64.c (renamed from arch/powerpc/kernel/machine_kexec_64.c)9
-rw-r--r--arch/powerpc/kexec/crash.c (renamed from arch/powerpc/kernel/crash.c)0
-rw-r--r--arch/powerpc/kexec/elf_64.c125
-rw-r--r--arch/powerpc/kexec/file_load.c (renamed from arch/powerpc/kernel/machine_kexec_file_64.c)0
-rw-r--r--arch/powerpc/kexec/ima.c (renamed from arch/powerpc/kernel/ima_kexec.c)0
-rw-r--r--arch/powerpc/kexec/relocate_32.S500
-rw-r--r--arch/powerpc/kvm/Makefile3
-rw-r--r--arch/powerpc/kvm/book3s.c52
-rw-r--r--arch/powerpc/kvm/book3s.h3
-rw-r--r--arch/powerpc/kvm/book3s_32_mmu.c6
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu.c15
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c28
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_radix.c31
-rw-r--r--arch/powerpc/kvm/book3s_64_vio.c45
-rw-r--r--arch/powerpc/kvm/book3s_64_vio_hv.c42
-rw-r--r--arch/powerpc/kvm/book3s_hv.c253
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c82
-rw-r--r--arch/powerpc/kvm/book3s_hv_nested.c12
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c44
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_xics.c2
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S120
-rw-r--r--arch/powerpc/kvm/book3s_hv_uvmem.c813
-rw-r--r--arch/powerpc/kvm/book3s_pr.c74
-rw-r--r--arch/powerpc/kvm/book3s_xive.c204
-rw-r--r--arch/powerpc/kvm/book3s_xive.h19
-rw-r--r--arch/powerpc/kvm/book3s_xive_native.c107
-rw-r--r--arch/powerpc/kvm/booke.c67
-rw-r--r--arch/powerpc/kvm/e500.c33
-rw-r--r--arch/powerpc/kvm/e500_mmu_host.c6
-rw-r--r--arch/powerpc/kvm/e500mc.c30
-rw-r--r--arch/powerpc/kvm/emulate.c1
-rw-r--r--arch/powerpc/kvm/emulate_loadstore.c11
-rw-r--r--arch/powerpc/kvm/powerpc.c105
-rw-r--r--arch/powerpc/lib/Makefile4
-rw-r--r--arch/powerpc/lib/locks.c6
-rw-r--r--arch/powerpc/lib/memcpy_mcsafe_64.S242
-rw-r--r--arch/powerpc/lib/pmem.c4
-rw-r--r--arch/powerpc/lib/string_32.S4
-rw-r--r--arch/powerpc/lib/string_64.S6
-rw-r--r--arch/powerpc/mm/Makefile2
-rw-r--r--arch/powerpc/mm/book3s32/hash_low.S46
-rw-r--r--arch/powerpc/mm/book3s32/mmu.c80
-rw-r--r--arch/powerpc/mm/book3s64/hash_native.c69
-rw-r--r--arch/powerpc/mm/book3s64/hash_pgtable.c2
-rw-r--r--arch/powerpc/mm/book3s64/hash_utils.c120
-rw-r--r--arch/powerpc/mm/book3s64/iommu_api.c17
-rw-r--r--arch/powerpc/mm/book3s64/mmu_context.c15
-rw-r--r--arch/powerpc/mm/book3s64/pgtable.c128
-rw-r--r--arch/powerpc/mm/book3s64/pkeys.c10
-rw-r--r--arch/powerpc/mm/book3s64/radix_pgtable.c94
-rw-r--r--arch/powerpc/mm/book3s64/radix_tlb.c462
-rw-r--r--arch/powerpc/mm/book3s64/subpage_prot.c12
-rw-r--r--arch/powerpc/mm/dma-noncoherent.c320
-rw-r--r--arch/powerpc/mm/fault.c17
-rw-r--r--arch/powerpc/mm/hugetlbpage.c2
-rw-r--r--arch/powerpc/mm/init-common.c7
-rw-r--r--arch/powerpc/mm/init_32.c5
-rw-r--r--arch/powerpc/mm/init_64.c76
-rw-r--r--arch/powerpc/mm/ioremap.c99
-rw-r--r--arch/powerpc/mm/ioremap_32.c93
-rw-r--r--arch/powerpc/mm/ioremap_64.c115
-rw-r--r--arch/powerpc/mm/kasan/kasan_init_32.c124
-rw-r--r--arch/powerpc/mm/mem.c284
-rw-r--r--arch/powerpc/mm/mmu_decl.h24
-rw-r--r--arch/powerpc/mm/nohash/8xx.c65
-rw-r--r--arch/powerpc/mm/nohash/Makefile1
-rw-r--r--arch/powerpc/mm/nohash/book3e_hugetlbpage.c16
-rw-r--r--arch/powerpc/mm/nohash/fsl_booke.c8
-rw-r--r--arch/powerpc/mm/nohash/kaslr_booke.c401
-rw-r--r--arch/powerpc/mm/nohash/tlb.c3
-rw-r--r--arch/powerpc/mm/numa.c12
-rw-r--r--arch/powerpc/mm/pgtable-frag.c6
-rw-r--r--arch/powerpc/mm/pgtable_32.c161
-rw-r--r--arch/powerpc/mm/pgtable_64.c203
-rw-r--r--arch/powerpc/mm/ptdump/bats.c2
-rw-r--r--arch/powerpc/mm/ptdump/hashpagetable.c24
-rw-r--r--arch/powerpc/mm/ptdump/ptdump.c43
-rw-r--r--arch/powerpc/mm/slice.c4
-rw-r--r--arch/powerpc/net/bpf_jit32.h4
-rw-r--r--arch/powerpc/net/bpf_jit_comp.c16
-rw-r--r--arch/powerpc/net/bpf_jit_comp64.c13
-rw-r--r--arch/powerpc/oprofile/backtrace.c16
-rw-r--r--arch/powerpc/perf/8xx-pmu.c12
-rw-r--r--arch/powerpc/perf/callchain.c37
-rw-r--r--arch/powerpc/perf/core-book3s.c26
-rw-r--r--arch/powerpc/perf/imc-pmu.c29
-rw-r--r--arch/powerpc/platforms/44x/Kconfig8
-rw-r--r--arch/powerpc/platforms/4xx/Makefile1
-rw-r--r--arch/powerpc/platforms/4xx/ocm.c390
-rw-r--r--arch/powerpc/platforms/512x/mpc512x_lpbfifo.c6
-rw-r--r--arch/powerpc/platforms/52xx/mpc52xx_gpt.c1
-rw-r--r--arch/powerpc/platforms/83xx/km83xx.c5
-rw-r--r--arch/powerpc/platforms/83xx/misc.c34
-rw-r--r--arch/powerpc/platforms/83xx/mpc832x_mds.c3
-rw-r--r--arch/powerpc/platforms/83xx/mpc832x_rdb.c3
-rw-r--r--arch/powerpc/platforms/83xx/mpc836x_mds.c10
-rw-r--r--arch/powerpc/platforms/83xx/mpc836x_rdk.c3
-rw-r--r--arch/powerpc/platforms/83xx/mpc83xx.h7
-rw-r--r--arch/powerpc/platforms/85xx/common.c23
-rw-r--r--arch/powerpc/platforms/85xx/corenet_generic.c12
-rw-r--r--arch/powerpc/platforms/85xx/mpc85xx.h2
-rw-r--r--arch/powerpc/platforms/85xx/mpc85xx_mds.c34
-rw-r--r--arch/powerpc/platforms/85xx/mpc85xx_rdb.c18
-rw-r--r--arch/powerpc/platforms/85xx/smp.c9
-rw-r--r--arch/powerpc/platforms/85xx/twr_p102x.c21
-rw-r--r--arch/powerpc/platforms/86xx/mpc8610_hpcd.c4
-rw-r--r--arch/powerpc/platforms/8xx/cpm1.c18
-rw-r--r--arch/powerpc/platforms/8xx/pic.c2
-rw-r--r--arch/powerpc/platforms/Kconfig17
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype35
-rw-r--r--arch/powerpc/platforms/cell/iommu.c2
-rw-r--r--arch/powerpc/platforms/cell/setup.c3
-rw-r--r--arch/powerpc/platforms/cell/spufs/inode.c205
-rw-r--r--arch/powerpc/platforms/maple/setup.c5
-rw-r--r--arch/powerpc/platforms/pasemi/iommu.c2
-rw-r--r--arch/powerpc/platforms/pasemi/setup.c4
-rw-r--r--arch/powerpc/platforms/powernv/Kconfig5
-rw-r--r--arch/powerpc/platforms/powernv/Makefile7
-rw-r--r--arch/powerpc/platforms/powernv/eeh-powernv.c97
-rw-r--r--arch/powerpc/platforms/powernv/idle.c6
-rw-r--r--arch/powerpc/platforms/powernv/npu-dma.c101
-rw-r--r--arch/powerpc/platforms/powernv/opal-call.c8
-rw-r--r--arch/powerpc/platforms/powernv/opal-core.c636
-rw-r--r--arch/powerpc/platforms/powernv/opal-fadump.c716
-rw-r--r--arch/powerpc/platforms/powernv/opal-fadump.h146
-rw-r--r--arch/powerpc/platforms/powernv/opal-imc.c60
-rw-r--r--arch/powerpc/platforms/powernv/opal-msglog.c57
-rw-r--r--arch/powerpc/platforms/powernv/opal-powercap.c2
-rw-r--r--arch/powerpc/platforms/powernv/opal-prd.c8
-rw-r--r--arch/powerpc/platforms/powernv/opal-psr.c4
-rw-r--r--arch/powerpc/platforms/powernv/opal-secvar.c140
-rw-r--r--arch/powerpc/platforms/powernv/opal-sensor-groups.c2
-rw-r--r--arch/powerpc/platforms/powernv/opal-xscom.c213
-rw-r--r--arch/powerpc/platforms/powernv/opal.c261
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda-tce.c48
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c348
-rw-r--r--arch/powerpc/platforms/powernv/pci.c87
-rw-r--r--arch/powerpc/platforms/powernv/pci.h5
-rw-r--r--arch/powerpc/platforms/powernv/powernv.h5
-rw-r--r--arch/powerpc/platforms/powernv/setup.c13
-rw-r--r--arch/powerpc/platforms/powernv/smp.c55
-rw-r--r--arch/powerpc/platforms/powernv/ultravisor.c69
-rw-r--r--arch/powerpc/platforms/ps3/setup.c4
-rw-r--r--arch/powerpc/platforms/ps3/spu.c10
-rw-r--r--arch/powerpc/platforms/ps3/system-bus.c11
-rw-r--r--arch/powerpc/platforms/pseries/Kconfig16
-rw-r--r--arch/powerpc/platforms/pseries/Makefile2
-rw-r--r--arch/powerpc/platforms/pseries/cmm.c441
-rw-r--r--arch/powerpc/platforms/pseries/dtl.c38
-rw-r--r--arch/powerpc/platforms/pseries/eeh_pseries.c68
-rw-r--r--arch/powerpc/platforms/pseries/firmware.c10
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-cpu.c244
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-memory.c36
-rw-r--r--arch/powerpc/platforms/pseries/hvCall_inst.c12
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c66
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c243
-rw-r--r--arch/powerpc/platforms/pseries/lparcfg.c18
-rw-r--r--arch/powerpc/platforms/pseries/mobility.c9
-rw-r--r--arch/powerpc/platforms/pseries/of_helpers.c8
-rw-r--r--arch/powerpc/platforms/pseries/papr_scm.c106
-rw-r--r--arch/powerpc/platforms/pseries/pci.c7
-rw-r--r--arch/powerpc/platforms/pseries/pci_dlpar.c18
-rw-r--r--arch/powerpc/platforms/pseries/pseries.h1
-rw-r--r--arch/powerpc/platforms/pseries/pseries_energy.c23
-rw-r--r--arch/powerpc/platforms/pseries/ras.c462
-rw-r--r--arch/powerpc/platforms/pseries/reconfig.c8
-rw-r--r--arch/powerpc/platforms/pseries/rtas-fadump.c550
-rw-r--r--arch/powerpc/platforms/pseries/rtas-fadump.h114
-rw-r--r--arch/powerpc/platforms/pseries/scanlog.c15
-rw-r--r--arch/powerpc/platforms/pseries/setup.c40
-rw-r--r--arch/powerpc/platforms/pseries/smp.c3
-rw-r--r--arch/powerpc/platforms/pseries/svm.c85
-rw-r--r--arch/powerpc/platforms/pseries/vio.c6
-rw-r--r--arch/powerpc/sysdev/Kconfig7
-rw-r--r--arch/powerpc/sysdev/Makefile3
-rw-r--r--arch/powerpc/sysdev/dart_iommu.c2
-rw-r--r--arch/powerpc/sysdev/fsl_pci.c16
-rw-r--r--arch/powerpc/sysdev/mpic.c4
-rw-r--r--arch/powerpc/sysdev/scom.c223
-rw-r--r--arch/powerpc/sysdev/simple_gpio.c143
-rw-r--r--arch/powerpc/sysdev/simple_gpio.h13
-rw-r--r--arch/powerpc/sysdev/xics/icp-native.c6
-rw-r--r--arch/powerpc/sysdev/xics/icp-opal.c6
-rw-r--r--arch/powerpc/sysdev/xive/common.c164
-rw-r--r--arch/powerpc/sysdev/xive/native.c33
-rw-r--r--arch/powerpc/sysdev/xive/spapr.c69
-rw-r--r--arch/powerpc/sysdev/xive/xive-internal.h2
-rwxr-xr-xarch/powerpc/tools/relocs_check.sh22
-rwxr-xr-xarch/powerpc/tools/unrel_branch_check.sh4
-rw-r--r--arch/powerpc/xmon/Makefile4
-rw-r--r--arch/powerpc/xmon/dis-asm.h4
-rw-r--r--arch/powerpc/xmon/xmon.c192
491 files changed, 16142 insertions, 10670 deletions
diff --git a/arch/powerpc/Kbuild b/arch/powerpc/Kbuild
index 51e6908323ad..5e2f9eaa3ee7 100644
--- a/arch/powerpc/Kbuild
+++ b/arch/powerpc/Kbuild
@@ -14,4 +14,5 @@ obj-$(CONFIG_XMON) += xmon/
obj-$(CONFIG_KVM) += kvm/
obj-$(CONFIG_PERF_EVENTS) += perf/
+obj-$(CONFIG_KEXEC_CORE) += kexec/
obj-$(CONFIG_KEXEC_FILE) += purgatory/
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index d8dcd8820369..497b7d0b2d7e 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -1,10 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
source "arch/powerpc/platforms/Kconfig.cputype"
-config PPC32
- bool
- default y if !PPC64
-
config 32BIT
bool
default y if PPC32
@@ -106,7 +102,7 @@ config LOCKDEP_SUPPORT
config GENERIC_LOCKBREAK
bool
default y
- depends on SMP && PREEMPT
+ depends on SMP && PREEMPTION
config GENERIC_HWEIGHT
bool
@@ -128,14 +124,15 @@ config PPC
select ARCH_HAS_HUGEPD if HUGETLB_PAGE
select ARCH_HAS_MMIOWB if PPC64
select ARCH_HAS_PHYS_TO_DMA
- select ARCH_HAS_PMEM_API if PPC64
+ select ARCH_HAS_PMEM_API
select ARCH_HAS_PTE_DEVMAP if PPC_BOOK3S_64
select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_MEMBARRIER_CALLBACKS
- select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE && PPC64
- select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && !RELOCATABLE && !HIBERNATION)
+ select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE && PPC_BOOK3S_64
+ select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && !HIBERNATION)
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
- select ARCH_HAS_UACCESS_FLUSHCACHE if PPC64
+ select ARCH_HAS_UACCESS_FLUSHCACHE
+ select ARCH_HAS_UACCESS_MCSAFE if PPC64
select ARCH_HAS_UBSAN_SANITIZE_ALL
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select ARCH_KEEP_MEMBLOCK
@@ -148,7 +145,7 @@ config PPC
select ARCH_WANT_IPC_PARSE_VERSION
select ARCH_WEAK_RELEASE_ACQUIRE
select BINFMT_ELF
- select BUILDTIME_EXTABLE_SORT
+ select BUILDTIME_TABLE_SORT
select CLONE_BACKWARDS
select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN
select DYNAMIC_FTRACE if FUNCTION_TRACER
@@ -160,6 +157,7 @@ config PPC
select GENERIC_CMOS_UPDATE
select GENERIC_CPU_AUTOPROBE
select GENERIC_CPU_VULNERABILITIES if PPC_BARRIER_NOSPEC
+ select GENERIC_EARLY_IOREMAP
select GENERIC_IRQ_SHOW
select GENERIC_IRQ_SHOW_LEVEL
select GENERIC_PCI_IOMAP if PCI
@@ -171,17 +169,20 @@ config PPC
select HAVE_ARCH_HUGE_VMAP if PPC_BOOK3S_64 && PPC_RADIX_MMU
select HAVE_ARCH_JUMP_LABEL
select HAVE_ARCH_KASAN if PPC32
+ select HAVE_ARCH_KASAN_VMALLOC if PPC32
select HAVE_ARCH_KGDB
select HAVE_ARCH_MMAP_RND_BITS
select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT
select HAVE_ARCH_NVRAM_OPS
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
+ select HAVE_ASM_MODVERSIONS
select HAVE_C_RECORDMCOUNT
select HAVE_CBPF_JIT if !PPC64
select HAVE_STACKPROTECTOR if PPC64 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r13)
select HAVE_STACKPROTECTOR if PPC32 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r2)
select HAVE_CONTEXT_TRACKING if PPC64
+ select HAVE_COPY_THREAD_TLS
select HAVE_DEBUG_KMEMLEAK
select HAVE_DEBUG_STACKOVERFLOW
select HAVE_DYNAMIC_FTRACE
@@ -218,9 +219,8 @@ config PPC
select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
- select HAVE_RCU_TABLE_FREE if SMP
- select HAVE_RCU_TABLE_NO_INVALIDATE if HAVE_RCU_TABLE_FREE
- select HAVE_MMU_GATHER_PAGE_SIZE
+ select MMU_GATHER_RCU_TABLE_FREE
+ select MMU_GATHER_PAGE_SIZE
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RELIABLE_STACKTRACE if PPC_BOOK3S_64 && CPU_LITTLE_ENDIAN
select HAVE_SYSCALL_TRACEPOINTS
@@ -234,6 +234,7 @@ config PPC
select NEED_DMA_MAP_STATE if PPC64 || NOT_COHERENT_CACHE
select NEED_SG_DMA_LENGTH
select OF
+ select OF_DMA_DEFAULT_COHERENT if !NOT_COHERENT_CACHE
select OF_EARLY_FLATTREE
select OLD_SIGACTION if PPC32
select OLD_SIGSUSPEND
@@ -448,6 +449,19 @@ config PPC_TRANSACTIONAL_MEM
help
Support user-mode Transactional Memory on POWERPC.
+config PPC_UV
+ bool "Ultravisor support"
+ depends on KVM_BOOK3S_HV_POSSIBLE
+ depends on DEVICE_PRIVATE
+ default n
+ help
+ This option paravirtualizes the kernel to run in POWER platforms that
+ supports the Protected Execution Facility (PEF). On such platforms,
+ the ultravisor firmware runs at a privilege level above the
+ hypervisor.
+
+ If unsure, say "N".
+
config LD_HEAD_STUB_CATCH
bool "Reserve 256 bytes to cope with linker stubs in HEAD text" if EXPERT
depends on PPC64
@@ -465,7 +479,7 @@ config MPROFILE_KERNEL
config HOTPLUG_CPU
bool "Support for enabling/disabling CPUs"
depends on SMP && (PPC_PSERIES || \
- PPC_PMAC || PPC_POWERNV || FSL_SOC_BOOKE)
+ PPC_PMAC || PPC_POWERNV || FSL_SOC_BOOKE)
help
Say Y here to be able to disable and re-enable individual
CPUs at runtime on SMP machines.
@@ -511,6 +525,7 @@ config KEXEC_FILE
select KEXEC_CORE
select HAVE_IMA_KEXEC
select BUILD_BIN2C
+ select KEXEC_ELF
depends on PPC64
depends on CRYPTO=y
depends on CRYPTO_SHA256=y
@@ -547,6 +562,17 @@ config RELOCATABLE
setting can still be useful to bootwrappers that need to know the
load address of the kernel (eg. u-boot/mkimage).
+config RANDOMIZE_BASE
+ bool "Randomize the address of the kernel image"
+ depends on (FSL_BOOKE && FLATMEM && PPC32)
+ depends on RELOCATABLE
+ help
+ Randomizes the virtual address at which the kernel image is
+ loaded, as a security feature that deters exploit attempts
+ relying on knowledge of the location of kernel internals.
+
+ If unsure, say Y.
+
config RELOCATABLE_TEST
bool "Test relocatable kernel"
depends on (PPC64 && RELOCATABLE)
@@ -566,7 +592,7 @@ config CRASH_DUMP
config FA_DUMP
bool "Firmware-assisted dump"
- depends on PPC64 && PPC_RTAS
+ depends on PPC64 && (PPC_RTAS || PPC_POWERNV)
select CRASH_CORE
select CRASH_DUMP
help
@@ -577,7 +603,26 @@ config FA_DUMP
is meant to be a kdump replacement offering robustness and
speed not possible without system firmware assistance.
- If unsure, say "N"
+ If unsure, say "y". Only special kernels like petitboot may
+ need to say "N" here.
+
+config PRESERVE_FA_DUMP
+ bool "Preserve Firmware-assisted dump"
+ depends on PPC64 && PPC_POWERNV && !FA_DUMP
+ help
+ On a kernel with FA_DUMP disabled, this option helps to preserve
+ crash data from a previously crash'ed kernel. Useful when the next
+ memory preserving kernel boot would process this crash data.
+ Petitboot kernel is the typical usecase for this option.
+
+config OPAL_CORE
+ bool "Export OPAL memory as /sys/firmware/opal/core"
+ depends on PPC64 && PPC_POWERNV
+ help
+ This option uses the MPIPL support in firmware to provide an
+ ELF core of OPAL memory after a crash. The ELF core is exported
+ as /sys/firmware/opal/core file which is helpful in debugging
+ OPAL crashes using GDB.
config IRQ_ALL_CPUS
bool "Distribute interrupts on all CPUs by default"
@@ -851,15 +896,33 @@ config CMDLINE
some command-line options at build time by entering them here. In
most cases you will need to specify the root device here.
+choice
+ prompt "Kernel command line type" if CMDLINE != ""
+ default CMDLINE_FROM_BOOTLOADER
+
+config CMDLINE_FROM_BOOTLOADER
+ bool "Use bootloader kernel arguments if available"
+ help
+ Uses the command-line options passed by the boot loader. If
+ the boot loader doesn't provide any, the default kernel command
+ string provided in CMDLINE will be used.
+
+config CMDLINE_EXTEND
+ bool "Extend bootloader kernel arguments"
+ help
+ The command-line arguments provided by the boot loader will be
+ appended to the default kernel command string.
+
config CMDLINE_FORCE
bool "Always use the default kernel command string"
- depends on CMDLINE_BOOL
help
Always use the default kernel command string, even if the boot
loader passes other arguments to the kernel.
This is useful if you cannot or don't want to change the
command-line options your boot loader passes to the kernel.
+endchoice
+
config EXTRA_TARGETS
string "Additional default image types"
help
@@ -911,6 +974,28 @@ config PPC_MEM_KEYS
If unsure, say y.
+config PPC_SECURE_BOOT
+ prompt "Enable secure boot support"
+ bool
+ depends on PPC_POWERNV
+ depends on IMA_ARCH_POLICY
+ help
+ Systems with firmware secure boot enabled need to define security
+ policies to extend secure boot to the OS. This config allows a user
+ to enable OS secure boot on systems that have firmware support for
+ it. If in doubt say N.
+
+config PPC_SECVAR_SYSFS
+ bool "Enable sysfs interface for POWER secure variables"
+ default y
+ depends on PPC_SECURE_BOOT
+ depends on SYSFS
+ help
+ POWER secure variables are managed and controlled by firmware.
+ These variables are exposed to userspace via sysfs to enable
+ read/write operations on these variables. Say Y if you have
+ secure boot enabled and want to expose variables to userspace.
+
endmenu
config ISA_DMA_API
@@ -1138,18 +1223,6 @@ config TASK_SIZE
default "0x80000000" if PPC_8xx
default "0xc0000000"
-config CONSISTENT_SIZE_BOOL
- bool "Set custom consistent memory pool size"
- depends on ADVANCED_OPTIONS && NOT_COHERENT_CACHE
- help
- This option allows you to set the size of the
- consistent memory pool. This pool of virtual memory
- is used to make consistent memory allocations.
-
-config CONSISTENT_SIZE
- hex "Size of consistent memory pool" if CONSISTENT_SIZE_BOOL
- default "0x00200000" if NOT_COHERENT_CACHE
-
config PIN_TLB
bool "Pinned Kernel TLBs (860 ONLY)"
depends on ADVANCED_OPTIONS && PPC_8xx && \
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index c59920920ddc..0b063830eea8 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -122,8 +122,8 @@ config XMON_DEFAULT_RO_MODE
depends on XMON
default y
help
- Operate xmon in read-only mode. The cmdline options 'xmon=rw' and
- 'xmon=ro' override this default.
+ Operate xmon in read-only mode. The cmdline options 'xmon=rw' and
+ 'xmon=ro' override this default.
config DEBUGGER
bool
@@ -222,7 +222,7 @@ config PPC_EARLY_DEBUG_44x
help
Select this to enable early debugging for IBM 44x chips via the
inbuilt serial port. If you enable this, ensure you set
- PPC_EARLY_DEBUG_44x_PHYSLOW below to suit your target board.
+ PPC_EARLY_DEBUG_44x_PHYSLOW below to suit your target board.
config PPC_EARLY_DEBUG_40x
bool "Early serial debugging for IBM/AMCC 40x CPUs"
@@ -325,7 +325,7 @@ config PPC_EARLY_DEBUG_44x_PHYSLOW
default "0x40000200"
help
You probably want 0x40000200 for ebony boards and
- 0x40000300 for taishan
+ 0x40000300 for taishan
config PPC_EARLY_DEBUG_44x_PHYSHIGH
hex "EPRN of early debug UART physical address"
@@ -359,9 +359,9 @@ config FAIL_IOMMU
If you are unsure, say N.
config PPC_PTDUMP
- bool "Export kernel pagetable layout to userspace via debugfs"
- depends on DEBUG_KERNEL && DEBUG_FS
- help
+ bool "Export kernel pagetable layout to userspace via debugfs"
+ depends on DEBUG_KERNEL && DEBUG_FS
+ help
This option exports the state of the kernel pagetables to a
debugfs file. This is only useful for kernel developers who are
working in architecture specific areas of the kernel - probably
@@ -371,7 +371,7 @@ config PPC_PTDUMP
config PPC_DEBUG_WX
bool "Warn on W+X mappings at boot"
- depends on PPC_PTDUMP
+ depends on PPC_PTDUMP && STRICT_KERNEL_RWX
help
Generate a warning if any W+X mappings are found at boot.
@@ -390,8 +390,8 @@ config PPC_DEBUG_WX
config PPC_FAST_ENDIAN_SWITCH
bool "Deprecated fast endian-switch syscall"
- depends on DEBUG_KERNEL && PPC_BOOK3S_64
- help
+ depends on DEBUG_KERNEL && PPC_BOOK3S_64
+ help
If you're unsure what this is, say N.
config KASAN_SHADOW_OFFSET
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index c345b79414a9..f35730548e42 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -39,13 +39,11 @@ endif
uname := $(shell uname -m)
KBUILD_DEFCONFIG := $(if $(filter ppc%,$(uname)),$(uname),ppc64)_defconfig
-ifdef CONFIG_PPC64
new_nm := $(shell if $(NM) --help 2>&1 | grep -- '--synthetic' > /dev/null; then echo y; else echo n; fi)
ifeq ($(new_nm),y)
NM := $(NM) --synthetic
endif
-endif
# BITS is used as extension for files which are available in a 32 bit
# and a 64 bit version to simplify shared Makefiles.
@@ -67,7 +65,7 @@ UTS_MACHINE := $(subst $(space),,$(machine-y))
ifdef CONFIG_PPC32
KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o
else
-KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/powerpc/kernel/module.lds
+KBUILD_LDS_MODULE += $(srctree)/arch/powerpc/kernel/module.lds
ifeq ($(call ld-ifversion, -ge, 225000000, y),y)
# Have the linker provide sfpr if possible.
# There is a corresponding test in arch/powerpc/lib/Makefile
@@ -93,11 +91,13 @@ MULTIPLEWORD := -mmultiple
endif
ifdef CONFIG_PPC64
+ifndef CONFIG_CC_IS_CLANG
cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mabi=elfv1)
cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mcall-aixdesc)
aflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mabi=elfv1)
aflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mabi=elfv2
endif
+endif
ifndef CONFIG_CC_IS_CLANG
cflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mno-strict-align
@@ -112,7 +112,6 @@ ifeq ($(HAS_BIARCH),y)
KBUILD_CFLAGS += -m$(BITS)
KBUILD_AFLAGS += -m$(BITS) -Wl,-a$(BITS)
KBUILD_LDFLAGS += -m elf$(BITS)$(LDEMULATION)
-KBUILD_ARFLAGS += --target=elf$(BITS)-$(GNUTARGET)
endif
cflags-$(CONFIG_STACKPROTECTOR) += -mstack-protector-guard=tls
@@ -144,6 +143,7 @@ endif
endif
CFLAGS-$(CONFIG_PPC64) := $(call cc-option,-mtraceback=no)
+ifndef CONFIG_CC_IS_CLANG
ifdef CONFIG_CPU_LITTLE_ENDIAN
CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv2,$(call cc-option,-mcall-aixdesc))
AFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv2)
@@ -152,6 +152,7 @@ CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv1)
CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcall-aixdesc)
AFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv1)
endif
+endif
CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcmodel=medium,$(call cc-option,-mminimal-toc))
CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mno-pointers-to-nested-functions)
@@ -333,32 +334,32 @@ powernv_be_defconfig:
PHONY += mpc85xx_defconfig
mpc85xx_defconfig:
- $(call merge_into_defconfig,mpc85xx_basic_defconfig,\
+ $(call merge_into_defconfig,mpc85xx_base.config,\
85xx-32bit 85xx-hw fsl-emb-nonhw)
PHONY += mpc85xx_smp_defconfig
mpc85xx_smp_defconfig:
- $(call merge_into_defconfig,mpc85xx_basic_defconfig,\
+ $(call merge_into_defconfig,mpc85xx_base.config,\
85xx-32bit 85xx-smp 85xx-hw fsl-emb-nonhw)
PHONY += corenet32_smp_defconfig
corenet32_smp_defconfig:
- $(call merge_into_defconfig,corenet_basic_defconfig,\
+ $(call merge_into_defconfig,corenet_base.config,\
85xx-32bit 85xx-smp 85xx-hw fsl-emb-nonhw dpaa)
PHONY += corenet64_smp_defconfig
corenet64_smp_defconfig:
- $(call merge_into_defconfig,corenet_basic_defconfig,\
+ $(call merge_into_defconfig,corenet_base.config,\
85xx-64bit 85xx-smp altivec 85xx-hw fsl-emb-nonhw dpaa)
PHONY += mpc86xx_defconfig
mpc86xx_defconfig:
- $(call merge_into_defconfig,mpc86xx_basic_defconfig,\
+ $(call merge_into_defconfig,mpc86xx_base.config,\
86xx-hw fsl-emb-nonhw)
PHONY += mpc86xx_smp_defconfig
mpc86xx_smp_defconfig:
- $(call merge_into_defconfig,mpc86xx_basic_defconfig,\
+ $(call merge_into_defconfig,mpc86xx_base.config,\
86xx-smp 86xx-hw fsl-emb-nonhw)
PHONY += ppc32_allmodconfig
diff --git a/arch/powerpc/Makefile.postlink b/arch/powerpc/Makefile.postlink
index 83f8e5ba2722..2268396ff4bb 100644
--- a/arch/powerpc/Makefile.postlink
+++ b/arch/powerpc/Makefile.postlink
@@ -17,11 +17,11 @@ quiet_cmd_head_check = CHKHEAD $@
quiet_cmd_relocs_check = CHKREL $@
ifdef CONFIG_PPC_BOOK3S_64
cmd_relocs_check = \
- $(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/relocs_check.sh "$(OBJDUMP)" "$@" ; \
- $(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/unrel_branch_check.sh "$(OBJDUMP)" "$@"
+ $(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/relocs_check.sh "$(OBJDUMP)" "$(NM)" "$@" ; \
+ $(BASH) $(srctree)/arch/powerpc/tools/unrel_branch_check.sh "$(OBJDUMP)" "$@"
else
cmd_relocs_check = \
- $(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/relocs_check.sh "$(OBJDUMP)" "$@"
+ $(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/relocs_check.sh "$(OBJDUMP)" "$(NM)" "$@"
endif
# `@true` prevents complaint when there is nothing to be done
diff --git a/arch/powerpc/boot/4xx.c b/arch/powerpc/boot/4xx.c
index 1699e9531552..00c4d843a023 100644
--- a/arch/powerpc/boot/4xx.c
+++ b/arch/powerpc/boot/4xx.c
@@ -228,7 +228,7 @@ void ibm4xx_denali_fixup_memsize(void)
dpath = 8; /* 64 bits */
/* get address pins (rows) */
- val = SDRAM0_READ(DDR0_42);
+ val = SDRAM0_READ(DDR0_42);
row = DDR_GET_VAL(val, DDR_APIN, DDR_APIN_SHIFT);
if (row > max_row)
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index 6841bd52738b..0556bf4fc9e9 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -50,7 +50,7 @@ endif
BOOTAFLAGS := -D__ASSEMBLY__ $(BOOTCFLAGS) -nostdinc
-BOOTARFLAGS := -cr$(KBUILD_ARFLAGS)
+BOOTARFLAGS := -crD
ifdef CONFIG_CC_IS_CLANG
BOOTCFLAGS += $(CLANG_FLAGS)
@@ -224,7 +224,7 @@ $(patsubst %.S,%.o, $(filter %.S, $(src-boot))): %.o: %.S FORCE
$(obj)/wrapper.a: $(obj-wlib) FORCE
$(call if_changed,bootar)
-hostprogs-y := addnote hack-coff mktree
+hostprogs := addnote hack-coff mktree
targets += $(patsubst $(obj)/%,%,$(obj-boot) wrapper.a)
extra-y := $(obj)/wrapper.a $(obj-plat) $(obj)/empty.o \
@@ -464,7 +464,7 @@ WRAPPER_BINDIR := /usr/sbin
INSTALL := install
extra-installed := $(patsubst $(obj)/%, $(DESTDIR)$(WRAPPER_OBJDIR)/%, $(extra-y))
-hostprogs-installed := $(patsubst %, $(DESTDIR)$(WRAPPER_BINDIR)/%, $(hostprogs-y))
+hostprogs-installed := $(patsubst %, $(DESTDIR)$(WRAPPER_BINDIR)/%, $(hostprogs))
wrapper-installed := $(DESTDIR)$(WRAPPER_BINDIR)/wrapper
dts-installed := $(patsubst $(dtstree)/%, $(DESTDIR)$(WRAPPER_DTSDIR)/%, $(wildcard $(dtstree)/*.dts))
diff --git a/arch/powerpc/boot/dts/fsl/kmcent2.dts b/arch/powerpc/boot/dts/fsl/kmcent2.dts
index 48b7f9797124..8e7f0828af29 100644
--- a/arch/powerpc/boot/dts/fsl/kmcent2.dts
+++ b/arch/powerpc/boot/dts/fsl/kmcent2.dts
@@ -210,13 +210,19 @@
fman@400000 {
ethernet@e0000 {
- fixed-link = <0 1 1000 0 0>;
- phy-connection-type = "sgmii";
+ phy-mode = "sgmii";
+ fixed-link {
+ speed = <1000>;
+ full-duplex;
+ };
};
ethernet@e2000 {
- fixed-link = <1 1 1000 0 0>;
- phy-connection-type = "sgmii";
+ phy-mode = "sgmii";
+ fixed-link {
+ speed = <1000>;
+ full-duplex;
+ };
};
ethernet@e4000 {
@@ -229,7 +235,7 @@
ethernet@e8000 {
phy-handle = <&front_phy>;
- phy-connection-type = "rgmii";
+ phy-mode = "rgmii-id";
};
mdio0: mdio@fc000 {
@@ -258,14 +264,50 @@
pci1: pcie@ffe250000 {
status = "disabled";
+ reg = <0xf 0xfe250000 0 0x10000>;
+ ranges = <0x02000000 0 0xe0000000 0xc 0x10000000 0 0x10000000
+ 0x01000000 0 0 0xf 0xf8010000 0 0x00010000>;
+ pcie@0 {
+ ranges = <0x02000000 0 0xe0000000
+ 0x02000000 0 0xe0000000
+ 0 0x10000000
+
+ 0x01000000 0 0x00000000
+ 0x01000000 0 0x00000000
+ 0 0x00010000>;
+ };
};
pci2: pcie@ffe260000 {
status = "disabled";
+ reg = <0xf 0xfe260000 0 0x10000>;
+ ranges = <0x02000000 0 0xe0000000 0xc 0x20000000 0 0x10000000
+ 0x01000000 0 0x00000000 0xf 0xf8020000 0 0x00010000>;
+ pcie@0 {
+ ranges = <0x02000000 0 0xe0000000
+ 0x02000000 0 0xe0000000
+ 0 0x10000000
+
+ 0x01000000 0 0x00000000
+ 0x01000000 0 0x00000000
+ 0 0x00010000>;
+ };
};
pci3: pcie@ffe270000 {
status = "disabled";
+ reg = <0xf 0xfe270000 0 0x10000>;
+ ranges = <0x02000000 0 0xe0000000 0xc 0x30000000 0 0x10000000
+ 0x01000000 0 0x00000000 0xf 0xf8030000 0 0x00010000>;
+ pcie@0 {
+ ranges = <0x02000000 0 0xe0000000
+ 0x02000000 0 0xe0000000
+ 0 0x10000000
+
+ 0x01000000 0 0x00000000
+ 0x01000000 0 0x00000000
+ 0 0x00010000>;
+ };
};
qe: qe@ffe140000 {
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0-best-effort.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0-best-effort.dtsi
index e1a961f05dcd..baa0c503e741 100644
--- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0-best-effort.dtsi
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0-best-effort.dtsi
@@ -63,6 +63,7 @@ fman@400000 {
#size-cells = <0>;
compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
reg = <0xe1000 0x1000>;
+ fsl,erratum-a011043; /* must ignore read errors */
pcsphy0: ethernet-phy@0 {
reg = <0x0>;
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0.dtsi
index c288f3c6c637..93095600e808 100644
--- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0.dtsi
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0.dtsi
@@ -60,6 +60,7 @@ fman@400000 {
#size-cells = <0>;
compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
reg = <0xf1000 0x1000>;
+ fsl,erratum-a011043; /* must ignore read errors */
pcsphy6: ethernet-phy@0 {
reg = <0x0>;
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1-best-effort.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1-best-effort.dtsi
index 94f3e7175012..ff4bd38f0645 100644
--- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1-best-effort.dtsi
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1-best-effort.dtsi
@@ -63,6 +63,7 @@ fman@400000 {
#size-cells = <0>;
compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
reg = <0xe3000 0x1000>;
+ fsl,erratum-a011043; /* must ignore read errors */
pcsphy1: ethernet-phy@0 {
reg = <0x0>;
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1.dtsi
index 94a76982d214..1fa38ed6f59e 100644
--- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1.dtsi
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1.dtsi
@@ -60,6 +60,7 @@ fman@400000 {
#size-cells = <0>;
compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
reg = <0xf3000 0x1000>;
+ fsl,erratum-a011043; /* must ignore read errors */
pcsphy7: ethernet-phy@0 {
reg = <0x0>;
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-0.dtsi
index b5ff5f71c6b8..a8cc9780c0c4 100644
--- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-0.dtsi
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-0.dtsi
@@ -59,6 +59,7 @@ fman@400000 {
#size-cells = <0>;
compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
reg = <0xe1000 0x1000>;
+ fsl,erratum-a011043; /* must ignore read errors */
pcsphy0: ethernet-phy@0 {
reg = <0x0>;
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-1.dtsi
index ee44182c6348..8b8bd70c9382 100644
--- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-1.dtsi
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-1.dtsi
@@ -59,6 +59,7 @@ fman@400000 {
#size-cells = <0>;
compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
reg = <0xe3000 0x1000>;
+ fsl,erratum-a011043; /* must ignore read errors */
pcsphy1: ethernet-phy@0 {
reg = <0x0>;
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-2.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-2.dtsi
index f05f0d775039..619c880b54d8 100644
--- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-2.dtsi
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-2.dtsi
@@ -59,6 +59,7 @@ fman@400000 {
#size-cells = <0>;
compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
reg = <0xe5000 0x1000>;
+ fsl,erratum-a011043; /* must ignore read errors */
pcsphy2: ethernet-phy@0 {
reg = <0x0>;
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-3.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-3.dtsi
index a9114ec51075..d7ebb73a400d 100644
--- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-3.dtsi
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-3.dtsi
@@ -59,6 +59,7 @@ fman@400000 {
#size-cells = <0>;
compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
reg = <0xe7000 0x1000>;
+ fsl,erratum-a011043; /* must ignore read errors */
pcsphy3: ethernet-phy@0 {
reg = <0x0>;
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-4.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-4.dtsi
index 44dd00ac7367..b151d696a069 100644
--- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-4.dtsi
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-4.dtsi
@@ -59,6 +59,7 @@ fman@400000 {
#size-cells = <0>;
compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
reg = <0xe9000 0x1000>;
+ fsl,erratum-a011043; /* must ignore read errors */
pcsphy4: ethernet-phy@0 {
reg = <0x0>;
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-5.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-5.dtsi
index 5b1b84b58602..adc0ae0013a3 100644
--- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-5.dtsi
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-5.dtsi
@@ -59,6 +59,7 @@ fman@400000 {
#size-cells = <0>;
compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
reg = <0xeb000 0x1000>;
+ fsl,erratum-a011043; /* must ignore read errors */
pcsphy5: ethernet-phy@0 {
reg = <0x0>;
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-0.dtsi
index 0e1daaef9e74..435047e0e250 100644
--- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-0.dtsi
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-0.dtsi
@@ -60,6 +60,7 @@ fman@500000 {
#size-cells = <0>;
compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
reg = <0xf1000 0x1000>;
+ fsl,erratum-a011043; /* must ignore read errors */
pcsphy14: ethernet-phy@0 {
reg = <0x0>;
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-1.dtsi
index 68c5ef779266..c098657cca0a 100644
--- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-1.dtsi
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-1.dtsi
@@ -60,6 +60,7 @@ fman@500000 {
#size-cells = <0>;
compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
reg = <0xf3000 0x1000>;
+ fsl,erratum-a011043; /* must ignore read errors */
pcsphy15: ethernet-phy@0 {
reg = <0x0>;
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-0.dtsi
index 605363cc1117..9d06824815f3 100644
--- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-0.dtsi
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-0.dtsi
@@ -59,6 +59,7 @@ fman@500000 {
#size-cells = <0>;
compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
reg = <0xe1000 0x1000>;
+ fsl,erratum-a011043; /* must ignore read errors */
pcsphy8: ethernet-phy@0 {
reg = <0x0>;
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-1.dtsi
index 1955dfa13634..70e947730c4b 100644
--- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-1.dtsi
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-1.dtsi
@@ -59,6 +59,7 @@ fman@500000 {
#size-cells = <0>;
compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
reg = <0xe3000 0x1000>;
+ fsl,erratum-a011043; /* must ignore read errors */
pcsphy9: ethernet-phy@0 {
reg = <0x0>;
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-2.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-2.dtsi
index 2c1476454ee0..ad96e6529595 100644
--- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-2.dtsi
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-2.dtsi
@@ -59,6 +59,7 @@ fman@500000 {
#size-cells = <0>;
compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
reg = <0xe5000 0x1000>;
+ fsl,erratum-a011043; /* must ignore read errors */
pcsphy10: ethernet-phy@0 {
reg = <0x0>;
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-3.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-3.dtsi
index b8b541ff5fb0..034bc4b71f7a 100644
--- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-3.dtsi
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-3.dtsi
@@ -59,6 +59,7 @@ fman@500000 {
#size-cells = <0>;
compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
reg = <0xe7000 0x1000>;
+ fsl,erratum-a011043; /* must ignore read errors */
pcsphy11: ethernet-phy@0 {
reg = <0x0>;
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-4.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-4.dtsi
index 4b2cfddd1b15..93ca23d82b39 100644
--- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-4.dtsi
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-4.dtsi
@@ -59,6 +59,7 @@ fman@500000 {
#size-cells = <0>;
compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
reg = <0xe9000 0x1000>;
+ fsl,erratum-a011043; /* must ignore read errors */
pcsphy12: ethernet-phy@0 {
reg = <0x0>;
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-5.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-5.dtsi
index 0a52ddf7cc17..23b3117a2fd2 100644
--- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-5.dtsi
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-5.dtsi
@@ -59,6 +59,7 @@ fman@500000 {
#size-cells = <0>;
compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
reg = <0xeb000 0x1000>;
+ fsl,erratum-a011043; /* must ignore read errors */
pcsphy13: ethernet-phy@0 {
reg = <0x0>;
diff --git a/arch/powerpc/boot/dts/mgcoge.dts b/arch/powerpc/boot/dts/mgcoge.dts
index a2dd5f1da621..7de068991bde 100644
--- a/arch/powerpc/boot/dts/mgcoge.dts
+++ b/arch/powerpc/boot/dts/mgcoge.dts
@@ -224,7 +224,7 @@
reg = <0x11a80 0x40 0x89fc 0x2>;
interrupts = <2 8>;
interrupt-parent = <&PIC>;
- gpios = < &cpm2_pio_d 19 0>;
+ cs-gpios = < &cpm2_pio_d 19 0>;
#address-cells = <1>;
#size-cells = <0>;
ds3106@1 {
diff --git a/arch/powerpc/boot/dts/mpc832x_rdb.dts b/arch/powerpc/boot/dts/mpc832x_rdb.dts
index b6257186528e..ecebc27a2898 100644
--- a/arch/powerpc/boot/dts/mpc832x_rdb.dts
+++ b/arch/powerpc/boot/dts/mpc832x_rdb.dts
@@ -249,7 +249,7 @@
reg = <0x4c0 0x40>;
interrupts = <2>;
interrupt-parent = <&qeic>;
- gpios = <&qe_pio_d 13 0>;
+ cs-gpios = <&qe_pio_d 13 0>;
mode = "cpu-qe";
mmc-slot@0 {
diff --git a/arch/powerpc/boot/dts/mpc8610_hpcd.dts b/arch/powerpc/boot/dts/mpc8610_hpcd.dts
index 1a8321ac105a..33bbe58c1ad0 100644
--- a/arch/powerpc/boot/dts/mpc8610_hpcd.dts
+++ b/arch/powerpc/boot/dts/mpc8610_hpcd.dts
@@ -200,7 +200,7 @@
interrupts = <59 2>;
interrupt-parent = <&mpic>;
mode = "cpu";
- gpios = <&sdcsr_pio 7 0>;
+ cs-gpios = <&sdcsr_pio 7 0>;
sleep = <&pmc 0x00000800 0>;
mmc-slot@0 {
diff --git a/arch/powerpc/boot/libfdt_env.h b/arch/powerpc/boot/libfdt_env.h
index 2abc8e83b95e..9757d4f6331e 100644
--- a/arch/powerpc/boot/libfdt_env.h
+++ b/arch/powerpc/boot/libfdt_env.h
@@ -6,6 +6,8 @@
#include <string.h>
#define INT_MAX ((int)(~0U>>1))
+#define UINT32_MAX ((u32)~0U)
+#define INT32_MAX ((s32)(UINT32_MAX >> 1))
#include "of.h"
diff --git a/arch/powerpc/boot/main.c b/arch/powerpc/boot/main.c
index 102cc546444d..a9d209135975 100644
--- a/arch/powerpc/boot/main.c
+++ b/arch/powerpc/boot/main.c
@@ -146,6 +146,46 @@ static struct addr_range prep_initrd(struct addr_range vmlinux, void *chosen,
return (struct addr_range){(void *)initrd_addr, initrd_size};
}
+#ifdef __powerpc64__
+static void prep_esm_blob(struct addr_range vmlinux, void *chosen)
+{
+ unsigned long esm_blob_addr, esm_blob_size;
+
+ /* Do we have an ESM (Enter Secure Mode) blob? */
+ if (_esm_blob_end <= _esm_blob_start)
+ return;
+
+ printf("Attached ESM blob at 0x%p-0x%p\n\r",
+ _esm_blob_start, _esm_blob_end);
+ esm_blob_addr = (unsigned long)_esm_blob_start;
+ esm_blob_size = _esm_blob_end - _esm_blob_start;
+
+ /*
+ * If the ESM blob is too low it will be clobbered when the
+ * kernel relocates to its final location. In this case,
+ * allocate a safer place and move it.
+ */
+ if (esm_blob_addr < vmlinux.size) {
+ void *old_addr = (void *)esm_blob_addr;
+
+ printf("Allocating 0x%lx bytes for esm_blob ...\n\r",
+ esm_blob_size);
+ esm_blob_addr = (unsigned long)malloc(esm_blob_size);
+ if (!esm_blob_addr)
+ fatal("Can't allocate memory for ESM blob !\n\r");
+ printf("Relocating ESM blob 0x%lx <- 0x%p (0x%lx bytes)\n\r",
+ esm_blob_addr, old_addr, esm_blob_size);
+ memmove((void *)esm_blob_addr, old_addr, esm_blob_size);
+ }
+
+ /* Tell the kernel ESM blob address via device tree. */
+ setprop_val(chosen, "linux,esm-blob-start", (u32)(esm_blob_addr));
+ setprop_val(chosen, "linux,esm-blob-end", (u32)(esm_blob_addr + esm_blob_size));
+}
+#else
+static inline void prep_esm_blob(struct addr_range vmlinux, void *chosen) { }
+#endif
+
/* A buffer that may be edited by tools operating on a zImage binary so as to
* edit the command line passed to vmlinux (by setting /chosen/bootargs).
* The buffer is put in it's own section so that tools may locate it easier.
@@ -214,6 +254,7 @@ void start(void)
vmlinux = prep_kernel();
initrd = prep_initrd(vmlinux, chosen,
loader_info.initrd_addr, loader_info.initrd_size);
+ prep_esm_blob(vmlinux, chosen);
prep_cmdline(chosen);
printf("Finalizing device tree...");
diff --git a/arch/powerpc/boot/ops.h b/arch/powerpc/boot/ops.h
index cd043726ed88..e0606766480f 100644
--- a/arch/powerpc/boot/ops.h
+++ b/arch/powerpc/boot/ops.h
@@ -251,6 +251,8 @@ extern char _initrd_start[];
extern char _initrd_end[];
extern char _dtb_start[];
extern char _dtb_end[];
+extern char _esm_blob_start[];
+extern char _esm_blob_end[];
static inline __attribute__((const))
int __ilog2_u32(u32 n)
diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper
index 5148ac271f28..ed6266367bc0 100755
--- a/arch/powerpc/boot/wrapper
+++ b/arch/powerpc/boot/wrapper
@@ -13,6 +13,7 @@
# -i initrd specify initrd file
# -d devtree specify device-tree blob
# -s tree.dts specify device-tree source file (needs dtc installed)
+# -e esm_blob specify ESM blob for secure images
# -c cache $kernel.strip.gz (use if present & newer, else make)
# -C prefix specify command prefix for cross-building tools
# (strip, objcopy, ld)
@@ -37,6 +38,7 @@ platform=of
initrd=
dtb=
dts=
+esm_blob=
cacheit=
binary=
compression=.gz
@@ -60,9 +62,9 @@ tmpdir=.
usage() {
echo 'Usage: wrapper [-o output] [-p platform] [-i initrd]' >&2
- echo ' [-d devtree] [-s tree.dts] [-c] [-C cross-prefix]' >&2
- echo ' [-D datadir] [-W workingdir] [-Z (gz|xz|none)]' >&2
- echo ' [--no-compression] [vmlinux]' >&2
+ echo ' [-d devtree] [-s tree.dts] [-e esm_blob]' >&2
+ echo ' [-c] [-C cross-prefix] [-D datadir] [-W workingdir]' >&2
+ echo ' [-Z (gz|xz|none)] [--no-compression] [vmlinux]' >&2
exit 1
}
@@ -105,6 +107,11 @@ while [ "$#" -gt 0 ]; do
[ "$#" -gt 0 ] || usage
dtb="$1"
;;
+ -e)
+ shift
+ [ "$#" -gt 0 ] || usage
+ esm_blob="$1"
+ ;;
-s)
shift
[ "$#" -gt 0 ] || usage
@@ -218,9 +225,16 @@ objflags=-S
tmp=$tmpdir/zImage.$$.o
ksection=.kernel:vmlinux.strip
isection=.kernel:initrd
+esection=.kernel:esm_blob
link_address='0x400000'
make_space=y
+
+if [ -n "$esm_blob" -a "$platform" != "pseries" ]; then
+ echo "ESM blob not support on non-pseries platforms" >&2
+ exit 1
+fi
+
case "$platform" in
of)
platformo="$object/of.o $object/epapr.o"
@@ -477,6 +491,10 @@ if [ -n "$dtb" ]; then
fi
fi
+if [ -n "$esm_blob" ]; then
+ addsec $tmp "$esm_blob" $esection
+fi
+
if [ "$platform" != "miboot" ]; then
if [ -n "$link_address" ] ; then
text_start="-Ttext $link_address"
diff --git a/arch/powerpc/boot/zImage.lds.S b/arch/powerpc/boot/zImage.lds.S
index 4ac1e36edfe7..a21f3a76e06f 100644
--- a/arch/powerpc/boot/zImage.lds.S
+++ b/arch/powerpc/boot/zImage.lds.S
@@ -68,6 +68,14 @@ SECTIONS
_initrd_end = .;
}
+ . = ALIGN(4096);
+ .kernel:esm_blob :
+ {
+ _esm_blob_start = .;
+ *(.kernel:esm_blob)
+ _esm_blob_end = .;
+ }
+
#ifdef CONFIG_PPC64_BOOT_WRAPPER
. = ALIGN(256);
.got :
diff --git a/arch/powerpc/configs/40x/acadia_defconfig b/arch/powerpc/configs/40x/acadia_defconfig
index 5a75e4f14273..db93c117be36 100644
--- a/arch/powerpc/configs/40x/acadia_defconfig
+++ b/arch/powerpc/configs/40x/acadia_defconfig
@@ -18,9 +18,6 @@ CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
CONFIG_CONNECTOR=y
CONFIG_MTD=y
diff --git a/arch/powerpc/configs/40x/ep405_defconfig b/arch/powerpc/configs/40x/ep405_defconfig
index e2691c5db766..a3854cf65f8d 100644
--- a/arch/powerpc/configs/40x/ep405_defconfig
+++ b/arch/powerpc/configs/40x/ep405_defconfig
@@ -17,9 +17,6 @@ CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
CONFIG_CONNECTOR=y
CONFIG_MTD=y
diff --git a/arch/powerpc/configs/40x/kilauea_defconfig b/arch/powerpc/configs/40x/kilauea_defconfig
index 949989ef2322..edc22464dfb5 100644
--- a/arch/powerpc/configs/40x/kilauea_defconfig
+++ b/arch/powerpc/configs/40x/kilauea_defconfig
@@ -20,9 +20,6 @@ CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
CONFIG_CONNECTOR=y
CONFIG_MTD=y
diff --git a/arch/powerpc/configs/40x/klondike_defconfig b/arch/powerpc/configs/40x/klondike_defconfig
index 4347a87088dc..579fa846839c 100644
--- a/arch/powerpc/configs/40x/klondike_defconfig
+++ b/arch/powerpc/configs/40x/klondike_defconfig
@@ -4,7 +4,6 @@ CONFIG_LOG_BUF_SHIFT=14
CONFIG_SYSFS_DEPRECATED=y
CONFIG_SYSFS_DEPRECATED_V2=y
CONFIG_BLK_DEV_INITRD=y
-CONFIG_SYSCTL_SYSCALL=y
CONFIG_EMBEDDED=y
CONFIG_SLAB=y
CONFIG_MODULES=y
diff --git a/arch/powerpc/configs/40x/makalu_defconfig b/arch/powerpc/configs/40x/makalu_defconfig
index 90b759bbf426..188789b9aa4c 100644
--- a/arch/powerpc/configs/40x/makalu_defconfig
+++ b/arch/powerpc/configs/40x/makalu_defconfig
@@ -17,9 +17,6 @@ CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
CONFIG_CONNECTOR=y
CONFIG_MTD=y
diff --git a/arch/powerpc/configs/40x/obs600_defconfig b/arch/powerpc/configs/40x/obs600_defconfig
index 881c300c011d..5bf6af7ef093 100644
--- a/arch/powerpc/configs/40x/obs600_defconfig
+++ b/arch/powerpc/configs/40x/obs600_defconfig
@@ -20,9 +20,6 @@ CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
CONFIG_CONNECTOR=y
CONFIG_MTD=y
diff --git a/arch/powerpc/configs/40x/walnut_defconfig b/arch/powerpc/configs/40x/walnut_defconfig
index 0ed46704b9fa..9eaaf1a1d2c6 100644
--- a/arch/powerpc/configs/40x/walnut_defconfig
+++ b/arch/powerpc/configs/40x/walnut_defconfig
@@ -15,9 +15,6 @@ CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
CONFIG_CONNECTOR=y
CONFIG_MTD=y
diff --git a/arch/powerpc/configs/44x/akebono_defconfig b/arch/powerpc/configs/44x/akebono_defconfig
index 2fa553ebfdc9..7705a5c3f4ea 100644
--- a/arch/powerpc/configs/44x/akebono_defconfig
+++ b/arch/powerpc/configs/44x/akebono_defconfig
@@ -29,9 +29,6 @@ CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
@@ -62,7 +59,6 @@ CONFIG_BLK_DEV_SD=y
# CONFIG_NET_VENDOR_DLINK is not set
# CONFIG_NET_VENDOR_EMULEX is not set
# CONFIG_NET_VENDOR_EXAR is not set
-# CONFIG_NET_VENDOR_HP is not set
CONFIG_IBM_EMAC=y
# CONFIG_NET_VENDOR_MARVELL is not set
# CONFIG_NET_VENDOR_MELLANOX is not set
diff --git a/arch/powerpc/configs/44x/arches_defconfig b/arch/powerpc/configs/44x/arches_defconfig
index 5a1b9ee18075..82c6f49b8dcb 100644
--- a/arch/powerpc/configs/44x/arches_defconfig
+++ b/arch/powerpc/configs/44x/arches_defconfig
@@ -20,9 +20,6 @@ CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
CONFIG_CONNECTOR=y
CONFIG_MTD=y
diff --git a/arch/powerpc/configs/44x/bamboo_defconfig b/arch/powerpc/configs/44x/bamboo_defconfig
index 22e1ef5272ab..679213214a75 100644
--- a/arch/powerpc/configs/44x/bamboo_defconfig
+++ b/arch/powerpc/configs/44x/bamboo_defconfig
@@ -18,9 +18,6 @@ CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
CONFIG_CONNECTOR=y
CONFIG_BLK_DEV_RAM=y
diff --git a/arch/powerpc/configs/44x/canyonlands_defconfig b/arch/powerpc/configs/44x/canyonlands_defconfig
index 86f34ea4173a..ccc14eb7a2f1 100644
--- a/arch/powerpc/configs/44x/canyonlands_defconfig
+++ b/arch/powerpc/configs/44x/canyonlands_defconfig
@@ -20,9 +20,6 @@ CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
CONFIG_CONNECTOR=y
CONFIG_MTD=y
diff --git a/arch/powerpc/configs/44x/currituck_defconfig b/arch/powerpc/configs/44x/currituck_defconfig
index ce3ec5a2cd15..be76e066df01 100644
--- a/arch/powerpc/configs/44x/currituck_defconfig
+++ b/arch/powerpc/configs/44x/currituck_defconfig
@@ -27,9 +27,6 @@ CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
diff --git a/arch/powerpc/configs/44x/ebony_defconfig b/arch/powerpc/configs/44x/ebony_defconfig
index f67447c92e6f..93d2a4e64af9 100644
--- a/arch/powerpc/configs/44x/ebony_defconfig
+++ b/arch/powerpc/configs/44x/ebony_defconfig
@@ -16,9 +16,6 @@ CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
CONFIG_CONNECTOR=y
CONFIG_MTD=y
diff --git a/arch/powerpc/configs/44x/eiger_defconfig b/arch/powerpc/configs/44x/eiger_defconfig
index 5dbd83a1c11b..1abaa63e067f 100644
--- a/arch/powerpc/configs/44x/eiger_defconfig
+++ b/arch/powerpc/configs/44x/eiger_defconfig
@@ -21,9 +21,6 @@ CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
CONFIG_CONNECTOR=y
CONFIG_MTD=y
diff --git a/arch/powerpc/configs/44x/fsp2_defconfig b/arch/powerpc/configs/44x/fsp2_defconfig
index e49114f0e526..e67fc041ca3e 100644
--- a/arch/powerpc/configs/44x/fsp2_defconfig
+++ b/arch/powerpc/configs/44x/fsp2_defconfig
@@ -39,9 +39,6 @@ CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
CONFIG_VLAN_8021Q=m
CONFIG_DEVTMPFS=y
diff --git a/arch/powerpc/configs/44x/icon_defconfig b/arch/powerpc/configs/44x/icon_defconfig
index fa5378af44f9..7d7ff84c8200 100644
--- a/arch/powerpc/configs/44x/icon_defconfig
+++ b/arch/powerpc/configs/44x/icon_defconfig
@@ -20,9 +20,6 @@ CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
CONFIG_CONNECTOR=y
CONFIG_MTD=y
diff --git a/arch/powerpc/configs/44x/iss476-smp_defconfig b/arch/powerpc/configs/44x/iss476-smp_defconfig
index aae879c21239..fb5c73a29bf4 100644
--- a/arch/powerpc/configs/44x/iss476-smp_defconfig
+++ b/arch/powerpc/configs/44x/iss476-smp_defconfig
@@ -29,9 +29,6 @@ CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
CONFIG_CONNECTOR=y
CONFIG_MTD=y
diff --git a/arch/powerpc/configs/44x/katmai_defconfig b/arch/powerpc/configs/44x/katmai_defconfig
index 56eddca998c6..c6dc1445fc04 100644
--- a/arch/powerpc/configs/44x/katmai_defconfig
+++ b/arch/powerpc/configs/44x/katmai_defconfig
@@ -18,9 +18,6 @@ CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
CONFIG_CONNECTOR=y
CONFIG_MTD=y
diff --git a/arch/powerpc/configs/44x/rainier_defconfig b/arch/powerpc/configs/44x/rainier_defconfig
index 369bfd2e451d..c83ad03182df 100644
--- a/arch/powerpc/configs/44x/rainier_defconfig
+++ b/arch/powerpc/configs/44x/rainier_defconfig
@@ -19,9 +19,6 @@ CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
CONFIG_CONNECTOR=y
CONFIG_MTD=y
diff --git a/arch/powerpc/configs/44x/redwood_defconfig b/arch/powerpc/configs/44x/redwood_defconfig
index 8be95f6fe3a7..640fe1d5af28 100644
--- a/arch/powerpc/configs/44x/redwood_defconfig
+++ b/arch/powerpc/configs/44x/redwood_defconfig
@@ -21,9 +21,6 @@ CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
CONFIG_CONNECTOR=y
CONFIG_MTD=y
diff --git a/arch/powerpc/configs/44x/sam440ep_defconfig b/arch/powerpc/configs/44x/sam440ep_defconfig
index 974a4f038cda..22dc0dadf576 100644
--- a/arch/powerpc/configs/44x/sam440ep_defconfig
+++ b/arch/powerpc/configs/44x/sam440ep_defconfig
@@ -10,8 +10,6 @@ CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
CONFIG_PARTITION_ADVANCED=y
CONFIG_AMIGA_PARTITION=y
-# CONFIG_IOSCHED_DEADLINE is not set
-# CONFIG_IOSCHED_CFQ is not set
# CONFIG_EBONY is not set
CONFIG_SAM440EP=y
CONFIG_CMDLINE_BOOL=y
@@ -23,9 +21,6 @@ CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
CONFIG_CONNECTOR=y
CONFIG_BLK_DEV_LOOP=y
diff --git a/arch/powerpc/configs/44x/sequoia_defconfig b/arch/powerpc/configs/44x/sequoia_defconfig
index 10e517b69fa4..2c0973db8837 100644
--- a/arch/powerpc/configs/44x/sequoia_defconfig
+++ b/arch/powerpc/configs/44x/sequoia_defconfig
@@ -20,9 +20,6 @@ CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
CONFIG_CONNECTOR=y
CONFIG_MTD=y
diff --git a/arch/powerpc/configs/44x/taishan_defconfig b/arch/powerpc/configs/44x/taishan_defconfig
index cd08f3ddd609..a2d355ca62b2 100644
--- a/arch/powerpc/configs/44x/taishan_defconfig
+++ b/arch/powerpc/configs/44x/taishan_defconfig
@@ -18,9 +18,6 @@ CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
CONFIG_CONNECTOR=y
CONFIG_MTD=y
diff --git a/arch/powerpc/configs/52xx/pcm030_defconfig b/arch/powerpc/configs/52xx/pcm030_defconfig
index 303600ff1fdb..789622ffd844 100644
--- a/arch/powerpc/configs/52xx/pcm030_defconfig
+++ b/arch/powerpc/configs/52xx/pcm030_defconfig
@@ -14,8 +14,6 @@ CONFIG_SLAB=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_IOSCHED_DEADLINE is not set
-# CONFIG_IOSCHED_CFQ is not set
# CONFIG_PPC_CHRP is not set
CONFIG_PPC_MPC52xx=y
CONFIG_PPC_MPC5200_SIMPLE=y
@@ -31,9 +29,6 @@ CONFIG_IP_MULTICAST=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_INET_DIAG is not set
# CONFIG_IPV6 is not set
# CONFIG_FW_LOADER is not set
diff --git a/arch/powerpc/configs/83xx/kmeter1_defconfig b/arch/powerpc/configs/83xx/kmeter1_defconfig
index d21b5cb365f2..24bf1bde1bb4 100644
--- a/arch/powerpc/configs/83xx/kmeter1_defconfig
+++ b/arch/powerpc/configs/83xx/kmeter1_defconfig
@@ -11,8 +11,6 @@ CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
CONFIG_PARTITION_ADVANCED=y
# CONFIG_MSDOS_PARTITION is not set
-# CONFIG_IOSCHED_DEADLINE is not set
-# CONFIG_IOSCHED_CFQ is not set
# CONFIG_PPC_CHRP is not set
# CONFIG_PPC_PMAC is not set
CONFIG_PPC_83xx=y
@@ -25,9 +23,6 @@ CONFIG_UNIX=y
CONFIG_INET=y
CONFIG_IP_MULTICAST=y
CONFIG_IP_PNP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
CONFIG_TIPC=y
CONFIG_BRIDGE=m
diff --git a/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig b/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig
index dad53ef86b49..cbcae2a927e9 100644
--- a/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig
+++ b/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig
@@ -22,9 +22,6 @@ CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_SYN_COOKIES=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
# CONFIG_FW_LOADER is not set
CONFIG_BLK_DEV_LOOP=y
diff --git a/arch/powerpc/configs/85xx/ge_imp3a_defconfig b/arch/powerpc/configs/85xx/ge_imp3a_defconfig
index 920f37316fdb..f29c166998af 100644
--- a/arch/powerpc/configs/85xx/ge_imp3a_defconfig
+++ b/arch/powerpc/configs/85xx/ge_imp3a_defconfig
@@ -60,7 +60,6 @@ CONFIG_SYN_COOKIES=y
CONFIG_INET_AH=m
CONFIG_INET_ESP=m
CONFIG_INET_IPCOMP=m
-# CONFIG_INET_XFRM_MODE_BEET is not set
CONFIG_INET6_AH=m
CONFIG_INET6_IPCOMP=m
CONFIG_IPV6_TUNNEL=m
diff --git a/arch/powerpc/configs/adder875_defconfig b/arch/powerpc/configs/adder875_defconfig
index f7a803ab2285..f55e23cb176c 100644
--- a/arch/powerpc/configs/adder875_defconfig
+++ b/arch/powerpc/configs/adder875_defconfig
@@ -9,7 +9,6 @@ CONFIG_EXPERT=y
# CONFIG_VM_EVENT_COUNTERS is not set
# CONFIG_BLK_DEV_BSG is not set
CONFIG_PARTITION_ADVANCED=y
-# CONFIG_IOSCHED_CFQ is not set
CONFIG_PPC_ADDER875=y
CONFIG_8xx_COPYBACK=y
CONFIG_GEN_RTC=y
@@ -22,9 +21,6 @@ CONFIG_INET=y
CONFIG_IP_MULTICAST=y
CONFIG_IP_PNP=y
CONFIG_SYN_COOKIES=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
# CONFIG_FW_LOADER is not set
CONFIG_MTD=y
diff --git a/arch/powerpc/configs/amigaone_defconfig b/arch/powerpc/configs/amigaone_defconfig
index cf94d28d0e31..f6d140f2d922 100644
--- a/arch/powerpc/configs/amigaone_defconfig
+++ b/arch/powerpc/configs/amigaone_defconfig
@@ -26,9 +26,6 @@ CONFIG_UNIX=y
CONFIG_INET=y
CONFIG_IP_MULTICAST=y
CONFIG_SYN_COOKIES=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
CONFIG_NETFILTER=y
# CONFIG_NETFILTER_ADVANCED is not set
diff --git a/arch/powerpc/configs/cell_defconfig b/arch/powerpc/configs/cell_defconfig
index 2dd1b58a18ae..42fbc70cec33 100644
--- a/arch/powerpc/configs/cell_defconfig
+++ b/arch/powerpc/configs/cell_defconfig
@@ -51,11 +51,9 @@ CONFIG_IP_PNP_BOOTP=y
CONFIG_IP_PNP_RARP=y
CONFIG_NET_IPIP=y
CONFIG_SYN_COOKIES=y
-# CONFIG_INET_XFRM_MODE_BEET is not set
CONFIG_INET6_AH=m
CONFIG_INET6_ESP=m
CONFIG_INET6_IPCOMP=m
-# CONFIG_INET6_XFRM_MODE_BEET is not set
# CONFIG_IPV6_SIT is not set
CONFIG_IPV6_TUNNEL=m
CONFIG_NETFILTER=y
diff --git a/arch/powerpc/configs/chrp32_defconfig b/arch/powerpc/configs/chrp32_defconfig
index 9ff493dd8439..502a75d49789 100644
--- a/arch/powerpc/configs/chrp32_defconfig
+++ b/arch/powerpc/configs/chrp32_defconfig
@@ -27,9 +27,6 @@ CONFIG_UNIX=y
CONFIG_INET=y
CONFIG_IP_MULTICAST=y
CONFIG_SYN_COOKIES=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
CONFIG_NETFILTER=y
# CONFIG_NETFILTER_ADVANCED is not set
diff --git a/arch/powerpc/configs/corenet_basic_defconfig b/arch/powerpc/configs/corenet_base.config
index b568d465e59e..b568d465e59e 100644
--- a/arch/powerpc/configs/corenet_basic_defconfig
+++ b/arch/powerpc/configs/corenet_base.config
diff --git a/arch/powerpc/configs/debug.config b/arch/powerpc/configs/debug.config
new file mode 100644
index 000000000000..a14ae1f20d60
--- /dev/null
+++ b/arch/powerpc/configs/debug.config
@@ -0,0 +1 @@
+CONFIG_SCOM_DEBUGFS=y
diff --git a/arch/powerpc/configs/ep8248e_defconfig b/arch/powerpc/configs/ep8248e_defconfig
index 6e08d9502d89..00d69965f898 100644
--- a/arch/powerpc/configs/ep8248e_defconfig
+++ b/arch/powerpc/configs/ep8248e_defconfig
@@ -6,7 +6,6 @@ CONFIG_EXPERT=y
CONFIG_KALLSYMS_ALL=y
CONFIG_SLAB=y
CONFIG_PARTITION_ADVANCED=y
-# CONFIG_IOSCHED_CFQ is not set
# CONFIG_PPC_CHRP is not set
# CONFIG_PPC_PMAC is not set
CONFIG_PPC_82xx=y
diff --git a/arch/powerpc/configs/ep88xc_defconfig b/arch/powerpc/configs/ep88xc_defconfig
index b20bd0cf3543..0e2e5e81a359 100644
--- a/arch/powerpc/configs/ep88xc_defconfig
+++ b/arch/powerpc/configs/ep88xc_defconfig
@@ -11,7 +11,6 @@ CONFIG_EXPERT=y
# CONFIG_VM_EVENT_COUNTERS is not set
# CONFIG_BLK_DEV_BSG is not set
CONFIG_PARTITION_ADVANCED=y
-# CONFIG_IOSCHED_CFQ is not set
CONFIG_PPC_EP88XC=y
CONFIG_8xx_COPYBACK=y
CONFIG_GEN_RTC=y
@@ -24,9 +23,6 @@ CONFIG_INET=y
CONFIG_IP_MULTICAST=y
CONFIG_IP_PNP=y
CONFIG_SYN_COOKIES=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
# CONFIG_FW_LOADER is not set
CONFIG_MTD=y
diff --git a/arch/powerpc/configs/gamecube_defconfig b/arch/powerpc/configs/gamecube_defconfig
index 85e73c3bd859..24c0e0ea5aeb 100644
--- a/arch/powerpc/configs/gamecube_defconfig
+++ b/arch/powerpc/configs/gamecube_defconfig
@@ -29,9 +29,6 @@ CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_RARP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_INET_DIAG is not set
# CONFIG_IPV6 is not set
# CONFIG_WIRELESS is not set
diff --git a/arch/powerpc/configs/guest.config b/arch/powerpc/configs/guest.config
index 8b8cd18ecd7c..209f58515d88 100644
--- a/arch/powerpc/configs/guest.config
+++ b/arch/powerpc/configs/guest.config
@@ -1,5 +1,4 @@
CONFIG_VIRTIO_BLK=y
-CONFIG_VIRTIO_BLK_SCSI=y
CONFIG_SCSI_VIRTIO=y
CONFIG_VIRTIO_NET=y
CONFIG_NET_FAILOVER=y
diff --git a/arch/powerpc/configs/mgcoge_defconfig b/arch/powerpc/configs/mgcoge_defconfig
index 6ce4f206eac7..dcc8dccf54f3 100644
--- a/arch/powerpc/configs/mgcoge_defconfig
+++ b/arch/powerpc/configs/mgcoge_defconfig
@@ -12,7 +12,6 @@ CONFIG_KALLSYMS_ALL=y
CONFIG_EMBEDDED=y
CONFIG_SLAB=y
CONFIG_PARTITION_ADVANCED=y
-# CONFIG_IOSCHED_CFQ is not set
# CONFIG_PPC_PMAC is not set
CONFIG_PPC_82xx=y
CONFIG_MGCOGE=y
diff --git a/arch/powerpc/configs/mpc512x_defconfig b/arch/powerpc/configs/mpc512x_defconfig
index 6203c1093a3a..e39346b3dc3b 100644
--- a/arch/powerpc/configs/mpc512x_defconfig
+++ b/arch/powerpc/configs/mpc512x_defconfig
@@ -9,7 +9,6 @@ CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
CONFIG_PARTITION_ADVANCED=y
-# CONFIG_IOSCHED_CFQ is not set
# CONFIG_PPC_CHRP is not set
CONFIG_PPC_MPC512x=y
CONFIG_MPC512x_LPBFIFO=y
@@ -25,9 +24,6 @@ CONFIG_PACKET=y
CONFIG_UNIX=y
CONFIG_INET=y
CONFIG_IP_PNP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_INET_DIAG is not set
# CONFIG_IPV6 is not set
CONFIG_CAN=y
diff --git a/arch/powerpc/configs/mpc5200_defconfig b/arch/powerpc/configs/mpc5200_defconfig
index 6f87a5c74960..83d801307178 100644
--- a/arch/powerpc/configs/mpc5200_defconfig
+++ b/arch/powerpc/configs/mpc5200_defconfig
@@ -15,7 +15,6 @@ CONFIG_PPC_MEDIA5200=y
CONFIG_PPC_MPC5200_BUGFIX=y
CONFIG_PPC_MPC5200_LPBFIFO=m
# CONFIG_PPC_PMAC is not set
-CONFIG_SIMPLE_GPIO=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
diff --git a/arch/powerpc/configs/mpc85xx_basic_defconfig b/arch/powerpc/configs/mpc85xx_base.config
index b1593fe6f70b..b1593fe6f70b 100644
--- a/arch/powerpc/configs/mpc85xx_basic_defconfig
+++ b/arch/powerpc/configs/mpc85xx_base.config
diff --git a/arch/powerpc/configs/mpc86xx_basic_defconfig b/arch/powerpc/configs/mpc86xx_base.config
index 67bd1fa036ee..67bd1fa036ee 100644
--- a/arch/powerpc/configs/mpc86xx_basic_defconfig
+++ b/arch/powerpc/configs/mpc86xx_base.config
diff --git a/arch/powerpc/configs/mpc885_ads_defconfig b/arch/powerpc/configs/mpc885_ads_defconfig
index 285d506c5a76..82a008c04eae 100644
--- a/arch/powerpc/configs/mpc885_ads_defconfig
+++ b/arch/powerpc/configs/mpc885_ads_defconfig
@@ -11,7 +11,6 @@ CONFIG_EXPERT=y
# CONFIG_VM_EVENT_COUNTERS is not set
# CONFIG_BLK_DEV_BSG is not set
CONFIG_PARTITION_ADVANCED=y
-# CONFIG_IOSCHED_CFQ is not set
CONFIG_8xx_COPYBACK=y
CONFIG_GEN_RTC=y
CONFIG_HZ_100=y
@@ -23,9 +22,6 @@ CONFIG_INET=y
CONFIG_IP_MULTICAST=y
CONFIG_IP_PNP=y
CONFIG_SYN_COOKIES=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
# CONFIG_FW_LOADER is not set
CONFIG_MTD=y
diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig
index 7e6654848531..f492e7d35925 100644
--- a/arch/powerpc/configs/pmac32_defconfig
+++ b/arch/powerpc/configs/pmac32_defconfig
@@ -20,7 +20,6 @@ CONFIG_CPU_FREQ=y
CONFIG_CPU_FREQ_GOV_POWERSAVE=y
CONFIG_CPU_FREQ_GOV_USERSPACE=y
CONFIG_CPU_FREQ_PMAC=y
-CONFIG_PPC601_SYNC_FIX=y
CONFIG_GEN_RTC=y
CONFIG_HIGHMEM=y
CONFIG_BINFMT_MISC=m
@@ -39,8 +38,6 @@ CONFIG_IP_MULTICAST=y
CONFIG_SYN_COOKIES=y
CONFIG_INET_AH=y
CONFIG_INET_ESP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_IPV6 is not set
CONFIG_NETFILTER=y
CONFIG_NF_CONNTRACK=m
diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig
index 34219d555e8a..71749377d164 100644
--- a/arch/powerpc/configs/powernv_defconfig
+++ b/arch/powerpc/configs/powernv_defconfig
@@ -38,7 +38,7 @@ CONFIG_MODULE_UNLOAD=y
CONFIG_MODVERSIONS=y
CONFIG_MODULE_SRCVERSION_ALL=y
CONFIG_PARTITION_ADVANCED=y
-CONFIG_SCOM_DEBUGFS=y
+# CONFIG_SCOM_DEBUGFS is not set
CONFIG_OPAL_PRD=y
CONFIG_PPC_MEMTRACE=y
# CONFIG_PPC_PSERIES is not set
@@ -83,9 +83,6 @@ CONFIG_INET_IPCOMP=m
CONFIG_INET6_AH=m
CONFIG_INET6_ESP=m
CONFIG_INET6_IPCOMP=m
-CONFIG_INET6_XFRM_MODE_TRANSPORT=m
-CONFIG_INET6_XFRM_MODE_TUNNEL=m
-CONFIG_INET6_XFRM_MODE_BEET=m
CONFIG_IPV6_SIT=m
CONFIG_NETFILTER=y
# CONFIG_NETFILTER_ADVANCED is not set
@@ -184,7 +181,6 @@ CONFIG_MLX5_FPGA=y
CONFIG_MLX5_CORE_EN=y
CONFIG_MLX5_CORE_IPOIB=y
CONFIG_MYRI10GE=m
-CONFIG_QLGE=m
CONFIG_NETXEN_NIC=m
CONFIG_USB_NET_DRIVERS=m
# CONFIG_WLAN is not set
diff --git a/arch/powerpc/configs/ppc40x_defconfig b/arch/powerpc/configs/ppc40x_defconfig
index 8f136b52198b..a5f683aed328 100644
--- a/arch/powerpc/configs/ppc40x_defconfig
+++ b/arch/powerpc/configs/ppc40x_defconfig
@@ -84,4 +84,3 @@ CONFIG_CRYPTO_ECB=y
CONFIG_CRYPTO_PCBC=y
CONFIG_CRYPTO_MD5=y
CONFIG_CRYPTO_DES=y
-CONFIG_PPC4xx_OCM=y
diff --git a/arch/powerpc/configs/ppc44x_defconfig b/arch/powerpc/configs/ppc44x_defconfig
index 67952819593e..a41eedfe0a5f 100644
--- a/arch/powerpc/configs/ppc44x_defconfig
+++ b/arch/powerpc/configs/ppc44x_defconfig
@@ -32,9 +32,6 @@ CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
CONFIG_BRIDGE=m
CONFIG_CONNECTOR=y
CONFIG_MTD=y
diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig
index dc83fefa04f7..7e68cb222c7b 100644
--- a/arch/powerpc/configs/ppc64_defconfig
+++ b/arch/powerpc/configs/ppc64_defconfig
@@ -29,6 +29,7 @@ CONFIG_DTL=y
CONFIG_SCANLOG=m
CONFIG_PPC_SMLPAR=y
CONFIG_IBMEBUS=y
+CONFIG_PPC_SVM=y
CONFIG_PPC_MAPLE=y
CONFIG_PPC_PASEMI=y
CONFIG_PPC_PASEMI_IOMMU=y
@@ -188,7 +189,6 @@ CONFIG_MLX4_EN=m
CONFIG_MYRI10GE=m
CONFIG_S2IO=m
CONFIG_PASEMI_MAC=y
-CONFIG_QLGE=m
CONFIG_NETXEN_NIC=m
CONFIG_SUNGEM=y
CONFIG_GELIC_NET=m
diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig
index 9dca4cffa623..3e2f44f38ac5 100644
--- a/arch/powerpc/configs/ppc6xx_defconfig
+++ b/arch/powerpc/configs/ppc6xx_defconfig
@@ -109,9 +109,6 @@ CONFIG_SYN_COOKIES=y
CONFIG_INET_AH=m
CONFIG_INET_ESP=m
CONFIG_INET_IPCOMP=m
-CONFIG_INET_XFRM_MODE_TRANSPORT=m
-CONFIG_INET_XFRM_MODE_TUNNEL=m
-CONFIG_INET_XFRM_MODE_BEET=m
CONFIG_INET_DIAG=m
CONFIG_TCP_CONG_ADVANCED=y
CONFIG_TCP_CONG_HSTCP=m
@@ -129,7 +126,6 @@ CONFIG_INET6_AH=m
CONFIG_INET6_ESP=m
CONFIG_INET6_IPCOMP=m
CONFIG_IPV6_MIP6=m
-CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m
CONFIG_IPV6_TUNNEL=m
CONFIG_IPV6_MULTIPLE_TABLES=y
CONFIG_IPV6_SUBTREES=y
@@ -511,7 +507,6 @@ CONFIG_FORCEDETH=m
CONFIG_HAMACHI=m
CONFIG_YELLOWFIN=m
CONFIG_QLA3XXX=m
-CONFIG_QLGE=m
CONFIG_NETXEN_NIC=m
CONFIG_8139CP=m
CONFIG_8139TOO=m
diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig
index 314c63939816..4db51719342a 100644
--- a/arch/powerpc/configs/ps3_defconfig
+++ b/arch/powerpc/configs/ps3_defconfig
@@ -47,9 +47,6 @@ CONFIG_INET=y
CONFIG_IP_MULTICAST=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_INET_DIAG is not set
CONFIG_BT=m
CONFIG_BT_RFCOMM=m
diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig
index 38abc9c1770a..6b68109e248f 100644
--- a/arch/powerpc/configs/pseries_defconfig
+++ b/arch/powerpc/configs/pseries_defconfig
@@ -42,6 +42,7 @@ CONFIG_DTL=y
CONFIG_SCANLOG=m
CONFIG_PPC_SMLPAR=y
CONFIG_IBMEBUS=y
+CONFIG_PPC_SVM=y
# CONFIG_PPC_PMAC is not set
CONFIG_RTAS_FLASH=m
CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
@@ -168,7 +169,6 @@ CONFIG_IXGBE=m
CONFIG_I40E=m
CONFIG_MLX4_EN=m
CONFIG_MYRI10GE=m
-CONFIG_QLGE=m
CONFIG_NETXEN_NIC=m
CONFIG_PPP=m
CONFIG_PPP_BSDCOMP=m
diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig
index 557b530b2f70..1b6bdad36b13 100644
--- a/arch/powerpc/configs/skiroot_defconfig
+++ b/arch/powerpc/configs/skiroot_defconfig
@@ -1,8 +1,3 @@
-CONFIG_PPC64=y
-CONFIG_ALTIVEC=y
-CONFIG_VSX=y
-CONFIG_NR_CPUS=2048
-CONFIG_CPU_LITTLE_ENDIAN=y
CONFIG_KERNEL_XZ=y
# CONFIG_SWAP is not set
CONFIG_SYSVIPC=y
@@ -28,17 +23,15 @@ CONFIG_EXPERT=y
# CONFIG_AIO is not set
CONFIG_PERF_EVENTS=y
# CONFIG_COMPAT_BRK is not set
+# CONFIG_SLAB_MERGE_DEFAULT is not set
+CONFIG_SLAB_FREELIST_RANDOM=y
CONFIG_SLAB_FREELIST_HARDENED=y
-CONFIG_JUMP_LABEL=y
-CONFIG_STRICT_KERNEL_RWX=y
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-CONFIG_MODULE_SIG=y
-CONFIG_MODULE_SIG_FORCE=y
-CONFIG_MODULE_SIG_SHA512=y
-CONFIG_PARTITION_ADVANCED=y
-# CONFIG_MQ_IOSCHED_DEADLINE is not set
-# CONFIG_MQ_IOSCHED_KYBER is not set
+CONFIG_PPC64=y
+CONFIG_ALTIVEC=y
+CONFIG_VSX=y
+CONFIG_NR_CPUS=2048
+CONFIG_CPU_LITTLE_ENDIAN=y
+CONFIG_PANIC_TIMEOUT=30
# CONFIG_PPC_VAS is not set
# CONFIG_PPC_PSERIES is not set
# CONFIG_PPC_OF_BOOT_TRAMPOLINE is not set
@@ -46,16 +39,27 @@ CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
CONFIG_CPU_IDLE=y
CONFIG_HZ_100=y
CONFIG_KEXEC=y
+CONFIG_KEXEC_FILE=y
+CONFIG_PRESERVE_FA_DUMP=y
CONFIG_IRQ_ALL_CPUS=y
CONFIG_NUMA=y
-# CONFIG_COMPACTION is not set
-# CONFIG_MIGRATION is not set
CONFIG_PPC_64K_PAGES=y
CONFIG_SCHED_SMT=y
CONFIG_CMDLINE_BOOL=y
CONFIG_CMDLINE="console=tty0 console=hvc0 ipr.fast_reboot=1 quiet"
# CONFIG_SECCOMP is not set
# CONFIG_PPC_MEM_KEYS is not set
+CONFIG_JUMP_LABEL=y
+CONFIG_STRICT_KERNEL_RWX=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_SIG_FORCE=y
+CONFIG_MODULE_SIG_SHA512=y
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_MQ_IOSCHED_DEADLINE is not set
+# CONFIG_MQ_IOSCHED_KYBER is not set
+# CONFIG_COMPACTION is not set
+# CONFIG_MIGRATION is not set
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
@@ -63,9 +67,6 @@ CONFIG_INET=y
CONFIG_IP_MULTICAST=y
CONFIG_NET_IPIP=y
CONFIG_SYN_COOKIES=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
CONFIG_DNS_RESOLVER=y
# CONFIG_WIRELESS is not set
CONFIG_DEVTMPFS=y
@@ -140,7 +141,6 @@ CONFIG_TIGON3=m
CONFIG_BNX2X=m
# CONFIG_NET_VENDOR_BROCADE is not set
# CONFIG_NET_VENDOR_CADENCE is not set
-# CONFIG_NET_CADENCE is not set
# CONFIG_NET_VENDOR_CAVIUM is not set
CONFIG_CHELSIO_T1=m
# CONFIG_NET_VENDOR_CISCO is not set
@@ -149,7 +149,6 @@ CONFIG_CHELSIO_T1=m
# CONFIG_NET_VENDOR_DLINK is not set
CONFIG_BE2NET=m
# CONFIG_NET_VENDOR_EZCHIP is not set
-# CONFIG_NET_VENDOR_HP is not set
# CONFIG_NET_VENDOR_HUAWEI is not set
CONFIG_E1000=m
CONFIG_E1000E=m
@@ -157,7 +156,6 @@ CONFIG_IGB=m
CONFIG_IXGB=m
CONFIG_IXGBE=m
CONFIG_I40E=m
-CONFIG_S2IO=m
# CONFIG_NET_VENDOR_MARVELL is not set
CONFIG_MLX4_EN=m
# CONFIG_MLX4_CORE_GEN2 is not set
@@ -168,12 +166,12 @@ CONFIG_MLX5_CORE_EN=y
# CONFIG_NET_VENDOR_MICROSEMI is not set
CONFIG_MYRI10GE=m
# CONFIG_NET_VENDOR_NATSEMI is not set
+CONFIG_S2IO=m
# CONFIG_NET_VENDOR_NETRONOME is not set
# CONFIG_NET_VENDOR_NI is not set
# CONFIG_NET_VENDOR_NVIDIA is not set
# CONFIG_NET_VENDOR_OKI is not set
# CONFIG_NET_VENDOR_PACKET_ENGINES is not set
-CONFIG_QLGE=m
CONFIG_NETXEN_NIC=m
CONFIG_QED=m
CONFIG_QEDE=m
@@ -213,6 +211,7 @@ CONFIG_IPMI_WATCHDOG=y
CONFIG_HW_RANDOM=y
CONFIG_TCG_TPM=y
CONFIG_TCG_TIS_I2C_NUVOTON=y
+# CONFIG_DEVPORT is not set
CONFIG_I2C=y
# CONFIG_I2C_COMPAT is not set
CONFIG_I2C_CHARDEV=y
@@ -239,7 +238,6 @@ CONFIG_HID_CYPRESS=y
CONFIG_HID_EZKEY=y
CONFIG_HID_ITE=y
CONFIG_HID_KENSINGTON=y
-CONFIG_HID_LOGITECH=y
CONFIG_HID_MICROSOFT=y
CONFIG_HID_MONTEREY=y
CONFIG_USB_HIDDEV=y
@@ -276,6 +274,18 @@ CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_ASCII=y
CONFIG_NLS_ISO8859_1=y
CONFIG_NLS_UTF8=y
+CONFIG_ENCRYPTED_KEYS=y
+CONFIG_SECURITY=y
+CONFIG_HARDENED_USERCOPY=y
+# CONFIG_HARDENED_USERCOPY_FALLBACK is not set
+CONFIG_HARDENED_USERCOPY_PAGESPAN=y
+CONFIG_FORTIFY_SOURCE=y
+CONFIG_SECURITY_LOCKDOWN_LSM=y
+CONFIG_SECURITY_LOCKDOWN_LSM_EARLY=y
+CONFIG_LOCK_DOWN_KERNEL_FORCE_INTEGRITY=y
+# CONFIG_INTEGRITY is not set
+CONFIG_LSM="yama,loadpin,safesetid,integrity"
+# CONFIG_CRYPTO_HW is not set
CONFIG_CRC16=y
CONFIG_CRC_ITU_T=y
CONFIG_LIBCRC32C=y
@@ -286,17 +296,20 @@ CONFIG_LIBCRC32C=y
# CONFIG_XZ_DEC_SPARC is not set
CONFIG_PRINTK_TIME=y
CONFIG_MAGIC_SYSRQ=y
+CONFIG_SLUB_DEBUG_ON=y
+CONFIG_SCHED_STACK_END_CHECK=y
CONFIG_DEBUG_STACKOVERFLOW=y
+CONFIG_PANIC_ON_OOPS=y
CONFIG_SOFTLOCKUP_DETECTOR=y
CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
CONFIG_HARDLOCKUP_DETECTOR=y
CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y
CONFIG_WQ_WATCHDOG=y
# CONFIG_SCHED_DEBUG is not set
+CONFIG_DEBUG_SG=y
+CONFIG_DEBUG_NOTIFIERS=y
+CONFIG_BUG_ON_DATA_CORRUPTION=y
+CONFIG_DEBUG_CREDENTIALS=y
# CONFIG_FTRACE is not set
-# CONFIG_RUNTIME_TESTING_MENU is not set
CONFIG_XMON=y
-CONFIG_XMON_DEFAULT=y
-CONFIG_ENCRYPTED_KEYS=y
-# CONFIG_CRYPTO_ECHAINIV is not set
-# CONFIG_CRYPTO_HW is not set
+# CONFIG_RUNTIME_TESTING_MENU is not set
diff --git a/arch/powerpc/configs/storcenter_defconfig b/arch/powerpc/configs/storcenter_defconfig
index 6c39c52b8e4a..b964084e4056 100644
--- a/arch/powerpc/configs/storcenter_defconfig
+++ b/arch/powerpc/configs/storcenter_defconfig
@@ -22,9 +22,6 @@ CONFIG_INET=y
CONFIG_IP_MULTICAST=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
CONFIG_MTD=y
CONFIG_MTD_CMDLINE_PARTS=y
@@ -80,5 +77,4 @@ CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_ISO8859_1=y
CONFIG_NLS_UTF8=y
CONFIG_CRC_T10DIF=y
-# CONFIG_ENABLE_WARN_DEPRECATED is not set
# CONFIG_ENABLE_MUST_CHECK is not set
diff --git a/arch/powerpc/configs/tqm8xx_defconfig b/arch/powerpc/configs/tqm8xx_defconfig
index 7493f36dd6e9..eda8bfb2d0a3 100644
--- a/arch/powerpc/configs/tqm8xx_defconfig
+++ b/arch/powerpc/configs/tqm8xx_defconfig
@@ -14,7 +14,6 @@ CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_SRCVERSION_ALL=y
# CONFIG_BLK_DEV_BSG is not set
CONFIG_PARTITION_ADVANCED=y
-# CONFIG_IOSCHED_CFQ is not set
CONFIG_TQM8XX=y
CONFIG_8xx_COPYBACK=y
# CONFIG_8xx_CPU15 is not set
@@ -27,9 +26,6 @@ CONFIG_UNIX=y
CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_SYN_COOKIES=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
# CONFIG_WIRELESS is not set
# CONFIG_FW_LOADER is not set
diff --git a/arch/powerpc/configs/wii_defconfig b/arch/powerpc/configs/wii_defconfig
index 5a04448ad6b5..379c171f3ddd 100644
--- a/arch/powerpc/configs/wii_defconfig
+++ b/arch/powerpc/configs/wii_defconfig
@@ -29,9 +29,6 @@ CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_RARP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_INET_DIAG is not set
# CONFIG_IPV6 is not set
CONFIG_BT=y
diff --git a/arch/powerpc/crypto/aes-spe-glue.c b/arch/powerpc/crypto/aes-spe-glue.c
index 3a4ca7d32477..c2b23b69d7b1 100644
--- a/arch/powerpc/crypto/aes-spe-glue.c
+++ b/arch/powerpc/crypto/aes-spe-glue.c
@@ -17,7 +17,10 @@
#include <asm/byteorder.h>
#include <asm/switch_to.h>
#include <crypto/algapi.h>
+#include <crypto/internal/skcipher.h>
#include <crypto/xts.h>
+#include <crypto/gf128mul.h>
+#include <crypto/scatterwalk.h>
/*
* MAX_BYTES defines the number of bytes that are allowed to be processed
@@ -91,13 +94,6 @@ static int ppc_aes_setkey(struct crypto_tfm *tfm, const u8 *in_key,
{
struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm);
- if (key_len != AES_KEYSIZE_128 &&
- key_len != AES_KEYSIZE_192 &&
- key_len != AES_KEYSIZE_256) {
- tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
- return -EINVAL;
- }
-
switch (key_len) {
case AES_KEYSIZE_128:
ctx->rounds = 4;
@@ -111,6 +107,8 @@ static int ppc_aes_setkey(struct crypto_tfm *tfm, const u8 *in_key,
ctx->rounds = 6;
ppc_expand_key_256(ctx->key_enc, in_key);
break;
+ default:
+ return -EINVAL;
}
ppc_generate_decrypt_key(ctx->key_dec, ctx->key_enc, key_len);
@@ -118,25 +116,24 @@ static int ppc_aes_setkey(struct crypto_tfm *tfm, const u8 *in_key,
return 0;
}
-static int ppc_xts_setkey(struct crypto_tfm *tfm, const u8 *in_key,
+static int ppc_aes_setkey_skcipher(struct crypto_skcipher *tfm,
+ const u8 *in_key, unsigned int key_len)
+{
+ return ppc_aes_setkey(crypto_skcipher_tfm(tfm), in_key, key_len);
+}
+
+static int ppc_xts_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
unsigned int key_len)
{
- struct ppc_xts_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct ppc_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
int err;
- err = xts_check_key(tfm, in_key, key_len);
+ err = xts_verify_key(tfm, in_key, key_len);
if (err)
return err;
key_len >>= 1;
- if (key_len != AES_KEYSIZE_128 &&
- key_len != AES_KEYSIZE_192 &&
- key_len != AES_KEYSIZE_256) {
- tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
- return -EINVAL;
- }
-
switch (key_len) {
case AES_KEYSIZE_128:
ctx->rounds = 4;
@@ -153,6 +150,8 @@ static int ppc_xts_setkey(struct crypto_tfm *tfm, const u8 *in_key,
ppc_expand_key_256(ctx->key_enc, in_key);
ppc_expand_key_256(ctx->key_twk, in_key + AES_KEYSIZE_256);
break;
+ default:
+ return -EINVAL;
}
ppc_generate_decrypt_key(ctx->key_dec, ctx->key_enc, key_len);
@@ -178,208 +177,229 @@ static void ppc_aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
spe_end();
}
-static int ppc_ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int ppc_ecb_crypt(struct skcipher_request *req, bool enc)
{
- struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
- unsigned int ubytes;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct ppc_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
int err;
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
+ err = skcipher_walk_virt(&walk, req, false);
- while ((nbytes = walk.nbytes)) {
- ubytes = nbytes > MAX_BYTES ?
- nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
- nbytes -= ubytes;
+ while ((nbytes = walk.nbytes) != 0) {
+ nbytes = min_t(unsigned int, nbytes, MAX_BYTES);
+ nbytes = round_down(nbytes, AES_BLOCK_SIZE);
spe_begin();
- ppc_encrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->key_enc, ctx->rounds, nbytes);
+ if (enc)
+ ppc_encrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key_enc, ctx->rounds, nbytes);
+ else
+ ppc_decrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key_dec, ctx->rounds, nbytes);
spe_end();
- err = blkcipher_walk_done(desc, &walk, ubytes);
+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
}
return err;
}
-static int ppc_ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int ppc_ecb_encrypt(struct skcipher_request *req)
{
- struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
- unsigned int ubytes;
- int err;
-
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
-
- while ((nbytes = walk.nbytes)) {
- ubytes = nbytes > MAX_BYTES ?
- nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
- nbytes -= ubytes;
-
- spe_begin();
- ppc_decrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->key_dec, ctx->rounds, nbytes);
- spe_end();
-
- err = blkcipher_walk_done(desc, &walk, ubytes);
- }
+ return ppc_ecb_crypt(req, true);
+}
- return err;
+static int ppc_ecb_decrypt(struct skcipher_request *req)
+{
+ return ppc_ecb_crypt(req, false);
}
-static int ppc_cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int ppc_cbc_crypt(struct skcipher_request *req, bool enc)
{
- struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
- unsigned int ubytes;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct ppc_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
int err;
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
+ err = skcipher_walk_virt(&walk, req, false);
- while ((nbytes = walk.nbytes)) {
- ubytes = nbytes > MAX_BYTES ?
- nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
- nbytes -= ubytes;
+ while ((nbytes = walk.nbytes) != 0) {
+ nbytes = min_t(unsigned int, nbytes, MAX_BYTES);
+ nbytes = round_down(nbytes, AES_BLOCK_SIZE);
spe_begin();
- ppc_encrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->key_enc, ctx->rounds, nbytes, walk.iv);
+ if (enc)
+ ppc_encrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key_enc, ctx->rounds, nbytes,
+ walk.iv);
+ else
+ ppc_decrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key_dec, ctx->rounds, nbytes,
+ walk.iv);
spe_end();
- err = blkcipher_walk_done(desc, &walk, ubytes);
+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
}
return err;
}
-static int ppc_cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int ppc_cbc_encrypt(struct skcipher_request *req)
{
- struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
- unsigned int ubytes;
- int err;
-
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
-
- while ((nbytes = walk.nbytes)) {
- ubytes = nbytes > MAX_BYTES ?
- nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
- nbytes -= ubytes;
-
- spe_begin();
- ppc_decrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->key_dec, ctx->rounds, nbytes, walk.iv);
- spe_end();
-
- err = blkcipher_walk_done(desc, &walk, ubytes);
- }
+ return ppc_cbc_crypt(req, true);
+}
- return err;
+static int ppc_cbc_decrypt(struct skcipher_request *req)
+{
+ return ppc_cbc_crypt(req, false);
}
-static int ppc_ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int ppc_ctr_crypt(struct skcipher_request *req)
{
- struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
- unsigned int pbytes, ubytes;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct ppc_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
int err;
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
+ err = skcipher_walk_virt(&walk, req, false);
- while ((pbytes = walk.nbytes)) {
- pbytes = pbytes > MAX_BYTES ? MAX_BYTES : pbytes;
- pbytes = pbytes == nbytes ?
- nbytes : pbytes & ~(AES_BLOCK_SIZE - 1);
- ubytes = walk.nbytes - pbytes;
+ while ((nbytes = walk.nbytes) != 0) {
+ nbytes = min_t(unsigned int, nbytes, MAX_BYTES);
+ if (nbytes < walk.total)
+ nbytes = round_down(nbytes, AES_BLOCK_SIZE);
spe_begin();
ppc_crypt_ctr(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->key_enc, ctx->rounds, pbytes , walk.iv);
+ ctx->key_enc, ctx->rounds, nbytes, walk.iv);
spe_end();
- nbytes -= pbytes;
- err = blkcipher_walk_done(desc, &walk, ubytes);
+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
}
return err;
}
-static int ppc_xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int ppc_xts_crypt(struct skcipher_request *req, bool enc)
{
- struct ppc_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
- unsigned int ubytes;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct ppc_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
int err;
u32 *twk;
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
+ err = skcipher_walk_virt(&walk, req, false);
twk = ctx->key_twk;
- while ((nbytes = walk.nbytes)) {
- ubytes = nbytes > MAX_BYTES ?
- nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
- nbytes -= ubytes;
+ while ((nbytes = walk.nbytes) != 0) {
+ nbytes = min_t(unsigned int, nbytes, MAX_BYTES);
+ nbytes = round_down(nbytes, AES_BLOCK_SIZE);
spe_begin();
- ppc_encrypt_xts(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->key_enc, ctx->rounds, nbytes, walk.iv, twk);
+ if (enc)
+ ppc_encrypt_xts(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key_enc, ctx->rounds, nbytes,
+ walk.iv, twk);
+ else
+ ppc_decrypt_xts(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key_dec, ctx->rounds, nbytes,
+ walk.iv, twk);
spe_end();
twk = NULL;
- err = blkcipher_walk_done(desc, &walk, ubytes);
+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
}
return err;
}
-static int ppc_xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int ppc_xts_encrypt(struct skcipher_request *req)
{
- struct ppc_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
- unsigned int ubytes;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct ppc_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ int tail = req->cryptlen % AES_BLOCK_SIZE;
+ int offset = req->cryptlen - tail - AES_BLOCK_SIZE;
+ struct skcipher_request subreq;
+ u8 b[2][AES_BLOCK_SIZE];
int err;
- u32 *twk;
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
- twk = ctx->key_twk;
+ if (req->cryptlen < AES_BLOCK_SIZE)
+ return -EINVAL;
- while ((nbytes = walk.nbytes)) {
- ubytes = nbytes > MAX_BYTES ?
- nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
- nbytes -= ubytes;
+ if (tail) {
+ subreq = *req;
+ skcipher_request_set_crypt(&subreq, req->src, req->dst,
+ req->cryptlen - tail, req->iv);
+ req = &subreq;
+ }
- spe_begin();
- ppc_decrypt_xts(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->key_dec, ctx->rounds, nbytes, walk.iv, twk);
- spe_end();
+ err = ppc_xts_crypt(req, true);
+ if (err || !tail)
+ return err;
- twk = NULL;
- err = blkcipher_walk_done(desc, &walk, ubytes);
+ scatterwalk_map_and_copy(b[0], req->dst, offset, AES_BLOCK_SIZE, 0);
+ memcpy(b[1], b[0], tail);
+ scatterwalk_map_and_copy(b[0], req->src, offset + AES_BLOCK_SIZE, tail, 0);
+
+ spe_begin();
+ ppc_encrypt_xts(b[0], b[0], ctx->key_enc, ctx->rounds, AES_BLOCK_SIZE,
+ req->iv, NULL);
+ spe_end();
+
+ scatterwalk_map_and_copy(b[0], req->dst, offset, AES_BLOCK_SIZE + tail, 1);
+
+ return 0;
+}
+
+static int ppc_xts_decrypt(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct ppc_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ int tail = req->cryptlen % AES_BLOCK_SIZE;
+ int offset = req->cryptlen - tail - AES_BLOCK_SIZE;
+ struct skcipher_request subreq;
+ u8 b[3][AES_BLOCK_SIZE];
+ le128 twk;
+ int err;
+
+ if (req->cryptlen < AES_BLOCK_SIZE)
+ return -EINVAL;
+
+ if (tail) {
+ subreq = *req;
+ skcipher_request_set_crypt(&subreq, req->src, req->dst,
+ offset, req->iv);
+ req = &subreq;
}
- return err;
+ err = ppc_xts_crypt(req, false);
+ if (err || !tail)
+ return err;
+
+ scatterwalk_map_and_copy(b[1], req->src, offset, AES_BLOCK_SIZE + tail, 0);
+
+ spe_begin();
+ if (!offset)
+ ppc_encrypt_ecb(req->iv, req->iv, ctx->key_twk, ctx->rounds,
+ AES_BLOCK_SIZE);
+
+ gf128mul_x_ble(&twk, (le128 *)req->iv);
+
+ ppc_decrypt_xts(b[1], b[1], ctx->key_dec, ctx->rounds, AES_BLOCK_SIZE,
+ (u8 *)&twk, NULL);
+ memcpy(b[0], b[2], tail);
+ memcpy(b[0] + tail, b[1] + tail, AES_BLOCK_SIZE - tail);
+ ppc_decrypt_xts(b[0], b[0], ctx->key_dec, ctx->rounds, AES_BLOCK_SIZE,
+ req->iv, NULL);
+ spe_end();
+
+ scatterwalk_map_and_copy(b[0], req->dst, offset, AES_BLOCK_SIZE + tail, 1);
+
+ return 0;
}
/*
@@ -388,9 +408,9 @@ static int ppc_xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
* This improves IPsec thoughput by another few percent. Additionally we assume
* that AES context is always aligned to at least 8 bytes because it is created
* with kmalloc() in the crypto infrastructure
- *
*/
-static struct crypto_alg aes_algs[] = { {
+
+static struct crypto_alg aes_cipher_alg = {
.cra_name = "aes",
.cra_driver_name = "aes-ppc-spe",
.cra_priority = 300,
@@ -408,96 +428,84 @@ static struct crypto_alg aes_algs[] = { {
.cia_decrypt = ppc_aes_decrypt
}
}
-}, {
- .cra_name = "ecb(aes)",
- .cra_driver_name = "ecb-ppc-spe",
- .cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct ppc_aes_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = AES_MIN_KEY_SIZE,
- .max_keysize = AES_MAX_KEY_SIZE,
- .ivsize = AES_BLOCK_SIZE,
- .setkey = ppc_aes_setkey,
- .encrypt = ppc_ecb_encrypt,
- .decrypt = ppc_ecb_decrypt,
- }
- }
-}, {
- .cra_name = "cbc(aes)",
- .cra_driver_name = "cbc-ppc-spe",
- .cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct ppc_aes_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = AES_MIN_KEY_SIZE,
- .max_keysize = AES_MAX_KEY_SIZE,
- .ivsize = AES_BLOCK_SIZE,
- .setkey = ppc_aes_setkey,
- .encrypt = ppc_cbc_encrypt,
- .decrypt = ppc_cbc_decrypt,
- }
- }
-}, {
- .cra_name = "ctr(aes)",
- .cra_driver_name = "ctr-ppc-spe",
- .cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct ppc_aes_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = AES_MIN_KEY_SIZE,
- .max_keysize = AES_MAX_KEY_SIZE,
- .ivsize = AES_BLOCK_SIZE,
- .setkey = ppc_aes_setkey,
- .encrypt = ppc_ctr_crypt,
- .decrypt = ppc_ctr_crypt,
- }
- }
-}, {
- .cra_name = "xts(aes)",
- .cra_driver_name = "xts-ppc-spe",
- .cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct ppc_xts_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = AES_MIN_KEY_SIZE * 2,
- .max_keysize = AES_MAX_KEY_SIZE * 2,
- .ivsize = AES_BLOCK_SIZE,
- .setkey = ppc_xts_setkey,
- .encrypt = ppc_xts_encrypt,
- .decrypt = ppc_xts_decrypt,
- }
+};
+
+static struct skcipher_alg aes_skcipher_algs[] = {
+ {
+ .base.cra_name = "ecb(aes)",
+ .base.cra_driver_name = "ecb-ppc-spe",
+ .base.cra_priority = 300,
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct ppc_aes_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .setkey = ppc_aes_setkey_skcipher,
+ .encrypt = ppc_ecb_encrypt,
+ .decrypt = ppc_ecb_decrypt,
+ }, {
+ .base.cra_name = "cbc(aes)",
+ .base.cra_driver_name = "cbc-ppc-spe",
+ .base.cra_priority = 300,
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct ppc_aes_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ppc_aes_setkey_skcipher,
+ .encrypt = ppc_cbc_encrypt,
+ .decrypt = ppc_cbc_decrypt,
+ }, {
+ .base.cra_name = "ctr(aes)",
+ .base.cra_driver_name = "ctr-ppc-spe",
+ .base.cra_priority = 300,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct ppc_aes_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ppc_aes_setkey_skcipher,
+ .encrypt = ppc_ctr_crypt,
+ .decrypt = ppc_ctr_crypt,
+ .chunksize = AES_BLOCK_SIZE,
+ }, {
+ .base.cra_name = "xts(aes)",
+ .base.cra_driver_name = "xts-ppc-spe",
+ .base.cra_priority = 300,
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct ppc_xts_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = AES_MIN_KEY_SIZE * 2,
+ .max_keysize = AES_MAX_KEY_SIZE * 2,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ppc_xts_setkey,
+ .encrypt = ppc_xts_encrypt,
+ .decrypt = ppc_xts_decrypt,
}
-} };
+};
static int __init ppc_aes_mod_init(void)
{
- return crypto_register_algs(aes_algs, ARRAY_SIZE(aes_algs));
+ int err;
+
+ err = crypto_register_alg(&aes_cipher_alg);
+ if (err)
+ return err;
+
+ err = crypto_register_skciphers(aes_skcipher_algs,
+ ARRAY_SIZE(aes_skcipher_algs));
+ if (err)
+ crypto_unregister_alg(&aes_cipher_alg);
+ return err;
}
static void __exit ppc_aes_mod_fini(void)
{
- crypto_unregister_algs(aes_algs, ARRAY_SIZE(aes_algs));
+ crypto_unregister_alg(&aes_cipher_alg);
+ crypto_unregister_skciphers(aes_skcipher_algs,
+ ARRAY_SIZE(aes_skcipher_algs));
}
module_init(ppc_aes_mod_init);
diff --git a/arch/powerpc/crypto/crc-vpmsum_test.c b/arch/powerpc/crypto/crc-vpmsum_test.c
index 47985219a68f..dce86e75f1a8 100644
--- a/arch/powerpc/crypto/crc-vpmsum_test.c
+++ b/arch/powerpc/crypto/crc-vpmsum_test.c
@@ -103,6 +103,7 @@ static int __init crc_test_init(void)
crc32, verify32, len);
break;
}
+ cond_resched();
}
pr_info("crc-vpmsum_test done, completed %lu iterations\n", i);
} while (0);
diff --git a/arch/powerpc/crypto/crc32c-vpmsum_glue.c b/arch/powerpc/crypto/crc32c-vpmsum_glue.c
index 2c232898b933..63760b7dbb76 100644
--- a/arch/powerpc/crypto/crc32c-vpmsum_glue.c
+++ b/arch/powerpc/crypto/crc32c-vpmsum_glue.c
@@ -73,10 +73,8 @@ static int crc32c_vpmsum_setkey(struct crypto_shash *hash, const u8 *key,
{
u32 *mctx = crypto_shash_ctx(hash);
- if (keylen != sizeof(u32)) {
- crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN);
+ if (keylen != sizeof(u32))
return -EINVAL;
- }
*mctx = le32_to_cpup((__le32 *)key);
return 0;
}
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index 9a1d2fc6ceb7..d0a23d0db863 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -4,11 +4,11 @@ generated-y += syscall_table_64.h
generated-y += syscall_table_c32.h
generated-y += syscall_table_spu.h
generic-y += div64.h
+generic-y += dma-mapping.h
generic-y += export.h
generic-y += irq_regs.h
generic-y += local64.h
generic-y += mcs_spinlock.h
generic-y += preempt.h
generic-y += vtime.h
-generic-y += msi.h
-generic-y += simd.h
+generic-y += early_ioremap.h
diff --git a/arch/powerpc/include/asm/archrandom.h b/arch/powerpc/include/asm/archrandom.h
index 9c63b596e6ce..9a53e29680f4 100644
--- a/arch/powerpc/include/asm/archrandom.h
+++ b/arch/powerpc/include/asm/archrandom.h
@@ -6,44 +6,35 @@
#include <asm/machdep.h>
-static inline int arch_get_random_long(unsigned long *v)
+static inline bool __must_check arch_get_random_long(unsigned long *v)
{
- return 0;
+ return false;
}
-static inline int arch_get_random_int(unsigned int *v)
+static inline bool __must_check arch_get_random_int(unsigned int *v)
{
- return 0;
+ return false;
}
-static inline int arch_get_random_seed_long(unsigned long *v)
+static inline bool __must_check arch_get_random_seed_long(unsigned long *v)
{
if (ppc_md.get_random_seed)
return ppc_md.get_random_seed(v);
- return 0;
+ return false;
}
-static inline int arch_get_random_seed_int(unsigned int *v)
+
+static inline bool __must_check arch_get_random_seed_int(unsigned int *v)
{
unsigned long val;
- int rc;
+ bool rc;
- rc = arch_get_random_long(&val);
+ rc = arch_get_random_seed_long(&val);
if (rc)
*v = val;
return rc;
}
-
-static inline int arch_has_random(void)
-{
- return 0;
-}
-
-static inline int arch_has_random_seed(void)
-{
- return !!ppc_md.get_random_seed;
-}
#endif /* CONFIG_ARCH_RANDOM */
#ifdef CONFIG_PPC_POWERNV
diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h
index ec1c97a8e8cb..983c0084fb3f 100644
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -15,6 +15,7 @@
#include <asm/epapr_hcalls.h>
#include <asm/dcr.h>
#include <asm/mmu_context.h>
+#include <asm/ultravisor-api.h>
#include <uapi/asm/ucontext.h>
@@ -34,6 +35,16 @@ extern struct static_key hcall_tracepoint_key;
void __trace_hcall_entry(unsigned long opcode, unsigned long *args);
void __trace_hcall_exit(long opcode, long retval, unsigned long *retbuf);
+/* Ultravisor */
+#if defined(CONFIG_PPC_POWERNV) || defined(CONFIG_PPC_SVM)
+long ucall_norets(unsigned long opcode, ...);
+#else
+static inline long ucall_norets(unsigned long opcode, ...)
+{
+ return U_NOT_AVAILABLE;
+}
+#endif
+
/* OPAL */
int64_t __opal_call(int64_t a0, int64_t a1, int64_t a2, int64_t a3,
int64_t a4, int64_t a5, int64_t a6, int64_t a7,
@@ -81,7 +92,8 @@ long sys_swapcontext(struct ucontext __user *old_ctx,
long sys_debug_setcontext(struct ucontext __user *ctx,
int ndbg, struct sig_dbg_op __user *dbg);
int
-ppc_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct timeval __user *tvp);
+ppc_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp,
+ struct __kernel_old_timeval __user *tvp);
unsigned long __init early_init(unsigned long dt_ptr);
void __init machine_init(u64 dt_ptr);
#endif
@@ -123,7 +135,8 @@ extern int __ucmpdi2(u64, u64);
/* tracing */
void _mcount(void);
-unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip);
+unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip,
+ unsigned long sp);
void pnv_power9_force_smt4_catch(void);
void pnv_power9_force_smt4_release(void);
@@ -140,9 +153,12 @@ void _kvmppc_save_tm_pr(struct kvm_vcpu *vcpu, u64 guest_msr);
/* Patch sites */
extern s32 patch__call_flush_count_cache;
extern s32 patch__flush_count_cache_return;
+extern s32 patch__flush_link_stack_return;
+extern s32 patch__call_kvm_flush_link_stack;
extern s32 patch__memset_nocache, patch__memcpy_nocache;
extern long flush_count_cache;
+extern long kvm_flush_link_stack;
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
void kvmppc_save_tm_hv(struct kvm_vcpu *vcpu, u64 msr, bool preserve_nv);
diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h
index fbe8df433019..123adcefd40f 100644
--- a/arch/powerpc/include/asm/barrier.h
+++ b/arch/powerpc/include/asm/barrier.h
@@ -18,8 +18,6 @@
* mb() prevents loads and stores being reordered across this point.
* rmb() prevents loads being reordered across this point.
* wmb() prevents stores being reordered across this point.
- * read_barrier_depends() prevents data-dependent loads being reordered
- * across this point (nop on PPC).
*
* *mb() variants without smp_ prefix must order all types of memory
* operations with one another. sync is the only instruction sufficient
diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h
index 603aed229af7..28dcf8222943 100644
--- a/arch/powerpc/include/asm/bitops.h
+++ b/arch/powerpc/include/asm/bitops.h
@@ -64,7 +64,7 @@
/* Macro for generating the ***_bits() functions */
#define DEFINE_BITOP(fn, op, prefix) \
-static __inline__ void fn(unsigned long mask, \
+static inline void fn(unsigned long mask, \
volatile unsigned long *_p) \
{ \
unsigned long old; \
@@ -86,22 +86,22 @@ DEFINE_BITOP(clear_bits, andc, "")
DEFINE_BITOP(clear_bits_unlock, andc, PPC_RELEASE_BARRIER)
DEFINE_BITOP(change_bits, xor, "")
-static __inline__ void set_bit(int nr, volatile unsigned long *addr)
+static inline void arch_set_bit(int nr, volatile unsigned long *addr)
{
set_bits(BIT_MASK(nr), addr + BIT_WORD(nr));
}
-static __inline__ void clear_bit(int nr, volatile unsigned long *addr)
+static inline void arch_clear_bit(int nr, volatile unsigned long *addr)
{
clear_bits(BIT_MASK(nr), addr + BIT_WORD(nr));
}
-static __inline__ void clear_bit_unlock(int nr, volatile unsigned long *addr)
+static inline void arch_clear_bit_unlock(int nr, volatile unsigned long *addr)
{
clear_bits_unlock(BIT_MASK(nr), addr + BIT_WORD(nr));
}
-static __inline__ void change_bit(int nr, volatile unsigned long *addr)
+static inline void arch_change_bit(int nr, volatile unsigned long *addr)
{
change_bits(BIT_MASK(nr), addr + BIT_WORD(nr));
}
@@ -109,7 +109,7 @@ static __inline__ void change_bit(int nr, volatile unsigned long *addr)
/* Like DEFINE_BITOP(), with changes to the arguments to 'op' and the output
* operands. */
#define DEFINE_TESTOP(fn, op, prefix, postfix, eh) \
-static __inline__ unsigned long fn( \
+static inline unsigned long fn( \
unsigned long mask, \
volatile unsigned long *_p) \
{ \
@@ -138,34 +138,34 @@ DEFINE_TESTOP(test_and_clear_bits, andc, PPC_ATOMIC_ENTRY_BARRIER,
DEFINE_TESTOP(test_and_change_bits, xor, PPC_ATOMIC_ENTRY_BARRIER,
PPC_ATOMIC_EXIT_BARRIER, 0)
-static __inline__ int test_and_set_bit(unsigned long nr,
- volatile unsigned long *addr)
+static inline int arch_test_and_set_bit(unsigned long nr,
+ volatile unsigned long *addr)
{
return test_and_set_bits(BIT_MASK(nr), addr + BIT_WORD(nr)) != 0;
}
-static __inline__ int test_and_set_bit_lock(unsigned long nr,
- volatile unsigned long *addr)
+static inline int arch_test_and_set_bit_lock(unsigned long nr,
+ volatile unsigned long *addr)
{
return test_and_set_bits_lock(BIT_MASK(nr),
addr + BIT_WORD(nr)) != 0;
}
-static __inline__ int test_and_clear_bit(unsigned long nr,
- volatile unsigned long *addr)
+static inline int arch_test_and_clear_bit(unsigned long nr,
+ volatile unsigned long *addr)
{
return test_and_clear_bits(BIT_MASK(nr), addr + BIT_WORD(nr)) != 0;
}
-static __inline__ int test_and_change_bit(unsigned long nr,
- volatile unsigned long *addr)
+static inline int arch_test_and_change_bit(unsigned long nr,
+ volatile unsigned long *addr)
{
return test_and_change_bits(BIT_MASK(nr), addr + BIT_WORD(nr)) != 0;
}
#ifdef CONFIG_PPC64
-static __inline__ unsigned long clear_bit_unlock_return_word(int nr,
- volatile unsigned long *addr)
+static inline unsigned long
+clear_bit_unlock_return_word(int nr, volatile unsigned long *addr)
{
unsigned long old, t;
unsigned long *p = (unsigned long *)addr + BIT_WORD(nr);
@@ -185,15 +185,18 @@ static __inline__ unsigned long clear_bit_unlock_return_word(int nr,
return old;
}
-/* This is a special function for mm/filemap.c */
-#define clear_bit_unlock_is_negative_byte(nr, addr) \
- (clear_bit_unlock_return_word(nr, addr) & BIT_MASK(PG_waiters))
+/*
+ * This is a special function for mm/filemap.c
+ * Bit 7 corresponds to PG_waiters.
+ */
+#define arch_clear_bit_unlock_is_negative_byte(nr, addr) \
+ (clear_bit_unlock_return_word(nr, addr) & BIT_MASK(7))
#endif /* CONFIG_PPC64 */
#include <asm-generic/bitops/non-atomic.h>
-static __inline__ void __clear_bit_unlock(int nr, volatile unsigned long *addr)
+static inline void arch___clear_bit_unlock(int nr, volatile unsigned long *addr)
{
__asm__ __volatile__(PPC_RELEASE_BARRIER "" ::: "memory");
__clear_bit(nr, addr);
@@ -215,14 +218,14 @@ static __inline__ void __clear_bit_unlock(int nr, volatile unsigned long *addr)
* fls: find last (most-significant) bit set.
* Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32.
*/
-static __inline__ int fls(unsigned int x)
+static inline int fls(unsigned int x)
{
return 32 - __builtin_clz(x);
}
#include <asm-generic/bitops/builtin-__fls.h>
-static __inline__ int fls64(__u64 x)
+static inline int fls64(__u64 x)
{
return 64 - __builtin_clzll(x);
}
@@ -239,6 +242,10 @@ unsigned long __arch_hweight64(__u64 w);
#include <asm-generic/bitops/find.h>
+/* wrappers that deal with KASAN instrumentation */
+#include <asm-generic/bitops/instrumented-atomic.h>
+#include <asm-generic/bitops/instrumented-lock.h>
+
/* Little-endian versions */
#include <asm-generic/bitops/le.h>
diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h
index 677e9babef80..3c0ba22dc360 100644
--- a/arch/powerpc/include/asm/book3s/32/kup.h
+++ b/arch/powerpc/include/asm/book3s/32/kup.h
@@ -91,6 +91,7 @@
static inline void kuap_update_sr(u32 sr, u32 addr, u32 end)
{
+ addr &= 0xf0000000; /* align addr to start of segment */
barrier(); /* make sure thread.kuap is updated before playing with SRs */
while (addr < end) {
mtsrin(sr, addr);
@@ -101,41 +102,91 @@ static inline void kuap_update_sr(u32 sr, u32 addr, u32 end)
isync(); /* Context sync required after mtsrin() */
}
-static inline void allow_user_access(void __user *to, const void __user *from, u32 size)
+static __always_inline void allow_user_access(void __user *to, const void __user *from,
+ u32 size, unsigned long dir)
{
u32 addr, end;
- if (__builtin_constant_p(to) && to == NULL)
+ BUILD_BUG_ON(!__builtin_constant_p(dir));
+ BUILD_BUG_ON(dir == KUAP_CURRENT);
+
+ if (!(dir & KUAP_WRITE))
return;
addr = (__force u32)to;
- if (!addr || addr >= TASK_SIZE || !size)
+ if (unlikely(addr >= TASK_SIZE || !size))
return;
end = min(addr + size, TASK_SIZE);
+
current->thread.kuap = (addr & 0xf0000000) | ((((end - 1) >> 28) + 1) & 0xf);
kuap_update_sr(mfsrin(addr) & ~SR_KS, addr, end); /* Clear Ks */
}
-static inline void prevent_user_access(void __user *to, const void __user *from, u32 size)
+static __always_inline void prevent_user_access(void __user *to, const void __user *from,
+ u32 size, unsigned long dir)
{
- u32 addr = (__force u32)to;
- u32 end = min(addr + size, TASK_SIZE);
+ u32 addr, end;
+
+ BUILD_BUG_ON(!__builtin_constant_p(dir));
+
+ if (dir == KUAP_CURRENT) {
+ u32 kuap = current->thread.kuap;
- if (!addr || addr >= TASK_SIZE || !size)
+ if (unlikely(!kuap))
+ return;
+
+ addr = kuap & 0xf0000000;
+ end = kuap << 28;
+ } else if (dir & KUAP_WRITE) {
+ addr = (__force u32)to;
+ end = min(addr + size, TASK_SIZE);
+
+ if (unlikely(addr >= TASK_SIZE || !size))
+ return;
+ } else {
return;
+ }
current->thread.kuap = 0;
kuap_update_sr(mfsrin(addr) | SR_KS, addr, end); /* set Ks */
}
-static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write)
+static inline unsigned long prevent_user_access_return(void)
+{
+ unsigned long flags = current->thread.kuap;
+ unsigned long addr = flags & 0xf0000000;
+ unsigned long end = flags << 28;
+ void __user *to = (__force void __user *)addr;
+
+ if (flags)
+ prevent_user_access(to, to, end - addr, KUAP_READ_WRITE);
+
+ return flags;
+}
+
+static inline void restore_user_access(unsigned long flags)
{
+ unsigned long addr = flags & 0xf0000000;
+ unsigned long end = flags << 28;
+ void __user *to = (__force void __user *)addr;
+
+ if (flags)
+ allow_user_access(to, to, end - addr, KUAP_READ_WRITE);
+}
+
+static inline bool
+bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
+{
+ unsigned long begin = regs->kuap & 0xf0000000;
+ unsigned long end = regs->kuap << 28;
+
if (!is_write)
return false;
- return WARN(!regs->kuap, "Bug: write fault blocked by segment registers !");
+ return WARN(address < begin || address >= end,
+ "Bug: write fault blocked by segment registers !");
}
#endif /* CONFIG_PPC_KUAP */
diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h
index 998317702630..dc5c039eb28e 100644
--- a/arch/powerpc/include/asm/book3s/32/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h
@@ -49,7 +49,6 @@ static inline void pgtable_free(void *table, unsigned index_size)
#define get_hugepd_cache_index(x) (x)
-#ifdef CONFIG_SMP
static inline void pgtable_free_tlb(struct mmu_gather *tlb,
void *table, int shift)
{
@@ -66,13 +65,6 @@ static inline void __tlb_remove_table(void *_table)
pgtable_free(table, shift);
}
-#else
-static inline void pgtable_free_tlb(struct mmu_gather *tlb,
- void *table, int shift)
-{
- pgtable_free(table, shift);
-}
-#endif
static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
unsigned long address)
diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 838de59f6754..5b39c11e884a 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -148,23 +148,21 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot);
*/
#include <asm/fixmap.h>
-#ifdef CONFIG_HIGHMEM
-#define KVIRT_TOP PKMAP_BASE
-#else
-#define KVIRT_TOP FIXADDR_START
-#endif
-
/*
* ioremap_bot starts at that address. Early ioremaps move down from there,
* until mem_init() at which point this becomes the top of the vmalloc
* and ioremap space
*/
-#ifdef CONFIG_NOT_COHERENT_CACHE
-#define IOREMAP_TOP ((KVIRT_TOP - CONFIG_CONSISTENT_SIZE) & PAGE_MASK)
+#ifdef CONFIG_HIGHMEM
+#define IOREMAP_TOP PKMAP_BASE
#else
-#define IOREMAP_TOP KVIRT_TOP
+#define IOREMAP_TOP FIXADDR_START
#endif
+/* PPC32 shares vmalloc area with ioremap */
+#define IOREMAP_START VMALLOC_START
+#define IOREMAP_END VMALLOC_END
+
/*
* Just any arbitrary offset to the start of the vmalloc VM area: the
* current 16MB value just means that there will be a 64MB "hole" after the
@@ -195,14 +193,17 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot);
#else
#define VMALLOC_START ((((long)high_memory + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1)))
#endif
+
+#ifdef CONFIG_KASAN_VMALLOC
+#define VMALLOC_END _ALIGN_DOWN(ioremap_bot, PAGE_SIZE << KASAN_SHADOW_SCALE_SHIFT)
+#else
#define VMALLOC_END ioremap_bot
+#endif
#ifndef __ASSEMBLY__
#include <linux/sched.h>
#include <linux/threads.h>
-extern unsigned long ioremap_bot;
-
/* Bits to mask out from a PGD to get to the PUD page */
#define PGD_MASKED_BITS 0
diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h
index f254de956d6a..90dd3a3fc8c7 100644
--- a/arch/powerpc/include/asm/book3s/64/kup-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h
@@ -63,6 +63,14 @@
* because that would require an expensive read/modify write of the AMR.
*/
+static inline unsigned long get_kuap(void)
+{
+ if (!early_mmu_has_feature(MMU_FTR_RADIX_KUAP))
+ return 0;
+
+ return mfspr(SPRN_AMR);
+}
+
static inline void set_kuap(unsigned long value)
{
if (!early_mmu_has_feature(MMU_FTR_RADIX_KUAP))
@@ -77,25 +85,43 @@ static inline void set_kuap(unsigned long value)
isync();
}
-static inline void allow_user_access(void __user *to, const void __user *from,
- unsigned long size)
+static __always_inline void allow_user_access(void __user *to, const void __user *from,
+ unsigned long size, unsigned long dir)
{
// This is written so we can resolve to a single case at build time
- if (__builtin_constant_p(to) && to == NULL)
+ BUILD_BUG_ON(!__builtin_constant_p(dir));
+ if (dir == KUAP_READ)
set_kuap(AMR_KUAP_BLOCK_WRITE);
- else if (__builtin_constant_p(from) && from == NULL)
+ else if (dir == KUAP_WRITE)
set_kuap(AMR_KUAP_BLOCK_READ);
- else
+ else if (dir == KUAP_READ_WRITE)
set_kuap(0);
+ else
+ BUILD_BUG();
}
static inline void prevent_user_access(void __user *to, const void __user *from,
- unsigned long size)
+ unsigned long size, unsigned long dir)
+{
+ set_kuap(AMR_KUAP_BLOCKED);
+}
+
+static inline unsigned long prevent_user_access_return(void)
{
+ unsigned long flags = get_kuap();
+
set_kuap(AMR_KUAP_BLOCKED);
+
+ return flags;
+}
+
+static inline void restore_user_access(unsigned long flags)
+{
+ set_kuap(flags);
}
-static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write)
+static inline bool
+bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
{
return WARN(mmu_has_feature(MMU_FTR_RADIX_KUAP) &&
(regs->kuap & (is_write ? AMR_KUAP_BLOCK_WRITE : AMR_KUAP_BLOCK_READ)),
diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index 15b75005bc34..3fa1b962dc27 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -600,8 +600,11 @@ extern void slb_set_size(u16 size);
*
*/
#define MAX_USER_CONTEXT ((ASM_CONST(1) << CONTEXT_BITS) - 2)
+
+// The + 2 accounts for INVALID_REGION and 1 more to avoid overlap with kernel
#define MIN_USER_CONTEXT (MAX_KERNEL_CTX_CNT + MAX_VMALLOC_CTX_CNT + \
- MAX_IO_CTX_CNT + MAX_VMEMMAP_CTX_CNT)
+ MAX_IO_CTX_CNT + MAX_VMEMMAP_CTX_CNT + 2)
+
/*
* For platforms that support on 65bit VA we limit the context bits
*/
diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index 23b83d3593e2..bb3deb76c951 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -206,7 +206,6 @@ extern int mmu_io_psize;
void mmu_early_init_devtree(void);
void hash__early_init_devtree(void);
void radix__early_init_devtree(void);
-extern void radix_init_native(void);
extern void hash__early_init_mmu(void);
extern void radix__early_init_mmu(void);
static inline void early_init_mmu(void)
@@ -238,9 +237,6 @@ static inline void setup_initial_memory_limit(phys_addr_t first_memblock_base,
first_memblock_size);
}
-extern int (*register_process_table)(unsigned long base, unsigned long page_size,
- unsigned long tbl_size);
-
#ifdef CONFIG_PPC_PSERIES
extern void radix_init_pseries(void);
#else
diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index d5a44912902f..a41e91bd0580 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -19,9 +19,7 @@ extern struct vmemmap_backing *vmemmap_list;
extern pmd_t *pmd_fragment_alloc(struct mm_struct *, unsigned long);
extern void pmd_fragment_free(unsigned long *);
extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift);
-#ifdef CONFIG_SMP
extern void __tlb_remove_table(void *_table);
-#endif
void pte_frag_destroy(void *pte_frag);
static inline pgd_t *radix__pgd_alloc(struct mm_struct *mm)
@@ -122,11 +120,6 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
unsigned long address)
{
- /*
- * By now all the pud entries should be none entries. So go
- * ahead and flush the page walk cache
- */
- flush_tlb_pgtable(tlb, address);
pgtable_free_tlb(tlb, pud, PUD_INDEX);
}
@@ -143,11 +136,6 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
unsigned long address)
{
- /*
- * By now all the pud entries should be none entries. So go
- * ahead and flush the page walk cache
- */
- flush_tlb_pgtable(tlb, address);
return pgtable_free_tlb(tlb, pmd, PMD_INDEX);
}
@@ -166,11 +154,6 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
unsigned long address)
{
- /*
- * By now all the pud entries should be none entries. So go
- * ahead and flush the page walk cache
- */
- flush_tlb_pgtable(tlb, address);
pgtable_free_tlb(tlb, table, PTE_INDEX);
}
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable-4k.h b/arch/powerpc/include/asm/book3s/64/pgtable-4k.h
index a069dfcac9a9..4e697bc2f4cd 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable-4k.h
@@ -70,9 +70,6 @@ static inline int get_hugepd_cache_index(int index)
/* should not reach */
}
-#else /* !CONFIG_HUGETLB_PAGE */
-static inline int pmd_huge(pmd_t pmd) { return 0; }
-static inline int pud_huge(pud_t pud) { return 0; }
#endif /* CONFIG_HUGETLB_PAGE */
#endif /* __ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable-64k.h b/arch/powerpc/include/asm/book3s/64/pgtable-64k.h
index e3d4dd4ae2fa..34d1018896b3 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable-64k.h
@@ -59,9 +59,6 @@ static inline int get_hugepd_cache_index(int index)
BUG();
}
-#else /* !CONFIG_HUGETLB_PAGE */
-static inline int pmd_huge(pmd_t pmd) { return 0; }
-static inline int pud_huge(pud_t pud) { return 0; }
#endif /* CONFIG_HUGETLB_PAGE */
static inline int remap_4k_pfn(struct vm_area_struct *vma, unsigned long addr,
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 8308f32e9782..201a69e6a355 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -289,7 +289,6 @@ extern unsigned long __kernel_io_end;
#define KERN_IO_END __kernel_io_end
extern struct page *vmemmap;
-extern unsigned long ioremap_bot;
extern unsigned long pci_io_base;
#endif /* __ASSEMBLY__ */
@@ -317,6 +316,7 @@ extern unsigned long pci_io_base;
#define PHB_IO_BASE (ISA_IO_END)
#define PHB_IO_END (KERN_IO_START + FULL_IO_SIZE)
#define IOREMAP_BASE (PHB_IO_END)
+#define IOREMAP_START (ioremap_bot)
#define IOREMAP_END (KERN_IO_END)
/* Advertise special mapping type for AGP */
@@ -608,8 +608,10 @@ static inline bool pte_access_permitted(pte_t pte, bool write)
*/
static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot)
{
- return __pte((((pte_basic_t)(pfn) << PAGE_SHIFT) & PTE_RPN_MASK) |
- pgprot_val(pgprot));
+ VM_BUG_ON(pfn >> (64 - PAGE_SHIFT));
+ VM_BUG_ON((pfn << PAGE_SHIFT) & ~PTE_RPN_MASK);
+
+ return __pte(((pte_basic_t)pfn << PAGE_SHIFT) | pgprot_val(pgprot));
}
static inline unsigned long pte_pfn(pte_t pte)
@@ -1353,18 +1355,21 @@ static inline bool is_pte_rw_upgrade(unsigned long old_val, unsigned long new_va
* Like pmd_huge() and pmd_large(), but works regardless of config options
*/
#define pmd_is_leaf pmd_is_leaf
+#define pmd_leaf pmd_is_leaf
static inline bool pmd_is_leaf(pmd_t pmd)
{
return !!(pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE));
}
#define pud_is_leaf pud_is_leaf
+#define pud_leaf pud_is_leaf
static inline bool pud_is_leaf(pud_t pud)
{
return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PTE));
}
#define pgd_is_leaf pgd_is_leaf
+#define pgd_leaf pgd_is_leaf
static inline bool pgd_is_leaf(pgd_t pgd)
{
return !!(pgd_raw(pgd) & cpu_to_be64(_PAGE_PTE));
diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
index e04a839cb5b9..d97db3ad9aae 100644
--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -254,7 +254,13 @@ extern void radix__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
extern pgtable_t radix__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
extern pmd_t radix__pmdp_huge_get_and_clear(struct mm_struct *mm,
unsigned long addr, pmd_t *pmdp);
-extern int radix__has_transparent_hugepage(void);
+static inline int radix__has_transparent_hugepage(void)
+{
+ /* For radix 2M at PMD level means thp */
+ if (mmu_psize_defs[MMU_PAGE_2M].shift == PMD_SHIFT)
+ return 1;
+ return 0;
+}
#endif
extern int __meminit radix__vmemmap_create_mapping(unsigned long start,
@@ -266,9 +272,6 @@ extern void radix__vmemmap_remove_mapping(unsigned long start,
extern int radix__map_kernel_page(unsigned long ea, unsigned long pa,
pgprot_t flags, unsigned int psz);
-extern int radix__ioremap_range(unsigned long ea, phys_addr_t pa,
- unsigned long size, pgprot_t prot, int nid);
-
static inline unsigned long radix__get_tree_size(void)
{
unsigned long rts_field;
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
index 05147cecb8df..ca8db193ae38 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
@@ -17,8 +17,8 @@ extern void radix__flush_tlb_lpid_page(unsigned int lpid,
unsigned long addr,
unsigned long page_size);
extern void radix__flush_pwc_lpid(unsigned int lpid);
-extern void radix__flush_tlb_lpid(unsigned int lpid);
-extern void radix__local_flush_tlb_lpid_guest(unsigned int lpid);
+extern void radix__flush_all_lpid(unsigned int lpid);
+extern void radix__flush_all_lpid_guest(unsigned int lpid);
#else
static inline void radix__tlbiel_all(unsigned int action) { WARN_ON(1); };
static inline void radix__flush_tlb_lpid_page(unsigned int lpid,
@@ -31,11 +31,11 @@ static inline void radix__flush_pwc_lpid(unsigned int lpid)
{
WARN_ON(1);
}
-static inline void radix__flush_tlb_lpid(unsigned int lpid)
+static inline void radix__flush_all_lpid(unsigned int lpid)
{
WARN_ON(1);
}
-static inline void radix__local_flush_tlb_lpid_guest(unsigned int lpid)
+static inline void radix__flush_all_lpid_guest(unsigned int lpid)
{
WARN_ON(1);
}
@@ -73,6 +73,4 @@ extern void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr);
extern void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr);
extern void radix__flush_tlb_all(void);
-extern void radix__local_flush_tlb_lpid(unsigned int lpid);
-
#endif
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h
index ebf572ea621e..dcb5c3839d2f 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h
@@ -147,19 +147,12 @@ static inline void flush_tlb_fix_spurious_fault(struct vm_area_struct *vma,
flush_tlb_page(vma, address);
}
-/*
- * flush the page walk cache for the address
- */
-static inline void flush_tlb_pgtable(struct mmu_gather *tlb, unsigned long address)
-{
- /*
- * Flush the page table walk cache on freeing a page table. We already
- * have marked the upper/higher level page table entry none by now.
- * So it is safe to flush PWC here.
- */
- if (!radix_enabled())
- return;
+extern bool tlbie_capable;
+extern bool tlbie_enabled;
- radix__flush_tlb_pwc(tlb, address);
+static inline bool cputlb_use_tlbie(void)
+{
+ return tlbie_enabled;
}
+
#endif /* _ASM_POWERPC_BOOK3S_64_TLBFLUSH_H */
diff --git a/arch/powerpc/include/asm/book3s/pgtable.h b/arch/powerpc/include/asm/book3s/pgtable.h
index 6436b65ac7bc..0e1263455d73 100644
--- a/arch/powerpc/include/asm/book3s/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/pgtable.h
@@ -26,5 +26,16 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
unsigned long size, pgprot_t vma_prot);
#define __HAVE_PHYS_MEM_ACCESS_PROT
+/*
+ * This gets called at the end of handling a page fault, when
+ * the kernel has put a new PTE into the page table for the process.
+ * We use it to ensure coherency between the i-cache and d-cache
+ * for the page which has just been mapped in.
+ * On machines which use an MMU hash table, we use this to put a
+ * corresponding HPTE into the hash table ahead of time, instead of
+ * waiting for the inevitable extra hash-table miss exception.
+ */
+void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep);
+
#endif /* __ASSEMBLY__ */
#endif
diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h
index fed7e6241349..338f36cd9934 100644
--- a/arch/powerpc/include/asm/bug.h
+++ b/arch/powerpc/include/asm/bug.h
@@ -5,14 +5,6 @@
#include <asm/asm-compat.h>
-/*
- * Define an illegal instr to trap on the bug.
- * We don't use 0 because that marks the end of a function
- * in the ELF ABI. That's "Boo Boo" in case you wonder...
- */
-#define BUG_OPCODE .long 0x00b00b00 /* For asm */
-#define BUG_ILLEGAL_INSTR "0x00b00b00" /* For BUG macro */
-
#ifdef CONFIG_BUG
#ifdef __ASSEMBLY__
@@ -57,6 +49,15 @@
".previous\n"
#endif
+#define BUG_ENTRY(insn, flags, ...) \
+ __asm__ __volatile__( \
+ "1: " insn "\n" \
+ _EMIT_BUG_ENTRY \
+ : : "i" (__FILE__), "i" (__LINE__), \
+ "i" (flags), \
+ "i" (sizeof(struct bug_entry)), \
+ ##__VA_ARGS__)
+
/*
* BUG_ON() and WARN_ON() do their best to cooperate with compile-time
* optimisations. However depending on the complexity of the condition
@@ -64,11 +65,7 @@
*/
#define BUG() do { \
- __asm__ __volatile__( \
- "1: twi 31,0,0\n" \
- _EMIT_BUG_ENTRY \
- : : "i" (__FILE__), "i" (__LINE__), \
- "i" (0), "i" (sizeof(struct bug_entry))); \
+ BUG_ENTRY("twi 31, 0, 0", 0); \
unreachable(); \
} while (0)
@@ -77,23 +74,11 @@
if (x) \
BUG(); \
} else { \
- __asm__ __volatile__( \
- "1: "PPC_TLNEI" %4,0\n" \
- _EMIT_BUG_ENTRY \
- : : "i" (__FILE__), "i" (__LINE__), "i" (0), \
- "i" (sizeof(struct bug_entry)), \
- "r" ((__force long)(x))); \
+ BUG_ENTRY(PPC_TLNEI " %4, 0", 0, "r" ((__force long)(x))); \
} \
} while (0)
-#define __WARN_FLAGS(flags) do { \
- __asm__ __volatile__( \
- "1: twi 31,0,0\n" \
- _EMIT_BUG_ENTRY \
- : : "i" (__FILE__), "i" (__LINE__), \
- "i" (BUGFLAG_WARNING|(flags)), \
- "i" (sizeof(struct bug_entry))); \
-} while (0)
+#define __WARN_FLAGS(flags) BUG_ENTRY("twi 31, 0, 0", BUGFLAG_WARNING | (flags))
#define WARN_ON(x) ({ \
int __ret_warn_on = !!(x); \
@@ -101,13 +86,9 @@
if (__ret_warn_on) \
__WARN(); \
} else { \
- __asm__ __volatile__( \
- "1: "PPC_TLNEI" %4,0\n" \
- _EMIT_BUG_ENTRY \
- : : "i" (__FILE__), "i" (__LINE__), \
- "i" (BUGFLAG_WARNING|BUGFLAG_TAINT(TAINT_WARN)),\
- "i" (sizeof(struct bug_entry)), \
- "r" (__ret_warn_on)); \
+ BUG_ENTRY(PPC_TLNEI " %4, 0", \
+ BUGFLAG_WARNING | BUGFLAG_TAINT(TAINT_WARN), \
+ "r" (__ret_warn_on)); \
} \
unlikely(__ret_warn_on); \
})
diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h
index 45e3137ccd71..72b81015cebe 100644
--- a/arch/powerpc/include/asm/cache.h
+++ b/arch/powerpc/include/asm/cache.h
@@ -55,42 +55,48 @@ struct ppc64_caches {
extern struct ppc64_caches ppc64_caches;
-static inline u32 l1_cache_shift(void)
+static inline u32 l1_dcache_shift(void)
{
return ppc64_caches.l1d.log_block_size;
}
-static inline u32 l1_cache_bytes(void)
+static inline u32 l1_dcache_bytes(void)
{
return ppc64_caches.l1d.block_size;
}
+
+static inline u32 l1_icache_shift(void)
+{
+ return ppc64_caches.l1i.log_block_size;
+}
+
+static inline u32 l1_icache_bytes(void)
+{
+ return ppc64_caches.l1i.block_size;
+}
#else
-static inline u32 l1_cache_shift(void)
+static inline u32 l1_dcache_shift(void)
{
return L1_CACHE_SHIFT;
}
-static inline u32 l1_cache_bytes(void)
+static inline u32 l1_dcache_bytes(void)
{
return L1_CACHE_BYTES;
}
+
+static inline u32 l1_icache_shift(void)
+{
+ return L1_CACHE_SHIFT;
+}
+
+static inline u32 l1_icache_bytes(void)
+{
+ return L1_CACHE_BYTES;
+}
+
#endif
-#endif /* ! __ASSEMBLY__ */
-
-#if defined(__ASSEMBLY__)
-/*
- * For a snooping icache, we still need a dummy icbi to purge all the
- * prefetched instructions from the ifetch buffers. We also need a sync
- * before the icbi to order the the actual stores to memory that might
- * have modified instructions with the icbi.
- */
-#define PURGE_PREFETCHED_INS \
- sync; \
- icbi 0,r3; \
- sync; \
- isync
-#else
#define __read_mostly __attribute__((__section__(".data..read_mostly")))
#ifdef CONFIG_PPC_BOOK3S_32
@@ -124,6 +130,17 @@ static inline void dcbst(void *addr)
{
__asm__ __volatile__ ("dcbst 0, %0" : : "r"(addr) : "memory");
}
+
+static inline void icbi(void *addr)
+{
+ asm volatile ("icbi 0, %0" : : "r"(addr) : "memory");
+}
+
+static inline void iccci(void *addr)
+{
+ asm volatile ("iccci 0, %0" : : "r"(addr) : "memory");
+}
+
#endif /* !__ASSEMBLY__ */
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_CACHE_H */
diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h
index eef388f2659f..4a1c9f0200e1 100644
--- a/arch/powerpc/include/asm/cacheflush.h
+++ b/arch/powerpc/include/asm/cacheflush.h
@@ -42,29 +42,25 @@ extern void flush_dcache_page(struct page *page);
#define flush_dcache_mmap_lock(mapping) do { } while (0)
#define flush_dcache_mmap_unlock(mapping) do { } while (0)
-extern void flush_icache_range(unsigned long, unsigned long);
+void flush_icache_range(unsigned long start, unsigned long stop);
extern void flush_icache_user_range(struct vm_area_struct *vma,
struct page *page, unsigned long addr,
int len);
-extern void __flush_dcache_icache(void *page_va);
extern void flush_dcache_icache_page(struct page *page);
-#if defined(CONFIG_PPC32) && !defined(CONFIG_BOOKE)
-extern void __flush_dcache_icache_phys(unsigned long physaddr);
-#else
-static inline void __flush_dcache_icache_phys(unsigned long physaddr)
-{
- BUG();
-}
-#endif
-
-/*
- * Write any modified data cache blocks out to memory and invalidate them.
- * Does not invalidate the corresponding instruction cache blocks.
+void __flush_dcache_icache(void *page);
+
+/**
+ * flush_dcache_range(): Write any modified data cache blocks out to memory and
+ * invalidate them. Does not invalidate the corresponding instruction cache
+ * blocks.
+ *
+ * @start: the start address
+ * @stop: the stop address (exclusive)
*/
static inline void flush_dcache_range(unsigned long start, unsigned long stop)
{
- unsigned long shift = l1_cache_shift();
- unsigned long bytes = l1_cache_bytes();
+ unsigned long shift = l1_dcache_shift();
+ unsigned long bytes = l1_dcache_bytes();
void *addr = (void *)(start & ~(bytes - 1));
unsigned long size = stop - (unsigned long)addr + (bytes - 1);
unsigned long i;
@@ -89,8 +85,8 @@ static inline void flush_dcache_range(unsigned long start, unsigned long stop)
*/
static inline void clean_dcache_range(unsigned long start, unsigned long stop)
{
- unsigned long shift = l1_cache_shift();
- unsigned long bytes = l1_cache_bytes();
+ unsigned long shift = l1_dcache_shift();
+ unsigned long bytes = l1_dcache_bytes();
void *addr = (void *)(start & ~(bytes - 1));
unsigned long size = stop - (unsigned long)addr + (bytes - 1);
unsigned long i;
@@ -108,8 +104,8 @@ static inline void clean_dcache_range(unsigned long start, unsigned long stop)
static inline void invalidate_dcache_range(unsigned long start,
unsigned long stop)
{
- unsigned long shift = l1_cache_shift();
- unsigned long bytes = l1_cache_bytes();
+ unsigned long shift = l1_dcache_shift();
+ unsigned long bytes = l1_dcache_bytes();
void *addr = (void *)(start & ~(bytes - 1));
unsigned long size = stop - (unsigned long)addr + (bytes - 1);
unsigned long i;
diff --git a/arch/powerpc/include/asm/compat.h b/arch/powerpc/include/asm/compat.h
index 74d0db511099..3e3cdfaa76c6 100644
--- a/arch/powerpc/include/asm/compat.h
+++ b/arch/powerpc/include/asm/compat.h
@@ -96,23 +96,6 @@ typedef u32 compat_sigset_word;
#define COMPAT_OFF_T_MAX 0x7fffffff
-/*
- * A pointer passed in from user mode. This should not
- * be used for syscall parameters, just declare them
- * as pointers because the syscall entry code will have
- * appropriately converted them already.
- */
-
-static inline void __user *compat_ptr(compat_uptr_t uptr)
-{
- return (void __user *)(unsigned long)uptr;
-}
-
-static inline compat_uptr_t ptr_to_compat(void __user *uptr)
-{
- return (u32)(unsigned long)uptr;
-}
-
static inline void __user *arch_compat_alloc_user_space(long len)
{
struct pt_regs *regs = current->thread.regs;
diff --git a/arch/powerpc/include/asm/cpm.h b/arch/powerpc/include/asm/cpm.h
index 4c24ea8209bb..ce483b0f8a4d 100644
--- a/arch/powerpc/include/asm/cpm.h
+++ b/arch/powerpc/include/asm/cpm.h
@@ -1,171 +1 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __CPM_H
-#define __CPM_H
-
-#include <linux/compiler.h>
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/of.h>
-#include <soc/fsl/qe/qe.h>
-
-/*
- * SPI Parameter RAM common to QE and CPM.
- */
-struct spi_pram {
- __be16 rbase; /* Rx Buffer descriptor base address */
- __be16 tbase; /* Tx Buffer descriptor base address */
- u8 rfcr; /* Rx function code */
- u8 tfcr; /* Tx function code */
- __be16 mrblr; /* Max receive buffer length */
- __be32 rstate; /* Internal */
- __be32 rdp; /* Internal */
- __be16 rbptr; /* Internal */
- __be16 rbc; /* Internal */
- __be32 rxtmp; /* Internal */
- __be32 tstate; /* Internal */
- __be32 tdp; /* Internal */
- __be16 tbptr; /* Internal */
- __be16 tbc; /* Internal */
- __be32 txtmp; /* Internal */
- __be32 res; /* Tx temp. */
- __be16 rpbase; /* Relocation pointer (CPM1 only) */
- __be16 res1; /* Reserved */
-};
-
-/*
- * USB Controller pram common to QE and CPM.
- */
-struct usb_ctlr {
- u8 usb_usmod;
- u8 usb_usadr;
- u8 usb_uscom;
- u8 res1[1];
- __be16 usb_usep[4];
- u8 res2[4];
- __be16 usb_usber;
- u8 res3[2];
- __be16 usb_usbmr;
- u8 res4[1];
- u8 usb_usbs;
- /* Fields down below are QE-only */
- __be16 usb_ussft;
- u8 res5[2];
- __be16 usb_usfrn;
- u8 res6[0x22];
-} __attribute__ ((packed));
-
-/*
- * Function code bits, usually generic to devices.
- */
-#ifdef CONFIG_CPM1
-#define CPMFCR_GBL ((u_char)0x00) /* Flag doesn't exist in CPM1 */
-#define CPMFCR_TC2 ((u_char)0x00) /* Flag doesn't exist in CPM1 */
-#define CPMFCR_DTB ((u_char)0x00) /* Flag doesn't exist in CPM1 */
-#define CPMFCR_BDB ((u_char)0x00) /* Flag doesn't exist in CPM1 */
-#else
-#define CPMFCR_GBL ((u_char)0x20) /* Set memory snooping */
-#define CPMFCR_TC2 ((u_char)0x04) /* Transfer code 2 value */
-#define CPMFCR_DTB ((u_char)0x02) /* Use local bus for data when set */
-#define CPMFCR_BDB ((u_char)0x01) /* Use local bus for BD when set */
-#endif
-#define CPMFCR_EB ((u_char)0x10) /* Set big endian byte order */
-
-/* Opcodes common to CPM1 and CPM2
-*/
-#define CPM_CR_INIT_TRX ((ushort)0x0000)
-#define CPM_CR_INIT_RX ((ushort)0x0001)
-#define CPM_CR_INIT_TX ((ushort)0x0002)
-#define CPM_CR_HUNT_MODE ((ushort)0x0003)
-#define CPM_CR_STOP_TX ((ushort)0x0004)
-#define CPM_CR_GRA_STOP_TX ((ushort)0x0005)
-#define CPM_CR_RESTART_TX ((ushort)0x0006)
-#define CPM_CR_CLOSE_RX_BD ((ushort)0x0007)
-#define CPM_CR_SET_GADDR ((ushort)0x0008)
-#define CPM_CR_SET_TIMER ((ushort)0x0008)
-#define CPM_CR_STOP_IDMA ((ushort)0x000b)
-
-/* Buffer descriptors used by many of the CPM protocols. */
-typedef struct cpm_buf_desc {
- ushort cbd_sc; /* Status and Control */
- ushort cbd_datlen; /* Data length in buffer */
- uint cbd_bufaddr; /* Buffer address in host memory */
-} cbd_t;
-
-/* Buffer descriptor control/status used by serial
- */
-
-#define BD_SC_EMPTY (0x8000) /* Receive is empty */
-#define BD_SC_READY (0x8000) /* Transmit is ready */
-#define BD_SC_WRAP (0x2000) /* Last buffer descriptor */
-#define BD_SC_INTRPT (0x1000) /* Interrupt on change */
-#define BD_SC_LAST (0x0800) /* Last buffer in frame */
-#define BD_SC_TC (0x0400) /* Transmit CRC */
-#define BD_SC_CM (0x0200) /* Continuous mode */
-#define BD_SC_ID (0x0100) /* Rec'd too many idles */
-#define BD_SC_P (0x0100) /* xmt preamble */
-#define BD_SC_BR (0x0020) /* Break received */
-#define BD_SC_FR (0x0010) /* Framing error */
-#define BD_SC_PR (0x0008) /* Parity error */
-#define BD_SC_NAK (0x0004) /* NAK - did not respond */
-#define BD_SC_OV (0x0002) /* Overrun */
-#define BD_SC_UN (0x0002) /* Underrun */
-#define BD_SC_CD (0x0001) /* */
-#define BD_SC_CL (0x0001) /* Collision */
-
-/* Buffer descriptor control/status used by Ethernet receive.
- * Common to SCC and FCC.
- */
-#define BD_ENET_RX_EMPTY (0x8000)
-#define BD_ENET_RX_WRAP (0x2000)
-#define BD_ENET_RX_INTR (0x1000)
-#define BD_ENET_RX_LAST (0x0800)
-#define BD_ENET_RX_FIRST (0x0400)
-#define BD_ENET_RX_MISS (0x0100)
-#define BD_ENET_RX_BC (0x0080) /* FCC Only */
-#define BD_ENET_RX_MC (0x0040) /* FCC Only */
-#define BD_ENET_RX_LG (0x0020)
-#define BD_ENET_RX_NO (0x0010)
-#define BD_ENET_RX_SH (0x0008)
-#define BD_ENET_RX_CR (0x0004)
-#define BD_ENET_RX_OV (0x0002)
-#define BD_ENET_RX_CL (0x0001)
-#define BD_ENET_RX_STATS (0x01ff) /* All status bits */
-
-/* Buffer descriptor control/status used by Ethernet transmit.
- * Common to SCC and FCC.
- */
-#define BD_ENET_TX_READY (0x8000)
-#define BD_ENET_TX_PAD (0x4000)
-#define BD_ENET_TX_WRAP (0x2000)
-#define BD_ENET_TX_INTR (0x1000)
-#define BD_ENET_TX_LAST (0x0800)
-#define BD_ENET_TX_TC (0x0400)
-#define BD_ENET_TX_DEF (0x0200)
-#define BD_ENET_TX_HB (0x0100)
-#define BD_ENET_TX_LC (0x0080)
-#define BD_ENET_TX_RL (0x0040)
-#define BD_ENET_TX_RCMASK (0x003c)
-#define BD_ENET_TX_UN (0x0002)
-#define BD_ENET_TX_CSL (0x0001)
-#define BD_ENET_TX_STATS (0x03ff) /* All status bits */
-
-/* Buffer descriptor control/status used by Transparent mode SCC.
- */
-#define BD_SCC_TX_LAST (0x0800)
-
-/* Buffer descriptor control/status used by I2C.
- */
-#define BD_I2C_START (0x0400)
-
-#ifdef CONFIG_CPM
-int cpm_command(u32 command, u8 opcode);
-#else
-static inline int cpm_command(u32 command, u8 opcode)
-{
- return -ENOSYS;
-}
-#endif /* CONFIG_CPM */
-
-int cpm2_gpiochip_add32(struct device *dev);
-
-#endif
+#include <soc/fsl/cpm.h>
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index d05f0c28e515..40a4d3c6fd99 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -145,12 +145,10 @@ static inline void cpu_feature_keys_init(void) { }
/* Definitions for features that only exist on 32-bit chips */
#ifdef CONFIG_PPC32
-#define CPU_FTR_601 ASM_CONST(0x00001000)
#define CPU_FTR_L2CR ASM_CONST(0x00002000)
#define CPU_FTR_SPEC7450 ASM_CONST(0x00004000)
#define CPU_FTR_TAU ASM_CONST(0x00008000)
#define CPU_FTR_CAN_DOZE ASM_CONST(0x00010000)
-#define CPU_FTR_USE_RTC ASM_CONST(0x00020000)
#define CPU_FTR_L3CR ASM_CONST(0x00040000)
#define CPU_FTR_L3_DISABLE_NAP ASM_CONST(0x00080000)
#define CPU_FTR_NAP_DISABLE_L2_PR ASM_CONST(0x00100000)
@@ -160,14 +158,12 @@ static inline void cpu_feature_keys_init(void) { }
#define CPU_FTR_NEED_COHERENT ASM_CONST(0x01000000)
#define CPU_FTR_NO_BTIC ASM_CONST(0x02000000)
#define CPU_FTR_PPC_LE ASM_CONST(0x04000000)
-#define CPU_FTR_UNIFIED_ID_CACHE ASM_CONST(0x08000000)
#define CPU_FTR_SPE ASM_CONST(0x10000000)
#define CPU_FTR_NEED_PAIRED_STWCX ASM_CONST(0x20000000)
#define CPU_FTR_INDEXED_DCR ASM_CONST(0x40000000)
#else /* CONFIG_PPC32 */
/* Define these to 0 for the sake of tests in common code */
-#define CPU_FTR_601 (0)
#define CPU_FTR_PPC_LE (0)
#endif
@@ -213,8 +209,10 @@ static inline void cpu_feature_keys_init(void) { }
#define CPU_FTR_POWER9_DD2_1 LONG_ASM_CONST(0x0000080000000000)
#define CPU_FTR_P9_TM_HV_ASSIST LONG_ASM_CONST(0x0000100000000000)
#define CPU_FTR_P9_TM_XER_SO_BUG LONG_ASM_CONST(0x0000200000000000)
-#define CPU_FTR_P9_TLBIE_BUG LONG_ASM_CONST(0x0000400000000000)
+#define CPU_FTR_P9_TLBIE_STQ_BUG LONG_ASM_CONST(0x0000400000000000)
#define CPU_FTR_P9_TIDR LONG_ASM_CONST(0x0000800000000000)
+#define CPU_FTR_P9_TLBIE_ERAT_BUG LONG_ASM_CONST(0x0001000000000000)
+#define CPU_FTR_P9_RADIX_PREFETCH_BUG LONG_ASM_CONST(0x0002000000000000)
#ifndef __ASSEMBLY__
@@ -294,8 +292,8 @@ static inline void cpu_feature_keys_init(void) { }
#define CPU_FTR_MAYBE_CAN_NAP 0
#endif
-#define CPU_FTRS_PPC601 (CPU_FTR_COMMON | CPU_FTR_601 | \
- CPU_FTR_COHERENT_ICACHE | CPU_FTR_UNIFIED_ID_CACHE | CPU_FTR_USE_RTC)
+#define CPU_FTRS_PPC601 (CPU_FTR_COMMON | \
+ CPU_FTR_COHERENT_ICACHE)
#define CPU_FTRS_603 (CPU_FTR_COMMON | CPU_FTR_MAYBE_CAN_DOZE | \
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE | CPU_FTR_NOEXECUTE)
#define CPU_FTRS_604 (CPU_FTR_COMMON | CPU_FTR_PPC_LE)
@@ -386,7 +384,7 @@ static inline void cpu_feature_keys_init(void) { }
#define CPU_FTRS_47X (CPU_FTRS_440x6)
#define CPU_FTRS_E200 (CPU_FTR_SPE_COMP | \
CPU_FTR_NODSISRALIGN | CPU_FTR_COHERENT_ICACHE | \
- CPU_FTR_UNIFIED_ID_CACHE | CPU_FTR_NOEXECUTE | \
+ CPU_FTR_NOEXECUTE | \
CPU_FTR_DEBUG_LVL_EXC)
#define CPU_FTRS_E500 (CPU_FTR_MAYBE_CAN_DOZE | \
CPU_FTR_SPE_COMP | CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_NODSISRALIGN | \
@@ -461,9 +459,11 @@ static inline void cpu_feature_keys_init(void) { }
CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_ARCH_207S | \
CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | CPU_FTR_PKEY | \
- CPU_FTR_P9_TLBIE_BUG | CPU_FTR_P9_TIDR)
-#define CPU_FTRS_POWER9_DD2_0 CPU_FTRS_POWER9
-#define CPU_FTRS_POWER9_DD2_1 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1)
+ CPU_FTR_P9_TLBIE_STQ_BUG | CPU_FTR_P9_TLBIE_ERAT_BUG | CPU_FTR_P9_TIDR)
+#define CPU_FTRS_POWER9_DD2_0 (CPU_FTRS_POWER9 | CPU_FTR_P9_RADIX_PREFETCH_BUG)
+#define CPU_FTRS_POWER9_DD2_1 (CPU_FTRS_POWER9 | \
+ CPU_FTR_P9_RADIX_PREFETCH_BUG | \
+ CPU_FTR_POWER9_DD2_1)
#define CPU_FTRS_POWER9_DD2_2 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1 | \
CPU_FTR_P9_TM_HV_ASSIST | \
CPU_FTR_P9_TM_XER_SO_BUG)
@@ -498,7 +498,9 @@ static inline void cpu_feature_keys_init(void) { }
#else
enum {
CPU_FTRS_POSSIBLE =
-#ifdef CONFIG_PPC_BOOK3S_32
+#ifdef CONFIG_PPC_BOOK3S_601
+ CPU_FTRS_PPC601 |
+#elif defined(CONFIG_PPC_BOOK3S_32)
CPU_FTRS_PPC601 | CPU_FTRS_603 | CPU_FTRS_604 | CPU_FTRS_740_NOTAU |
CPU_FTRS_740 | CPU_FTRS_750 | CPU_FTRS_750FX1 |
CPU_FTRS_750FX2 | CPU_FTRS_750FX | CPU_FTRS_750GX |
@@ -574,8 +576,10 @@ enum {
#else
enum {
CPU_FTRS_ALWAYS =
-#ifdef CONFIG_PPC_BOOK3S_32
- CPU_FTRS_PPC601 & CPU_FTRS_603 & CPU_FTRS_604 & CPU_FTRS_740_NOTAU &
+#ifdef CONFIG_PPC_BOOK3S_601
+ CPU_FTRS_PPC601 &
+#elif defined(CONFIG_PPC_BOOK3S_32)
+ CPU_FTRS_603 & CPU_FTRS_604 & CPU_FTRS_740_NOTAU &
CPU_FTRS_740 & CPU_FTRS_750 & CPU_FTRS_750FX1 &
CPU_FTRS_750FX2 & CPU_FTRS_750FX & CPU_FTRS_750GX &
CPU_FTRS_7400_NOTAU & CPU_FTRS_7400 & CPU_FTRS_7450_20 &
diff --git a/arch/powerpc/include/asm/current.h b/arch/powerpc/include/asm/current.h
index 297827b76169..bbfb94800415 100644
--- a/arch/powerpc/include/asm/current.h
+++ b/arch/powerpc/include/asm/current.h
@@ -16,7 +16,8 @@ static inline struct task_struct *get_current(void)
{
struct task_struct *task;
- __asm__ __volatile__("ld %0,%1(13)"
+ /* get_current can be cached by the compiler, so no volatile */
+ asm ("ld %0,%1(13)"
: "=r" (task)
: "i" (offsetof(struct paca_struct, __current)));
diff --git a/arch/powerpc/include/asm/dma-direct.h b/arch/powerpc/include/asm/dma-direct.h
index a2912b47102c..abc154d784b0 100644
--- a/arch/powerpc/include/asm/dma-direct.h
+++ b/arch/powerpc/include/asm/dma-direct.h
@@ -2,26 +2,13 @@
#ifndef ASM_POWERPC_DMA_DIRECT_H
#define ASM_POWERPC_DMA_DIRECT_H 1
-static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
-{
- if (!dev->dma_mask)
- return false;
-
- return addr + size - 1 <=
- min_not_zero(*dev->dma_mask, dev->bus_dma_mask);
-}
-
static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
{
- if (!dev)
- return paddr + PCI_DRAM_OFFSET;
return paddr + dev->archdata.dma_offset;
}
static inline phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr)
{
- if (!dev)
- return daddr - PCI_DRAM_OFFSET;
return daddr - dev->archdata.dma_offset;
}
#endif /* ASM_POWERPC_DMA_DIRECT_H */
diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h
deleted file mode 100644
index 565d6f74b189..000000000000
--- a/arch/powerpc/include/asm/dma-mapping.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2004 IBM
- */
-#ifndef _ASM_DMA_MAPPING_H
-#define _ASM_DMA_MAPPING_H
-
-static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
-{
- /* We don't handle the NULL dev case for ISA for now. We could
- * do it via an out of line call but it is not needed for now. The
- * only ISA DMA device we support is the floppy and we have a hack
- * in the floppy driver directly to get a device for us.
- */
- return NULL;
-}
-
-#endif /* _ASM_DMA_MAPPING_H */
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 8aa7c76c2130..6f9b2a12540a 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -88,6 +88,19 @@ struct eeh_pe {
struct list_head child_list; /* List of PEs below this PE */
struct list_head child; /* Memb. child_list/eeh_phb_pe */
struct list_head edevs; /* List of eeh_dev in this PE */
+
+#ifdef CONFIG_STACKTRACE
+ /*
+ * Saved stack trace. When we find a PE freeze in eeh_dev_check_failure
+ * the stack trace is saved here so we can print it in the recovery
+ * thread if it turns out to due to a real problem rather than
+ * a hot-remove.
+ *
+ * A max of 64 entries might be overkill, but it also might not be.
+ */
+ unsigned long stack_trace[64];
+ int trace_entries;
+#endif /* CONFIG_STACKTRACE */
};
#define eeh_pe_for_each_dev(pe, edev, tmp) \
@@ -121,6 +134,8 @@ static inline bool eeh_pe_passed(struct eeh_pe *pe)
struct eeh_dev {
int mode; /* EEH mode */
int class_code; /* Class code of the device */
+ int bdfn; /* bdfn of device (for cfg ops) */
+ struct pci_controller *controller;
int pe_config_addr; /* PE config address */
u32 config_space[16]; /* Saved PCI config space */
int pcix_cap; /* Saved PCIx capability */
@@ -136,6 +151,17 @@ struct eeh_dev {
struct pci_dev *physfn; /* Associated SRIOV PF */
};
+/* "fmt" must be a simple literal string */
+#define EEH_EDEV_PRINT(level, edev, fmt, ...) \
+ pr_##level("PCI %04x:%02x:%02x.%x#%04x: EEH: " fmt, \
+ (edev)->controller->global_number, PCI_BUSNO((edev)->bdfn), \
+ PCI_SLOT((edev)->bdfn), PCI_FUNC((edev)->bdfn), \
+ ((edev)->pe ? (edev)->pe_config_addr : 0xffff), ##__VA_ARGS__)
+#define eeh_edev_dbg(edev, fmt, ...) EEH_EDEV_PRINT(debug, (edev), fmt, ##__VA_ARGS__)
+#define eeh_edev_info(edev, fmt, ...) EEH_EDEV_PRINT(info, (edev), fmt, ##__VA_ARGS__)
+#define eeh_edev_warn(edev, fmt, ...) EEH_EDEV_PRINT(warn, (edev), fmt, ##__VA_ARGS__)
+#define eeh_edev_err(edev, fmt, ...) EEH_EDEV_PRINT(err, (edev), fmt, ##__VA_ARGS__)
+
static inline struct pci_dn *eeh_dev_to_pdn(struct eeh_dev *edev)
{
return edev ? edev->pdn : NULL;
@@ -247,7 +273,7 @@ static inline bool eeh_state_active(int state)
== (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
}
-typedef void *(*eeh_edev_traverse_func)(struct eeh_dev *edev, void *flag);
+typedef void (*eeh_edev_traverse_func)(struct eeh_dev *edev, void *flag);
typedef void *(*eeh_pe_traverse_func)(struct eeh_pe *pe, void *flag);
void eeh_set_pe_aux_size(int size);
int eeh_phb_pe_create(struct pci_controller *phb);
@@ -261,20 +287,20 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev);
void eeh_pe_update_time_stamp(struct eeh_pe *pe);
void *eeh_pe_traverse(struct eeh_pe *root,
eeh_pe_traverse_func fn, void *flag);
-void *eeh_pe_dev_traverse(struct eeh_pe *root,
- eeh_edev_traverse_func fn, void *flag);
+void eeh_pe_dev_traverse(struct eeh_pe *root,
+ eeh_edev_traverse_func fn, void *flag);
void eeh_pe_restore_bars(struct eeh_pe *pe);
const char *eeh_pe_loc_get(struct eeh_pe *pe);
struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe);
struct eeh_dev *eeh_dev_init(struct pci_dn *pdn);
void eeh_dev_phb_init_dynamic(struct pci_controller *phb);
-void eeh_probe_devices(void);
+void eeh_show_enabled(void);
int __init eeh_ops_register(struct eeh_ops *ops);
int __exit eeh_ops_unregister(const char *name);
int eeh_check_failure(const volatile void __iomem *token);
int eeh_dev_check_failure(struct eeh_dev *edev);
-void eeh_addr_cache_build(void);
+void eeh_addr_cache_init(void);
void eeh_add_device_early(struct pci_dn *);
void eeh_add_device_tree_early(struct pci_dn *);
void eeh_add_device_late(struct pci_dev *);
@@ -316,7 +342,7 @@ static inline bool eeh_enabled(void)
return false;
}
-static inline void eeh_probe_devices(void) { }
+static inline void eeh_show_enabled(void) { }
static inline void *eeh_dev_init(struct pci_dn *pdn, void *data)
{
@@ -332,7 +358,7 @@ static inline int eeh_check_failure(const volatile void __iomem *token)
#define eeh_dev_check_failure(x) (0)
-static inline void eeh_addr_cache_build(void) { }
+static inline void eeh_addr_cache_init(void) { }
static inline void eeh_add_device_early(struct pci_dn *pdn) { }
diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h
index 409c9bfb43d9..57c229a86f08 100644
--- a/arch/powerpc/include/asm/elf.h
+++ b/arch/powerpc/include/asm/elf.h
@@ -175,4 +175,7 @@ do { \
ARCH_DLINFO_CACHE_GEOMETRY; \
} while (0)
+/* Relocate the kernel image to @final_address */
+void relocate(unsigned long final_address);
+
#endif /* _ASM_POWERPC_ELF_H */
diff --git a/arch/powerpc/include/asm/elfnote.h b/arch/powerpc/include/asm/elfnote.h
new file mode 100644
index 000000000000..a201b6e9ae44
--- /dev/null
+++ b/arch/powerpc/include/asm/elfnote.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * PowerPC ELF notes.
+ *
+ * Copyright 2019, IBM Corporation
+ */
+
+#ifndef __ASM_POWERPC_ELFNOTE_H__
+#define __ASM_POWERPC_ELFNOTE_H__
+
+/*
+ * These note types should live in a SHT_NOTE segment and have
+ * "PowerPC" in the name field.
+ */
+
+/*
+ * The capabilities supported/required by this kernel (bitmap).
+ *
+ * This type uses a bitmap as "desc" field. Each bit is described
+ * in arch/powerpc/kernel/note.S
+ */
+#define PPC_ELFNOTE_CAPABILITIES 1
+
+#endif /* __ASM_POWERPC_ELFNOTE_H__ */
diff --git a/arch/powerpc/include/asm/error-injection.h b/arch/powerpc/include/asm/error-injection.h
deleted file mode 100644
index 62fd24739852..000000000000
--- a/arch/powerpc/include/asm/error-injection.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0+ */
-
-#ifndef _ASM_ERROR_INJECTION_H
-#define _ASM_ERROR_INJECTION_H
-
-#include <linux/compiler.h>
-#include <linux/linkage.h>
-#include <asm/ptrace.h>
-#include <asm-generic/error-injection.h>
-
-void override_function_with_return(struct pt_regs *regs);
-
-#endif /* _ASM_ERROR_INJECTION_H */
diff --git a/arch/powerpc/include/asm/fadump-internal.h b/arch/powerpc/include/asm/fadump-internal.h
new file mode 100644
index 000000000000..c814a2b55389
--- /dev/null
+++ b/arch/powerpc/include/asm/fadump-internal.h
@@ -0,0 +1,169 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Firmware-Assisted Dump internal code.
+ *
+ * Copyright 2011, Mahesh Salgaonkar, IBM Corporation.
+ * Copyright 2019, Hari Bathini, IBM Corporation.
+ */
+
+#ifndef _ASM_POWERPC_FADUMP_INTERNAL_H
+#define _ASM_POWERPC_FADUMP_INTERNAL_H
+
+/* Maximum number of memory regions kernel supports */
+#define FADUMP_MAX_MEM_REGS 128
+
+#ifndef CONFIG_PRESERVE_FA_DUMP
+
+/* The upper limit percentage for user specified boot memory size (25%) */
+#define MAX_BOOT_MEM_RATIO 4
+
+#define memblock_num_regions(memblock_type) (memblock.memblock_type.cnt)
+
+/* Alignment per CMA requirement. */
+#define FADUMP_CMA_ALIGNMENT (PAGE_SIZE << \
+ max_t(unsigned long, MAX_ORDER - 1, \
+ pageblock_order))
+
+/* FAD commands */
+#define FADUMP_REGISTER 1
+#define FADUMP_UNREGISTER 2
+#define FADUMP_INVALIDATE 3
+
+/*
+ * Copy the ascii values for first 8 characters from a string into u64
+ * variable at their respective indexes.
+ * e.g.
+ * The string "FADMPINF" will be converted into 0x4641444d50494e46
+ */
+static inline u64 fadump_str_to_u64(const char *str)
+{
+ u64 val = 0;
+ int i;
+
+ for (i = 0; i < sizeof(val); i++)
+ val = (*str) ? (val << 8) | *str++ : val << 8;
+ return val;
+}
+
+#define FADUMP_CPU_UNKNOWN (~((u32)0))
+
+#define FADUMP_CRASH_INFO_MAGIC fadump_str_to_u64("FADMPINF")
+
+/* fadump crash info structure */
+struct fadump_crash_info_header {
+ u64 magic_number;
+ u64 elfcorehdr_addr;
+ u32 crashing_cpu;
+ struct pt_regs regs;
+ struct cpumask online_mask;
+};
+
+struct fadump_memory_range {
+ u64 base;
+ u64 size;
+};
+
+/* fadump memory ranges info */
+struct fadump_mrange_info {
+ char name[16];
+ struct fadump_memory_range *mem_ranges;
+ u32 mem_ranges_sz;
+ u32 mem_range_cnt;
+ u32 max_mem_ranges;
+};
+
+/* Platform specific callback functions */
+struct fadump_ops;
+
+/* Firmware-assisted dump configuration details. */
+struct fw_dump {
+ unsigned long reserve_dump_area_start;
+ unsigned long reserve_dump_area_size;
+ /* cmd line option during boot */
+ unsigned long reserve_bootvar;
+
+ unsigned long cpu_state_data_size;
+ u64 cpu_state_dest_vaddr;
+ u32 cpu_state_data_version;
+ u32 cpu_state_entry_size;
+
+ unsigned long hpte_region_size;
+
+ unsigned long boot_memory_size;
+ u64 boot_mem_dest_addr;
+ u64 boot_mem_addr[FADUMP_MAX_MEM_REGS];
+ u64 boot_mem_sz[FADUMP_MAX_MEM_REGS];
+ u64 boot_mem_top;
+ u64 boot_mem_regs_cnt;
+
+ unsigned long fadumphdr_addr;
+ unsigned long cpu_notes_buf_vaddr;
+ unsigned long cpu_notes_buf_size;
+
+ /*
+ * Maximum size supported by firmware to copy from source to
+ * destination address per entry.
+ */
+ u64 max_copy_size;
+ u64 kernel_metadata;
+
+ int ibm_configure_kernel_dump;
+
+ unsigned long fadump_enabled:1;
+ unsigned long fadump_supported:1;
+ unsigned long dump_active:1;
+ unsigned long dump_registered:1;
+ unsigned long nocma:1;
+
+ struct fadump_ops *ops;
+};
+
+struct fadump_ops {
+ u64 (*fadump_init_mem_struct)(struct fw_dump *fadump_conf);
+ u64 (*fadump_get_metadata_size)(void);
+ int (*fadump_setup_metadata)(struct fw_dump *fadump_conf);
+ u64 (*fadump_get_bootmem_min)(void);
+ int (*fadump_register)(struct fw_dump *fadump_conf);
+ int (*fadump_unregister)(struct fw_dump *fadump_conf);
+ int (*fadump_invalidate)(struct fw_dump *fadump_conf);
+ void (*fadump_cleanup)(struct fw_dump *fadump_conf);
+ int (*fadump_process)(struct fw_dump *fadump_conf);
+ void (*fadump_region_show)(struct fw_dump *fadump_conf,
+ struct seq_file *m);
+ void (*fadump_trigger)(struct fadump_crash_info_header *fdh,
+ const char *msg);
+};
+
+/* Helper functions */
+s32 fadump_setup_cpu_notes_buf(u32 num_cpus);
+void fadump_free_cpu_notes_buf(void);
+u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs);
+void fadump_update_elfcore_header(char *bufp);
+bool is_fadump_boot_mem_contiguous(void);
+bool is_fadump_reserved_mem_contiguous(void);
+
+#else /* !CONFIG_PRESERVE_FA_DUMP */
+
+/* Firmware-assisted dump configuration details. */
+struct fw_dump {
+ u64 boot_mem_top;
+ u64 dump_active;
+};
+
+#endif /* CONFIG_PRESERVE_FA_DUMP */
+
+#ifdef CONFIG_PPC_PSERIES
+extern void rtas_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node);
+#else
+static inline void
+rtas_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node) { }
+#endif
+
+#ifdef CONFIG_PPC_POWERNV
+extern void opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node);
+#else
+static inline void
+opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node) { }
+#endif
+
+#endif /* _ASM_POWERPC_FADUMP_INTERNAL_H */
diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h
index 17d9b6acaf63..526a6a647312 100644
--- a/arch/powerpc/include/asm/fadump.h
+++ b/arch/powerpc/include/asm/fadump.h
@@ -6,196 +6,14 @@
* Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
*/
-#ifndef __PPC64_FA_DUMP_H__
-#define __PPC64_FA_DUMP_H__
+#ifndef _ASM_POWERPC_FADUMP_H
+#define _ASM_POWERPC_FADUMP_H
#ifdef CONFIG_FA_DUMP
-/*
- * The RMA region will be saved for later dumping when kernel crashes.
- * RMA is Real Mode Area, the first block of logical memory address owned
- * by logical partition, containing the storage that may be accessed with
- * translate off.
- */
-#define RMA_START 0x0
-#define RMA_END (ppc64_rma_size)
-
-/*
- * On some Power systems where RMO is 128MB, it still requires minimum of
- * 256MB for kernel to boot successfully. When kdump infrastructure is
- * configured to save vmcore over network, we run into OOM issue while
- * loading modules related to network setup. Hence we need aditional 64M
- * of memory to avoid OOM issue.
- */
-#define MIN_BOOT_MEM (((RMA_END < (0x1UL << 28)) ? (0x1UL << 28) : RMA_END) \
- + (0x1UL << 26))
-
-/* The upper limit percentage for user specified boot memory size (25%) */
-#define MAX_BOOT_MEM_RATIO 4
-
-#define memblock_num_regions(memblock_type) (memblock.memblock_type.cnt)
-
-/* Alignement per CMA requirement. */
-#define FADUMP_CMA_ALIGNMENT (PAGE_SIZE << \
- max_t(unsigned long, MAX_ORDER - 1, pageblock_order))
-
-/* Firmware provided dump sections */
-#define FADUMP_CPU_STATE_DATA 0x0001
-#define FADUMP_HPTE_REGION 0x0002
-#define FADUMP_REAL_MODE_REGION 0x0011
-
-/* Dump request flag */
-#define FADUMP_REQUEST_FLAG 0x00000001
-
-/* FAD commands */
-#define FADUMP_REGISTER 1
-#define FADUMP_UNREGISTER 2
-#define FADUMP_INVALIDATE 3
-
-/* Dump status flag */
-#define FADUMP_ERROR_FLAG 0x2000
-
-#define FADUMP_CPU_ID_MASK ((1UL << 32) - 1)
-
-#define CPU_UNKNOWN (~((u32)0))
-
-/* Utility macros */
-#define SKIP_TO_NEXT_CPU(reg_entry) \
-({ \
- while (be64_to_cpu(reg_entry->reg_id) != REG_ID("CPUEND")) \
- reg_entry++; \
- reg_entry++; \
-})
-
extern int crashing_cpu;
-/* Kernel Dump section info */
-struct fadump_section {
- __be32 request_flag;
- __be16 source_data_type;
- __be16 error_flags;
- __be64 source_address;
- __be64 source_len;
- __be64 bytes_dumped;
- __be64 destination_address;
-};
-
-/* ibm,configure-kernel-dump header. */
-struct fadump_section_header {
- __be32 dump_format_version;
- __be16 dump_num_sections;
- __be16 dump_status_flag;
- __be32 offset_first_dump_section;
-
- /* Fields for disk dump option. */
- __be32 dd_block_size;
- __be64 dd_block_offset;
- __be64 dd_num_blocks;
- __be32 dd_offset_disk_path;
-
- /* Maximum time allowed to prevent an automatic dump-reboot. */
- __be32 max_time_auto;
-};
-
-/*
- * Firmware Assisted dump memory structure. This structure is required for
- * registering future kernel dump with power firmware through rtas call.
- *
- * No disk dump option. Hence disk dump path string section is not included.
- */
-struct fadump_mem_struct {
- struct fadump_section_header header;
-
- /* Kernel dump sections */
- struct fadump_section cpu_state_data;
- struct fadump_section hpte_region;
- struct fadump_section rmr_region;
-};
-
-/* Firmware-assisted dump configuration details. */
-struct fw_dump {
- unsigned long cpu_state_data_size;
- unsigned long hpte_region_size;
- unsigned long boot_memory_size;
- unsigned long reserve_dump_area_start;
- unsigned long reserve_dump_area_size;
- /* cmd line option during boot */
- unsigned long reserve_bootvar;
-
- unsigned long fadumphdr_addr;
- unsigned long cpu_notes_buf;
- unsigned long cpu_notes_buf_size;
-
- int ibm_configure_kernel_dump;
-
- unsigned long fadump_enabled:1;
- unsigned long fadump_supported:1;
- unsigned long dump_active:1;
- unsigned long dump_registered:1;
- unsigned long nocma:1;
-};
-
-/*
- * Copy the ascii values for first 8 characters from a string into u64
- * variable at their respective indexes.
- * e.g.
- * The string "FADMPINF" will be converted into 0x4641444d50494e46
- */
-static inline u64 str_to_u64(const char *str)
-{
- u64 val = 0;
- int i;
-
- for (i = 0; i < sizeof(val); i++)
- val = (*str) ? (val << 8) | *str++ : val << 8;
- return val;
-}
-#define STR_TO_HEX(x) str_to_u64(x)
-#define REG_ID(x) str_to_u64(x)
-
-#define FADUMP_CRASH_INFO_MAGIC STR_TO_HEX("FADMPINF")
-#define REGSAVE_AREA_MAGIC STR_TO_HEX("REGSAVE")
-
-/* The firmware-assisted dump format.
- *
- * The register save area is an area in the partition's memory used to preserve
- * the register contents (CPU state data) for the active CPUs during a firmware
- * assisted dump. The dump format contains register save area header followed
- * by register entries. Each list of registers for a CPU starts with
- * "CPUSTRT" and ends with "CPUEND".
- */
-
-/* Register save area header. */
-struct fadump_reg_save_area_header {
- __be64 magic_number;
- __be32 version;
- __be32 num_cpu_offset;
-};
-
-/* Register entry. */
-struct fadump_reg_entry {
- __be64 reg_id;
- __be64 reg_value;
-};
-
-/* fadump crash info structure */
-struct fadump_crash_info_header {
- u64 magic_number;
- u64 elfcorehdr_addr;
- u32 crashing_cpu;
- struct pt_regs regs;
- struct cpumask online_mask;
-};
-
-struct fad_crash_memory_ranges {
- unsigned long long base;
- unsigned long long size;
-};
-
extern int is_fadump_memory_area(u64 addr, ulong size);
-extern int early_init_dt_scan_fw_dump(unsigned long node,
- const char *uname, int depth, void *data);
-extern int fadump_reserve_mem(void);
extern int setup_fadump(void);
extern int is_fadump_active(void);
extern int should_fadump_crash(void);
@@ -207,5 +25,11 @@ static inline int is_fadump_active(void) { return 0; }
static inline int should_fadump_crash(void) { return 0; }
static inline void crash_fadump(struct pt_regs *regs, const char *str) { }
static inline void fadump_cleanup(void) { }
+#endif /* !CONFIG_FA_DUMP */
+
+#if defined(CONFIG_FA_DUMP) || defined(CONFIG_PRESERVE_FA_DUMP)
+extern int early_init_dt_scan_fw_dump(unsigned long node, const char *uname,
+ int depth, void *data);
+extern int fadump_reserve_mem(void);
#endif
-#endif
+#endif /* _ASM_POWERPC_FADUMP_H */
diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h
index faeca8b76c8c..ca33f4ef6cb4 100644
--- a/arch/powerpc/include/asm/firmware.h
+++ b/arch/powerpc/include/asm/firmware.h
@@ -33,7 +33,7 @@
#define FW_FEATURE_LLAN ASM_CONST(0x0000000000010000)
#define FW_FEATURE_BULK_REMOVE ASM_CONST(0x0000000000020000)
#define FW_FEATURE_XDABR ASM_CONST(0x0000000000040000)
-#define FW_FEATURE_MULTITCE ASM_CONST(0x0000000000080000)
+#define FW_FEATURE_PUT_TCE_IND ASM_CONST(0x0000000000080000)
#define FW_FEATURE_SPLPAR ASM_CONST(0x0000000000100000)
#define FW_FEATURE_LPAR ASM_CONST(0x0000000000400000)
#define FW_FEATURE_PS3_LV1 ASM_CONST(0x0000000000800000)
@@ -50,6 +50,8 @@
#define FW_FEATURE_DRC_INFO ASM_CONST(0x0000000800000000)
#define FW_FEATURE_BLOCK_REMOVE ASM_CONST(0x0000001000000000)
#define FW_FEATURE_PAPR_SCM ASM_CONST(0x0000002000000000)
+#define FW_FEATURE_ULTRAVISOR ASM_CONST(0x0000004000000000)
+#define FW_FEATURE_STUFF_TCE ASM_CONST(0x0000008000000000)
#ifndef __ASSEMBLY__
@@ -62,15 +64,16 @@ enum {
FW_FEATURE_MIGRATE | FW_FEATURE_PERFMON | FW_FEATURE_CRQ |
FW_FEATURE_VIO | FW_FEATURE_RDMA | FW_FEATURE_LLAN |
FW_FEATURE_BULK_REMOVE | FW_FEATURE_XDABR |
- FW_FEATURE_MULTITCE | FW_FEATURE_SPLPAR | FW_FEATURE_LPAR |
+ FW_FEATURE_PUT_TCE_IND | FW_FEATURE_STUFF_TCE |
+ FW_FEATURE_SPLPAR | FW_FEATURE_LPAR |
FW_FEATURE_CMO | FW_FEATURE_VPHN | FW_FEATURE_XCMO |
FW_FEATURE_SET_MODE | FW_FEATURE_BEST_ENERGY |
FW_FEATURE_TYPE1_AFFINITY | FW_FEATURE_PRRN |
FW_FEATURE_HPT_RESIZE | FW_FEATURE_DRMEM_V2 |
FW_FEATURE_DRC_INFO | FW_FEATURE_BLOCK_REMOVE |
- FW_FEATURE_PAPR_SCM,
+ FW_FEATURE_PAPR_SCM | FW_FEATURE_ULTRAVISOR,
FW_FEATURE_PSERIES_ALWAYS = 0,
- FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL,
+ FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL | FW_FEATURE_ULTRAVISOR,
FW_FEATURE_POWERNV_ALWAYS = 0,
FW_FEATURE_PS3_POSSIBLE = FW_FEATURE_LPAR | FW_FEATURE_PS3_LV1,
FW_FEATURE_PS3_ALWAYS = FW_FEATURE_LPAR | FW_FEATURE_PS3_LV1,
diff --git a/arch/powerpc/include/asm/fixmap.h b/arch/powerpc/include/asm/fixmap.h
index 0cfc365d814b..2ef155a3c821 100644
--- a/arch/powerpc/include/asm/fixmap.h
+++ b/arch/powerpc/include/asm/fixmap.h
@@ -15,6 +15,7 @@
#define _ASM_FIXMAP_H
#ifndef __ASSEMBLY__
+#include <linux/sizes.h>
#include <asm/page.h>
#include <asm/pgtable.h>
#ifdef CONFIG_HIGHMEM
@@ -63,7 +64,22 @@ enum fixed_addresses {
FIX_IMMR_BASE = __ALIGN_MASK(FIX_IMMR_START, FIX_IMMR_SIZE - 1) - 1 +
FIX_IMMR_SIZE,
#endif
+#ifdef CONFIG_PPC_83xx
+ /* For IMMR we need an aligned 2M area */
+#define FIX_IMMR_SIZE (SZ_2M / PAGE_SIZE)
+ FIX_IMMR_START,
+ FIX_IMMR_BASE = __ALIGN_MASK(FIX_IMMR_START, FIX_IMMR_SIZE - 1) - 1 +
+ FIX_IMMR_SIZE,
+#endif
/* FIX_PCIE_MCFG, */
+ __end_of_permanent_fixed_addresses,
+
+#define NR_FIX_BTMAPS (SZ_256K / PAGE_SIZE)
+#define FIX_BTMAPS_SLOTS 16
+#define TOTAL_FIX_BTMAPS (NR_FIX_BTMAPS * FIX_BTMAPS_SLOTS)
+
+ FIX_BTMAP_END = __end_of_permanent_fixed_addresses,
+ FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1,
__end_of_fixed_addresses
};
@@ -71,14 +87,22 @@ enum fixed_addresses {
#define FIXADDR_START (FIXADDR_TOP - __FIXADDR_SIZE)
#define FIXMAP_PAGE_NOCACHE PAGE_KERNEL_NCG
+#define FIXMAP_PAGE_IO PAGE_KERNEL_NCG
#include <asm-generic/fixmap.h>
static inline void __set_fixmap(enum fixed_addresses idx,
phys_addr_t phys, pgprot_t flags)
{
- map_kernel_page(fix_to_virt(idx), phys, flags);
+ if (__builtin_constant_p(idx))
+ BUILD_BUG_ON(idx >= __end_of_fixed_addresses);
+ else if (WARN_ON(idx >= __end_of_fixed_addresses))
+ return;
+
+ map_kernel_page(__fix_to_virt(idx), phys, flags);
}
+#define __early_set_fixmap __set_fixmap
+
#endif /* !__ASSEMBLY__ */
#endif
diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h
index 3dfb80b86561..f54a08a2cd70 100644
--- a/arch/powerpc/include/asm/ftrace.h
+++ b/arch/powerpc/include/asm/ftrace.h
@@ -8,6 +8,8 @@
#define MCOUNT_ADDR ((unsigned long)(_mcount))
#define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */
+#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
+
#ifdef __ASSEMBLY__
/* Based off of objdump optput from glibc */
diff --git a/arch/powerpc/include/asm/futex.h b/arch/powerpc/include/asm/futex.h
index 3a6aa57b9d90..bc7d9d06a6d9 100644
--- a/arch/powerpc/include/asm/futex.h
+++ b/arch/powerpc/include/asm/futex.h
@@ -35,7 +35,7 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
{
int oldval = 0, ret;
- allow_write_to_user(uaddr, sizeof(*uaddr));
+ allow_read_write_user(uaddr, uaddr, sizeof(*uaddr));
pagefault_disable();
switch (op) {
@@ -60,10 +60,9 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
pagefault_enable();
- if (!ret)
- *oval = oldval;
+ *oval = oldval;
- prevent_write_to_user(uaddr, sizeof(*uaddr));
+ prevent_read_write_user(uaddr, uaddr, sizeof(*uaddr));
return ret;
}
@@ -77,7 +76,8 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
if (!access_ok(uaddr, sizeof(u32)))
return -EFAULT;
- allow_write_to_user(uaddr, sizeof(*uaddr));
+ allow_read_write_user(uaddr, uaddr, sizeof(*uaddr));
+
__asm__ __volatile__ (
PPC_ATOMIC_ENTRY_BARRIER
"1: lwarx %1,0,%3 # futex_atomic_cmpxchg_inatomic\n\
@@ -98,7 +98,8 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
: "cc", "memory");
*uval = prev;
- prevent_write_to_user(uaddr, sizeof(*uaddr));
+ prevent_read_write_user(uaddr, uaddr, sizeof(*uaddr));
+
return ret;
}
diff --git a/arch/powerpc/include/asm/head-64.h b/arch/powerpc/include/asm/head-64.h
index a466765709a9..2dabcf668292 100644
--- a/arch/powerpc/include/asm/head-64.h
+++ b/arch/powerpc/include/asm/head-64.h
@@ -169,47 +169,6 @@ name:
#define ABS_ADDR(label) (label - fs_label + fs_start)
-#define EXC_REAL_BEGIN(name, start, size) \
- FIXED_SECTION_ENTRY_BEGIN_LOCATION(real_vectors, exc_real_##start##_##name, start, size)
-
-#define EXC_REAL_END(name, start, size) \
- FIXED_SECTION_ENTRY_END_LOCATION(real_vectors, exc_real_##start##_##name, start, size)
-
-#define EXC_VIRT_BEGIN(name, start, size) \
- FIXED_SECTION_ENTRY_BEGIN_LOCATION(virt_vectors, exc_virt_##start##_##name, start, size)
-
-#define EXC_VIRT_END(name, start, size) \
- FIXED_SECTION_ENTRY_END_LOCATION(virt_vectors, exc_virt_##start##_##name, start, size)
-
-#define EXC_COMMON_BEGIN(name) \
- USE_TEXT_SECTION(); \
- .balign IFETCH_ALIGN_BYTES; \
- .global name; \
- _ASM_NOKPROBE_SYMBOL(name); \
- DEFINE_FIXED_SYMBOL(name); \
-name:
-
-#define TRAMP_REAL_BEGIN(name) \
- FIXED_SECTION_ENTRY_BEGIN(real_trampolines, name)
-
-#define TRAMP_VIRT_BEGIN(name) \
- FIXED_SECTION_ENTRY_BEGIN(virt_trampolines, name)
-
-#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
-#define TRAMP_KVM_BEGIN(name) \
- TRAMP_VIRT_BEGIN(name)
-#else
-#define TRAMP_KVM_BEGIN(name)
-#endif
-
-#define EXC_REAL_NONE(start, size) \
- FIXED_SECTION_ENTRY_BEGIN_LOCATION(real_vectors, exc_real_##start##_##unused, start, size); \
- FIXED_SECTION_ENTRY_END_LOCATION(real_vectors, exc_real_##start##_##unused, start, size)
-
-#define EXC_VIRT_NONE(start, size) \
- FIXED_SECTION_ENTRY_BEGIN_LOCATION(virt_vectors, exc_virt_##start##_##unused, start, size); \
- FIXED_SECTION_ENTRY_END_LOCATION(virt_vectors, exc_virt_##start##_##unused, start, size)
-
#endif /* __ASSEMBLY__ */
#endif /* _ASM_POWERPC_HEAD_64_H */
diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h
index 20a101046cff..bd6504c28c2f 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -31,9 +31,6 @@ static inline int is_hugepage_only_range(struct mm_struct *mm,
return 0;
}
-void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea,
- pte_t pte);
-
#define __HAVE_ARCH_HUGETLB_FREE_PGD_RANGE
void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
unsigned long end, unsigned long floor,
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index 11112023e327..e90c073e437e 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -342,6 +342,16 @@
#define H_TLB_INVALIDATE 0xF808
#define H_COPY_TOFROM_GUEST 0xF80C
+/* Flags for H_SVM_PAGE_IN */
+#define H_PAGE_IN_SHARED 0x1
+
+/* Platform-specific hcalls used by the Ultravisor */
+#define H_SVM_PAGE_IN 0xEF00
+#define H_SVM_PAGE_OUT 0xEF04
+#define H_SVM_INIT_START 0xEF08
+#define H_SVM_INIT_DONE 0xEF0C
+#define H_SVM_INIT_ABORT 0xEF14
+
/* Values for 2nd argument to H_SET_MODE */
#define H_SET_MODE_RESOURCE_SET_CIABR 1
#define H_SET_MODE_RESOURCE_SET_DAWR 2
diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h
index 67e2da195eae..f2f8d8aa8e3b 100644
--- a/arch/powerpc/include/asm/hw_breakpoint.h
+++ b/arch/powerpc/include/asm/hw_breakpoint.h
@@ -14,6 +14,7 @@ struct arch_hw_breakpoint {
unsigned long address;
u16 type;
u16 len; /* length of the target data symbol */
+ u16 hw_len; /* length programmed in hw */
};
/* Note: Don't change the the first 6 bits below as they are in the same order
@@ -33,6 +34,15 @@ struct arch_hw_breakpoint {
#define HW_BRK_TYPE_PRIV_ALL (HW_BRK_TYPE_USER | HW_BRK_TYPE_KERNEL | \
HW_BRK_TYPE_HYP)
+#ifdef CONFIG_PPC_8xx
+#define HW_BREAKPOINT_ALIGN 0x3
+#else
+#define HW_BREAKPOINT_ALIGN 0x7
+#endif
+
+#define DABR_MAX_LEN 8
+#define DAWR_MAX_LEN 512
+
#ifdef CONFIG_HAVE_HW_BREAKPOINT
#include <linux/kdebug.h>
#include <asm/reg.h>
@@ -44,8 +54,6 @@ struct pmu;
struct perf_sample_data;
struct task_struct;
-#define HW_BREAKPOINT_ALIGN 0x7
-
extern int hw_breakpoint_slots(int type);
extern int arch_bp_generic_fields(int type, int *gen_bp_type);
extern int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw);
@@ -70,6 +78,7 @@ static inline void hw_breakpoint_disable(void)
brk.address = 0;
brk.type = 0;
brk.len = 0;
+ brk.hw_len = 0;
if (ppc_breakpoint_available())
__set_breakpoint(&brk);
}
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index 32a18f2f49bc..e3a905e3d573 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -226,8 +226,8 @@ static inline bool arch_irqs_disabled(void)
#endif /* CONFIG_PPC_BOOK3S */
#ifdef CONFIG_PPC_BOOK3E
-#define __hard_irq_enable() asm volatile("wrteei 1" : : : "memory")
-#define __hard_irq_disable() asm volatile("wrteei 0" : : : "memory")
+#define __hard_irq_enable() wrtee(MSR_EE)
+#define __hard_irq_disable() wrtee(0)
#else
#define __hard_irq_enable() __mtmsrd(MSR_EE|MSR_RI, 1)
#define __hard_irq_disable() __mtmsrd(MSR_RI, 1)
@@ -280,8 +280,6 @@ extern void force_external_irq_replay(void);
#else /* CONFIG_PPC64 */
-#define SET_MSR_EE(x) mtmsr(x)
-
static inline unsigned long arch_local_save_flags(void)
{
return mfmsr();
@@ -289,47 +287,44 @@ static inline unsigned long arch_local_save_flags(void)
static inline void arch_local_irq_restore(unsigned long flags)
{
-#if defined(CONFIG_BOOKE)
- asm volatile("wrtee %0" : : "r" (flags) : "memory");
-#else
- mtmsr(flags);
-#endif
+ if (IS_ENABLED(CONFIG_BOOKE))
+ wrtee(flags);
+ else
+ mtmsr(flags);
}
static inline unsigned long arch_local_irq_save(void)
{
unsigned long flags = arch_local_save_flags();
-#ifdef CONFIG_BOOKE
- asm volatile("wrteei 0" : : : "memory");
-#elif defined(CONFIG_PPC_8xx)
- wrtspr(SPRN_EID);
-#else
- SET_MSR_EE(flags & ~MSR_EE);
-#endif
+
+ if (IS_ENABLED(CONFIG_BOOKE))
+ wrtee(0);
+ else if (IS_ENABLED(CONFIG_PPC_8xx))
+ wrtspr(SPRN_EID);
+ else
+ mtmsr(flags & ~MSR_EE);
+
return flags;
}
static inline void arch_local_irq_disable(void)
{
-#ifdef CONFIG_BOOKE
- asm volatile("wrteei 0" : : : "memory");
-#elif defined(CONFIG_PPC_8xx)
- wrtspr(SPRN_EID);
-#else
- arch_local_irq_save();
-#endif
+ if (IS_ENABLED(CONFIG_BOOKE))
+ wrtee(0);
+ else if (IS_ENABLED(CONFIG_PPC_8xx))
+ wrtspr(SPRN_EID);
+ else
+ mtmsr(mfmsr() & ~MSR_EE);
}
static inline void arch_local_irq_enable(void)
{
-#ifdef CONFIG_BOOKE
- asm volatile("wrteei 1" : : : "memory");
-#elif defined(CONFIG_PPC_8xx)
- wrtspr(SPRN_EIE);
-#else
- unsigned long msr = mfmsr();
- SET_MSR_EE(msr | MSR_EE);
-#endif
+ if (IS_ENABLED(CONFIG_BOOKE))
+ wrtee(MSR_EE);
+ else if (IS_ENABLED(CONFIG_PPC_8xx))
+ wrtspr(SPRN_EIE);
+ else
+ mtmsr(mfmsr() | MSR_EE);
}
static inline bool arch_irqs_disabled_flags(unsigned long flags)
diff --git a/arch/powerpc/include/asm/io-workarounds.h b/arch/powerpc/include/asm/io-workarounds.h
index 01567ea4ceaf..3cce499fbe27 100644
--- a/arch/powerpc/include/asm/io-workarounds.h
+++ b/arch/powerpc/include/asm/io-workarounds.h
@@ -8,6 +8,7 @@
#ifndef _IO_WORKAROUNDS_H
#define _IO_WORKAROUNDS_H
+#ifdef CONFIG_PPC_IO_WORKAROUNDS
#include <linux/io.h>
#include <asm/pci-bridge.h>
@@ -32,4 +33,23 @@ extern int spiderpci_iowa_init(struct iowa_bus *, void *);
#define SPIDER_PCI_DUMMY_READ 0x0810
#define SPIDER_PCI_DUMMY_READ_BASE 0x0814
+#endif
+
+#if defined(CONFIG_PPC_IO_WORKAROUNDS) && defined(CONFIG_PPC_INDIRECT_MMIO)
+extern bool io_workaround_inited;
+
+static inline bool iowa_is_active(void)
+{
+ return unlikely(io_workaround_inited);
+}
+#else
+static inline bool iowa_is_active(void)
+{
+ return false;
+}
+#endif
+
+void __iomem *iowa_ioremap(phys_addr_t addr, unsigned long size,
+ pgprot_t prot, void *caller);
+
#endif /* _IO_WORKAROUNDS_H */
diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h
index 23e5d5d16c7e..635969b5b58e 100644
--- a/arch/powerpc/include/asm/io.h
+++ b/arch/powerpc/include/asm/io.h
@@ -691,8 +691,6 @@ static inline void iosync(void)
* * ioremap_prot allows to specify the page flags as an argument and can
* also be hooked by the platform via ppc_md.
*
- * * ioremap_nocache is identical to ioremap
- *
* * ioremap_wc enables write combining
*
* * ioremap_wt enables write through
@@ -705,16 +703,9 @@ static inline void iosync(void)
* create hand-made mappings for use only by the PCI code and cannot
* currently be hooked. Must be page aligned.
*
- * * __ioremap is the low level implementation used by ioremap and
- * ioremap_prot and cannot be hooked (but can be used by a hook on one
- * of the previous ones)
- *
* * __ioremap_caller is the same as above but takes an explicit caller
* reference rather than using __builtin_return_address(0)
*
- * * __iounmap, is the low level implementation used by iounmap and cannot
- * be hooked (but can be used by a hook on iounmap)
- *
*/
extern void __iomem *ioremap(phys_addr_t address, unsigned long size);
extern void __iomem *ioremap_prot(phys_addr_t address, unsigned long size,
@@ -722,20 +713,20 @@ extern void __iomem *ioremap_prot(phys_addr_t address, unsigned long size,
extern void __iomem *ioremap_wc(phys_addr_t address, unsigned long size);
void __iomem *ioremap_wt(phys_addr_t address, unsigned long size);
void __iomem *ioremap_coherent(phys_addr_t address, unsigned long size);
-#define ioremap_nocache(addr, size) ioremap((addr), (size))
#define ioremap_uc(addr, size) ioremap((addr), (size))
#define ioremap_cache(addr, size) \
ioremap_prot((addr), (size), pgprot_val(PAGE_KERNEL))
extern void iounmap(volatile void __iomem *addr);
-extern void __iomem *__ioremap(phys_addr_t, unsigned long size,
- unsigned long flags);
+int early_ioremap_range(unsigned long ea, phys_addr_t pa,
+ unsigned long size, pgprot_t prot);
+void __iomem *do_ioremap(phys_addr_t pa, phys_addr_t offset, unsigned long size,
+ pgprot_t prot, void *caller);
+
extern void __iomem *__ioremap_caller(phys_addr_t, unsigned long size,
pgprot_t prot, void *caller);
-extern void __iounmap(volatile void __iomem *addr);
-
extern void __iomem * __ioremap_at(phys_addr_t pa, void *ea,
unsigned long size, pgprot_t prot);
extern void __iounmap_at(void *ea, unsigned long size);
diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index 18d342b815e4..350101e11ddb 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -48,15 +48,16 @@ struct iommu_table_ops {
* returns old TCE and DMA direction mask.
* @tce is a physical address.
*/
- int (*exchange)(struct iommu_table *tbl,
+ int (*xchg_no_kill)(struct iommu_table *tbl,
long index,
unsigned long *hpa,
- enum dma_data_direction *direction);
- /* Real mode */
- int (*exchange_rm)(struct iommu_table *tbl,
- long index,
- unsigned long *hpa,
- enum dma_data_direction *direction);
+ enum dma_data_direction *direction,
+ bool realmode);
+
+ void (*tce_kill)(struct iommu_table *tbl,
+ unsigned long index,
+ unsigned long pages,
+ bool realmode);
__be64 *(*useraddrptr)(struct iommu_table *tbl, long index, bool alloc);
#endif
@@ -111,6 +112,8 @@ struct iommu_table {
struct iommu_table_ops *it_ops;
struct kref it_kref;
int it_nid;
+ unsigned long it_reserved_start; /* Start of not-DMA-able (MMIO) area */
+ unsigned long it_reserved_end;
};
#define IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl, entry) \
@@ -149,8 +152,9 @@ extern int iommu_tce_table_put(struct iommu_table *tbl);
/* Initializes an iommu_table based in values set in the passed-in
* structure
*/
-extern struct iommu_table *iommu_init_table(struct iommu_table * tbl,
- int nid);
+extern struct iommu_table *iommu_init_table(struct iommu_table *tbl,
+ int nid, unsigned long res_start, unsigned long res_end);
+
#define IOMMU_TABLE_GROUP_MAX_TABLES 2
struct iommu_table_group;
@@ -206,6 +210,12 @@ extern void iommu_del_device(struct device *dev);
extern long iommu_tce_xchg(struct mm_struct *mm, struct iommu_table *tbl,
unsigned long entry, unsigned long *hpa,
enum dma_data_direction *direction);
+extern long iommu_tce_xchg_no_kill(struct mm_struct *mm,
+ struct iommu_table *tbl,
+ unsigned long entry, unsigned long *hpa,
+ enum dma_data_direction *direction);
+extern void iommu_tce_kill(struct iommu_table *tbl,
+ unsigned long entry, unsigned long pages);
#else
static inline void iommu_register_group(struct iommu_table_group *table_group,
int pci_domain_number,
diff --git a/arch/powerpc/include/asm/kasan.h b/arch/powerpc/include/asm/kasan.h
index 296e51c2f066..fbff9ff9032e 100644
--- a/arch/powerpc/include/asm/kasan.h
+++ b/arch/powerpc/include/asm/kasan.h
@@ -31,9 +31,11 @@
void kasan_early_init(void);
void kasan_mmu_init(void);
void kasan_init(void);
+void kasan_late_init(void);
#else
static inline void kasan_init(void) { }
static inline void kasan_mmu_init(void) { }
+static inline void kasan_late_init(void) { }
#endif
#endif /* __ASSEMBLY */
diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h
index 5b5e39643a27..92bcd1a26d73 100644
--- a/arch/powerpc/include/asm/kup.h
+++ b/arch/powerpc/include/asm/kup.h
@@ -2,6 +2,16 @@
#ifndef _ASM_POWERPC_KUP_H_
#define _ASM_POWERPC_KUP_H_
+#define KUAP_READ 1
+#define KUAP_WRITE 2
+#define KUAP_READ_WRITE (KUAP_READ | KUAP_WRITE)
+/*
+ * For prevent_user_access() only.
+ * Use the current saved situation instead of the to/from/size params.
+ * Used on book3s/32
+ */
+#define KUAP_CURRENT 4
+
#ifdef CONFIG_PPC64
#include <asm/book3s/64/kup-radix.h>
#endif
@@ -42,32 +52,55 @@ void setup_kuap(bool disabled);
#else
static inline void setup_kuap(bool disabled) { }
static inline void allow_user_access(void __user *to, const void __user *from,
- unsigned long size) { }
+ unsigned long size, unsigned long dir) { }
static inline void prevent_user_access(void __user *to, const void __user *from,
- unsigned long size) { }
-static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write) { return false; }
+ unsigned long size, unsigned long dir) { }
+static inline unsigned long prevent_user_access_return(void) { return 0UL; }
+static inline void restore_user_access(unsigned long flags) { }
+static inline bool
+bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
+{
+ return false;
+}
#endif /* CONFIG_PPC_KUAP */
static inline void allow_read_from_user(const void __user *from, unsigned long size)
{
- allow_user_access(NULL, from, size);
+ allow_user_access(NULL, from, size, KUAP_READ);
}
static inline void allow_write_to_user(void __user *to, unsigned long size)
{
- allow_user_access(to, NULL, size);
+ allow_user_access(to, NULL, size, KUAP_WRITE);
+}
+
+static inline void allow_read_write_user(void __user *to, const void __user *from,
+ unsigned long size)
+{
+ allow_user_access(to, from, size, KUAP_READ_WRITE);
}
static inline void prevent_read_from_user(const void __user *from, unsigned long size)
{
- prevent_user_access(NULL, from, size);
+ prevent_user_access(NULL, from, size, KUAP_READ);
}
static inline void prevent_write_to_user(void __user *to, unsigned long size)
{
- prevent_user_access(to, NULL, size);
+ prevent_user_access(to, NULL, size, KUAP_WRITE);
+}
+
+static inline void prevent_read_write_user(void __user *to, const void __user *from,
+ unsigned long size)
+{
+ prevent_user_access(to, from, size, KUAP_READ_WRITE);
+}
+
+static inline void prevent_current_access_user(void)
+{
+ prevent_user_access(NULL, NULL, ~0UL, KUAP_CURRENT);
}
#endif /* !__ASSEMBLY__ */
-#endif /* _ASM_POWERPC_KUP_H_ */
+#endif /* _ASM_POWERPC_KUAP_H_ */
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index bb7c8cc77f1a..04b2b927bb5a 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -535,7 +535,7 @@ static inline void note_hpte_modification(struct kvm *kvm,
*/
static inline struct kvm_memslots *kvm_memslots_raw(struct kvm *kvm)
{
- return rcu_dereference_raw_notrace(kvm->memslots[0]);
+ return rcu_dereference_raw_check(kvm->memslots[0]);
}
extern void kvmppc_mmu_debugfs_init(struct kvm *kvm);
diff --git a/arch/powerpc/include/asm/kvm_book3s_uvmem.h b/arch/powerpc/include/asm/kvm_book3s_uvmem.h
new file mode 100644
index 000000000000..5a9834e0e2d1
--- /dev/null
+++ b/arch/powerpc/include/asm/kvm_book3s_uvmem.h
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_KVM_BOOK3S_UVMEM_H__
+#define __ASM_KVM_BOOK3S_UVMEM_H__
+
+#ifdef CONFIG_PPC_UV
+int kvmppc_uvmem_init(void);
+void kvmppc_uvmem_free(void);
+int kvmppc_uvmem_slot_init(struct kvm *kvm, const struct kvm_memory_slot *slot);
+void kvmppc_uvmem_slot_free(struct kvm *kvm,
+ const struct kvm_memory_slot *slot);
+unsigned long kvmppc_h_svm_page_in(struct kvm *kvm,
+ unsigned long gra,
+ unsigned long flags,
+ unsigned long page_shift);
+unsigned long kvmppc_h_svm_page_out(struct kvm *kvm,
+ unsigned long gra,
+ unsigned long flags,
+ unsigned long page_shift);
+unsigned long kvmppc_h_svm_init_start(struct kvm *kvm);
+unsigned long kvmppc_h_svm_init_done(struct kvm *kvm);
+int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gfn);
+unsigned long kvmppc_h_svm_init_abort(struct kvm *kvm);
+void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free,
+ struct kvm *kvm, bool skip_page_out);
+#else
+static inline int kvmppc_uvmem_init(void)
+{
+ return 0;
+}
+
+static inline void kvmppc_uvmem_free(void) { }
+
+static inline int
+kvmppc_uvmem_slot_init(struct kvm *kvm, const struct kvm_memory_slot *slot)
+{
+ return 0;
+}
+
+static inline void
+kvmppc_uvmem_slot_free(struct kvm *kvm, const struct kvm_memory_slot *slot) { }
+
+static inline unsigned long
+kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gra,
+ unsigned long flags, unsigned long page_shift)
+{
+ return H_UNSUPPORTED;
+}
+
+static inline unsigned long
+kvmppc_h_svm_page_out(struct kvm *kvm, unsigned long gra,
+ unsigned long flags, unsigned long page_shift)
+{
+ return H_UNSUPPORTED;
+}
+
+static inline unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
+{
+ return H_UNSUPPORTED;
+}
+
+static inline unsigned long kvmppc_h_svm_init_done(struct kvm *kvm)
+{
+ return H_UNSUPPORTED;
+}
+
+static inline unsigned long kvmppc_h_svm_init_abort(struct kvm *kvm)
+{
+ return H_UNSUPPORTED;
+}
+
+static inline int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gfn)
+{
+ return -EFAULT;
+}
+
+static inline void
+kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free,
+ struct kvm *kvm, bool skip_page_out) { }
+#endif /* CONFIG_PPC_UV */
+#endif /* __ASM_KVM_BOOK3S_UVMEM_H__ */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index e6e5f59aaa97..6e8b8ffd06ad 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -232,11 +232,25 @@ struct revmap_entry {
};
/*
- * We use the top bit of each memslot->arch.rmap entry as a lock bit,
- * and bit 32 as a present flag. The bottom 32 bits are the
- * index in the guest HPT of a HPTE that points to the page.
+ * The rmap array of size number of guest pages is allocated for each memslot.
+ * This array is used to store usage specific information about the guest page.
+ * Below are the encodings of the various possible usage types.
*/
-#define KVMPPC_RMAP_LOCK_BIT 63
+/* Free bits which can be used to define a new usage */
+#define KVMPPC_RMAP_TYPE_MASK 0xff00000000000000
+#define KVMPPC_RMAP_NESTED 0xc000000000000000 /* Nested rmap array */
+#define KVMPPC_RMAP_HPT 0x0100000000000000 /* HPT guest */
+
+/*
+ * rmap usage definition for a hash page table (hpt) guest:
+ * 0x0000080000000000 Lock bit
+ * 0x0000018000000000 RC bits
+ * 0x0000000100000000 Present bit
+ * 0x00000000ffffffff HPT index bits
+ * The bottom 32 bits are the index in the guest HPT of a HPTE that points to
+ * the page.
+ */
+#define KVMPPC_RMAP_LOCK_BIT 43
#define KVMPPC_RMAP_RC_SHIFT 32
#define KVMPPC_RMAP_REFERENCED (HPTE_R_R << KVMPPC_RMAP_RC_SHIFT)
#define KVMPPC_RMAP_PRESENT 0x100000000ul
@@ -261,6 +275,11 @@ struct kvm_hpt_info {
struct kvm_resize_hpt;
+/* Flag values for kvm_arch.secure_guest */
+#define KVMPPC_SECURE_INIT_START 0x1 /* H_SVM_INIT_START has been called */
+#define KVMPPC_SECURE_INIT_DONE 0x2 /* H_SVM_INIT_DONE completed */
+#define KVMPPC_SECURE_INIT_ABORT 0x4 /* H_SVM_INIT_ABORT issued */
+
struct kvm_arch {
unsigned int lpid;
unsigned int smt_mode; /* # vcpus per virtual core */
@@ -283,6 +302,7 @@ struct kvm_arch {
cpumask_t cpu_in_guest;
u8 radix;
u8 fwnmi_enabled;
+ u8 secure_guest;
bool threads_indep;
bool nested_enable;
pgd_t *pgtable;
@@ -315,6 +335,8 @@ struct kvm_arch {
#endif
struct kvmppc_ops *kvm_ops;
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ struct mutex uvmem_lock;
+ struct list_head uvmem_pfns;
struct mutex mmu_setup_lock; /* nests inside vcpu mutexes */
u64 l1_ptcr;
int max_nested_lpid;
@@ -386,7 +408,6 @@ struct kvmppc_mmu {
u32 (*mfsrin)(struct kvm_vcpu *vcpu, u32 srnum);
int (*xlate)(struct kvm_vcpu *vcpu, gva_t eaddr,
struct kvmppc_pte *pte, bool data, bool iswrite);
- void (*reset_msr)(struct kvm_vcpu *vcpu);
void (*tlbie)(struct kvm_vcpu *vcpu, ulong addr, bool large);
int (*esid_to_vsid)(struct kvm_vcpu *vcpu, ulong esid, u64 *vsid);
u64 (*ea_to_vp)(struct kvm_vcpu *vcpu, gva_t eaddr, bool data);
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 2484e6a8f5ca..bc2494e5710a 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -119,8 +119,7 @@ extern int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr,
enum xlate_instdata xlid, enum xlate_readwrite xlrw,
struct kvmppc_pte *pte);
-extern struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm,
- unsigned int id);
+extern int kvmppc_core_vcpu_create(struct kvm_vcpu *vcpu);
extern void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu);
extern int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu);
extern int kvmppc_core_check_processor_compat(void);
@@ -271,9 +270,10 @@ struct kvmppc_ops {
union kvmppc_one_reg *val);
void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
void (*vcpu_put)(struct kvm_vcpu *vcpu);
+ void (*inject_interrupt)(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags);
void (*set_msr)(struct kvm_vcpu *vcpu, u64 msr);
int (*vcpu_run)(struct kvm_run *run, struct kvm_vcpu *vcpu);
- struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned int id);
+ int (*vcpu_create)(struct kvm_vcpu *vcpu);
void (*vcpu_free)(struct kvm_vcpu *vcpu);
int (*check_requests)(struct kvm_vcpu *vcpu);
int (*get_dirty_log)(struct kvm *kvm, struct kvm_dirty_log *log);
@@ -321,6 +321,7 @@ struct kvmppc_ops {
int size);
int (*store_to_eaddr)(struct kvm_vcpu *vcpu, ulong *eaddr, void *ptr,
int size);
+ int (*svm_off)(struct kvm *kvm);
};
extern struct kvmppc_ops *kvmppc_hv_ops;
@@ -452,9 +453,100 @@ static inline u32 kvmppc_get_xics_latch(void)
return xirr;
}
-static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi)
+/*
+ * To avoid the need to unnecessarily exit fully to the host kernel, an IPI to
+ * a CPU thread that's running/napping inside of a guest is by default regarded
+ * as a request to wake the CPU (if needed) and continue execution within the
+ * guest, potentially to process new state like externally-generated
+ * interrupts or IPIs sent from within the guest itself (e.g. H_PROD/H_IPI).
+ *
+ * To force an exit to the host kernel, kvmppc_set_host_ipi() must be called
+ * prior to issuing the IPI to set the corresponding 'host_ipi' flag in the
+ * target CPU's PACA. To avoid unnecessary exits to the host, this flag should
+ * be immediately cleared via kvmppc_clear_host_ipi() by the IPI handler on
+ * the receiving side prior to processing the IPI work.
+ *
+ * NOTE:
+ *
+ * We currently issue an smp_mb() at the beginning of kvmppc_set_host_ipi().
+ * This is to guard against sequences such as the following:
+ *
+ * CPU
+ * X: smp_muxed_ipi_set_message():
+ * X: smp_mb()
+ * X: message[RESCHEDULE] = 1
+ * X: doorbell_global_ipi(42):
+ * X: kvmppc_set_host_ipi(42)
+ * X: ppc_msgsnd_sync()/smp_mb()
+ * X: ppc_msgsnd() -> 42
+ * 42: doorbell_exception(): // from CPU X
+ * 42: ppc_msgsync()
+ * 105: smp_muxed_ipi_set_message():
+ * 105: smb_mb()
+ * // STORE DEFERRED DUE TO RE-ORDERING
+ * --105: message[CALL_FUNCTION] = 1
+ * | 105: doorbell_global_ipi(42):
+ * | 105: kvmppc_set_host_ipi(42)
+ * | 42: kvmppc_clear_host_ipi(42)
+ * | 42: smp_ipi_demux_relaxed()
+ * | 42: // returns to executing guest
+ * | // RE-ORDERED STORE COMPLETES
+ * ->105: message[CALL_FUNCTION] = 1
+ * 105: ppc_msgsnd_sync()/smp_mb()
+ * 105: ppc_msgsnd() -> 42
+ * 42: local_paca->kvm_hstate.host_ipi == 0 // IPI ignored
+ * 105: // hangs waiting on 42 to process messages/call_single_queue
+ *
+ * We also issue an smp_mb() at the end of kvmppc_clear_host_ipi(). This is
+ * to guard against sequences such as the following (as well as to create
+ * a read-side pairing with the barrier in kvmppc_set_host_ipi()):
+ *
+ * CPU
+ * X: smp_muxed_ipi_set_message():
+ * X: smp_mb()
+ * X: message[RESCHEDULE] = 1
+ * X: doorbell_global_ipi(42):
+ * X: kvmppc_set_host_ipi(42)
+ * X: ppc_msgsnd_sync()/smp_mb()
+ * X: ppc_msgsnd() -> 42
+ * 42: doorbell_exception(): // from CPU X
+ * 42: ppc_msgsync()
+ * // STORE DEFERRED DUE TO RE-ORDERING
+ * -- 42: kvmppc_clear_host_ipi(42)
+ * | 42: smp_ipi_demux_relaxed()
+ * | 105: smp_muxed_ipi_set_message():
+ * | 105: smb_mb()
+ * | 105: message[CALL_FUNCTION] = 1
+ * | 105: doorbell_global_ipi(42):
+ * | 105: kvmppc_set_host_ipi(42)
+ * | // RE-ORDERED STORE COMPLETES
+ * -> 42: kvmppc_clear_host_ipi(42)
+ * 42: // returns to executing guest
+ * 105: ppc_msgsnd_sync()/smp_mb()
+ * 105: ppc_msgsnd() -> 42
+ * 42: local_paca->kvm_hstate.host_ipi == 0 // IPI ignored
+ * 105: // hangs waiting on 42 to process messages/call_single_queue
+ */
+static inline void kvmppc_set_host_ipi(int cpu)
{
- paca_ptrs[cpu]->kvm_hstate.host_ipi = host_ipi;
+ /*
+ * order stores of IPI messages vs. setting of host_ipi flag
+ *
+ * pairs with the barrier in kvmppc_clear_host_ipi()
+ */
+ smp_mb();
+ paca_ptrs[cpu]->kvm_hstate.host_ipi = 1;
+}
+
+static inline void kvmppc_clear_host_ipi(int cpu)
+{
+ paca_ptrs[cpu]->kvm_hstate.host_ipi = 0;
+ /*
+ * order clearing of host_ipi flag vs. processing of IPI messages
+ *
+ * pairs with the barrier in kvmppc_set_host_ipi()
+ */
+ smp_mb();
}
static inline void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)
@@ -486,7 +578,10 @@ static inline u32 kvmppc_get_xics_latch(void)
return 0;
}
-static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi)
+static inline void kvmppc_set_host_ipi(int cpu)
+{}
+
+static inline void kvmppc_clear_host_ipi(int cpu)
{}
static inline void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)
@@ -598,6 +693,7 @@ extern int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu,
union kvmppc_one_reg *val);
extern int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu,
union kvmppc_one_reg *val);
+extern bool kvmppc_xive_native_supported(void);
#else
static inline int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server,
diff --git a/arch/powerpc/include/asm/local.h b/arch/powerpc/include/asm/local.h
index fdd00939270b..bc4bd19b7fc2 100644
--- a/arch/powerpc/include/asm/local.h
+++ b/arch/powerpc/include/asm/local.h
@@ -17,7 +17,7 @@ typedef struct
#define LOCAL_INIT(i) { (i) }
-static __inline__ long local_read(local_t *l)
+static __inline__ long local_read(const local_t *l)
{
return READ_ONCE(l->v);
}
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index c43d6eca9edd..7bcb64444a39 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -3,9 +3,6 @@
#define _ASM_POWERPC_MACHDEP_H
#ifdef __KERNEL__
-/*
- */
-
#include <linux/seq_file.h>
#include <linux/init.h>
#include <linux/dma-mapping.h>
@@ -31,10 +28,6 @@ struct pci_host_bridge;
struct machdep_calls {
char *name;
#ifdef CONFIG_PPC64
- void __iomem * (*ioremap)(phys_addr_t addr, unsigned long size,
- pgprot_t prot, void *caller);
- void (*iounmap)(volatile void __iomem *token);
-
#ifdef CONFIG_PM
void (*iommu_save)(void);
void (*iommu_restore)(void);
diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
index a4c6a74ad2fb..6a6ddaabdb34 100644
--- a/arch/powerpc/include/asm/mce.h
+++ b/arch/powerpc/include/asm/mce.h
@@ -30,6 +30,10 @@ enum MCE_Disposition {
enum MCE_Initiator {
MCE_INITIATOR_UNKNOWN = 0,
MCE_INITIATOR_CPU = 1,
+ MCE_INITIATOR_PCI = 2,
+ MCE_INITIATOR_ISA = 3,
+ MCE_INITIATOR_MEMORY= 4,
+ MCE_INITIATOR_POWERMGM = 5,
};
enum MCE_ErrorType {
@@ -41,6 +45,8 @@ enum MCE_ErrorType {
MCE_ERROR_TYPE_USER = 5,
MCE_ERROR_TYPE_RA = 6,
MCE_ERROR_TYPE_LINK = 7,
+ MCE_ERROR_TYPE_DCACHE = 8,
+ MCE_ERROR_TYPE_ICACHE = 9,
};
enum MCE_ErrorClass {
@@ -122,7 +128,8 @@ struct machine_check_event {
enum MCE_UeErrorType ue_error_type:8;
u8 effective_address_provided;
u8 physical_address_provided;
- u8 reserved_1[5];
+ u8 ignore_event;
+ u8 reserved_1[4];
u64 effective_address;
u64 physical_address;
u8 reserved_2[8];
@@ -193,6 +200,7 @@ struct mce_error_info {
enum MCE_Initiator initiator:8;
enum MCE_ErrorClass error_class:8;
bool sync_error;
+ bool ignore_event;
};
#define MAX_MC_EVT 100
diff --git a/arch/powerpc/include/asm/mem_encrypt.h b/arch/powerpc/include/asm/mem_encrypt.h
new file mode 100644
index 000000000000..ba9dab07c1be
--- /dev/null
+++ b/arch/powerpc/include/asm/mem_encrypt.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * SVM helper functions
+ *
+ * Copyright 2018 IBM Corporation
+ */
+
+#ifndef _ASM_POWERPC_MEM_ENCRYPT_H
+#define _ASM_POWERPC_MEM_ENCRYPT_H
+
+#include <asm/svm.h>
+
+static inline bool mem_encrypt_active(void)
+{
+ return is_secure_guest();
+}
+
+static inline bool force_dma_unencrypted(struct device *dev)
+{
+ return is_secure_guest();
+}
+
+int set_memory_encrypted(unsigned long addr, int numpages);
+int set_memory_decrypted(unsigned long addr, int numpages);
+
+#endif /* _ASM_POWERPC_MEM_ENCRYPT_H */
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index ba94ce8c22d7..0699cfeeb8c9 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -257,7 +257,7 @@ extern void radix__mmu_cleanup_all(void);
/* Functions for creating and updating partition table on POWER9 */
extern void mmu_partition_table_init(void);
extern void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
- unsigned long dw1);
+ unsigned long dw1, bool flush);
#endif /* CONFIG_PPC64 */
struct mm_struct;
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 58efca934311..360367c579de 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -238,11 +238,6 @@ static inline void arch_unmap(struct mm_struct *mm,
mm->context.vdso_base = 0;
}
-static inline void arch_bprm_mm_init(struct mm_struct *mm,
- struct vm_area_struct *vma)
-{
-}
-
#ifdef CONFIG_PPC_MEM_KEYS
bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write,
bool execute, bool foreign);
diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
index 1c3133b5f86a..85ed2390fb99 100644
--- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
@@ -3,6 +3,7 @@
#define _ASM_POWERPC_KUP_8XX_H_
#include <asm/bug.h>
+#include <asm/mmu.h>
#ifdef CONFIG_PPC_KUAP
@@ -34,18 +35,33 @@
#include <asm/reg.h>
static inline void allow_user_access(void __user *to, const void __user *from,
- unsigned long size)
+ unsigned long size, unsigned long dir)
{
mtspr(SPRN_MD_AP, MD_APG_INIT);
}
static inline void prevent_user_access(void __user *to, const void __user *from,
- unsigned long size)
+ unsigned long size, unsigned long dir)
{
mtspr(SPRN_MD_AP, MD_APG_KUAP);
}
-static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write)
+static inline unsigned long prevent_user_access_return(void)
+{
+ unsigned long flags = mfspr(SPRN_MD_AP);
+
+ mtspr(SPRN_MD_AP, MD_APG_KUAP);
+
+ return flags;
+}
+
+static inline void restore_user_access(unsigned long flags)
+{
+ mtspr(SPRN_MD_AP, flags);
+}
+
+static inline bool
+bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
{
return WARN(!((regs->kuap ^ MD_APG_KUAP) & 0xf0000000),
"Bug: fault blocked by AP register !");
diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h
index 0284f8f5305f..60c4d829152e 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -11,8 +11,6 @@
#include <asm/mmu.h> /* For sub-arch specific PPC_PIN_SIZE */
#include <asm/asm-405.h>
-extern unsigned long ioremap_bot;
-
#ifdef CONFIG_44x
extern int icache_44x_need_flush;
#endif
@@ -78,23 +76,21 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot);
*/
#include <asm/fixmap.h>
-#ifdef CONFIG_HIGHMEM
-#define KVIRT_TOP PKMAP_BASE
-#else
-#define KVIRT_TOP FIXADDR_START
-#endif
-
/*
* ioremap_bot starts at that address. Early ioremaps move down from there,
* until mem_init() at which point this becomes the top of the vmalloc
* and ioremap space
*/
-#ifdef CONFIG_NOT_COHERENT_CACHE
-#define IOREMAP_TOP ((KVIRT_TOP - CONFIG_CONSISTENT_SIZE) & PAGE_MASK)
+#ifdef CONFIG_HIGHMEM
+#define IOREMAP_TOP PKMAP_BASE
#else
-#define IOREMAP_TOP KVIRT_TOP
+#define IOREMAP_TOP FIXADDR_START
#endif
+/* PPC32 shares vmalloc area with ioremap */
+#define IOREMAP_START VMALLOC_START
+#define IOREMAP_END VMALLOC_END
+
/*
* Just any arbitrary offset to the start of the vmalloc VM area: the
* current 16MB value just means that there will be a 64MB "hole" after the
@@ -118,7 +114,12 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot);
#else
#define VMALLOC_START ((((long)high_memory + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1)))
#endif
+
+#ifdef CONFIG_KASAN_VMALLOC
+#define VMALLOC_END _ALIGN_DOWN(ioremap_bot, PAGE_SIZE << KASAN_SHADOW_SCALE_SHIFT)
+#else
#define VMALLOC_END ioremap_bot
+#endif
/*
* Bits in a linux-style PTE. These match the bits in the
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
index b9f66cf15c31..9a33b8bd842d 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -53,6 +53,7 @@
#define PHB_IO_BASE (ISA_IO_END)
#define PHB_IO_END (KERN_IO_START + FULL_IO_SIZE)
#define IOREMAP_BASE (PHB_IO_END)
+#define IOREMAP_START (ioremap_bot)
#define IOREMAP_END (KERN_VIRT_START + KERN_VIRT_SIZE)
diff --git a/arch/powerpc/include/asm/nohash/mmu-book3e.h b/arch/powerpc/include/asm/nohash/mmu-book3e.h
index 4c9777d256fb..b41004664312 100644
--- a/arch/powerpc/include/asm/nohash/mmu-book3e.h
+++ b/arch/powerpc/include/asm/nohash/mmu-book3e.h
@@ -75,7 +75,6 @@
#define MAS2_E 0x00000001
#define MAS2_WIMGE_MASK 0x0000001f
#define MAS2_EPN_MASK(size) (~0 << (size + 10))
-#define MAS2_VAL(addr, size, flags) ((addr) & MAS2_EPN_MASK(size) | (flags))
#define MAS3_RPN 0xFFFFF000
#define MAS3_U0 0x00000200
@@ -221,6 +220,16 @@
#define TLBILX_T_CLASS2 6
#define TLBILX_T_CLASS3 7
+/*
+ * The mapping only needs to be cache-coherent on SMP, except on
+ * Freescale e500mc derivatives where it's also needed for coherent DMA.
+ */
+#if defined(CONFIG_SMP) || defined(CONFIG_PPC_E500MC)
+#define MAS2_M_IF_NEEDED MAS2_M
+#else
+#define MAS2_M_IF_NEEDED 0
+#endif
+
#ifndef __ASSEMBLY__
#include <asm/bug.h>
diff --git a/arch/powerpc/include/asm/nohash/pgalloc.h b/arch/powerpc/include/asm/nohash/pgalloc.h
index 332b13b4ecdb..29c43665a753 100644
--- a/arch/powerpc/include/asm/nohash/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/pgalloc.h
@@ -46,7 +46,6 @@ static inline void pgtable_free(void *table, int shift)
#define get_hugepd_cache_index(x) (x)
-#ifdef CONFIG_SMP
static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
{
unsigned long pgf = (unsigned long)table;
@@ -64,13 +63,6 @@ static inline void __tlb_remove_table(void *_table)
pgtable_free(table, shift);
}
-#else
-static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
-{
- pgtable_free(table, shift);
-}
-#endif
-
static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
unsigned long address)
{
diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h
index 1ca1c1864b32..7fed9dc0f147 100644
--- a/arch/powerpc/include/asm/nohash/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/pgtable.h
@@ -293,5 +293,18 @@ static inline int pgd_huge(pgd_t pgd)
#define is_hugepd(hpd) (hugepd_ok(hpd))
#endif
+/*
+ * This gets called at the end of handling a page fault, when
+ * the kernel has put a new PTE into the page table for the process.
+ * We use it to ensure coherency between the i-cache and d-cache
+ * for the page which has just been mapped in.
+ */
+#if defined(CONFIG_PPC_FSL_BOOK3E) && defined(CONFIG_HUGETLB_PAGE)
+void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep);
+#else
+static inline
+void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) {}
+#endif
+
#endif /* __ASSEMBLY__ */
#endif
diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
index 383242eb0dea..c1f25a760eb1 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -208,7 +208,13 @@
#define OPAL_HANDLE_HMI2 166
#define OPAL_NX_COPROC_INIT 167
#define OPAL_XIVE_GET_VP_STATE 170
-#define OPAL_LAST 170
+#define OPAL_MPIPL_UPDATE 173
+#define OPAL_MPIPL_REGISTER_TAG 174
+#define OPAL_MPIPL_QUERY_TAG 175
+#define OPAL_SECVAR_GET 176
+#define OPAL_SECVAR_GET_NEXT 177
+#define OPAL_SECVAR_ENQUEUE_UPDATE 178
+#define OPAL_LAST 178
#define QUIESCE_HOLD 1 /* Spin all calls at entry */
#define QUIESCE_REJECT 2 /* Fail all calls with OPAL_BUSY */
@@ -453,6 +459,7 @@ enum opal_msg_type {
OPAL_MSG_DPO = 5,
OPAL_MSG_PRD = 6,
OPAL_MSG_OCC = 7,
+ OPAL_MSG_PRD2 = 8,
OPAL_MSG_TYPE_MAX,
};
@@ -1059,6 +1066,7 @@ enum {
OPAL_REBOOT_NORMAL = 0,
OPAL_REBOOT_PLATFORM_ERROR = 1,
OPAL_REBOOT_FULL_IPL = 2,
+ OPAL_REBOOT_MPIPL = 3,
};
/* Argument to OPAL_PCI_TCE_KILL */
@@ -1135,6 +1143,44 @@ enum {
#define OPAL_PCI_P2P_LOAD 0x2
#define OPAL_PCI_P2P_STORE 0x4
+/* MPIPL update operations */
+enum opal_mpipl_ops {
+ OPAL_MPIPL_ADD_RANGE = 0,
+ OPAL_MPIPL_REMOVE_RANGE = 1,
+ OPAL_MPIPL_REMOVE_ALL = 2,
+ OPAL_MPIPL_FREE_PRESERVED_MEMORY = 3,
+};
+
+/* Tag will point to various metadata area. Kernel will
+ * use tag to get metadata value.
+ */
+enum opal_mpipl_tags {
+ OPAL_MPIPL_TAG_CPU = 0,
+ OPAL_MPIPL_TAG_OPAL = 1,
+ OPAL_MPIPL_TAG_KERNEL = 2,
+ OPAL_MPIPL_TAG_BOOT_MEM = 3,
+};
+
+/* Preserved memory details */
+struct opal_mpipl_region {
+ __be64 src;
+ __be64 dest;
+ __be64 size;
+};
+
+/* Structure version */
+#define OPAL_MPIPL_VERSION 0x01
+
+struct opal_mpipl_fadump {
+ u8 version;
+ u8 reserved[7];
+ __be32 crashing_pir; /* OPAL crashing CPU PIR */
+ __be32 cpu_data_version;
+ __be32 cpu_data_size;
+ __be32 region_cnt;
+ struct opal_mpipl_region region[];
+} __packed;
+
#endif /* __ASSEMBLY__ */
#endif /* __OPAL_API_H */
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 57bd029c715e..9986ac34b8e2 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -39,6 +39,7 @@ int64_t opal_npu_spa_clear_cache(uint64_t phb_id, uint32_t bdfn,
uint64_t PE_handle);
int64_t opal_npu_tl_set(uint64_t phb_id, uint32_t bdfn, long cap,
uint64_t rate_phys, uint32_t size);
+
int64_t opal_console_write(int64_t term_number, __be64 *length,
const uint8_t *buffer);
int64_t opal_console_read(int64_t term_number, __be64 *length,
@@ -272,7 +273,7 @@ int64_t opal_xive_get_vp_info(uint64_t vp,
int64_t opal_xive_set_vp_info(uint64_t vp,
uint64_t flags,
uint64_t report_cl_pair);
-int64_t opal_xive_allocate_irq(uint32_t chip_id);
+int64_t opal_xive_allocate_irq_raw(uint32_t chip_id);
int64_t opal_xive_free_irq(uint32_t girq);
int64_t opal_xive_sync(uint32_t type, uint32_t id);
int64_t opal_xive_dump(uint32_t type, uint32_t id);
@@ -297,6 +298,17 @@ int opal_sensor_group_clear(u32 group_hndl, int token);
int opal_sensor_group_enable(u32 group_hndl, int token, bool enable);
int opal_nx_coproc_init(uint32_t chip_id, uint32_t ct);
+int opal_secvar_get(const char *key, uint64_t key_len, u8 *data,
+ uint64_t *data_size);
+int opal_secvar_get_next(const char *key, uint64_t *key_len,
+ uint64_t key_buf_size);
+int opal_secvar_enqueue_update(const char *key, uint64_t key_len, u8 *data,
+ uint64_t data_size);
+
+s64 opal_mpipl_update(enum opal_mpipl_ops op, u64 src, u64 dest, u64 size);
+s64 opal_mpipl_register_tag(enum opal_mpipl_tags tag, u64 addr);
+s64 opal_mpipl_query_tag(enum opal_mpipl_tags tag, u64 *addr);
+
s64 opal_signal_system_reset(s32 cpu);
s64 opal_quiesce(u64 shutdown_type, s32 cpu);
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index 0d52f57fca04..86332080399a 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -209,15 +209,25 @@ static inline bool pfn_valid(unsigned long pfn)
*/
#if defined(CONFIG_PPC32) && defined(CONFIG_BOOKE)
#define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) + VIRT_PHYS_OFFSET))
-#define __pa(x) ((unsigned long)(x) - VIRT_PHYS_OFFSET)
+#define __pa(x) ((phys_addr_t)(unsigned long)(x) - VIRT_PHYS_OFFSET)
#else
#ifdef CONFIG_PPC64
/*
* gcc miscompiles (unsigned long)(&static_var) - PAGE_OFFSET
* with -mcmodel=medium, so we use & and | instead of - and + on 64-bit.
+ * This also results in better code generation.
*/
-#define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) | PAGE_OFFSET))
-#define __pa(x) ((unsigned long)(x) & 0x0fffffffffffffffUL)
+#define __va(x) \
+({ \
+ VIRTUAL_BUG_ON((unsigned long)(x) >= PAGE_OFFSET); \
+ (void *)(unsigned long)((phys_addr_t)(x) | PAGE_OFFSET); \
+})
+
+#define __pa(x) \
+({ \
+ VIRTUAL_BUG_ON((unsigned long)(x) < PAGE_OFFSET); \
+ (unsigned long)(x) & 0x0fffffffffffffffUL; \
+})
#else /* 32-bit, non book E */
#define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) + PAGE_OFFSET - MEMORY_START))
@@ -315,17 +325,15 @@ void arch_free_page(struct page *page, int order);
struct vm_area_struct;
+extern unsigned long kernstart_virt_addr;
+
+static inline unsigned long kaslr_offset(void)
+{
+ return kernstart_virt_addr - KERNELBASE;
+}
+
#include <asm-generic/memory_model.h>
#endif /* __ASSEMBLY__ */
#include <asm/slice.h>
-/*
- * Allow 30-bit DMA for very limited Broadcom wifi chips on many powerbooks.
- */
-#ifdef CONFIG_PPC32
-#define ARCH_ZONE_DMA_BITS 30
-#else
-#define ARCH_ZONE_DMA_BITS 31
-#endif
-
#endif /* _ASM_POWERPC_PAGE_H */
diff --git a/arch/powerpc/include/asm/page_32.h b/arch/powerpc/include/asm/page_32.h
index 683dfbc67ca8..d64dfe3ac712 100644
--- a/arch/powerpc/include/asm/page_32.h
+++ b/arch/powerpc/include/asm/page_32.h
@@ -40,6 +40,8 @@ typedef unsigned long long pte_basic_t;
typedef unsigned long pte_basic_t;
#endif
+#include <asm/bug.h>
+
/*
* Clear page using the dcbz instruction, which doesn't cause any
* memory traffic (except to write out any cache lines which get
@@ -49,6 +51,8 @@ static inline void clear_page(void *addr)
{
unsigned int i;
+ WARN_ON((unsigned long)addr & (L1_CACHE_BYTES - 1));
+
for (i = 0; i < PAGE_SIZE / L1_CACHE_BYTES; i++, addr += L1_CACHE_BYTES)
dcbz(addr);
}
diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index 8dad1fdf4bd2..69f4cb3b7c56 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -183,6 +183,7 @@ struct iommu_table;
struct pci_dn {
int flags;
#define PCI_DN_FLAG_IOV_VF 0x01
+#define PCI_DN_FLAG_DEAD 0x02 /* Device has been hot-removed */
int busno; /* pci bus number */
int devfn; /* pci device and function number */
@@ -222,12 +223,15 @@ struct pci_dn {
extern struct pci_dn *pci_get_pdn_by_devfn(struct pci_bus *bus,
int devfn);
extern struct pci_dn *pci_get_pdn(struct pci_dev *pdev);
-extern struct pci_dn *add_dev_pci_data(struct pci_dev *pdev);
-extern void remove_dev_pci_data(struct pci_dev *pdev);
extern struct pci_dn *pci_add_device_node_info(struct pci_controller *hose,
struct device_node *dn);
extern void pci_remove_device_node_info(struct device_node *dn);
+#ifdef CONFIG_PCI_IOV
+struct pci_dn *add_sriov_vf_pdns(struct pci_dev *pdev);
+void remove_sriov_vf_pdns(struct pci_dev *pdev);
+#endif
+
static inline int pci_device_from_OF_node(struct device_node *np,
u8 *bus, u8 *devfn)
{
diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h
index 2372d35533ad..63ed7e3b0ba3 100644
--- a/arch/powerpc/include/asm/pci.h
+++ b/arch/powerpc/include/asm/pci.h
@@ -112,10 +112,7 @@ extern pgprot_t pci_phys_mem_access_prot(struct file *file,
unsigned long size,
pgprot_t prot);
-#define HAVE_ARCH_PCI_RESOURCE_TO_USER
-
extern resource_size_t pcibios_io_space_offset(struct pci_controller *hose);
-extern void pcibios_setup_bus_devices(struct pci_bus *bus);
extern void pcibios_setup_bus_self(struct pci_bus *bus);
extern void pcibios_setup_phb_io_space(struct pci_controller *hose);
extern void pcibios_scan_phb(struct pci_controller *hose);
diff --git a/arch/powerpc/include/asm/pgalloc.h b/arch/powerpc/include/asm/pgalloc.h
index 2b2c60a1a66d..6dd78a2dc03a 100644
--- a/arch/powerpc/include/asm/pgalloc.h
+++ b/arch/powerpc/include/asm/pgalloc.h
@@ -64,8 +64,6 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
extern struct kmem_cache *pgtable_cache[];
#define PGT_CACHE(shift) pgtable_cache[shift]
-static inline void check_pgt_cache(void) { }
-
#ifdef CONFIG_PPC_BOOK3S
#include <asm/book3s/pgalloc.h>
#else
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index c58ba7963688..8cc543ed114c 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -68,6 +68,8 @@ extern pgd_t swapper_pg_dir[];
extern void paging_init(void);
+extern unsigned long ioremap_bot;
+
/*
* kern_addr_valid is intended to indicate whether an address is a valid
* kernel address. Most 32-bit archs define it as always true (like this)
@@ -77,18 +79,6 @@ extern void paging_init(void);
#include <asm-generic/pgtable.h>
-
-/*
- * This gets called at the end of handling a page fault, when
- * the kernel has put a new PTE into the page table for the process.
- * We use it to ensure coherency between the i-cache and d-cache
- * for the page which has just been mapped in.
- * On machines which use an MMU hash table, we use this to put a
- * corresponding HPTE into the hash table ahead of time, instead of
- * waiting for the inevitable extra hash-table miss exception.
- */
-extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t *);
-
#ifndef CONFIG_TRANSPARENT_HUGEPAGE
#define pmd_large(pmd) 0
#endif
@@ -97,7 +87,6 @@ extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t *);
unsigned long vmalloc_to_phys(void *vmalloc_addr);
void pgtable_cache_add(unsigned int shift);
-void pgtable_cache_init(void);
#if defined(CONFIG_STRICT_KERNEL_RWX) || defined(CONFIG_PPC32)
void mark_initmem_nx(void);
@@ -105,12 +94,6 @@ void mark_initmem_nx(void);
static inline void mark_initmem_nx(void) { }
#endif
-#ifdef CONFIG_PPC_DEBUG_WX
-void ptdump_check_wx(void);
-#else
-static inline void ptdump_check_wx(void) { }
-#endif
-
/*
* When used, PTE_FRAG_NR is defined in subarch pgtable.h
* so we are sure it is included when arriving here.
@@ -168,13 +151,9 @@ static inline bool pgd_is_leaf(pgd_t pgd)
#define is_ioremap_addr is_ioremap_addr
static inline bool is_ioremap_addr(const void *x)
{
-#ifdef CONFIG_MMU
unsigned long addr = (unsigned long)x;
return addr >= IOREMAP_BASE && addr < IOREMAP_END;
-#else
- return false;
-#endif
}
#endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h
index cff5a411e595..4497c8afb573 100644
--- a/arch/powerpc/include/asm/plpar_wrappers.h
+++ b/arch/powerpc/include/asm/plpar_wrappers.h
@@ -340,6 +340,12 @@ static inline long plpar_set_ciabr(unsigned long ciabr)
{
return 0;
}
+
+static inline long plpar_pte_read_4(unsigned long flags, unsigned long ptex,
+ unsigned long *ptes)
+{
+ return 0;
+}
#endif /* CONFIG_PPC_PSERIES */
#endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */
diff --git a/arch/powerpc/include/asm/pnv-pci.h b/arch/powerpc/include/asm/pnv-pci.h
index edcb1fc50aeb..d0ee0ede5767 100644
--- a/arch/powerpc/include/asm/pnv-pci.h
+++ b/arch/powerpc/include/asm/pnv-pci.h
@@ -15,6 +15,7 @@
#define PCI_SLOT_ID_PREFIX (1UL << 63)
#define PCI_SLOT_ID(phb_id, bdfn) \
(PCI_SLOT_ID_PREFIX | ((uint64_t)(bdfn) << 16) | (phb_id))
+#define PCI_PHB_SLOT_ID(phb_id) (phb_id)
extern int pnv_pci_get_slot_id(struct device_node *np, uint64_t *id);
extern int pnv_pci_get_device_tree(uint32_t phandle, void *buf, uint64_t len);
diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h
index cec2d6409515..7f4be5a05eb3 100644
--- a/arch/powerpc/include/asm/ppc-pci.h
+++ b/arch/powerpc/include/asm/ppc-pci.h
@@ -62,11 +62,6 @@ void eeh_pe_dev_mode_mark(struct eeh_pe *pe, int mode);
void eeh_sysfs_add_device(struct pci_dev *pdev);
void eeh_sysfs_remove_device(struct pci_dev *pdev);
-static inline const char *eeh_pci_name(struct pci_dev *pdev)
-{
- return pdev ? pci_name(pdev) : "<null>";
-}
-
static inline const char *eeh_driver_name(struct pci_dev *pdev)
{
return (pdev && pdev->driver) ? pdev->driver->name : "<null>";
@@ -74,6 +69,8 @@ static inline const char *eeh_driver_name(struct pci_dev *pdev)
#endif /* CONFIG_EEH */
+#define PCI_BUSNO(bdfn) ((bdfn >> 8) & 0xff)
+
#else /* CONFIG_PCI */
static inline void init_pci_config_tokens(void) { }
#endif /* !CONFIG_PCI */
diff --git a/arch/powerpc/include/asm/ppc4xx_ocm.h b/arch/powerpc/include/asm/ppc4xx_ocm.h
deleted file mode 100644
index fc4db6dcde84..000000000000
--- a/arch/powerpc/include/asm/ppc4xx_ocm.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * PowerPC 4xx OCM memory allocation support
- *
- * (C) Copyright 2009, Applied Micro Circuits Corporation
- * Victor Gallardo (vgallardo@amcc.com)
- *
- * See file CREDITS for list of people who contributed to this
- * project.
- */
-
-#ifndef __ASM_POWERPC_PPC4XX_OCM_H__
-#define __ASM_POWERPC_PPC4XX_OCM_H__
-
-#define PPC4XX_OCM_NON_CACHED 0
-#define PPC4XX_OCM_CACHED 1
-
-#if defined(CONFIG_PPC4xx_OCM)
-
-void *ppc4xx_ocm_alloc(phys_addr_t *phys, int size, int align,
- int flags, const char *owner);
-void ppc4xx_ocm_free(const void *virt);
-
-#else
-
-#define ppc4xx_ocm_alloc(phys, size, align, flags, owner) NULL
-#define ppc4xx_ocm_free(addr) ((void)0)
-
-#endif /* CONFIG_PPC4xx_OCM */
-
-#endif /* __ASM_POWERPC_PPC4XX_OCM_H__ */
diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index e0637730a8e7..6b03dff61a05 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -311,18 +311,48 @@ n:
addis reg,reg,(name - 0b)@ha; \
addi reg,reg,(name - 0b)@l;
-#ifdef __powerpc64__
-#ifdef HAVE_AS_ATHIGH
+#if defined(__powerpc64__) && defined(HAVE_AS_ATHIGH)
#define __AS_ATHIGH high
#else
#define __AS_ATHIGH h
#endif
-#define LOAD_REG_IMMEDIATE(reg,expr) \
- lis reg,(expr)@highest; \
- ori reg,reg,(expr)@higher; \
- rldicr reg,reg,32,31; \
- oris reg,reg,(expr)@__AS_ATHIGH; \
- ori reg,reg,(expr)@l;
+
+.macro __LOAD_REG_IMMEDIATE_32 r, x
+ .if (\x) >= 0x8000 || (\x) < -0x8000
+ lis \r, (\x)@__AS_ATHIGH
+ .if (\x) & 0xffff != 0
+ ori \r, \r, (\x)@l
+ .endif
+ .else
+ li \r, (\x)@l
+ .endif
+.endm
+
+.macro __LOAD_REG_IMMEDIATE r, x
+ .if (\x) >= 0x80000000 || (\x) < -0x80000000
+ __LOAD_REG_IMMEDIATE_32 \r, (\x) >> 32
+ sldi \r, \r, 32
+ .if (\x) & 0xffff0000 != 0
+ oris \r, \r, (\x)@__AS_ATHIGH
+ .endif
+ .if (\x) & 0xffff != 0
+ ori \r, \r, (\x)@l
+ .endif
+ .else
+ __LOAD_REG_IMMEDIATE_32 \r, \x
+ .endif
+.endm
+
+#ifdef __powerpc64__
+
+#define LOAD_REG_IMMEDIATE(reg, expr) __LOAD_REG_IMMEDIATE reg, expr
+
+#define LOAD_REG_IMMEDIATE_SYM(reg, tmp, expr) \
+ lis tmp, (expr)@highest; \
+ lis reg, (expr)@__AS_ATHIGH; \
+ ori tmp, tmp, (expr)@higher; \
+ ori reg, reg, (expr)@l; \
+ rldimi reg, tmp, 32, 0
#define LOAD_REG_ADDR(reg,name) \
ld reg,name@got(r2)
@@ -335,11 +365,13 @@ n:
#else /* 32-bit */
-#define LOAD_REG_IMMEDIATE(reg,expr) \
+#define LOAD_REG_IMMEDIATE(reg, expr) __LOAD_REG_IMMEDIATE_32 reg, expr
+
+#define LOAD_REG_IMMEDIATE_SYM(reg,expr) \
lis reg,(expr)@ha; \
addi reg,reg,(expr)@l;
-#define LOAD_REG_ADDR(reg,name) LOAD_REG_IMMEDIATE(reg, name)
+#define LOAD_REG_ADDR(reg,name) LOAD_REG_IMMEDIATE_SYM(reg, name)
#define LOAD_REG_ADDRBASE(reg, name) lis reg,name@ha
#define ADDROFF(name) name@l
@@ -351,19 +383,9 @@ n:
/* various errata or part fixups */
#ifdef CONFIG_PPC601_SYNC_FIX
-#define SYNC \
-BEGIN_FTR_SECTION \
- sync; \
- isync; \
-END_FTR_SECTION_IFSET(CPU_FTR_601)
-#define SYNC_601 \
-BEGIN_FTR_SECTION \
- sync; \
-END_FTR_SECTION_IFSET(CPU_FTR_601)
-#define ISYNC_601 \
-BEGIN_FTR_SECTION \
- isync; \
-END_FTR_SECTION_IFSET(CPU_FTR_601)
+#define SYNC sync; isync
+#define SYNC_601 sync
+#define ISYNC_601 isync
#else
#define SYNC
#define SYNC_601
@@ -389,15 +411,11 @@ END_FTR_SECTION_NESTED(CPU_FTR_CELL_TB_BUG, CPU_FTR_CELL_TB_BUG, 96)
#define MFTBU(dest) mfspr dest, SPRN_TBRU
#endif
-#ifndef CONFIG_SMP
-#define TLBSYNC
-#else /* CONFIG_SMP */
/* tlbsync is not implemented on 601 */
-#define TLBSYNC \
-BEGIN_FTR_SECTION \
- tlbsync; \
- sync; \
-END_FTR_SECTION_IFCLR(CPU_FTR_601)
+#if !defined(CONFIG_SMP) || defined(CONFIG_PPC_BOOK3S_601)
+#define TLBSYNC
+#else
+#define TLBSYNC tlbsync; sync
#endif
#ifdef CONFIG_PPC64
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index a9993e7a443b..8387698bd5b6 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -163,6 +163,12 @@ struct thread_struct {
#if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP)
unsigned long kuap; /* opened segments for user access */
#endif
+#ifdef CONFIG_VMAP_STACK
+ unsigned long srr0;
+ unsigned long srr1;
+ unsigned long dar;
+ unsigned long dsisr;
+#endif
/* Debug Registers */
struct debug_reg debug;
struct thread_fp_state fp_state;
@@ -412,6 +418,9 @@ static inline unsigned long get_clean_sp(unsigned long sp, int is_32)
extern unsigned long isa300_idle_stop_noloss(unsigned long psscr_val);
extern unsigned long isa300_idle_stop_mayloss(unsigned long psscr_val);
extern unsigned long isa206_idle_insn_mayloss(unsigned long type);
+#ifdef CONFIG_PPC_970_NAP
+extern void power4_idle_nap(void);
+#endif
extern unsigned long cpuidle_disable;
enum idle_boot_override {IDLE_NO_OVERRIDE = 0, IDLE_POWERSAVE_OFF};
diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
index feee1b21bbd5..ee3ada66deb5 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -203,7 +203,11 @@ do { \
#endif /* __powerpc64__ */
#define arch_has_single_step() (1)
-#define arch_has_block_step() (!cpu_has_feature(CPU_FTR_601))
+#ifndef CONFIG_BOOK3S_601
+#define arch_has_block_step() (true)
+#else
+#define arch_has_block_step() (false)
+#endif
#define ARCH_HAS_USER_SINGLE_STEP_REPORT
/*
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 10caa145f98b..1aa46dff0957 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -25,9 +25,7 @@
#include <asm/reg_fsl_emb.h>
#endif
-#ifdef CONFIG_PPC_8xx
#include <asm/reg_8xx.h>
-#endif /* CONFIG_PPC_8xx */
#define MSR_SF_LG 63 /* Enable 64 bit mode */
#define MSR_ISF_LG 61 /* Interrupt 64b mode valid on 630 */
@@ -38,6 +36,7 @@
#define MSR_TM_LG 32 /* Trans Mem Available */
#define MSR_VEC_LG 25 /* Enable AltiVec */
#define MSR_VSX_LG 23 /* Enable VSX */
+#define MSR_S_LG 22 /* Secure state */
#define MSR_POW_LG 18 /* Enable Power Management */
#define MSR_WE_LG 18 /* Wait State Enable */
#define MSR_TGPR_LG 17 /* TLB Update registers in use */
@@ -71,11 +70,13 @@
#define MSR_SF __MASK(MSR_SF_LG) /* Enable 64 bit mode */
#define MSR_ISF __MASK(MSR_ISF_LG) /* Interrupt 64b mode valid on 630 */
#define MSR_HV __MASK(MSR_HV_LG) /* Hypervisor state */
+#define MSR_S __MASK(MSR_S_LG) /* Secure state */
#else
/* so tests for these bits fail on 32-bit */
#define MSR_SF 0
#define MSR_ISF 0
#define MSR_HV 0
+#define MSR_S 0
#endif
/*
@@ -472,9 +473,10 @@
#define HMER_DEBUG_TRIG (1ul << (63 - 17)) /* Debug trigger */
#define SPRN_HMEER 0x151 /* Hyp maintenance exception enable reg */
#define SPRN_PCR 0x152 /* Processor compatibility register */
-#define PCR_VEC_DIS (1ul << (63-0)) /* Vec. disable (bit NA since POWER8) */
-#define PCR_VSX_DIS (1ul << (63-1)) /* VSX disable (bit NA since POWER8) */
-#define PCR_TM_DIS (1ul << (63-2)) /* Trans. memory disable (POWER8) */
+#define PCR_VEC_DIS (__MASK(63-0)) /* Vec. disable (bit NA since POWER8) */
+#define PCR_VSX_DIS (__MASK(63-1)) /* VSX disable (bit NA since POWER8) */
+#define PCR_TM_DIS (__MASK(63-2)) /* Trans. memory disable (POWER8) */
+#define PCR_HIGH_BITS (PCR_VEC_DIS | PCR_VSX_DIS | PCR_TM_DIS)
/*
* These bits are used in the function kvmppc_set_arch_compat() to specify and
* determine both the compatibility level which we want to emulate and the
@@ -483,6 +485,8 @@
#define PCR_ARCH_207 0x8 /* Architecture 2.07 */
#define PCR_ARCH_206 0x4 /* Architecture 2.06 */
#define PCR_ARCH_205 0x2 /* Architecture 2.05 */
+#define PCR_LOW_BITS (PCR_ARCH_207 | PCR_ARCH_206 | PCR_ARCH_205)
+#define PCR_MASK ~(PCR_HIGH_BITS | PCR_LOW_BITS) /* PCR Reserved Bits */
#define SPRN_HEIR 0x153 /* Hypervisor Emulated Instruction Register */
#define SPRN_TLBINDEXR 0x154 /* P7 TLB control register */
#define SPRN_TLBVPNR 0x155 /* P7 TLB control register */
@@ -742,6 +746,18 @@
#define SPRN_USPRG7 0x107 /* SPRG7 userspace read */
#define SPRN_SRR0 0x01A /* Save/Restore Register 0 */
#define SPRN_SRR1 0x01B /* Save/Restore Register 1 */
+
+#ifdef CONFIG_PPC_BOOK3S
+/*
+ * Bits loaded from MSR upon interrupt.
+ * PPC (64-bit) bits 33-36,42-47 are interrupt dependent, the others are
+ * loaded from MSR. The exception is that SRESET and MCE do not always load
+ * bit 62 (RI) from MSR. Don't use PPC_BITMASK for this because 32-bit uses
+ * it.
+ */
+#define SRR1_MSR_BITS (~0x783f0000UL)
+#endif
+
#define SRR1_ISI_NOPT 0x40000000 /* ISI: Not found in hash */
#define SRR1_ISI_N_OR_G 0x10000000 /* ISI: Access is no-exec or G */
#define SRR1_ISI_PROT 0x08000000 /* ISI: Other protection fault */
@@ -1364,6 +1380,14 @@ static inline void mtmsr_isync(unsigned long val)
#define wrtspr(rn) asm volatile("mtspr " __stringify(rn) ",0" : \
: : "memory")
+static inline void wrtee(unsigned long val)
+{
+ if (__builtin_constant_p(val))
+ asm volatile("wrteei %0" : : "i" ((val & MSR_EE) ? 1 : 0) : "memory");
+ else
+ asm volatile("wrtee %0" : : "r" (val) : "memory");
+}
+
extern unsigned long msr_check_and_set(unsigned long bits);
extern bool strict_msr_control;
extern void __msr_check_and_clear(unsigned long bits);
@@ -1378,19 +1402,9 @@ static inline void msr_check_and_clear(unsigned long bits)
#define mftb() ({unsigned long rval; \
asm volatile( \
"90: mfspr %0, %2;\n" \
- "97: cmpwi %0,0;\n" \
- " beq- 90b;\n" \
- "99:\n" \
- ".section __ftr_fixup,\"a\"\n" \
- ".align 3\n" \
- "98:\n" \
- " .8byte %1\n" \
- " .8byte %1\n" \
- " .8byte 97b-98b\n" \
- " .8byte 99b-98b\n" \
- " .8byte 0\n" \
- " .8byte 0\n" \
- ".previous" \
+ ASM_FTR_IFSET( \
+ "97: cmpwi %0,0;\n" \
+ " beq- 90b;\n", "", %1) \
: "=r" (rval) \
: "i" (CPU_FTR_CELL_TB_BUG), "i" (SPRN_TBRL) : "cr0"); \
rval;})
diff --git a/arch/powerpc/include/asm/reg_8xx.h b/arch/powerpc/include/asm/reg_8xx.h
index 7192eece6c3e..299ee7be0f67 100644
--- a/arch/powerpc/include/asm/reg_8xx.h
+++ b/arch/powerpc/include/asm/reg_8xx.h
@@ -5,8 +5,6 @@
#ifndef _ASM_POWERPC_REG_8xx_H
#define _ASM_POWERPC_REG_8xx_H
-#include <asm/mmu.h>
-
/* Cache control on the MPC8xx is provided through some additional
* special purpose registers.
*/
@@ -37,8 +35,24 @@
#define SPRN_CMPE 152
#define SPRN_CMPF 153
#define SPRN_LCTRL1 156
+#define LCTRL1_CTE_GT 0xc0000000
+#define LCTRL1_CTF_LT 0x14000000
+#define LCTRL1_CRWE_RW 0x00000000
+#define LCTRL1_CRWE_RO 0x00040000
+#define LCTRL1_CRWE_WO 0x000c0000
+#define LCTRL1_CRWF_RW 0x00000000
+#define LCTRL1_CRWF_RO 0x00010000
+#define LCTRL1_CRWF_WO 0x00030000
#define SPRN_LCTRL2 157
+#define LCTRL2_LW0EN 0x80000000
+#define LCTRL2_LW0LA_E 0x00000000
+#define LCTRL2_LW0LA_F 0x04000000
+#define LCTRL2_LW0LA_EandF 0x08000000
+#define LCTRL2_LW0LADC 0x02000000
+#define LCTRL2_SLW0EN 0x00000002
+#ifdef CONFIG_PPC_8xx
#define SPRN_ICTRL 158
+#endif
#define SPRN_BAR 159
/* Commands. Only the first few are available to the instruction cache.
diff --git a/arch/powerpc/include/asm/scom.h b/arch/powerpc/include/asm/scom.h
deleted file mode 100644
index 08c44396e54a..000000000000
--- a/arch/powerpc/include/asm/scom.h
+++ /dev/null
@@ -1,154 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright 2010 Benjamin Herrenschmidt, IBM Corp
- * <benh@kernel.crashing.org>
- * and David Gibson, IBM Corporation.
- */
-
-#ifndef _ASM_POWERPC_SCOM_H
-#define _ASM_POWERPC_SCOM_H
-
-#ifdef __KERNEL__
-#ifndef __ASSEMBLY__
-#ifdef CONFIG_PPC_SCOM
-
-/*
- * The SCOM bus is a sideband bus used for accessing various internal
- * registers of the processor or the chipset. The implementation details
- * differ between processors and platforms, and the access method as
- * well.
- *
- * This API allows to "map" ranges of SCOM register numbers associated
- * with a given SCOM controller. The later must be represented by a
- * device node, though some implementations might support NULL if there
- * is no possible ambiguity
- *
- * Then, scom_read/scom_write can be used to accesses registers inside
- * that range. The argument passed is a register number relative to
- * the beginning of the range mapped.
- */
-
-typedef void *scom_map_t;
-
-/* Value for an invalid SCOM map */
-#define SCOM_MAP_INVALID (NULL)
-
-/* The scom_controller data structure is what the platform passes
- * to the core code in scom_init, it provides the actual implementation
- * of all the SCOM functions
- */
-struct scom_controller {
- scom_map_t (*map)(struct device_node *ctrl_dev, u64 reg, u64 count);
- void (*unmap)(scom_map_t map);
-
- int (*read)(scom_map_t map, u64 reg, u64 *value);
- int (*write)(scom_map_t map, u64 reg, u64 value);
-};
-
-extern const struct scom_controller *scom_controller;
-
-/**
- * scom_init - Initialize the SCOM backend, called by the platform
- * @controller: The platform SCOM controller
- */
-static inline void scom_init(const struct scom_controller *controller)
-{
- scom_controller = controller;
-}
-
-/**
- * scom_map_ok - Test is a SCOM mapping is successful
- * @map: The result of scom_map to test
- */
-static inline int scom_map_ok(scom_map_t map)
-{
- return map != SCOM_MAP_INVALID;
-}
-
-/**
- * scom_map - Map a block of SCOM registers
- * @ctrl_dev: Device node of the SCOM controller
- * some implementations allow NULL here
- * @reg: first SCOM register to map
- * @count: Number of SCOM registers to map
- */
-
-static inline scom_map_t scom_map(struct device_node *ctrl_dev,
- u64 reg, u64 count)
-{
- return scom_controller->map(ctrl_dev, reg, count);
-}
-
-/**
- * scom_find_parent - Find the SCOM controller for a device
- * @dev: OF node of the device
- *
- * This is not meant for general usage, but in combination with
- * scom_map() allows to map registers not represented by the
- * device own scom-reg property. Useful for applying HW workarounds
- * on things not properly represented in the device-tree for example.
- */
-struct device_node *scom_find_parent(struct device_node *dev);
-
-
-/**
- * scom_map_device - Map a device's block of SCOM registers
- * @dev: OF node of the device
- * @index: Register bank index (index in "scom-reg" property)
- *
- * This function will use the device-tree binding for SCOM which
- * is to follow "scom-parent" properties until it finds a node with
- * a "scom-controller" property to find the controller. It will then
- * use the "scom-reg" property which is made of reg/count pairs,
- * each of them having a size defined by the controller's #scom-cells
- * property
- */
-extern scom_map_t scom_map_device(struct device_node *dev, int index);
-
-
-/**
- * scom_unmap - Unmap a block of SCOM registers
- * @map: Result of scom_map is to be unmapped
- */
-static inline void scom_unmap(scom_map_t map)
-{
- if (scom_map_ok(map))
- scom_controller->unmap(map);
-}
-
-/**
- * scom_read - Read a SCOM register
- * @map: Result of scom_map
- * @reg: Register index within that map
- * @value: Updated with the value read
- *
- * Returns 0 (success) or a negative error code
- */
-static inline int scom_read(scom_map_t map, u64 reg, u64 *value)
-{
- int rc;
-
- rc = scom_controller->read(map, reg, value);
- if (rc)
- *value = 0xfffffffffffffffful;
- return rc;
-}
-
-/**
- * scom_write - Write to a SCOM register
- * @map: Result of scom_map
- * @reg: Register index within that map
- * @value: Value to write
- *
- * Returns 0 (success) or a negative error code
- */
-static inline int scom_write(scom_map_t map, u64 reg, u64 value)
-{
- return scom_controller->write(map, reg, value);
-}
-
-
-#endif /* CONFIG_PPC_SCOM */
-#endif /* __ASSEMBLY__ */
-#endif /* __KERNEL__ */
-#endif /* _ASM_POWERPC_SCOM_H */
diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h
index 4a1664a8658d..d19871763ed4 100644
--- a/arch/powerpc/include/asm/sections.h
+++ b/arch/powerpc/include/asm/sections.h
@@ -5,8 +5,22 @@
#include <linux/elf.h>
#include <linux/uaccess.h>
+
+#define arch_is_kernel_initmem_freed arch_is_kernel_initmem_freed
+
#include <asm-generic/sections.h>
+extern bool init_mem_is_free;
+
+static inline int arch_is_kernel_initmem_freed(unsigned long addr)
+{
+ if (!init_mem_is_free)
+ return 0;
+
+ return addr >= (unsigned long)__init_begin &&
+ addr < (unsigned long)__init_end;
+}
+
extern char __head_end[];
#ifdef __powerpc64__
@@ -61,17 +75,6 @@ static inline int overlaps_kernel_text(unsigned long start, unsigned long end)
(unsigned long)_stext < end;
}
-static inline int overlaps_kvm_tmp(unsigned long start, unsigned long end)
-{
-#ifdef CONFIG_KVM_GUEST
- extern char kvm_tmp[];
- return start < (unsigned long)kvm_tmp &&
- (unsigned long)&kvm_tmp[1024 * 1024] < end;
-#else
- return 0;
-#endif
-}
-
#ifdef PPC64_ELF_ABI_v1
#define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1
diff --git a/arch/powerpc/include/asm/secure_boot.h b/arch/powerpc/include/asm/secure_boot.h
new file mode 100644
index 000000000000..a2ff556916c6
--- /dev/null
+++ b/arch/powerpc/include/asm/secure_boot.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Secure boot definitions
+ *
+ * Copyright (C) 2019 IBM Corporation
+ * Author: Nayna Jain
+ */
+#ifndef _ASM_POWER_SECURE_BOOT_H
+#define _ASM_POWER_SECURE_BOOT_H
+
+#ifdef CONFIG_PPC_SECURE_BOOT
+
+bool is_ppc_secureboot_enabled(void);
+bool is_ppc_trustedboot_enabled(void);
+
+#else
+
+static inline bool is_ppc_secureboot_enabled(void)
+{
+ return false;
+}
+
+static inline bool is_ppc_trustedboot_enabled(void)
+{
+ return false;
+}
+
+#endif
+#endif
diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h
index 759597bf0fd8..7c05e95a5c44 100644
--- a/arch/powerpc/include/asm/security_features.h
+++ b/arch/powerpc/include/asm/security_features.h
@@ -9,7 +9,7 @@
#define _ASM_POWERPC_SECURITY_FEATURES_H
-extern unsigned long powerpc_security_features;
+extern u64 powerpc_security_features;
extern bool rfi_flush;
/* These are bit flags */
@@ -24,17 +24,17 @@ void setup_stf_barrier(void);
void do_stf_barrier_fixups(enum stf_barrier_type types);
void setup_count_cache_flush(void);
-static inline void security_ftr_set(unsigned long feature)
+static inline void security_ftr_set(u64 feature)
{
powerpc_security_features |= feature;
}
-static inline void security_ftr_clear(unsigned long feature)
+static inline void security_ftr_clear(u64 feature)
{
powerpc_security_features &= ~feature;
}
-static inline bool security_ftr_enabled(unsigned long feature)
+static inline bool security_ftr_enabled(u64 feature)
{
return !!(powerpc_security_features & feature);
}
@@ -81,6 +81,9 @@ static inline bool security_ftr_enabled(unsigned long feature)
// Software required to flush count cache on context switch
#define SEC_FTR_FLUSH_COUNT_CACHE 0x0000000000000400ull
+// Software required to flush link stack on context switch
+#define SEC_FTR_FLUSH_LINK_STACK 0x0000000000001000ull
+
// Features enabled by default
#define SEC_FTR_DEFAULT \
diff --git a/arch/powerpc/include/asm/secvar.h b/arch/powerpc/include/asm/secvar.h
new file mode 100644
index 000000000000..4cc35b58b986
--- /dev/null
+++ b/arch/powerpc/include/asm/secvar.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019 IBM Corporation
+ * Author: Nayna Jain
+ *
+ * PowerPC secure variable operations.
+ */
+#ifndef SECVAR_OPS_H
+#define SECVAR_OPS_H
+
+#include <linux/types.h>
+#include <linux/errno.h>
+
+extern const struct secvar_operations *secvar_ops;
+
+struct secvar_operations {
+ int (*get)(const char *key, uint64_t key_len, u8 *data,
+ uint64_t *data_size);
+ int (*get_next)(const char *key, uint64_t *key_len,
+ uint64_t keybufsize);
+ int (*set)(const char *key, uint64_t key_len, u8 *data,
+ uint64_t data_size);
+};
+
+#ifdef CONFIG_PPC_SECURE_BOOT
+
+extern void set_secvar_ops(const struct secvar_operations *ops);
+
+#else
+
+static inline void set_secvar_ops(const struct secvar_operations *ops) { }
+
+#endif
+
+#endif
diff --git a/arch/powerpc/include/asm/setjmp.h b/arch/powerpc/include/asm/setjmp.h
index d995061f5f86..e9f81bb3f83b 100644
--- a/arch/powerpc/include/asm/setjmp.h
+++ b/arch/powerpc/include/asm/setjmp.h
@@ -7,7 +7,7 @@
#define JMP_BUF_LEN 23
-extern long setjmp(long *);
-extern void longjmp(long *, long);
+extern long setjmp(long *) __attribute__((returns_twice));
+extern void longjmp(long *, long) __attribute__((noreturn));
#endif /* _ASM_POWERPC_SETJMP_H */
diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
index a47f827bc5f1..860228e917dc 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -15,6 +15,7 @@
*
* (the type definitions are in asm/spinlock_types.h)
*/
+#include <linux/jump_label.h>
#include <linux/irqflags.h>
#ifdef CONFIG_PPC64
#include <asm/paca.h>
@@ -36,10 +37,12 @@
#endif
#ifdef CONFIG_PPC_PSERIES
+DECLARE_STATIC_KEY_FALSE(shared_processor);
+
#define vcpu_is_preempted vcpu_is_preempted
static inline bool vcpu_is_preempted(int cpu)
{
- if (!firmware_has_feature(FW_FEATURE_SPLPAR))
+ if (!static_branch_unlikely(&shared_processor))
return false;
return !!(be32_to_cpu(lppaca_of(cpu).yield_count) & 1);
}
@@ -101,14 +104,37 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock)
#if defined(CONFIG_PPC_SPLPAR)
/* We only yield to the hypervisor if we are in shared processor mode */
-#define SHARED_PROCESSOR (lppaca_shared_proc(local_paca->lppaca_ptr))
-extern void __spin_yield(arch_spinlock_t *lock);
-extern void __rw_yield(arch_rwlock_t *lock);
+void splpar_spin_yield(arch_spinlock_t *lock);
+void splpar_rw_yield(arch_rwlock_t *lock);
#else /* SPLPAR */
-#define __spin_yield(x) barrier()
-#define __rw_yield(x) barrier()
-#define SHARED_PROCESSOR 0
+static inline void splpar_spin_yield(arch_spinlock_t *lock) {};
+static inline void splpar_rw_yield(arch_rwlock_t *lock) {};
+#endif
+
+static inline bool is_shared_processor(void)
+{
+#ifdef CONFIG_PPC_SPLPAR
+ return static_branch_unlikely(&shared_processor);
+#else
+ return false;
#endif
+}
+
+static inline void spin_yield(arch_spinlock_t *lock)
+{
+ if (is_shared_processor())
+ splpar_spin_yield(lock);
+ else
+ barrier();
+}
+
+static inline void rw_yield(arch_rwlock_t *lock)
+{
+ if (is_shared_processor())
+ splpar_rw_yield(lock);
+ else
+ barrier();
+}
static inline void arch_spin_lock(arch_spinlock_t *lock)
{
@@ -117,8 +143,8 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
break;
do {
HMT_low();
- if (SHARED_PROCESSOR)
- __spin_yield(lock);
+ if (is_shared_processor())
+ splpar_spin_yield(lock);
} while (unlikely(lock->slock != 0));
HMT_medium();
}
@@ -136,8 +162,8 @@ void arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
local_irq_restore(flags);
do {
HMT_low();
- if (SHARED_PROCESSOR)
- __spin_yield(lock);
+ if (is_shared_processor())
+ splpar_spin_yield(lock);
} while (unlikely(lock->slock != 0));
HMT_medium();
local_irq_restore(flags_dis);
@@ -226,8 +252,8 @@ static inline void arch_read_lock(arch_rwlock_t *rw)
break;
do {
HMT_low();
- if (SHARED_PROCESSOR)
- __rw_yield(rw);
+ if (is_shared_processor())
+ splpar_rw_yield(rw);
} while (unlikely(rw->lock < 0));
HMT_medium();
}
@@ -240,8 +266,8 @@ static inline void arch_write_lock(arch_rwlock_t *rw)
break;
do {
HMT_low();
- if (SHARED_PROCESSOR)
- __rw_yield(rw);
+ if (is_shared_processor())
+ splpar_rw_yield(rw);
} while (unlikely(rw->lock != 0));
HMT_medium();
}
@@ -281,9 +307,9 @@ static inline void arch_write_unlock(arch_rwlock_t *rw)
rw->lock = 0;
}
-#define arch_spin_relax(lock) __spin_yield(lock)
-#define arch_read_relax(lock) __rw_yield(lock)
-#define arch_write_relax(lock) __rw_yield(lock)
+#define arch_spin_relax(lock) spin_yield(lock)
+#define arch_read_relax(lock) rw_yield(lock)
+#define arch_write_relax(lock) rw_yield(lock)
/* See include/linux/spinlock.h */
#define smp_mb__after_spinlock() smp_mb()
diff --git a/arch/powerpc/include/asm/string.h b/arch/powerpc/include/asm/string.h
index 9bf6dffb4090..b72692702f35 100644
--- a/arch/powerpc/include/asm/string.h
+++ b/arch/powerpc/include/asm/string.h
@@ -53,7 +53,9 @@ void *__memmove(void *to, const void *from, __kernel_size_t n);
#ifndef CONFIG_KASAN
#define __HAVE_ARCH_MEMSET32
#define __HAVE_ARCH_MEMSET64
+#define __HAVE_ARCH_MEMCPY_MCSAFE
+extern int memcpy_mcsafe(void *dst, const void *src, __kernel_size_t sz);
extern void *__memset16(uint16_t *, uint16_t v, __kernel_size_t);
extern void *__memset32(uint32_t *, uint32_t v, __kernel_size_t);
extern void *__memset64(uint64_t *, uint64_t v, __kernel_size_t);
diff --git a/arch/powerpc/include/asm/svm.h b/arch/powerpc/include/asm/svm.h
new file mode 100644
index 000000000000..85580b30aba4
--- /dev/null
+++ b/arch/powerpc/include/asm/svm.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * SVM helper functions
+ *
+ * Copyright 2018 Anshuman Khandual, IBM Corporation.
+ */
+
+#ifndef _ASM_POWERPC_SVM_H
+#define _ASM_POWERPC_SVM_H
+
+#ifdef CONFIG_PPC_SVM
+
+static inline bool is_secure_guest(void)
+{
+ return mfmsr() & MSR_S;
+}
+
+void dtl_cache_ctor(void *addr);
+#define get_dtl_cache_ctor() (is_secure_guest() ? dtl_cache_ctor : NULL)
+
+#else /* CONFIG_PPC_SVM */
+
+static inline bool is_secure_guest(void)
+{
+ return false;
+}
+
+#define get_dtl_cache_ctor() NULL
+
+#endif /* CONFIG_PPC_SVM */
+#endif /* _ASM_POWERPC_SVM_H */
diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
index 8e1d0195ac36..a2270749b282 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -10,13 +10,31 @@
#define _ASM_POWERPC_THREAD_INFO_H
#include <asm/asm-const.h>
+#include <asm/page.h>
#ifdef __KERNEL__
+#if defined(CONFIG_VMAP_STACK) && CONFIG_THREAD_SHIFT < PAGE_SHIFT
+#define THREAD_SHIFT PAGE_SHIFT
+#else
#define THREAD_SHIFT CONFIG_THREAD_SHIFT
+#endif
#define THREAD_SIZE (1 << THREAD_SHIFT)
+/*
+ * By aligning VMAP'd stacks to 2 * THREAD_SIZE, we can detect overflow by
+ * checking sp & (1 << THREAD_SHIFT), which we can do cheaply in the entry
+ * assembly.
+ */
+#ifdef CONFIG_VMAP_STACK
+#define THREAD_ALIGN_SHIFT (THREAD_SHIFT + 1)
+#else
+#define THREAD_ALIGN_SHIFT THREAD_SHIFT
+#endif
+
+#define THREAD_ALIGN (1 << THREAD_ALIGN_SHIFT)
+
#ifndef __ASSEMBLY__
#include <linux/cache.h>
#include <asm/processor.h>
diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index 54f4ec1f9fab..08dbe3e6831c 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -41,11 +41,7 @@ struct div_result {
/* Accessor functions for the timebase (RTC on 601) registers. */
/* If one day CONFIG_POWER is added just define __USE_RTC as 1 */
-#ifdef CONFIG_PPC_BOOK3S_32
-#define __USE_RTC() (cpu_has_feature(CPU_FTR_USE_RTC))
-#else
-#define __USE_RTC() 0
-#endif
+#define __USE_RTC() (IS_ENABLED(CONFIG_PPC_BOOK3S_601))
#ifdef CONFIG_PPC64
diff --git a/arch/powerpc/include/asm/timex.h b/arch/powerpc/include/asm/timex.h
index 926b9f91a3ef..d2d2c4bd8435 100644
--- a/arch/powerpc/include/asm/timex.h
+++ b/arch/powerpc/include/asm/timex.h
@@ -17,38 +17,10 @@ typedef unsigned long cycles_t;
static inline cycles_t get_cycles(void)
{
-#ifdef __powerpc64__
+ if (IS_ENABLED(CONFIG_BOOK3S_601))
+ return 0;
+
return mftb();
-#else
- cycles_t ret;
-
- /*
- * For the "cycle" counter we use the timebase lower half.
- * Currently only used on SMP.
- */
-
- ret = 0;
-
- __asm__ __volatile__(
-#ifdef CONFIG_PPC_8xx
- "97: mftb %0\n"
-#else
- "97: mfspr %0, %2\n"
-#endif
- "99:\n"
- ".section __ftr_fixup,\"a\"\n"
- ".align 2\n"
- "98:\n"
- " .long %1\n"
- " .long 0\n"
- " .long 97b-98b\n"
- " .long 99b-98b\n"
- " .long 0\n"
- " .long 0\n"
- ".previous"
- : "=r" (ret) : "i" (CPU_FTR_601), "i" (SPRN_TBRL));
- return ret;
-#endif
}
#endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h
index b2c0be93929d..7f3a8b902325 100644
--- a/arch/powerpc/include/asm/tlb.h
+++ b/arch/powerpc/include/asm/tlb.h
@@ -26,6 +26,17 @@
#define tlb_flush tlb_flush
extern void tlb_flush(struct mmu_gather *tlb);
+/*
+ * book3s:
+ * Hash does not use the linux page-tables, so we can avoid
+ * the TLB invalidate for page-table freeing, Radix otoh does use the
+ * page-tables and needs the TLBI.
+ *
+ * nohash:
+ * We still do TLB invalidate in the __pte_free_tlb routine before we
+ * add the page table pages to mmu gather table batch.
+ */
+#define tlb_needs_table_invalidate() radix_enabled()
/* Get the generic bits... */
#include <asm-generic/tlb.h>
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 8b03eb44e876..2f500debae21 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -91,9 +91,14 @@ static inline int __access_ok(unsigned long addr, unsigned long size,
__put_user_check((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
#define __get_user(x, ptr) \
- __get_user_nocheck((x), (ptr), sizeof(*(ptr)))
+ __get_user_nocheck((x), (ptr), sizeof(*(ptr)), true)
#define __put_user(x, ptr) \
- __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
+ __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)), true)
+
+#define __get_user_allowed(x, ptr) \
+ __get_user_nocheck((x), (ptr), sizeof(*(ptr)), false)
+#define __put_user_allowed(x, ptr) \
+ __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)), false)
#define __get_user_inatomic(x, ptr) \
__get_user_nosleep((x), (ptr), sizeof(*(ptr)))
@@ -138,10 +143,9 @@ extern long __put_user_bad(void);
: "r" (x), "b" (addr), "i" (-EFAULT), "0" (err))
#endif /* __powerpc64__ */
-#define __put_user_size(x, ptr, size, retval) \
+#define __put_user_size_allowed(x, ptr, size, retval) \
do { \
retval = 0; \
- allow_write_to_user(ptr, size); \
switch (size) { \
case 1: __put_user_asm(x, ptr, retval, "stb"); break; \
case 2: __put_user_asm(x, ptr, retval, "sth"); break; \
@@ -149,17 +153,26 @@ do { \
case 8: __put_user_asm2(x, ptr, retval); break; \
default: __put_user_bad(); \
} \
+} while (0)
+
+#define __put_user_size(x, ptr, size, retval) \
+do { \
+ allow_write_to_user(ptr, size); \
+ __put_user_size_allowed(x, ptr, size, retval); \
prevent_write_to_user(ptr, size); \
} while (0)
-#define __put_user_nocheck(x, ptr, size) \
+#define __put_user_nocheck(x, ptr, size, do_allow) \
({ \
long __pu_err; \
__typeof__(*(ptr)) __user *__pu_addr = (ptr); \
if (!is_kernel_addr((unsigned long)__pu_addr)) \
might_fault(); \
__chk_user_ptr(ptr); \
- __put_user_size((x), __pu_addr, (size), __pu_err); \
+ if (do_allow) \
+ __put_user_size((x), __pu_addr, (size), __pu_err); \
+ else \
+ __put_user_size_allowed((x), __pu_addr, (size), __pu_err); \
__pu_err; \
})
@@ -236,13 +249,12 @@ extern long __get_user_bad(void);
: "b" (addr), "i" (-EFAULT), "0" (err))
#endif /* __powerpc64__ */
-#define __get_user_size(x, ptr, size, retval) \
+#define __get_user_size_allowed(x, ptr, size, retval) \
do { \
retval = 0; \
__chk_user_ptr(ptr); \
if (size > sizeof(x)) \
(x) = __get_user_bad(); \
- allow_read_from_user(ptr, size); \
switch (size) { \
case 1: __get_user_asm(x, ptr, retval, "lbz"); break; \
case 2: __get_user_asm(x, ptr, retval, "lhz"); break; \
@@ -250,6 +262,12 @@ do { \
case 8: __get_user_asm2(x, ptr, retval); break; \
default: (x) = __get_user_bad(); \
} \
+} while (0)
+
+#define __get_user_size(x, ptr, size, retval) \
+do { \
+ allow_read_from_user(ptr, size); \
+ __get_user_size_allowed(x, ptr, size, retval); \
prevent_read_from_user(ptr, size); \
} while (0)
@@ -260,7 +278,7 @@ do { \
#define __long_type(x) \
__typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL))
-#define __get_user_nocheck(x, ptr, size) \
+#define __get_user_nocheck(x, ptr, size, do_allow) \
({ \
long __gu_err; \
__long_type(*(ptr)) __gu_val; \
@@ -269,7 +287,10 @@ do { \
if (!is_kernel_addr((unsigned long)__gu_addr)) \
might_fault(); \
barrier_nospec(); \
- __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \
+ if (do_allow) \
+ __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \
+ else \
+ __get_user_size_allowed(__gu_val, __gu_addr, (size), __gu_err); \
(x) = (__typeof__(*(ptr)))__gu_val; \
__gu_err; \
})
@@ -313,9 +334,9 @@ raw_copy_in_user(void __user *to, const void __user *from, unsigned long n)
unsigned long ret;
barrier_nospec();
- allow_user_access(to, from, n);
+ allow_read_write_user(to, from, n);
ret = __copy_tofrom_user(to, from, n);
- prevent_user_access(to, from, n);
+ prevent_read_write_user(to, from, n);
return ret;
}
#endif /* __powerpc64__ */
@@ -356,38 +377,59 @@ static inline unsigned long raw_copy_from_user(void *to,
return ret;
}
-static inline unsigned long raw_copy_to_user(void __user *to,
- const void *from, unsigned long n)
+static inline unsigned long
+raw_copy_to_user_allowed(void __user *to, const void *from, unsigned long n)
{
- unsigned long ret;
if (__builtin_constant_p(n) && (n <= 8)) {
- ret = 1;
+ unsigned long ret = 1;
switch (n) {
case 1:
- __put_user_size(*(u8 *)from, (u8 __user *)to, 1, ret);
+ __put_user_size_allowed(*(u8 *)from, (u8 __user *)to, 1, ret);
break;
case 2:
- __put_user_size(*(u16 *)from, (u16 __user *)to, 2, ret);
+ __put_user_size_allowed(*(u16 *)from, (u16 __user *)to, 2, ret);
break;
case 4:
- __put_user_size(*(u32 *)from, (u32 __user *)to, 4, ret);
+ __put_user_size_allowed(*(u32 *)from, (u32 __user *)to, 4, ret);
break;
case 8:
- __put_user_size(*(u64 *)from, (u64 __user *)to, 8, ret);
+ __put_user_size_allowed(*(u64 *)from, (u64 __user *)to, 8, ret);
break;
}
if (ret == 0)
return 0;
}
+ return __copy_tofrom_user(to, (__force const void __user *)from, n);
+}
+
+static inline unsigned long
+raw_copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+ unsigned long ret;
+
allow_write_to_user(to, n);
- ret = __copy_tofrom_user(to, (__force const void __user *)from, n);
+ ret = raw_copy_to_user_allowed(to, from, n);
prevent_write_to_user(to, n);
return ret;
}
-extern unsigned long __clear_user(void __user *addr, unsigned long size);
+static __always_inline unsigned long __must_check
+copy_to_user_mcsafe(void __user *to, const void *from, unsigned long n)
+{
+ if (likely(check_copy_size(from, n, true))) {
+ if (access_ok(to, n)) {
+ allow_write_to_user(to, n);
+ n = memcpy_mcsafe((void *)to, from, n);
+ prevent_write_to_user(to, n);
+ }
+ }
+
+ return n;
+}
+
+unsigned long __arch_clear_user(void __user *addr, unsigned long size);
static inline unsigned long clear_user(void __user *addr, unsigned long size)
{
@@ -395,12 +437,17 @@ static inline unsigned long clear_user(void __user *addr, unsigned long size)
might_fault();
if (likely(access_ok(addr, size))) {
allow_write_to_user(addr, size);
- ret = __clear_user(addr, size);
+ ret = __arch_clear_user(addr, size);
prevent_write_to_user(addr, size);
}
return ret;
}
+static inline unsigned long __clear_user(void __user *addr, unsigned long size)
+{
+ return clear_user(addr, size);
+}
+
extern long strncpy_from_user(char *dst, const char __user *src, long count);
extern __must_check long strnlen_user(const char __user *str, long n);
@@ -409,4 +456,22 @@ extern long __copy_from_user_flushcache(void *dst, const void __user *src,
extern void memcpy_page_flushcache(char *to, struct page *page, size_t offset,
size_t len);
+static __must_check inline bool user_access_begin(const void __user *ptr, size_t len)
+{
+ if (unlikely(!access_ok(ptr, len)))
+ return false;
+ allow_read_write_user((void __user *)ptr, ptr, len);
+ return true;
+}
+#define user_access_begin user_access_begin
+#define user_access_end prevent_current_access_user
+#define user_access_save prevent_user_access_return
+#define user_access_restore restore_user_access
+
+#define unsafe_op_wrap(op, err) do { if (unlikely(op)) goto err; } while (0)
+#define unsafe_get_user(x, p, e) unsafe_op_wrap(__get_user_allowed(x, p), e)
+#define unsafe_put_user(x, p, e) unsafe_op_wrap(__put_user_allowed(x, p), e)
+#define unsafe_copy_to_user(d, s, l, e) \
+ unsafe_op_wrap(raw_copy_to_user_allowed(d, s, l), e)
+
#endif /* _ARCH_POWERPC_UACCESS_H */
diff --git a/arch/powerpc/include/asm/ultravisor-api.h b/arch/powerpc/include/asm/ultravisor-api.h
new file mode 100644
index 000000000000..b66f6db7be6c
--- /dev/null
+++ b/arch/powerpc/include/asm/ultravisor-api.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Ultravisor API.
+ *
+ * Copyright 2019, IBM Corporation.
+ *
+ */
+#ifndef _ASM_POWERPC_ULTRAVISOR_API_H
+#define _ASM_POWERPC_ULTRAVISOR_API_H
+
+#include <asm/hvcall.h>
+
+/* Return codes */
+#define U_BUSY H_BUSY
+#define U_FUNCTION H_FUNCTION
+#define U_NOT_AVAILABLE H_NOT_AVAILABLE
+#define U_P2 H_P2
+#define U_P3 H_P3
+#define U_P4 H_P4
+#define U_P5 H_P5
+#define U_PARAMETER H_PARAMETER
+#define U_PERMISSION H_PERMISSION
+#define U_SUCCESS H_SUCCESS
+
+/* opcodes */
+#define UV_WRITE_PATE 0xF104
+#define UV_RETURN 0xF11C
+#define UV_ESM 0xF110
+#define UV_REGISTER_MEM_SLOT 0xF120
+#define UV_UNREGISTER_MEM_SLOT 0xF124
+#define UV_PAGE_IN 0xF128
+#define UV_PAGE_OUT 0xF12C
+#define UV_SHARE_PAGE 0xF130
+#define UV_UNSHARE_PAGE 0xF134
+#define UV_UNSHARE_ALL_PAGES 0xF140
+#define UV_PAGE_INVAL 0xF138
+#define UV_SVM_TERMINATE 0xF13C
+
+#endif /* _ASM_POWERPC_ULTRAVISOR_API_H */
diff --git a/arch/powerpc/include/asm/ultravisor.h b/arch/powerpc/include/asm/ultravisor.h
new file mode 100644
index 000000000000..790b0e63681f
--- /dev/null
+++ b/arch/powerpc/include/asm/ultravisor.h
@@ -0,0 +1,85 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Ultravisor definitions
+ *
+ * Copyright 2019, IBM Corporation.
+ *
+ */
+#ifndef _ASM_POWERPC_ULTRAVISOR_H
+#define _ASM_POWERPC_ULTRAVISOR_H
+
+#include <asm/asm-prototypes.h>
+#include <asm/ultravisor-api.h>
+#include <asm/firmware.h>
+
+int early_init_dt_scan_ultravisor(unsigned long node, const char *uname,
+ int depth, void *data);
+
+/*
+ * In ultravisor enabled systems, PTCR becomes ultravisor privileged only for
+ * writing and an attempt to write to it will cause a Hypervisor Emulation
+ * Assistance interrupt.
+ */
+static inline void set_ptcr_when_no_uv(u64 val)
+{
+ if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
+ mtspr(SPRN_PTCR, val);
+}
+
+static inline int uv_register_pate(u64 lpid, u64 dw0, u64 dw1)
+{
+ return ucall_norets(UV_WRITE_PATE, lpid, dw0, dw1);
+}
+
+static inline int uv_share_page(u64 pfn, u64 npages)
+{
+ return ucall_norets(UV_SHARE_PAGE, pfn, npages);
+}
+
+static inline int uv_unshare_page(u64 pfn, u64 npages)
+{
+ return ucall_norets(UV_UNSHARE_PAGE, pfn, npages);
+}
+
+static inline int uv_unshare_all_pages(void)
+{
+ return ucall_norets(UV_UNSHARE_ALL_PAGES);
+}
+
+static inline int uv_page_in(u64 lpid, u64 src_ra, u64 dst_gpa, u64 flags,
+ u64 page_shift)
+{
+ return ucall_norets(UV_PAGE_IN, lpid, src_ra, dst_gpa, flags,
+ page_shift);
+}
+
+static inline int uv_page_out(u64 lpid, u64 dst_ra, u64 src_gpa, u64 flags,
+ u64 page_shift)
+{
+ return ucall_norets(UV_PAGE_OUT, lpid, dst_ra, src_gpa, flags,
+ page_shift);
+}
+
+static inline int uv_register_mem_slot(u64 lpid, u64 start_gpa, u64 size,
+ u64 flags, u64 slotid)
+{
+ return ucall_norets(UV_REGISTER_MEM_SLOT, lpid, start_gpa,
+ size, flags, slotid);
+}
+
+static inline int uv_unregister_mem_slot(u64 lpid, u64 slotid)
+{
+ return ucall_norets(UV_UNREGISTER_MEM_SLOT, lpid, slotid);
+}
+
+static inline int uv_page_inval(u64 lpid, u64 gpa, u64 page_shift)
+{
+ return ucall_norets(UV_PAGE_INVAL, lpid, gpa, page_shift);
+}
+
+static inline int uv_svm_terminate(u64 lpid)
+{
+ return ucall_norets(UV_SVM_TERMINATE, lpid);
+}
+
+#endif /* _ASM_POWERPC_ULTRAVISOR_H */
diff --git a/arch/powerpc/include/asm/vdso_datapage.h b/arch/powerpc/include/asm/vdso_datapage.h
index c61d59ed3b45..b9ef6cf50ea5 100644
--- a/arch/powerpc/include/asm/vdso_datapage.h
+++ b/arch/powerpc/include/asm/vdso_datapage.h
@@ -81,7 +81,9 @@ struct vdso_data {
__u32 stamp_sec_fraction; /* fractional seconds of stamp_xtime */
__s32 wtom_clock_nsec; /* Wall to monotonic clock nsec */
__s64 wtom_clock_sec; /* Wall to monotonic clock sec */
- struct timespec stamp_xtime; /* xtime as at tb_orig_stamp */
+ __s64 stamp_xtime_sec; /* xtime secs as at tb_orig_stamp */
+ __s64 stamp_xtime_nsec; /* xtime nsecs as at tb_orig_stamp */
+ __u32 hrtimer_res; /* hrtimer resolution */
__u32 syscall_map_64[SYSCALL_MAP_SIZE]; /* map of syscalls */
__u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of syscalls */
};
@@ -101,19 +103,27 @@ struct vdso_data {
__u32 tz_dsttime; /* Type of dst correction 0x5C */
__s32 wtom_clock_sec; /* Wall to monotonic clock */
__s32 wtom_clock_nsec;
- struct timespec stamp_xtime; /* xtime as at tb_orig_stamp */
+ __s32 stamp_xtime_sec; /* xtime seconds as at tb_orig_stamp */
+ __s32 stamp_xtime_nsec; /* xtime nsecs as at tb_orig_stamp */
__u32 stamp_sec_fraction; /* fractional seconds of stamp_xtime */
+ __u32 hrtimer_res; /* hrtimer resolution */
__u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of syscalls */
- __u32 dcache_block_size; /* L1 d-cache block size */
- __u32 icache_block_size; /* L1 i-cache block size */
- __u32 dcache_log_block_size; /* L1 d-cache log block size */
- __u32 icache_log_block_size; /* L1 i-cache log block size */
};
#endif /* CONFIG_PPC64 */
extern struct vdso_data *vdso_data;
+#else /* __ASSEMBLY__ */
+
+.macro get_datapage ptr, tmp
+ bcl 20, 31, .+4
+ mflr \ptr
+ addi \ptr, \ptr, (__kernel_datapage_offset - (.-4))@l
+ lwz \tmp, 0(\ptr)
+ add \ptr, \tmp, \ptr
+.endm
+
#endif /* __ASSEMBLY__ */
#endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/vmalloc.h b/arch/powerpc/include/asm/vmalloc.h
new file mode 100644
index 000000000000..b992dfaaa161
--- /dev/null
+++ b/arch/powerpc/include/asm/vmalloc.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_POWERPC_VMALLOC_H
+#define _ASM_POWERPC_VMALLOC_H
+
+#endif /* _ASM_POWERPC_VMALLOC_H */
diff --git a/arch/powerpc/include/asm/xive-regs.h b/arch/powerpc/include/asm/xive-regs.h
index f2dfcd50a2d3..33aee7490cbb 100644
--- a/arch/powerpc/include/asm/xive-regs.h
+++ b/arch/powerpc/include/asm/xive-regs.h
@@ -39,6 +39,7 @@
#define XIVE_ESB_VAL_P 0x2
#define XIVE_ESB_VAL_Q 0x1
+#define XIVE_ESB_INVALID 0xFF
/*
* Thread Management (aka "TM") registers
diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h
index e4016985764e..93f982dbb3d4 100644
--- a/arch/powerpc/include/asm/xive.h
+++ b/arch/powerpc/include/asm/xive.h
@@ -46,7 +46,15 @@ struct xive_irq_data {
/* Setup/used by frontend */
int target;
+ /*
+ * saved_p means that there is a queue entry for this interrupt
+ * in some CPU's queue (not including guest vcpu queues), even
+ * if P is not set in the source ESB.
+ * stale_p means that there is no queue entry for this interrupt
+ * in some CPU's queue, even if P is set in the source ESB.
+ */
bool saved_p;
+ bool stale_p;
};
#define XIVE_IRQ_FLAG_STORE_EOI 0x01
#define XIVE_IRQ_FLAG_LSI 0x02
@@ -79,54 +87,56 @@ extern bool __xive_enabled;
static inline bool xive_enabled(void) { return __xive_enabled; }
-extern bool xive_spapr_init(void);
-extern bool xive_native_init(void);
-extern void xive_smp_probe(void);
-extern int xive_smp_prepare_cpu(unsigned int cpu);
-extern void xive_smp_setup_cpu(void);
-extern void xive_smp_disable_cpu(void);
-extern void xive_teardown_cpu(void);
-extern void xive_shutdown(void);
-extern void xive_flush_interrupt(void);
+bool xive_spapr_init(void);
+bool xive_native_init(void);
+void xive_smp_probe(void);
+int xive_smp_prepare_cpu(unsigned int cpu);
+void xive_smp_setup_cpu(void);
+void xive_smp_disable_cpu(void);
+void xive_teardown_cpu(void);
+void xive_shutdown(void);
+void xive_flush_interrupt(void);
/* xmon hook */
-extern void xmon_xive_do_dump(int cpu);
+void xmon_xive_do_dump(int cpu);
+int xmon_xive_get_irq_config(u32 hw_irq, struct irq_data *d);
/* APIs used by KVM */
-extern u32 xive_native_default_eq_shift(void);
-extern u32 xive_native_alloc_vp_block(u32 max_vcpus);
-extern void xive_native_free_vp_block(u32 vp_base);
-extern int xive_native_populate_irq_data(u32 hw_irq,
- struct xive_irq_data *data);
-extern void xive_cleanup_irq_data(struct xive_irq_data *xd);
-extern u32 xive_native_alloc_irq(void);
-extern void xive_native_free_irq(u32 irq);
-extern int xive_native_configure_irq(u32 hw_irq, u32 target, u8 prio, u32 sw_irq);
-
-extern int xive_native_configure_queue(u32 vp_id, struct xive_q *q, u8 prio,
- __be32 *qpage, u32 order, bool can_escalate);
-extern void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio);
-
-extern void xive_native_sync_source(u32 hw_irq);
-extern void xive_native_sync_queue(u32 hw_irq);
-extern bool is_xive_irq(struct irq_chip *chip);
-extern int xive_native_enable_vp(u32 vp_id, bool single_escalation);
-extern int xive_native_disable_vp(u32 vp_id);
-extern int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id);
-extern bool xive_native_has_single_escalation(void);
-
-extern int xive_native_get_queue_info(u32 vp_id, uint32_t prio,
- u64 *out_qpage,
- u64 *out_qsize,
- u64 *out_qeoi_page,
- u32 *out_escalate_irq,
- u64 *out_qflags);
-
-extern int xive_native_get_queue_state(u32 vp_id, uint32_t prio, u32 *qtoggle,
- u32 *qindex);
-extern int xive_native_set_queue_state(u32 vp_id, uint32_t prio, u32 qtoggle,
- u32 qindex);
-extern int xive_native_get_vp_state(u32 vp_id, u64 *out_state);
+u32 xive_native_default_eq_shift(void);
+u32 xive_native_alloc_vp_block(u32 max_vcpus);
+void xive_native_free_vp_block(u32 vp_base);
+int xive_native_populate_irq_data(u32 hw_irq,
+ struct xive_irq_data *data);
+void xive_cleanup_irq_data(struct xive_irq_data *xd);
+u32 xive_native_alloc_irq(void);
+void xive_native_free_irq(u32 irq);
+int xive_native_configure_irq(u32 hw_irq, u32 target, u8 prio, u32 sw_irq);
+
+int xive_native_configure_queue(u32 vp_id, struct xive_q *q, u8 prio,
+ __be32 *qpage, u32 order, bool can_escalate);
+void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio);
+
+void xive_native_sync_source(u32 hw_irq);
+void xive_native_sync_queue(u32 hw_irq);
+bool is_xive_irq(struct irq_chip *chip);
+int xive_native_enable_vp(u32 vp_id, bool single_escalation);
+int xive_native_disable_vp(u32 vp_id);
+int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id);
+bool xive_native_has_single_escalation(void);
+
+int xive_native_get_queue_info(u32 vp_id, uint32_t prio,
+ u64 *out_qpage,
+ u64 *out_qsize,
+ u64 *out_qeoi_page,
+ u32 *out_escalate_irq,
+ u64 *out_qflags);
+
+int xive_native_get_queue_state(u32 vp_id, uint32_t prio, u32 *qtoggle,
+ u32 *qindex);
+int xive_native_set_queue_state(u32 vp_id, uint32_t prio, u32 qtoggle,
+ u32 qindex);
+int xive_native_get_vp_state(u32 vp_id, u64 *out_state);
+bool xive_native_has_queue_state_support(void);
#else
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index b0f72dea8b11..264e266a85bf 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -667,6 +667,8 @@ struct kvm_ppc_cpu_char {
/* PPC64 eXternal Interrupt Controller Specification */
#define KVM_DEV_XICS_GRP_SOURCES 1 /* 64-bit source attributes */
+#define KVM_DEV_XICS_GRP_CTRL 2
+#define KVM_DEV_XICS_NR_SERVERS 1
/* Layout of 64-bit source attribute values */
#define KVM_XICS_DESTINATION_SHIFT 0
@@ -683,6 +685,7 @@ struct kvm_ppc_cpu_char {
#define KVM_DEV_XIVE_GRP_CTRL 1
#define KVM_DEV_XIVE_RESET 1
#define KVM_DEV_XIVE_EQ_SYNC 2
+#define KVM_DEV_XIVE_NR_SERVERS 3
#define KVM_DEV_XIVE_GRP_SOURCE 2 /* 64-bit source identifier */
#define KVM_DEV_XIVE_GRP_SOURCE_CONFIG 3 /* 64-bit source identifier */
#define KVM_DEV_XIVE_GRP_EQ_CONFIG 4 /* 64-bit EQ identifier */
diff --git a/arch/powerpc/include/uapi/asm/msgbuf.h b/arch/powerpc/include/uapi/asm/msgbuf.h
index 2b1b37797a47..7919b2ba41b5 100644
--- a/arch/powerpc/include/uapi/asm/msgbuf.h
+++ b/arch/powerpc/include/uapi/asm/msgbuf.h
@@ -2,6 +2,8 @@
#ifndef _ASM_POWERPC_MSGBUF_H
#define _ASM_POWERPC_MSGBUF_H
+#include <asm/ipcbuf.h>
+
/*
* The msqid64_ds structure for the PowerPC architecture.
* Note extra padding because this structure is passed back and forth
@@ -11,9 +13,9 @@
struct msqid64_ds {
struct ipc64_perm msg_perm;
#ifdef __powerpc64__
- __kernel_time_t msg_stime; /* last msgsnd time */
- __kernel_time_t msg_rtime; /* last msgrcv time */
- __kernel_time_t msg_ctime; /* last change time */
+ long msg_stime; /* last msgsnd time */
+ long msg_rtime; /* last msgrcv time */
+ long msg_ctime; /* last change time */
#else
unsigned long msg_stime_high;
unsigned long msg_stime; /* last msgsnd time */
diff --git a/arch/powerpc/include/uapi/asm/sembuf.h b/arch/powerpc/include/uapi/asm/sembuf.h
index 3f60946f77e3..85e96ccb5f0f 100644
--- a/arch/powerpc/include/uapi/asm/sembuf.h
+++ b/arch/powerpc/include/uapi/asm/sembuf.h
@@ -2,6 +2,8 @@
#ifndef _ASM_POWERPC_SEMBUF_H
#define _ASM_POWERPC_SEMBUF_H
+#include <asm/ipcbuf.h>
+
/*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -26,8 +28,8 @@ struct semid64_ds {
unsigned long sem_ctime_high;
unsigned long sem_ctime; /* last change time */
#else
- __kernel_time_t sem_otime; /* last semop time */
- __kernel_time_t sem_ctime; /* last change time */
+ long sem_otime; /* last semop time */
+ long sem_ctime; /* last change time */
#endif
unsigned long sem_nsems; /* no. of semaphores in array */
unsigned long __unused3;
diff --git a/arch/powerpc/include/uapi/asm/shmbuf.h b/arch/powerpc/include/uapi/asm/shmbuf.h
index b591c4d7e4c5..00422b2f3c63 100644
--- a/arch/powerpc/include/uapi/asm/shmbuf.h
+++ b/arch/powerpc/include/uapi/asm/shmbuf.h
@@ -22,9 +22,9 @@
struct shmid64_ds {
struct ipc64_perm shm_perm; /* operation perms */
#ifdef __powerpc64__
- __kernel_time_t shm_atime; /* last attach time */
- __kernel_time_t shm_dtime; /* last detach time */
- __kernel_time_t shm_ctime; /* last change time */
+ long shm_atime; /* last attach time */
+ long shm_dtime; /* last detach time */
+ long shm_ctime; /* last change time */
#else
unsigned long shm_atime_high;
unsigned long shm_atime; /* last attach time */
diff --git a/arch/powerpc/include/uapi/asm/spu_info.h b/arch/powerpc/include/uapi/asm/spu_info.h
index cabfcbba9eac..45f97150587b 100644
--- a/arch/powerpc/include/uapi/asm/spu_info.h
+++ b/arch/powerpc/include/uapi/asm/spu_info.h
@@ -5,20 +5,6 @@
* (C) Copyright 2006 IBM Corp.
*
* Author: Dwayne Grant McConnell <decimal@us.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#ifndef _UAPI_SPU_INFO_H
diff --git a/arch/powerpc/include/uapi/asm/stat.h b/arch/powerpc/include/uapi/asm/stat.h
index afd25f2ff4e8..7871055e5e32 100644
--- a/arch/powerpc/include/uapi/asm/stat.h
+++ b/arch/powerpc/include/uapi/asm/stat.h
@@ -40,7 +40,7 @@ struct stat {
uid_t st_uid;
gid_t st_gid;
unsigned long st_rdev;
- off_t st_size;
+ long st_size;
unsigned long st_blksize;
unsigned long st_blocks;
unsigned long st_atime;
diff --git a/arch/powerpc/kernel/.gitignore b/arch/powerpc/kernel/.gitignore
index c5f676c3c224..67ebd3003c05 100644
--- a/arch/powerpc/kernel/.gitignore
+++ b/arch/powerpc/kernel/.gitignore
@@ -1 +1,2 @@
+prom_init_check
vmlinux.lds
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 56dfa7a2a6f2..78a1b22d4fd8 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -5,9 +5,6 @@
CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"'
-# Disable clang warning for using setjmp without setjmp.h header
-CFLAGS_crash.o += $(call cc-disable-warning, builtin-requires-header)
-
ifdef CONFIG_PPC64
CFLAGS_prom_init.o += $(NO_MINIMAL_TOC)
endif
@@ -22,6 +19,8 @@ CFLAGS_btext.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
CFLAGS_prom.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
CFLAGS_prom_init.o += $(call cc-option, -fno-stack-protector)
+CFLAGS_prom_init.o += -DDISABLE_BRANCH_PROFILING
+CFLAGS_prom_init.o += -ffreestanding
ifdef CONFIG_FUNCTION_TRACER
# Do not trace early boot code
@@ -39,7 +38,6 @@ KASAN_SANITIZE_btext.o := n
ifdef CONFIG_KASAN
CFLAGS_early_32.o += -DDISABLE_BRANCH_PROFILING
CFLAGS_cputable.o += -DDISABLE_BRANCH_PROFILING
-CFLAGS_prom_init.o += -DDISABLE_BRANCH_PROFILING
CFLAGS_btext.o += -DDISABLE_BRANCH_PROFILING
endif
@@ -52,7 +50,7 @@ obj-y := cputable.o ptrace.o syscalls.o \
of_platform.o prom_parse.o
obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \
signal_64.o ptrace32.o \
- paca.o nvram_64.o firmware.o
+ paca.o nvram_64.o firmware.o note.o
obj-$(CONFIG_VDSO32) += vdso32/
obj-$(CONFIG_PPC_WATCHDOG) += watchdog.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
@@ -64,8 +62,7 @@ obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o
obj-$(CONFIG_PPC_BARRIER_NOSPEC) += security.o
obj-$(CONFIG_PPC64) += vdso64/
obj-$(CONFIG_ALTIVEC) += vecemu.o
-obj-$(CONFIG_PPC_970_NAP) += idle_power4.o
-obj-$(CONFIG_PPC_P7_NAP) += idle_book3s.o
+obj-$(CONFIG_PPC_BOOK3S_IDLE) += idle_book3s.o
procfs-y := proc_powerpc.o
obj-$(CONFIG_PROC_FS) += $(procfs-y)
rtaspci-$(CONFIG_PPC64)-$(CONFIG_PCI) := rtas_pci.o
@@ -79,6 +76,7 @@ obj-$(CONFIG_EEH) += eeh.o eeh_pe.o eeh_dev.o eeh_cache.o \
obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
obj-$(CONFIG_FA_DUMP) += fadump.o
+obj-$(CONFIG_PRESERVE_FA_DUMP) += fadump.o
ifdef CONFIG_PPC32
obj-$(CONFIG_E500) += idle_e500.o
endif
@@ -124,14 +122,6 @@ pci64-$(CONFIG_PPC64) += pci_dn.o pci-hotplug.o isa-bridge.o
obj-$(CONFIG_PCI) += pci_$(BITS).o $(pci64-y) \
pci-common.o pci_of_scan.o
obj-$(CONFIG_PCI_MSI) += msi.o
-obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o crash.o \
- machine_kexec_$(BITS).o
-obj-$(CONFIG_KEXEC_FILE) += machine_kexec_file_$(BITS).o kexec_elf_$(BITS).o
-ifdef CONFIG_HAVE_IMA_KEXEC
-ifdef CONFIG_IMA
-obj-y += ima_kexec.o
-endif
-endif
obj-$(CONFIG_AUDIT) += audit.o
obj64-$(CONFIG_AUDIT) += compat_audit.o
@@ -155,17 +145,17 @@ endif
obj-$(CONFIG_EPAPR_PARAVIRT) += epapr_paravirt.o epapr_hcalls.o
obj-$(CONFIG_KVM_GUEST) += kvm.o kvm_emul.o
+ifneq ($(CONFIG_PPC_POWERNV)$(CONFIG_PPC_SVM),)
+obj-y += ucall.o
+endif
+
+obj-$(CONFIG_PPC_SECURE_BOOT) += secure_boot.o ima_arch.o secvar-ops.o
+obj-$(CONFIG_PPC_SECVAR_SYSFS) += secvar-sysfs.o
# Disable GCOV, KCOV & sanitizers in odd or sensitive code
GCOV_PROFILE_prom_init.o := n
KCOV_INSTRUMENT_prom_init.o := n
UBSAN_SANITIZE_prom_init.o := n
-GCOV_PROFILE_machine_kexec_64.o := n
-KCOV_INSTRUMENT_machine_kexec_64.o := n
-UBSAN_SANITIZE_machine_kexec_64.o := n
-GCOV_PROFILE_machine_kexec_32.o := n
-KCOV_INSTRUMENT_machine_kexec_32.o := n
-UBSAN_SANITIZE_machine_kexec_32.o := n
GCOV_PROFILE_kprobes.o := n
KCOV_INSTRUMENT_kprobes.o := n
UBSAN_SANITIZE_kprobes.o := n
@@ -184,15 +174,13 @@ extra-$(CONFIG_ALTIVEC) += vector.o
extra-$(CONFIG_PPC64) += entry_64.o
extra-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += prom_init.o
-ifdef CONFIG_PPC_OF_BOOT_TRAMPOLINE
-$(obj)/built-in.a: prom_init_check
+extra-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += prom_init_check
-quiet_cmd_prom_init_check = CALL $<
- cmd_prom_init_check = $(CONFIG_SHELL) $< "$(NM)" "$(obj)/prom_init.o"
+quiet_cmd_prom_init_check = PROMCHK $@
+ cmd_prom_init_check = $(CONFIG_SHELL) $< "$(NM)" $(obj)/prom_init.o; touch $@
-PHONY += prom_init_check
-prom_init_check: $(src)/prom_init_check.sh $(obj)/prom_init.o
- $(call cmd,prom_init_check)
-endif
+$(obj)/prom_init_check: $(src)/prom_init_check.sh $(obj)/prom_init.o FORCE
+ $(call if_changed,prom_init_check)
+targets += prom_init_check
clean-files := vmlinux.lds
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 4ccb6b3a7fbd..c25e562f1cd9 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -127,6 +127,12 @@ int main(void)
OFFSET(KSP_VSID, thread_struct, ksp_vsid);
#else /* CONFIG_PPC64 */
OFFSET(PGDIR, thread_struct, pgdir);
+#ifdef CONFIG_VMAP_STACK
+ OFFSET(SRR0, thread_struct, srr0);
+ OFFSET(SRR1, thread_struct, srr1);
+ OFFSET(DAR, thread_struct, dar);
+ OFFSET(DSISR, thread_struct, dsisr);
+#endif
#ifdef CONFIG_SPE
OFFSET(THREAD_EVR0, thread_struct, evr[0]);
OFFSET(THREAD_ACC, thread_struct, acc);
@@ -385,28 +391,25 @@ int main(void)
OFFSET(CFG_SYSCALL_MAP32, vdso_data, syscall_map_32);
OFFSET(WTOM_CLOCK_SEC, vdso_data, wtom_clock_sec);
OFFSET(WTOM_CLOCK_NSEC, vdso_data, wtom_clock_nsec);
- OFFSET(STAMP_XTIME, vdso_data, stamp_xtime);
+ OFFSET(STAMP_XTIME_SEC, vdso_data, stamp_xtime_sec);
+ OFFSET(STAMP_XTIME_NSEC, vdso_data, stamp_xtime_nsec);
OFFSET(STAMP_SEC_FRAC, vdso_data, stamp_sec_fraction);
+ OFFSET(CLOCK_HRTIMER_RES, vdso_data, hrtimer_res);
+#ifdef CONFIG_PPC64
OFFSET(CFG_ICACHE_BLOCKSZ, vdso_data, icache_block_size);
OFFSET(CFG_DCACHE_BLOCKSZ, vdso_data, dcache_block_size);
OFFSET(CFG_ICACHE_LOGBLOCKSZ, vdso_data, icache_log_block_size);
OFFSET(CFG_DCACHE_LOGBLOCKSZ, vdso_data, dcache_log_block_size);
-#ifdef CONFIG_PPC64
OFFSET(CFG_SYSCALL_MAP64, vdso_data, syscall_map_64);
- OFFSET(TVAL64_TV_SEC, timeval, tv_sec);
- OFFSET(TVAL64_TV_USEC, timeval, tv_usec);
+ OFFSET(TVAL64_TV_SEC, __kernel_old_timeval, tv_sec);
+ OFFSET(TVAL64_TV_USEC, __kernel_old_timeval, tv_usec);
+#endif
+ OFFSET(TSPC64_TV_SEC, __kernel_timespec, tv_sec);
+ OFFSET(TSPC64_TV_NSEC, __kernel_timespec, tv_nsec);
OFFSET(TVAL32_TV_SEC, old_timeval32, tv_sec);
OFFSET(TVAL32_TV_USEC, old_timeval32, tv_usec);
- OFFSET(TSPC64_TV_SEC, timespec, tv_sec);
- OFFSET(TSPC64_TV_NSEC, timespec, tv_nsec);
OFFSET(TSPC32_TV_SEC, old_timespec32, tv_sec);
OFFSET(TSPC32_TV_NSEC, old_timespec32, tv_nsec);
-#else
- OFFSET(TVAL32_TV_SEC, timeval, tv_sec);
- OFFSET(TVAL32_TV_USEC, timeval, tv_usec);
- OFFSET(TSPC32_TV_SEC, timespec, tv_sec);
- OFFSET(TSPC32_TV_NSEC, timespec, tv_nsec);
-#endif
/* timeval/timezone offsets for use by vdso */
OFFSET(TZONE_TZ_MINWEST, timezone, tz_minuteswest);
OFFSET(TZONE_TZ_DSTTIME, timezone, tz_dsttime);
@@ -416,8 +419,10 @@ int main(void)
DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC);
DEFINE(CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE);
DEFINE(CLOCK_MONOTONIC_COARSE, CLOCK_MONOTONIC_COARSE);
+ DEFINE(CLOCK_MAX, CLOCK_TAI);
DEFINE(NSEC_PER_SEC, NSEC_PER_SEC);
- DEFINE(CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC);
+ DEFINE(EINVAL, EINVAL);
+ DEFINE(KTIME_LOW_RES, KTIME_LOW_RES);
#ifdef CONFIG_BUG
DEFINE(BUG_ENTRY_SIZE, sizeof(struct bug_entry));
@@ -506,6 +511,7 @@ int main(void)
OFFSET(KVM_VRMA_SLB_V, kvm, arch.vrma_slb_v);
OFFSET(KVM_RADIX, kvm, arch.radix);
OFFSET(KVM_FWNMI, kvm, arch.fwnmi_enabled);
+ OFFSET(KVM_SECURE_GUEST, kvm, arch.secure_guest);
OFFSET(VCPU_DSISR, kvm_vcpu, arch.shregs.dsisr);
OFFSET(VCPU_DAR, kvm_vcpu, arch.shregs.dar);
OFFSET(VCPU_VPA, kvm_vcpu, arch.vpa.pinned_addr);
diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
index 2b4f3ec0acf7..1d308780e0d3 100644
--- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S
+++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
@@ -231,7 +231,7 @@ _GLOBAL(__setup_cpu_e5500)
blr
#endif
-/* flush L1 date cache, it can apply to e500v2, e500mc and e5500 */
+/* flush L1 data cache, it can apply to e500v2, e500mc and e5500 */
_GLOBAL(flush_dcache_L1)
mfmsr r10
wrteei 0
diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S
index 3239a9fe6c1c..a460298c7ddb 100644
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ b/arch/powerpc/kernel/cpu_setup_power.S
@@ -23,6 +23,7 @@ _GLOBAL(__setup_cpu_power7)
beqlr
li r0,0
mtspr SPRN_LPID,r0
+ LOAD_REG_IMMEDIATE(r0, PCR_MASK)
mtspr SPRN_PCR,r0
mfspr r3,SPRN_LPCR
li r4,(LPCR_LPES1 >> LPCR_LPES_SH)
@@ -37,6 +38,7 @@ _GLOBAL(__restore_cpu_power7)
beqlr
li r0,0
mtspr SPRN_LPID,r0
+ LOAD_REG_IMMEDIATE(r0, PCR_MASK)
mtspr SPRN_PCR,r0
mfspr r3,SPRN_LPCR
li r4,(LPCR_LPES1 >> LPCR_LPES_SH)
@@ -54,6 +56,7 @@ _GLOBAL(__setup_cpu_power8)
beqlr
li r0,0
mtspr SPRN_LPID,r0
+ LOAD_REG_IMMEDIATE(r0, PCR_MASK)
mtspr SPRN_PCR,r0
mfspr r3,SPRN_LPCR
ori r3, r3, LPCR_PECEDH
@@ -76,6 +79,7 @@ _GLOBAL(__restore_cpu_power8)
beqlr
li r0,0
mtspr SPRN_LPID,r0
+ LOAD_REG_IMMEDIATE(r0, PCR_MASK)
mtspr SPRN_PCR,r0
mfspr r3,SPRN_LPCR
ori r3, r3, LPCR_PECEDH
@@ -98,6 +102,7 @@ _GLOBAL(__setup_cpu_power9)
mtspr SPRN_PSSCR,r0
mtspr SPRN_LPID,r0
mtspr SPRN_PID,r0
+ LOAD_REG_IMMEDIATE(r0, PCR_MASK)
mtspr SPRN_PCR,r0
mfspr r3,SPRN_LPCR
LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC)
@@ -123,6 +128,7 @@ _GLOBAL(__restore_cpu_power9)
mtspr SPRN_PSSCR,r0
mtspr SPRN_LPID,r0
mtspr SPRN_PID,r0
+ LOAD_REG_IMMEDIATE(r0, PCR_MASK)
mtspr SPRN_PCR,r0
mfspr r3,SPRN_LPCR
LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC)
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index bfe5f4a2886b..e745abc5457a 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -569,7 +569,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
#endif /* CONFIG_PPC_BOOK3S_64 */
#ifdef CONFIG_PPC32
-#ifdef CONFIG_PPC_BOOK3S_32
+#ifdef CONFIG_PPC_BOOK3S_601
{ /* 601 */
.pvr_mask = 0xffff0000,
.pvr_value = 0x00010000,
@@ -583,6 +583,8 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check = machine_check_generic,
.platform = "ppc601",
},
+#endif /* CONFIG_PPC_BOOK3S_601 */
+#ifdef CONFIG_PPC_BOOK3S_6xx
{ /* 603 */
.pvr_mask = 0xffff0000,
.pvr_value = 0x00030000,
@@ -1212,7 +1214,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check = machine_check_generic,
.platform = "ppc603",
},
-#endif /* CONFIG_PPC_BOOK3S_32 */
+#endif /* CONFIG_PPC_BOOK3S_6xx */
#ifdef CONFIG_PPC_8xx
{ /* 8xx */
.pvr_mask = 0xffff0000,
diff --git a/arch/powerpc/kernel/dawr.c b/arch/powerpc/kernel/dawr.c
index 5f66b95b6858..cc14aa6c4a1b 100644
--- a/arch/powerpc/kernel/dawr.c
+++ b/arch/powerpc/kernel/dawr.c
@@ -30,10 +30,10 @@ int set_dawr(struct arch_hw_breakpoint *brk)
* DAWR length is stored in field MDR bits 48:53. Matches range in
* doublewords (64 bits) baised by -1 eg. 0b000000=1DW and
* 0b111111=64DW.
- * brk->len is in bytes.
+ * brk->hw_len is in bytes.
* This aligns up to double word size, shifts and does the bias.
*/
- mrd = ((brk->len + 7) >> 3) - 1;
+ mrd = ((brk->hw_len + 7) >> 3) - 1;
dawrx |= (mrd & 0x3f) << (63 - 53);
if (ppc_md.set_dawr)
@@ -54,7 +54,7 @@ static ssize_t dawr_write_file_bool(struct file *file,
const char __user *user_buf,
size_t count, loff_t *ppos)
{
- struct arch_hw_breakpoint null_brk = {0, 0, 0};
+ struct arch_hw_breakpoint null_brk = {0};
size_t rc;
/* Send error to user if they hypervisor won't allow us to write DAWR */
diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c
index 804b1a6196fa..f17ff1200eaa 100644
--- a/arch/powerpc/kernel/dbell.c
+++ b/arch/powerpc/kernel/dbell.c
@@ -33,7 +33,7 @@ void doorbell_global_ipi(int cpu)
{
u32 tag = get_hard_smp_processor_id(cpu);
- kvmppc_set_host_ipi(cpu, 1);
+ kvmppc_set_host_ipi(cpu);
/* Order previous accesses vs. msgsnd, which is treated as a store */
ppc_msgsnd_sync();
ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag);
@@ -48,7 +48,7 @@ void doorbell_core_ipi(int cpu)
{
u32 tag = cpu_thread_in_core(cpu);
- kvmppc_set_host_ipi(cpu, 1);
+ kvmppc_set_host_ipi(cpu);
/* Order previous accesses vs. msgsnd, which is treated as a store */
ppc_msgsnd_sync();
ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag);
@@ -84,7 +84,7 @@ void doorbell_exception(struct pt_regs *regs)
may_hard_irq_enable();
- kvmppc_set_host_ipi(smp_processor_id(), 0);
+ kvmppc_clear_host_ipi(smp_processor_id());
__this_cpu_inc(irq_stat.doorbell_irqs);
smp_ipi_demux_relaxed(); /* already performed the barrier */
diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c
index a0879674a9c8..e486d1d78de2 100644
--- a/arch/powerpc/kernel/dma-iommu.c
+++ b/arch/powerpc/kernel/dma-iommu.c
@@ -122,18 +122,17 @@ int dma_iommu_dma_supported(struct device *dev, u64 mask)
{
struct iommu_table *tbl = get_iommu_table_base(dev);
- if (!tbl) {
- dev_info(dev, "Warning: IOMMU dma not supported: mask 0x%08llx"
- ", table unavailable\n", mask);
- return 0;
- }
-
if (dev_is_pci(dev) && dma_iommu_bypass_supported(dev, mask)) {
dev->archdata.iommu_bypass = true;
dev_dbg(dev, "iommu: 64-bit OK, using fixed ops\n");
return 1;
}
+ if (!tbl) {
+ dev_err(dev, "Warning: IOMMU dma not supported: mask 0x%08llx, table unavailable\n", mask);
+ return 0;
+ }
+
if (tbl->it_offset > (mask >> tbl->it_page_shift)) {
dev_info(dev, "Warning: IOMMU offset too big for device mask\n");
dev_info(dev, "mask: 0x%08llx, table offset: 0x%08lx\n",
@@ -208,4 +207,6 @@ const struct dma_map_ops dma_iommu_ops = {
.sync_single_for_device = dma_iommu_sync_for_device,
.sync_sg_for_cpu = dma_iommu_sync_sg_for_cpu,
.sync_sg_for_device = dma_iommu_sync_sg_for_device,
+ .mmap = dma_common_mmap,
+ .get_sgtable = dma_common_get_sgtable,
};
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index bd95318d2202..182b4047c1ef 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -101,7 +101,7 @@ static void __restore_cpu_cpufeatures(void)
if (hv_mode) {
mtspr(SPRN_LPID, 0);
mtspr(SPRN_HFSCR, system_registers.hfscr);
- mtspr(SPRN_PCR, 0);
+ mtspr(SPRN_PCR, PCR_MASK);
}
mtspr(SPRN_FSCR, system_registers.fscr);
@@ -144,6 +144,7 @@ static void __init cpufeatures_setup_cpu(void)
mtspr(SPRN_HFSCR, 0);
}
mtspr(SPRN_FSCR, 0);
+ mtspr(SPRN_PCR, PCR_MASK);
/*
* LPCR does not get cleared, to match behaviour with secondaries
@@ -691,31 +692,62 @@ static bool __init cpufeatures_process_feature(struct dt_cpu_feature *f)
return true;
}
+/*
+ * Handle POWER9 broadcast tlbie invalidation issue using
+ * cpu feature flag.
+ */
+static __init void update_tlbie_feature_flag(unsigned long pvr)
+{
+ if (PVR_VER(pvr) == PVR_POWER9) {
+ /*
+ * Set the tlbie feature flag for anything below
+ * Nimbus DD 2.3 and Cumulus DD 1.3
+ */
+ if ((pvr & 0xe000) == 0) {
+ /* Nimbus */
+ if ((pvr & 0xfff) < 0x203)
+ cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_STQ_BUG;
+ } else if ((pvr & 0xc000) == 0) {
+ /* Cumulus */
+ if ((pvr & 0xfff) < 0x103)
+ cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_STQ_BUG;
+ } else {
+ WARN_ONCE(1, "Unknown PVR");
+ cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_STQ_BUG;
+ }
+
+ cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_ERAT_BUG;
+ }
+}
+
static __init void cpufeatures_cpu_quirks(void)
{
- int version = mfspr(SPRN_PVR);
+ unsigned long version = mfspr(SPRN_PVR);
/*
* Not all quirks can be derived from the cpufeatures device tree.
*/
- if ((version & 0xffffefff) == 0x004e0200)
- ; /* DD2.0 has no feature flag */
- else if ((version & 0xffffefff) == 0x004e0201)
+ if ((version & 0xffffefff) == 0x004e0200) {
+ /* DD2.0 has no feature flag */
+ cur_cpu_spec->cpu_features |= CPU_FTR_P9_RADIX_PREFETCH_BUG;
+ } else if ((version & 0xffffefff) == 0x004e0201) {
cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1;
- else if ((version & 0xffffefff) == 0x004e0202) {
+ cur_cpu_spec->cpu_features |= CPU_FTR_P9_RADIX_PREFETCH_BUG;
+ } else if ((version & 0xffffefff) == 0x004e0202) {
cur_cpu_spec->cpu_features |= CPU_FTR_P9_TM_HV_ASSIST;
cur_cpu_spec->cpu_features |= CPU_FTR_P9_TM_XER_SO_BUG;
cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1;
- } else if ((version & 0xffff0000) == 0x004e0000)
+ } else if ((version & 0xffff0000) == 0x004e0000) {
/* DD2.1 and up have DD2_1 */
cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1;
+ }
if ((version & 0xffff0000) == 0x004e0000) {
cur_cpu_spec->cpu_features &= ~(CPU_FTR_DAWR);
- cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG;
cur_cpu_spec->cpu_features |= CPU_FTR_P9_TIDR;
}
+ update_tlbie_feature_flag(version);
/*
* PKEY was not in the initial base or feature node
* specification, but it should become optional in the next
diff --git a/arch/powerpc/kernel/early_32.c b/arch/powerpc/kernel/early_32.c
index 3482118ffe76..ef2ad4945904 100644
--- a/arch/powerpc/kernel/early_32.c
+++ b/arch/powerpc/kernel/early_32.c
@@ -19,10 +19,13 @@
*/
notrace unsigned long __init early_init(unsigned long dt_ptr)
{
- unsigned long offset = reloc_offset();
+ unsigned long kva, offset = reloc_offset();
+
+ kva = *PTRRELOC(&kernstart_virt_addr);
/* First zero the BSS */
- memset(PTRRELOC(&__bss_start), 0, __bss_stop - __bss_start);
+ if (kva == KERNELBASE)
+ memset(PTRRELOC(&__bss_start), 0, __bss_stop - __bss_start);
/*
* Identify the CPU type and fix up code sections
@@ -32,5 +35,5 @@ notrace unsigned long __init early_init(unsigned long dt_ptr)
apply_feature_fixups();
- return KERNELBASE + offset;
+ return kva + offset;
}
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index c0e4b73191f3..17cb3e9b5697 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -150,6 +150,16 @@ static int __init eeh_setup(char *str)
}
__setup("eeh=", eeh_setup);
+void eeh_show_enabled(void)
+{
+ if (eeh_has_flag(EEH_FORCE_DISABLED))
+ pr_info("EEH: Recovery disabled by kernel parameter.\n");
+ else if (eeh_has_flag(EEH_ENABLED))
+ pr_info("EEH: Capable adapter found: recovery enabled.\n");
+ else
+ pr_info("EEH: No capable adapters found: recovery disabled.\n");
+}
+
/*
* This routine captures assorted PCI configuration space data
* for the indicated PCI device, and puts them into a buffer
@@ -410,11 +420,9 @@ static int eeh_phb_check_failure(struct eeh_pe *pe)
eeh_pe_mark_isolated(phb_pe);
eeh_serialize_unlock(flags);
- pr_err("EEH: PHB#%x failure detected, location: %s\n",
+ pr_debug("EEH: PHB#%x failure detected, location: %s\n",
phb_pe->phb->global_number, eeh_pe_loc_get(phb_pe));
- dump_stack();
eeh_send_failure_event(phb_pe);
-
return 1;
out:
eeh_serialize_unlock(flags);
@@ -441,7 +449,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
unsigned long flags;
struct device_node *dn;
struct pci_dev *dev;
- struct eeh_pe *pe, *parent_pe, *phb_pe;
+ struct eeh_pe *pe, *parent_pe;
int rc = 0;
const char *location = NULL;
@@ -460,8 +468,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
/* Access to IO BARs might get this far and still not want checking. */
if (!pe) {
eeh_stats.ignored_check++;
- pr_debug("EEH: Ignored check for %s\n",
- eeh_pci_name(dev));
+ eeh_edev_dbg(edev, "Ignored check\n");
return 0;
}
@@ -496,17 +503,16 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
rc = 1;
if (pe->state & EEH_PE_ISOLATED) {
pe->check_count++;
- if (pe->check_count % EEH_MAX_FAILS == 0) {
+ if (pe->check_count == EEH_MAX_FAILS) {
dn = pci_device_to_OF_node(dev);
if (dn)
location = of_get_property(dn, "ibm,loc-code",
NULL);
- printk(KERN_ERR "EEH: %d reads ignored for recovering device at "
- "location=%s driver=%s pci addr=%s\n",
+ eeh_edev_err(edev, "%d reads ignored for recovering device at location=%s driver=%s\n",
pe->check_count,
location ? location : "unknown",
- eeh_driver_name(dev), eeh_pci_name(dev));
- printk(KERN_ERR "EEH: Might be infinite loop in %s driver\n",
+ eeh_driver_name(dev));
+ eeh_edev_err(edev, "Might be infinite loop in %s driver\n",
eeh_driver_name(dev));
dump_stack();
}
@@ -573,13 +579,8 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
* a stack trace will help the device-driver authors figure
* out what happened. So print that out.
*/
- phb_pe = eeh_phb_pe_get(pe->phb);
- pr_err("EEH: Frozen PHB#%x-PE#%x detected\n",
- pe->phb->global_number, pe->addr);
- pr_err("EEH: PE location: %s, PHB location: %s\n",
- eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe));
- dump_stack();
-
+ pr_debug("EEH: %s: Frozen PHB#%x-PE#%x detected\n",
+ __func__, pe->phb->global_number, pe->addr);
eeh_send_failure_event(pe);
return 1;
@@ -697,7 +698,7 @@ int eeh_pci_enable(struct eeh_pe *pe, int function)
return rc;
}
-static void *eeh_disable_and_save_dev_state(struct eeh_dev *edev,
+static void eeh_disable_and_save_dev_state(struct eeh_dev *edev,
void *userdata)
{
struct pci_dev *pdev = eeh_dev_to_pci_dev(edev);
@@ -708,7 +709,7 @@ static void *eeh_disable_and_save_dev_state(struct eeh_dev *edev,
* state for the specified device
*/
if (!pdev || pdev == dev)
- return NULL;
+ return;
/* Ensure we have D0 power state */
pci_set_power_state(pdev, PCI_D0);
@@ -721,18 +722,16 @@ static void *eeh_disable_and_save_dev_state(struct eeh_dev *edev,
* interrupt from the device
*/
pci_write_config_word(pdev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE);
-
- return NULL;
}
-static void *eeh_restore_dev_state(struct eeh_dev *edev, void *userdata)
+static void eeh_restore_dev_state(struct eeh_dev *edev, void *userdata)
{
struct pci_dn *pdn = eeh_dev_to_pdn(edev);
struct pci_dev *pdev = eeh_dev_to_pci_dev(edev);
struct pci_dev *dev = userdata;
if (!pdev)
- return NULL;
+ return;
/* Apply customization from firmware */
if (pdn && eeh_ops->restore_config)
@@ -741,8 +740,6 @@ static void *eeh_restore_dev_state(struct eeh_dev *edev, void *userdata)
/* The caller should restore state for the specified device */
if (pdev != dev)
pci_restore_state(pdev);
-
- return NULL;
}
int eeh_restore_vf_config(struct pci_dn *pdn)
@@ -868,7 +865,7 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat
* the indicated device and its children so that the bunch of the
* devices could be reset properly.
*/
-static void *eeh_set_dev_freset(struct eeh_dev *edev, void *flag)
+static void eeh_set_dev_freset(struct eeh_dev *edev, void *flag)
{
struct pci_dev *dev;
unsigned int *freset = (unsigned int *)flag;
@@ -876,8 +873,6 @@ static void *eeh_set_dev_freset(struct eeh_dev *edev, void *flag)
dev = eeh_dev_to_pci_dev(edev);
if (dev)
*freset |= dev->needs_freset;
-
- return NULL;
}
static void eeh_pe_refreeze_passed(struct eeh_pe *root)
@@ -1063,23 +1058,6 @@ static struct notifier_block eeh_reboot_nb = {
.notifier_call = eeh_reboot_notifier,
};
-void eeh_probe_devices(void)
-{
- struct pci_controller *hose, *tmp;
- struct pci_dn *pdn;
-
- /* Enable EEH for all adapters */
- list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
- pdn = hose->pci_data;
- traverse_pci_dn(pdn, eeh_ops->probe, NULL);
- }
- if (eeh_enabled())
- pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n");
- else
- pr_info("EEH: No capable adapters found\n");
-
-}
-
/**
* eeh_init - EEH initialization
*
@@ -1120,6 +1098,8 @@ static int eeh_init(void)
list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
eeh_dev_phb_init_dynamic(hose);
+ eeh_addr_cache_init();
+
/* Initialize EEH event */
return eeh_event_init();
}
@@ -1190,15 +1170,14 @@ void eeh_add_device_late(struct pci_dev *dev)
struct pci_dn *pdn;
struct eeh_dev *edev;
- if (!dev || !eeh_enabled())
+ if (!dev)
return;
- pr_debug("EEH: Adding device %s\n", pci_name(dev));
-
pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn);
edev = pdn_to_eeh_dev(pdn);
+ eeh_edev_dbg(edev, "Adding device\n");
if (edev->pdev == dev) {
- pr_debug("EEH: Already referenced !\n");
+ eeh_edev_dbg(edev, "Device already referenced!\n");
return;
}
@@ -1212,7 +1191,6 @@ void eeh_add_device_late(struct pci_dev *dev)
eeh_rmv_from_parent_pe(edev);
eeh_addr_cache_rmv_dev(edev->pdev);
eeh_sysfs_remove_device(edev->pdev);
- edev->mode &= ~EEH_DEV_SYSFS;
/*
* We definitely should have the PCI device removed
@@ -1246,6 +1224,8 @@ void eeh_add_device_tree_late(struct pci_bus *bus)
{
struct pci_dev *dev;
+ if (eeh_has_flag(EEH_FORCE_DISABLED))
+ return;
list_for_each_entry(dev, &bus->devices, bus_list) {
eeh_add_device_late(dev);
if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
@@ -1299,10 +1279,10 @@ void eeh_remove_device(struct pci_dev *dev)
edev = pci_dev_to_eeh_dev(dev);
/* Unregister the device with the EEH/PCI address search system */
- pr_debug("EEH: Removing device %s\n", pci_name(dev));
+ dev_dbg(&dev->dev, "EEH: Removing device\n");
if (!edev || !edev->pdev || !edev->pe) {
- pr_debug("EEH: Not referenced !\n");
+ dev_dbg(&dev->dev, "EEH: Device not referenced!\n");
return;
}
@@ -1315,17 +1295,11 @@ void eeh_remove_device(struct pci_dev *dev)
edev->pdev = NULL;
/*
- * The flag "in_error" is used to trace EEH devices for VFs
- * in error state or not. It's set in eeh_report_error(). If
- * it's not set, eeh_report_{reset,resume}() won't be called
- * for the VF EEH device.
+ * eeh_sysfs_remove_device() uses pci_dev_to_eeh_dev() so we need to
+ * remove the sysfs files before clearing dev.archdata.edev
*/
- edev->in_error = false;
- dev->dev.archdata.edev = NULL;
- if (!(edev->pe->state & EEH_PE_KEEP))
- eeh_rmv_from_parent_pe(edev);
- else
- edev->mode |= EEH_DEV_DISCONNECTED;
+ if (edev->mode & EEH_DEV_SYSFS)
+ eeh_sysfs_remove_device(dev);
/*
* We're removing from the PCI subsystem, that means
@@ -1336,8 +1310,19 @@ void eeh_remove_device(struct pci_dev *dev)
edev->mode |= EEH_DEV_NO_HANDLER;
eeh_addr_cache_rmv_dev(dev);
- eeh_sysfs_remove_device(dev);
- edev->mode &= ~EEH_DEV_SYSFS;
+
+ /*
+ * The flag "in_error" is used to trace EEH devices for VFs
+ * in error state or not. It's set in eeh_report_error(). If
+ * it's not set, eeh_report_{reset,resume}() won't be called
+ * for the VF EEH device.
+ */
+ edev->in_error = false;
+ dev->dev.archdata.edev = NULL;
+ if (!(edev->pe->state & EEH_PE_KEEP))
+ eeh_rmv_from_parent_pe(edev);
+ else
+ edev->mode |= EEH_DEV_DISCONNECTED;
}
int eeh_unfreeze_pe(struct eeh_pe *pe)
@@ -1890,6 +1875,198 @@ static const struct file_operations eeh_force_recover_fops = {
.llseek = no_llseek,
.write = eeh_force_recover_write,
};
+
+static ssize_t eeh_debugfs_dev_usage(struct file *filp,
+ char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ static const char usage[] = "input format: <domain>:<bus>:<dev>.<fn>\n";
+
+ return simple_read_from_buffer(user_buf, count, ppos,
+ usage, sizeof(usage) - 1);
+}
+
+static ssize_t eeh_dev_check_write(struct file *filp,
+ const char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ uint32_t domain, bus, dev, fn;
+ struct pci_dev *pdev;
+ struct eeh_dev *edev;
+ char buf[20];
+ int ret;
+
+ memset(buf, 0, sizeof(buf));
+ ret = simple_write_to_buffer(buf, sizeof(buf)-1, ppos, user_buf, count);
+ if (!ret)
+ return -EFAULT;
+
+ ret = sscanf(buf, "%x:%x:%x.%x", &domain, &bus, &dev, &fn);
+ if (ret != 4) {
+ pr_err("%s: expected 4 args, got %d\n", __func__, ret);
+ return -EINVAL;
+ }
+
+ pdev = pci_get_domain_bus_and_slot(domain, bus, (dev << 3) | fn);
+ if (!pdev)
+ return -ENODEV;
+
+ edev = pci_dev_to_eeh_dev(pdev);
+ if (!edev) {
+ pci_err(pdev, "No eeh_dev for this device!\n");
+ pci_dev_put(pdev);
+ return -ENODEV;
+ }
+
+ ret = eeh_dev_check_failure(edev);
+ pci_info(pdev, "eeh_dev_check_failure(%04x:%02x:%02x.%01x) = %d\n",
+ domain, bus, dev, fn, ret);
+
+ pci_dev_put(pdev);
+
+ return count;
+}
+
+static const struct file_operations eeh_dev_check_fops = {
+ .open = simple_open,
+ .llseek = no_llseek,
+ .write = eeh_dev_check_write,
+ .read = eeh_debugfs_dev_usage,
+};
+
+static int eeh_debugfs_break_device(struct pci_dev *pdev)
+{
+ struct resource *bar = NULL;
+ void __iomem *mapped;
+ u16 old, bit;
+ int i, pos;
+
+ /* Do we have an MMIO BAR to disable? */
+ for (i = 0; i <= PCI_STD_RESOURCE_END; i++) {
+ struct resource *r = &pdev->resource[i];
+
+ if (!r->flags || !r->start)
+ continue;
+ if (r->flags & IORESOURCE_IO)
+ continue;
+ if (r->flags & IORESOURCE_UNSET)
+ continue;
+
+ bar = r;
+ break;
+ }
+
+ if (!bar) {
+ pci_err(pdev, "Unable to find Memory BAR to cause EEH with\n");
+ return -ENXIO;
+ }
+
+ pci_err(pdev, "Going to break: %pR\n", bar);
+
+ if (pdev->is_virtfn) {
+#ifndef CONFIG_PCI_IOV
+ return -ENXIO;
+#else
+ /*
+ * VFs don't have a per-function COMMAND register, so the best
+ * we can do is clear the Memory Space Enable bit in the PF's
+ * SRIOV control reg.
+ *
+ * Unfortunately, this requires that we have a PF (i.e doesn't
+ * work for a passed-through VF) and it has the potential side
+ * effect of also causing an EEH on every other VF under the
+ * PF. Oh well.
+ */
+ pdev = pdev->physfn;
+ if (!pdev)
+ return -ENXIO; /* passed through VFs have no PF */
+
+ pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV);
+ pos += PCI_SRIOV_CTRL;
+ bit = PCI_SRIOV_CTRL_MSE;
+#endif /* !CONFIG_PCI_IOV */
+ } else {
+ bit = PCI_COMMAND_MEMORY;
+ pos = PCI_COMMAND;
+ }
+
+ /*
+ * Process here is:
+ *
+ * 1. Disable Memory space.
+ *
+ * 2. Perform an MMIO to the device. This should result in an error
+ * (CA / UR) being raised by the device which results in an EEH
+ * PE freeze. Using the in_8() accessor skips the eeh detection hook
+ * so the freeze hook so the EEH Detection machinery won't be
+ * triggered here. This is to match the usual behaviour of EEH
+ * where the HW will asyncronously freeze a PE and it's up to
+ * the kernel to notice and deal with it.
+ *
+ * 3. Turn Memory space back on. This is more important for VFs
+ * since recovery will probably fail if we don't. For normal
+ * the COMMAND register is reset as a part of re-initialising
+ * the device.
+ *
+ * Breaking stuff is the point so who cares if it's racy ;)
+ */
+ pci_read_config_word(pdev, pos, &old);
+
+ mapped = ioremap(bar->start, PAGE_SIZE);
+ if (!mapped) {
+ pci_err(pdev, "Unable to map MMIO BAR %pR\n", bar);
+ return -ENXIO;
+ }
+
+ pci_write_config_word(pdev, pos, old & ~bit);
+ in_8(mapped);
+ pci_write_config_word(pdev, pos, old);
+
+ iounmap(mapped);
+
+ return 0;
+}
+
+static ssize_t eeh_dev_break_write(struct file *filp,
+ const char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ uint32_t domain, bus, dev, fn;
+ struct pci_dev *pdev;
+ char buf[20];
+ int ret;
+
+ memset(buf, 0, sizeof(buf));
+ ret = simple_write_to_buffer(buf, sizeof(buf)-1, ppos, user_buf, count);
+ if (!ret)
+ return -EFAULT;
+
+ ret = sscanf(buf, "%x:%x:%x.%x", &domain, &bus, &dev, &fn);
+ if (ret != 4) {
+ pr_err("%s: expected 4 args, got %d\n", __func__, ret);
+ return -EINVAL;
+ }
+
+ pdev = pci_get_domain_bus_and_slot(domain, bus, (dev << 3) | fn);
+ if (!pdev)
+ return -ENODEV;
+
+ ret = eeh_debugfs_break_device(pdev);
+ pci_dev_put(pdev);
+
+ if (ret < 0)
+ return ret;
+
+ return count;
+}
+
+static const struct file_operations eeh_dev_break_fops = {
+ .open = simple_open,
+ .llseek = no_llseek,
+ .write = eeh_dev_break_write,
+ .read = eeh_debugfs_dev_usage,
+};
+
#endif
static int __init eeh_init_proc(void)
@@ -1905,6 +2082,12 @@ static int __init eeh_init_proc(void)
debugfs_create_bool("eeh_disable_recovery", 0600,
powerpc_debugfs_root,
&eeh_debugfs_no_recover);
+ debugfs_create_file_unsafe("eeh_dev_check", 0600,
+ powerpc_debugfs_root, NULL,
+ &eeh_dev_check_fops);
+ debugfs_create_file_unsafe("eeh_dev_break", 0600,
+ powerpc_debugfs_root, NULL,
+ &eeh_dev_break_fops);
debugfs_create_file_unsafe("eeh_force_recover", 0600,
powerpc_debugfs_root, NULL,
&eeh_force_recover_fops);
diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c
index 05ffd32b3416..6b50bf15d8c1 100644
--- a/arch/powerpc/kernel/eeh_cache.c
+++ b/arch/powerpc/kernel/eeh_cache.c
@@ -148,8 +148,8 @@ eeh_addr_cache_insert(struct pci_dev *dev, resource_size_t alo,
piar->pcidev = dev;
piar->flags = flags;
- pr_debug("PIAR: insert range=[%pap:%pap] dev=%s\n",
- &alo, &ahi, pci_name(dev));
+ eeh_edev_dbg(piar->edev, "PIAR: insert range=[%pap:%pap]\n",
+ &alo, &ahi);
rb_link_node(&piar->rb_node, parent, p);
rb_insert_color(&piar->rb_node, &pci_io_addr_cache_root.rb_root);
@@ -159,18 +159,10 @@ eeh_addr_cache_insert(struct pci_dev *dev, resource_size_t alo,
static void __eeh_addr_cache_insert_dev(struct pci_dev *dev)
{
- struct pci_dn *pdn;
struct eeh_dev *edev;
int i;
- pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn);
- if (!pdn) {
- pr_warn("PCI: no pci dn found for dev=%s\n",
- pci_name(dev));
- return;
- }
-
- edev = pdn_to_eeh_dev(pdn);
+ edev = pci_dev_to_eeh_dev(dev);
if (!edev) {
pr_warn("PCI: no EEH dev found for %s\n",
pci_name(dev));
@@ -229,8 +221,8 @@ restart:
piar = rb_entry(n, struct pci_io_addr_range, rb_node);
if (piar->pcidev == dev) {
- pr_debug("PIAR: remove range=[%pap:%pap] dev=%s\n",
- &piar->addr_lo, &piar->addr_hi, pci_name(dev));
+ eeh_edev_dbg(piar->edev, "PIAR: remove range=[%pap:%pap]\n",
+ &piar->addr_lo, &piar->addr_hi);
rb_erase(n, &pci_io_addr_cache_root.rb_root);
kfree(piar);
goto restart;
@@ -258,37 +250,14 @@ void eeh_addr_cache_rmv_dev(struct pci_dev *dev)
}
/**
- * eeh_addr_cache_build - Build a cache of I/O addresses
+ * eeh_addr_cache_init - Initialize a cache of I/O addresses
*
- * Build a cache of pci i/o addresses. This cache will be used to
+ * Initialize a cache of pci i/o addresses. This cache will be used to
* find the pci device that corresponds to a given address.
- * This routine scans all pci busses to build the cache.
- * Must be run late in boot process, after the pci controllers
- * have been scanned for devices (after all device resources are known).
*/
-void eeh_addr_cache_build(void)
+void eeh_addr_cache_init(void)
{
- struct pci_dn *pdn;
- struct eeh_dev *edev;
- struct pci_dev *dev = NULL;
-
spin_lock_init(&pci_io_addr_cache_root.piar_lock);
-
- for_each_pci_dev(dev) {
- pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn);
- if (!pdn)
- continue;
-
- edev = pdn_to_eeh_dev(pdn);
- if (!edev)
- continue;
-
- dev->dev.archdata.edev = edev;
- edev->pdev = dev;
-
- eeh_addr_cache_insert_dev(dev);
- eeh_sysfs_add_device(dev);
- }
}
static int eeh_addr_cache_show(struct seq_file *s, void *v)
diff --git a/arch/powerpc/kernel/eeh_dev.c b/arch/powerpc/kernel/eeh_dev.c
index c4317c452d98..7370185c7a05 100644
--- a/arch/powerpc/kernel/eeh_dev.c
+++ b/arch/powerpc/kernel/eeh_dev.c
@@ -47,6 +47,8 @@ struct eeh_dev *eeh_dev_init(struct pci_dn *pdn)
/* Associate EEH device with OF node */
pdn->edev = edev;
edev->pdn = pdn;
+ edev->bdfn = (pdn->busno << 8) | pdn->devfn;
+ edev->controller = pdn->phb;
return edev;
}
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 89623962c727..a1eaffe868de 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -1,25 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* PCI Error Recovery Driver for RPA-compliant PPC64 platform.
* Copyright IBM Corp. 2004 2005
* Copyright Linas Vepstas <linas@linas.org> 2004, 2005
*
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or (at
- * your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT. See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
* Send comments and feedback to Linas Vepstas <linas@austin.ibm.com>
*/
#include <linux/delay.h>
@@ -27,6 +11,7 @@
#include <linux/irq.h>
#include <linux/module.h>
#include <linux/pci.h>
+#include <linux/pci_hotplug.h>
#include <asm/eeh.h>
#include <asm/eeh_event.h>
#include <asm/ppc-pci.h>
@@ -81,23 +66,6 @@ static const char *pci_ers_result_name(enum pci_ers_result result)
}
};
-static __printf(2, 3) void eeh_edev_info(const struct eeh_dev *edev,
- const char *fmt, ...)
-{
- struct va_format vaf;
- va_list args;
-
- va_start(args, fmt);
-
- vaf.fmt = fmt;
- vaf.va = &args;
-
- printk(KERN_INFO "EEH: PE#%x (PCI %s): %pV\n", edev->pe_config_addr,
- edev->pdev ? dev_name(&edev->pdev->dev) : "none", &vaf);
-
- va_end(args);
-}
-
static enum pci_ers_result pci_ers_merge_result(enum pci_ers_result old,
enum pci_ers_result new)
{
@@ -113,8 +81,16 @@ static bool eeh_dev_removed(struct eeh_dev *edev)
static bool eeh_edev_actionable(struct eeh_dev *edev)
{
- return (edev->pdev && !eeh_dev_removed(edev) &&
- !eeh_pe_passed(edev->pe));
+ if (!edev->pdev)
+ return false;
+ if (edev->pdev->error_state == pci_channel_io_perm_failure)
+ return false;
+ if (eeh_dev_removed(edev))
+ return false;
+ if (eeh_pe_passed(edev->pe))
+ return false;
+
+ return true;
}
/**
@@ -214,12 +190,12 @@ static void eeh_enable_irq(struct eeh_dev *edev)
}
}
-static void *eeh_dev_save_state(struct eeh_dev *edev, void *userdata)
+static void eeh_dev_save_state(struct eeh_dev *edev, void *userdata)
{
struct pci_dev *pdev;
if (!edev)
- return NULL;
+ return;
/*
* We cannot access the config space on some adapters.
@@ -229,14 +205,13 @@ static void *eeh_dev_save_state(struct eeh_dev *edev, void *userdata)
* device is created.
*/
if (edev->pe && (edev->pe->state & EEH_PE_CFG_RESTRICTED))
- return NULL;
+ return;
pdev = eeh_dev_to_pci_dev(edev);
if (!pdev)
- return NULL;
+ return;
pci_save_state(pdev);
- return NULL;
}
static void eeh_set_channel_state(struct eeh_pe *root, enum pci_channel_state s)
@@ -274,20 +249,27 @@ static void eeh_set_irq_state(struct eeh_pe *root, bool enable)
}
typedef enum pci_ers_result (*eeh_report_fn)(struct eeh_dev *,
+ struct pci_dev *,
struct pci_driver *);
static void eeh_pe_report_edev(struct eeh_dev *edev, eeh_report_fn fn,
enum pci_ers_result *result)
{
+ struct pci_dev *pdev;
struct pci_driver *driver;
enum pci_ers_result new_result;
- if (!edev->pdev) {
+ pci_lock_rescan_remove();
+ pdev = edev->pdev;
+ if (pdev)
+ get_device(&pdev->dev);
+ pci_unlock_rescan_remove();
+ if (!pdev) {
eeh_edev_info(edev, "no device");
return;
}
- device_lock(&edev->pdev->dev);
+ device_lock(&pdev->dev);
if (eeh_edev_actionable(edev)) {
- driver = eeh_pcid_get(edev->pdev);
+ driver = eeh_pcid_get(pdev);
if (!driver)
eeh_edev_info(edev, "no driver");
@@ -296,7 +278,7 @@ static void eeh_pe_report_edev(struct eeh_dev *edev, eeh_report_fn fn,
else if (edev->mode & EEH_DEV_NO_HANDLER)
eeh_edev_info(edev, "driver bound too late");
else {
- new_result = fn(edev, driver);
+ new_result = fn(edev, pdev, driver);
eeh_edev_info(edev, "%s driver reports: '%s'",
driver->name,
pci_ers_result_name(new_result));
@@ -305,12 +287,15 @@ static void eeh_pe_report_edev(struct eeh_dev *edev, eeh_report_fn fn,
new_result);
}
if (driver)
- eeh_pcid_put(edev->pdev);
+ eeh_pcid_put(pdev);
} else {
- eeh_edev_info(edev, "not actionable (%d,%d,%d)", !!edev->pdev,
+ eeh_edev_info(edev, "not actionable (%d,%d,%d)", !!pdev,
!eeh_dev_removed(edev), !eeh_pe_passed(edev->pe));
}
- device_unlock(&edev->pdev->dev);
+ device_unlock(&pdev->dev);
+ if (edev->pdev != pdev)
+ eeh_edev_warn(edev, "Device changed during processing!\n");
+ put_device(&pdev->dev);
}
static void eeh_pe_report(const char *name, struct eeh_pe *root,
@@ -337,20 +322,20 @@ static void eeh_pe_report(const char *name, struct eeh_pe *root,
* Report an EEH error to each device driver.
*/
static enum pci_ers_result eeh_report_error(struct eeh_dev *edev,
+ struct pci_dev *pdev,
struct pci_driver *driver)
{
enum pci_ers_result rc;
- struct pci_dev *dev = edev->pdev;
if (!driver->err_handler->error_detected)
return PCI_ERS_RESULT_NONE;
eeh_edev_info(edev, "Invoking %s->error_detected(IO frozen)",
driver->name);
- rc = driver->err_handler->error_detected(dev, pci_channel_io_frozen);
+ rc = driver->err_handler->error_detected(pdev, pci_channel_io_frozen);
edev->in_error = true;
- pci_uevent_ers(dev, PCI_ERS_RESULT_NONE);
+ pci_uevent_ers(pdev, PCI_ERS_RESULT_NONE);
return rc;
}
@@ -363,12 +348,13 @@ static enum pci_ers_result eeh_report_error(struct eeh_dev *edev,
* are now enabled.
*/
static enum pci_ers_result eeh_report_mmio_enabled(struct eeh_dev *edev,
+ struct pci_dev *pdev,
struct pci_driver *driver)
{
if (!driver->err_handler->mmio_enabled)
return PCI_ERS_RESULT_NONE;
eeh_edev_info(edev, "Invoking %s->mmio_enabled()", driver->name);
- return driver->err_handler->mmio_enabled(edev->pdev);
+ return driver->err_handler->mmio_enabled(pdev);
}
/**
@@ -382,20 +368,21 @@ static enum pci_ers_result eeh_report_mmio_enabled(struct eeh_dev *edev,
* driver can work again while the device is recovered.
*/
static enum pci_ers_result eeh_report_reset(struct eeh_dev *edev,
+ struct pci_dev *pdev,
struct pci_driver *driver)
{
if (!driver->err_handler->slot_reset || !edev->in_error)
return PCI_ERS_RESULT_NONE;
eeh_edev_info(edev, "Invoking %s->slot_reset()", driver->name);
- return driver->err_handler->slot_reset(edev->pdev);
+ return driver->err_handler->slot_reset(pdev);
}
-static void *eeh_dev_restore_state(struct eeh_dev *edev, void *userdata)
+static void eeh_dev_restore_state(struct eeh_dev *edev, void *userdata)
{
struct pci_dev *pdev;
if (!edev)
- return NULL;
+ return;
/*
* The content in the config space isn't saved because
@@ -407,15 +394,14 @@ static void *eeh_dev_restore_state(struct eeh_dev *edev, void *userdata)
if (list_is_last(&edev->entry, &edev->pe->edevs))
eeh_pe_restore_bars(edev->pe);
- return NULL;
+ return;
}
pdev = eeh_dev_to_pci_dev(edev);
if (!pdev)
- return NULL;
+ return;
pci_restore_state(pdev);
- return NULL;
}
/**
@@ -428,13 +414,14 @@ static void *eeh_dev_restore_state(struct eeh_dev *edev, void *userdata)
* to make the recovered device work again.
*/
static enum pci_ers_result eeh_report_resume(struct eeh_dev *edev,
+ struct pci_dev *pdev,
struct pci_driver *driver)
{
if (!driver->err_handler->resume || !edev->in_error)
return PCI_ERS_RESULT_NONE;
eeh_edev_info(edev, "Invoking %s->resume()", driver->name);
- driver->err_handler->resume(edev->pdev);
+ driver->err_handler->resume(pdev);
pci_uevent_ers(edev->pdev, PCI_ERS_RESULT_RECOVERED);
#ifdef CONFIG_PCI_IOV
@@ -453,6 +440,7 @@ static enum pci_ers_result eeh_report_resume(struct eeh_dev *edev,
* dead, and that no further recovery attempts will be made on it.
*/
static enum pci_ers_result eeh_report_failure(struct eeh_dev *edev,
+ struct pci_dev *pdev,
struct pci_driver *driver)
{
enum pci_ers_result rc;
@@ -462,10 +450,10 @@ static enum pci_ers_result eeh_report_failure(struct eeh_dev *edev,
eeh_edev_info(edev, "Invoking %s->error_detected(permanent failure)",
driver->name);
- rc = driver->err_handler->error_detected(edev->pdev,
+ rc = driver->err_handler->error_detected(pdev,
pci_channel_io_perm_failure);
- pci_uevent_ers(edev->pdev, PCI_ERS_RESULT_DISCONNECT);
+ pci_uevent_ers(pdev, PCI_ERS_RESULT_DISCONNECT);
return rc;
}
@@ -473,12 +461,9 @@ static void *eeh_add_virt_device(struct eeh_dev *edev)
{
struct pci_driver *driver;
struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
- struct pci_dn *pdn = eeh_dev_to_pdn(edev);
if (!(edev->physfn)) {
- pr_warn("%s: EEH dev %04x:%02x:%02x.%01x not for VF\n",
- __func__, pdn->phb->global_number, pdn->busno,
- PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
+ eeh_edev_warn(edev, "Not for VF\n");
return NULL;
}
@@ -492,12 +477,12 @@ static void *eeh_add_virt_device(struct eeh_dev *edev)
}
#ifdef CONFIG_PCI_IOV
- pci_iov_add_virtfn(edev->physfn, pdn->vf_index);
+ pci_iov_add_virtfn(edev->physfn, eeh_dev_to_pdn(edev)->vf_index);
#endif
return NULL;
}
-static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata)
+static void eeh_rmv_device(struct eeh_dev *edev, void *userdata)
{
struct pci_driver *driver;
struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
@@ -512,7 +497,7 @@ static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata)
*/
if (!eeh_edev_actionable(edev) ||
(dev->hdr_type == PCI_HEADER_TYPE_BRIDGE))
- return NULL;
+ return;
if (rmv_data) {
driver = eeh_pcid_get(dev);
@@ -521,7 +506,7 @@ static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata)
driver->err_handler->error_detected &&
driver->err_handler->slot_reset) {
eeh_pcid_put(dev);
- return NULL;
+ return;
}
eeh_pcid_put(dev);
}
@@ -540,12 +525,6 @@ static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata)
pci_iov_remove_virtfn(edev->physfn, pdn->vf_index);
edev->pdev = NULL;
-
- /*
- * We have to set the VF PE number to invalid one, which is
- * required to plug the VF successfully.
- */
- pdn->pe_number = IODA_INVALID_PE;
#endif
if (rmv_data)
list_add(&edev->rmv_entry, &rmv_data->removed_vf_list);
@@ -554,8 +533,6 @@ static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata)
pci_stop_and_remove_bus_device(dev);
pci_unlock_rescan_remove();
}
-
- return NULL;
}
static void *eeh_pe_detach_dev(struct eeh_pe *pe, void *userdata)
@@ -744,6 +721,99 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
*/
#define MAX_WAIT_FOR_RECOVERY 300
+
+/* Walks the PE tree after processing an event to remove any stale PEs.
+ *
+ * NB: This needs to be recursive to ensure the leaf PEs get removed
+ * before their parents do. Although this is possible to do recursively
+ * we don't since this is easier to read and we need to garantee
+ * the leaf nodes will be handled first.
+ */
+static void eeh_pe_cleanup(struct eeh_pe *pe)
+{
+ struct eeh_pe *child_pe, *tmp;
+
+ list_for_each_entry_safe(child_pe, tmp, &pe->child_list, child)
+ eeh_pe_cleanup(child_pe);
+
+ if (pe->state & EEH_PE_KEEP)
+ return;
+
+ if (!(pe->state & EEH_PE_INVALID))
+ return;
+
+ if (list_empty(&pe->edevs) && list_empty(&pe->child_list)) {
+ list_del(&pe->child);
+ kfree(pe);
+ }
+}
+
+/**
+ * eeh_check_slot_presence - Check if a device is still present in a slot
+ * @pdev: pci_dev to check
+ *
+ * This function may return a false positive if we can't determine the slot's
+ * presence state. This might happen for for PCIe slots if the PE containing
+ * the upstream bridge is also frozen, or the bridge is part of the same PE
+ * as the device.
+ *
+ * This shouldn't happen often, but you might see it if you hotplug a PCIe
+ * switch.
+ */
+static bool eeh_slot_presence_check(struct pci_dev *pdev)
+{
+ const struct hotplug_slot_ops *ops;
+ struct pci_slot *slot;
+ u8 state;
+ int rc;
+
+ if (!pdev)
+ return false;
+
+ if (pdev->error_state == pci_channel_io_perm_failure)
+ return false;
+
+ slot = pdev->slot;
+ if (!slot || !slot->hotplug)
+ return true;
+
+ ops = slot->hotplug->ops;
+ if (!ops || !ops->get_adapter_status)
+ return true;
+
+ /* set the attention indicator while we've got the slot ops */
+ if (ops->set_attention_status)
+ ops->set_attention_status(slot->hotplug, 1);
+
+ rc = ops->get_adapter_status(slot->hotplug, &state);
+ if (rc)
+ return true;
+
+ return !!state;
+}
+
+static void eeh_clear_slot_attention(struct pci_dev *pdev)
+{
+ const struct hotplug_slot_ops *ops;
+ struct pci_slot *slot;
+
+ if (!pdev)
+ return;
+
+ if (pdev->error_state == pci_channel_io_perm_failure)
+ return;
+
+ slot = pdev->slot;
+ if (!slot || !slot->hotplug)
+ return;
+
+ ops = slot->hotplug->ops;
+ if (!ops || !ops->set_attention_status)
+ return;
+
+ ops->set_attention_status(slot->hotplug, 0);
+}
+
/**
* eeh_handle_normal_event - Handle EEH events on a specific PE
* @pe: EEH PE - which should not be used after we return, as it may
@@ -774,6 +844,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
enum pci_ers_result result = PCI_ERS_RESULT_NONE;
struct eeh_rmv_data rmv_data =
{LIST_HEAD_INIT(rmv_data.removed_vf_list), 0};
+ int devices = 0;
bus = eeh_pe_bus_get(pe);
if (!bus) {
@@ -782,7 +853,59 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
return;
}
- eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
+ /*
+ * When devices are hot-removed we might get an EEH due to
+ * a driver attempting to touch the MMIO space of a removed
+ * device. In this case we don't have a device to recover
+ * so suppress the event if we can't find any present devices.
+ *
+ * The hotplug driver should take care of tearing down the
+ * device itself.
+ */
+ eeh_for_each_pe(pe, tmp_pe)
+ eeh_pe_for_each_dev(tmp_pe, edev, tmp)
+ if (eeh_slot_presence_check(edev->pdev))
+ devices++;
+
+ if (!devices) {
+ pr_debug("EEH: Frozen PHB#%x-PE#%x is empty!\n",
+ pe->phb->global_number, pe->addr);
+ goto out; /* nothing to recover */
+ }
+
+ /* Log the event */
+ if (pe->type & EEH_PE_PHB) {
+ pr_err("EEH: Recovering PHB#%x, location: %s\n",
+ pe->phb->global_number, eeh_pe_loc_get(pe));
+ } else {
+ struct eeh_pe *phb_pe = eeh_phb_pe_get(pe->phb);
+
+ pr_err("EEH: Recovering PHB#%x-PE#%x\n",
+ pe->phb->global_number, pe->addr);
+ pr_err("EEH: PE location: %s, PHB location: %s\n",
+ eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe));
+ }
+
+#ifdef CONFIG_STACKTRACE
+ /*
+ * Print the saved stack trace now that we've verified there's
+ * something to recover.
+ */
+ if (pe->trace_entries) {
+ void **ptrs = (void **) pe->stack_trace;
+ int i;
+
+ pr_err("EEH: Frozen PHB#%x-PE#%x detected\n",
+ pe->phb->global_number, pe->addr);
+
+ /* FIXME: Use the same format as dump_stack() */
+ pr_err("EEH: Call Trace:\n");
+ for (i = 0; i < pe->trace_entries; i++)
+ pr_err("EEH: [%pK] %pS\n", ptrs[i], ptrs[i]);
+
+ pe->trace_entries = 0;
+ }
+#endif /* CONFIG_STACKTRACE */
eeh_pe_update_time_stamp(pe);
pe->freeze_count++;
@@ -793,6 +916,10 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
result = PCI_ERS_RESULT_DISCONNECT;
}
+ eeh_for_each_pe(pe, tmp_pe)
+ eeh_pe_for_each_dev(tmp_pe, edev, tmp)
+ edev->mode &= ~EEH_DEV_NO_HANDLER;
+
/* Walk the various device drivers attached to this slot through
* a reset sequence, giving each an opportunity to do what it needs
* to accomplish the reset. Each child gets a report of the
@@ -969,6 +1096,19 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
return;
}
}
+
+out:
+ /*
+ * Clean up any PEs without devices. While marked as EEH_PE_RECOVERYING
+ * we don't want to modify the PE tree structure so we do it here.
+ */
+ eeh_pe_cleanup(pe);
+
+ /* clear the slot attention LED for all recovered devices */
+ eeh_for_each_pe(pe, tmp_pe)
+ eeh_pe_for_each_dev(tmp_pe, edev, tmp)
+ eeh_clear_slot_attention(edev->pdev);
+
eeh_pe_state_clear(pe, EEH_PE_RECOVERING, true);
}
@@ -981,7 +1121,8 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
*/
void eeh_handle_special_event(void)
{
- struct eeh_pe *pe, *phb_pe;
+ struct eeh_pe *pe, *phb_pe, *tmp_pe;
+ struct eeh_dev *edev, *tmp_edev;
struct pci_bus *bus;
struct pci_controller *hose;
unsigned long flags;
@@ -1040,6 +1181,7 @@ void eeh_handle_special_event(void)
*/
if (rc == EEH_NEXT_ERR_FROZEN_PE ||
rc == EEH_NEXT_ERR_FENCED_PHB) {
+ eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
eeh_handle_normal_event(pe);
} else {
pci_lock_rescan_remove();
@@ -1050,6 +1192,10 @@ void eeh_handle_special_event(void)
(phb_pe->state & EEH_PE_RECOVERING))
continue;
+ eeh_for_each_pe(pe, tmp_pe)
+ eeh_pe_for_each_dev(tmp_pe, edev, tmp_edev)
+ edev->mode &= ~EEH_DEV_NO_HANDLER;
+
/* Notify all devices to be down */
eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true);
eeh_set_channel_state(pe, pci_channel_io_perm_failure);
diff --git a/arch/powerpc/kernel/eeh_event.c b/arch/powerpc/kernel/eeh_event.c
index 64cfbe41174b..a7a8dc182efb 100644
--- a/arch/powerpc/kernel/eeh_event.c
+++ b/arch/powerpc/kernel/eeh_event.c
@@ -40,7 +40,6 @@ static int eeh_event_handler(void * dummy)
{
unsigned long flags;
struct eeh_event *event;
- struct eeh_pe *pe;
while (!kthread_should_stop()) {
if (wait_for_completion_interruptible(&eeh_eventlist_event))
@@ -59,19 +58,10 @@ static int eeh_event_handler(void * dummy)
continue;
/* We might have event without binding PE */
- pe = event->pe;
- if (pe) {
- if (pe->type & EEH_PE_PHB)
- pr_info("EEH: Detected error on PHB#%x\n",
- pe->phb->global_number);
- else
- pr_info("EEH: Detected PCI bus error on "
- "PHB#%x-PE#%x\n",
- pe->phb->global_number, pe->addr);
- eeh_handle_normal_event(pe);
- } else {
+ if (event->pe)
+ eeh_handle_normal_event(event->pe);
+ else
eeh_handle_special_event();
- }
kfree(event);
}
@@ -121,6 +111,24 @@ int __eeh_send_failure_event(struct eeh_pe *pe)
}
event->pe = pe;
+ /*
+ * Mark the PE as recovering before inserting it in the queue.
+ * This prevents the PE from being free()ed by a hotplug driver
+ * while the PE is sitting in the event queue.
+ */
+ if (pe) {
+#ifdef CONFIG_STACKTRACE
+ /*
+ * Save the current stack trace so we can dump it from the
+ * event handler thread.
+ */
+ pe->trace_entries = stack_trace_save(pe->stack_trace,
+ ARRAY_SIZE(pe->stack_trace), 0);
+#endif /* CONFIG_STACKTRACE */
+
+ eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
+ }
+
/* We may or may not be called in an interrupt context */
spin_lock_irqsave(&eeh_eventlist_lock, flags);
list_add(&event->list, &eeh_eventlist);
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index 854cef7b18f4..177852e39a25 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -231,29 +231,22 @@ void *eeh_pe_traverse(struct eeh_pe *root,
* The function is used to traverse the devices of the specified
* PE and its child PEs.
*/
-void *eeh_pe_dev_traverse(struct eeh_pe *root,
+void eeh_pe_dev_traverse(struct eeh_pe *root,
eeh_edev_traverse_func fn, void *flag)
{
struct eeh_pe *pe;
struct eeh_dev *edev, *tmp;
- void *ret;
if (!root) {
pr_warn("%s: Invalid PE %p\n",
__func__, root);
- return NULL;
+ return;
}
/* Traverse root PE */
- eeh_for_each_pe(root, pe) {
- eeh_pe_for_each_dev(pe, edev, tmp) {
- ret = fn(edev, flag);
- if (ret)
- return ret;
- }
- }
-
- return NULL;
+ eeh_for_each_pe(root, pe)
+ eeh_pe_for_each_dev(pe, edev, tmp)
+ fn(edev, flag);
}
/**
@@ -379,8 +372,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
/* Check if the PE number is valid */
if (!eeh_has_flag(EEH_VALID_PE_ZERO) && !edev->pe_config_addr) {
- pr_err("%s: Invalid PE#0 for edev 0x%x on PHB#%x\n",
- __func__, config_addr, pdn->phb->global_number);
+ eeh_edev_err(edev, "PE#0 is invalid for this PHB!\n");
return -EINVAL;
}
@@ -391,42 +383,34 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
* components.
*/
pe = eeh_pe_get(pdn->phb, edev->pe_config_addr, config_addr);
- if (pe && !(pe->type & EEH_PE_INVALID)) {
- /* Mark the PE as type of PCI bus */
- pe->type = EEH_PE_BUS;
- edev->pe = pe;
-
- /* Put the edev to PE */
- list_add_tail(&edev->entry, &pe->edevs);
- pr_debug("EEH: Add %04x:%02x:%02x.%01x to Bus PE#%x\n",
- pdn->phb->global_number,
- pdn->busno,
- PCI_SLOT(pdn->devfn),
- PCI_FUNC(pdn->devfn),
- pe->addr);
- return 0;
- } else if (pe && (pe->type & EEH_PE_INVALID)) {
- list_add_tail(&edev->entry, &pe->edevs);
- edev->pe = pe;
- /*
- * We're running to here because of PCI hotplug caused by
- * EEH recovery. We need clear EEH_PE_INVALID until the top.
- */
- parent = pe;
- while (parent) {
- if (!(parent->type & EEH_PE_INVALID))
- break;
- parent->type &= ~EEH_PE_INVALID;
- parent = parent->parent;
- }
+ if (pe) {
+ if (pe->type & EEH_PE_INVALID) {
+ list_add_tail(&edev->entry, &pe->edevs);
+ edev->pe = pe;
+ /*
+ * We're running to here because of PCI hotplug caused by
+ * EEH recovery. We need clear EEH_PE_INVALID until the top.
+ */
+ parent = pe;
+ while (parent) {
+ if (!(parent->type & EEH_PE_INVALID))
+ break;
+ parent->type &= ~EEH_PE_INVALID;
+ parent = parent->parent;
+ }
+
+ eeh_edev_dbg(edev,
+ "Added to device PE (parent: PE#%x)\n",
+ pe->parent->addr);
+ } else {
+ /* Mark the PE as type of PCI bus */
+ pe->type = EEH_PE_BUS;
+ edev->pe = pe;
- pr_debug("EEH: Add %04x:%02x:%02x.%01x to Device "
- "PE#%x, Parent PE#%x\n",
- pdn->phb->global_number,
- pdn->busno,
- PCI_SLOT(pdn->devfn),
- PCI_FUNC(pdn->devfn),
- pe->addr, pe->parent->addr);
+ /* Put the edev to PE */
+ list_add_tail(&edev->entry, &pe->edevs);
+ eeh_edev_dbg(edev, "Added to bus PE\n");
+ }
return 0;
}
@@ -468,13 +452,8 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
list_add_tail(&pe->child, &parent->child_list);
list_add_tail(&edev->entry, &pe->edevs);
edev->pe = pe;
- pr_debug("EEH: Add %04x:%02x:%02x.%01x to "
- "Device PE#%x, Parent PE#%x\n",
- pdn->phb->global_number,
- pdn->busno,
- PCI_SLOT(pdn->devfn),
- PCI_FUNC(pdn->devfn),
- pe->addr, pe->parent->addr);
+ eeh_edev_dbg(edev, "Added to device PE (parent: PE#%x)\n",
+ pe->parent->addr);
return 0;
}
@@ -491,16 +470,12 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
int eeh_rmv_from_parent_pe(struct eeh_dev *edev)
{
struct eeh_pe *pe, *parent, *child;
+ bool keep, recover;
int cnt;
- struct pci_dn *pdn = eeh_dev_to_pdn(edev);
pe = eeh_dev_to_pe(edev);
if (!pe) {
- pr_debug("%s: No PE found for device %04x:%02x:%02x.%01x\n",
- __func__, pdn->phb->global_number,
- pdn->busno,
- PCI_SLOT(pdn->devfn),
- PCI_FUNC(pdn->devfn));
+ eeh_edev_dbg(edev, "No PE found for device.\n");
return -EEXIST;
}
@@ -516,10 +491,21 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev)
*/
while (1) {
parent = pe->parent;
+
+ /* PHB PEs should never be removed */
if (pe->type & EEH_PE_PHB)
break;
- if (!(pe->state & EEH_PE_KEEP)) {
+ /*
+ * XXX: KEEP is set while resetting a PE. I don't think it's
+ * ever set without RECOVERING also being set. I could
+ * be wrong though so catch that with a WARN.
+ */
+ keep = !!(pe->state & EEH_PE_KEEP);
+ recover = !!(pe->state & EEH_PE_RECOVERING);
+ WARN_ON(keep && !recover);
+
+ if (!keep && !recover) {
if (list_empty(&pe->edevs) &&
list_empty(&pe->child_list)) {
list_del(&pe->child);
@@ -528,6 +514,15 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev)
break;
}
} else {
+ /*
+ * Mark the PE as invalid. At the end of the recovery
+ * process any invalid PEs will be garbage collected.
+ *
+ * We need to delay the free()ing of them since we can
+ * remove edev's while traversing the PE tree which
+ * might trigger the removal of a PE and we can't
+ * deal with that (yet).
+ */
if (list_empty(&pe->edevs)) {
cnt = 0;
list_for_each_entry(child, &pe->child_list, child) {
@@ -623,13 +618,11 @@ void eeh_pe_mark_isolated(struct eeh_pe *root)
}
EXPORT_SYMBOL_GPL(eeh_pe_mark_isolated);
-static void *__eeh_pe_dev_mode_mark(struct eeh_dev *edev, void *flag)
+static void __eeh_pe_dev_mode_mark(struct eeh_dev *edev, void *flag)
{
int mode = *((int *)flag);
edev->mode |= mode;
-
- return NULL;
}
/**
@@ -717,17 +710,13 @@ static void eeh_bridge_check_link(struct eeh_dev *edev)
if (!(edev->mode & (EEH_DEV_ROOT_PORT | EEH_DEV_DS_PORT)))
return;
- pr_debug("%s: Check PCIe link for %04x:%02x:%02x.%01x ...\n",
- __func__, pdn->phb->global_number,
- pdn->busno,
- PCI_SLOT(pdn->devfn),
- PCI_FUNC(pdn->devfn));
+ eeh_edev_dbg(edev, "Checking PCIe link...\n");
/* Check slot status */
cap = edev->pcie_cap;
eeh_ops->read_config(pdn, cap + PCI_EXP_SLTSTA, 2, &val);
if (!(val & PCI_EXP_SLTSTA_PDS)) {
- pr_debug(" No card in the slot (0x%04x) !\n", val);
+ eeh_edev_dbg(edev, "No card in the slot (0x%04x) !\n", val);
return;
}
@@ -736,7 +725,7 @@ static void eeh_bridge_check_link(struct eeh_dev *edev)
if (val & PCI_EXP_SLTCAP_PCP) {
eeh_ops->read_config(pdn, cap + PCI_EXP_SLTCTL, 2, &val);
if (val & PCI_EXP_SLTCTL_PCC) {
- pr_debug(" In power-off state, power it on ...\n");
+ eeh_edev_dbg(edev, "In power-off state, power it on ...\n");
val &= ~(PCI_EXP_SLTCTL_PCC | PCI_EXP_SLTCTL_PIC);
val |= (0x0100 & PCI_EXP_SLTCTL_PIC);
eeh_ops->write_config(pdn, cap + PCI_EXP_SLTCTL, 2, val);
@@ -752,7 +741,7 @@ static void eeh_bridge_check_link(struct eeh_dev *edev)
/* Check link */
eeh_ops->read_config(pdn, cap + PCI_EXP_LNKCAP, 4, &val);
if (!(val & PCI_EXP_LNKCAP_DLLLARC)) {
- pr_debug(" No link reporting capability (0x%08x) \n", val);
+ eeh_edev_dbg(edev, "No link reporting capability (0x%08x) \n", val);
msleep(1000);
return;
}
@@ -769,10 +758,10 @@ static void eeh_bridge_check_link(struct eeh_dev *edev)
}
if (val & PCI_EXP_LNKSTA_DLLLA)
- pr_debug(" Link up (%s)\n",
+ eeh_edev_dbg(edev, "Link up (%s)\n",
(val & PCI_EXP_LNKSTA_CLS_2_5GB) ? "2.5GB" : "5GB");
else
- pr_debug(" Link not ready (0x%04x)\n", val);
+ eeh_edev_dbg(edev, "Link not ready (0x%04x)\n", val);
}
#define BYTE_SWAP(OFF) (8*((OFF)/4)+3-(OFF))
@@ -852,7 +841,7 @@ static void eeh_restore_device_bars(struct eeh_dev *edev)
* the expansion ROM base address, the latency timer, and etc.
* from the saved values in the device node.
*/
-static void *eeh_restore_one_device_bars(struct eeh_dev *edev, void *flag)
+static void eeh_restore_one_device_bars(struct eeh_dev *edev, void *flag)
{
struct pci_dn *pdn = eeh_dev_to_pdn(edev);
@@ -864,8 +853,6 @@ static void *eeh_restore_one_device_bars(struct eeh_dev *edev, void *flag)
if (eeh_ops->restore_config && pdn)
eeh_ops->restore_config(pdn);
-
- return NULL;
}
/**
diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c
index 3fa04dda1737..4fb0f1e1017a 100644
--- a/arch/powerpc/kernel/eeh_sysfs.c
+++ b/arch/powerpc/kernel/eeh_sysfs.c
@@ -1,25 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Sysfs entries for PCI Error Recovery for PAPR-compliant platform.
* Copyright IBM Corporation 2007
* Copyright Linas Vepstas <linas@austin.ibm.com> 2007
*
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or (at
- * your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT. See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
* Send comments and feedback to Linas Vepstas <linas@austin.ibm.com>
*/
#include <linux/pci.h>
@@ -30,7 +14,7 @@
/**
* EEH_SHOW_ATTR -- Create sysfs entry for eeh statistic
* @_name: name of file in sysfs directory
- * @_memb: name of member in struct pci_dn to access
+ * @_memb: name of member in struct eeh_dev to access
* @_format: printf format for display
*
* All of the attributes look very similar, so just
@@ -91,7 +75,7 @@ static ssize_t eeh_pe_state_store(struct device *dev,
static DEVICE_ATTR_RW(eeh_pe_state);
-#ifdef CONFIG_PCI_IOV
+#if defined(CONFIG_PCI_IOV) && defined(CONFIG_PPC_PSERIES)
static ssize_t eeh_notify_resume_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -102,7 +86,6 @@ static ssize_t eeh_notify_resume_show(struct device *dev,
if (!edev || !edev->pe)
return -ENODEV;
- pdn = pci_get_pdn(pdev);
return sprintf(buf, "%d\n", pdn->last_allow_rc);
}
@@ -148,7 +131,7 @@ static void eeh_notify_resume_remove(struct pci_dev *pdev)
#else
static inline int eeh_notify_resume_add(struct pci_dev *pdev) { return 0; }
static inline void eeh_notify_resume_remove(struct pci_dev *pdev) { }
-#endif /* CONFIG_PCI_IOV */
+#endif /* CONFIG_PCI_IOV && CONFIG PPC_PSERIES*/
void eeh_sysfs_add_device(struct pci_dev *pdev)
{
@@ -176,22 +159,23 @@ void eeh_sysfs_remove_device(struct pci_dev *pdev)
{
struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
+ if (!edev) {
+ WARN_ON(eeh_enabled());
+ return;
+ }
+
+ edev->mode &= ~EEH_DEV_SYSFS;
+
/*
* The parent directory might have been removed. We needn't
* continue for that case.
*/
- if (!pdev->dev.kobj.sd) {
- if (edev)
- edev->mode &= ~EEH_DEV_SYSFS;
+ if (!pdev->dev.kobj.sd)
return;
- }
device_remove_file(&pdev->dev, &dev_attr_eeh_mode);
device_remove_file(&pdev->dev, &dev_attr_eeh_pe_config_addr);
device_remove_file(&pdev->dev, &dev_attr_eeh_pe_state);
eeh_notify_resume_remove(pdev);
-
- if (edev)
- edev->mode &= ~EEH_DEV_SYSFS;
}
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 54fab22c9a43..0713daa651d9 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -140,6 +140,7 @@ transfer_to_handler:
stw r12,_CTR(r11)
stw r2,_XER(r11)
mfspr r12,SPRN_SPRG_THREAD
+ tovirt_vmstack r12, r12
beq 2f /* if from user, fix up THREAD.regs */
addi r2, r12, -THREAD
addi r11,r1,STACK_FRAME_OVERHEAD
@@ -179,11 +180,13 @@ transfer_to_handler:
2: /* if from kernel, check interrupted DOZE/NAP mode and
* check for stack overflow
*/
- kuap_save_and_lock r11, r12, r9, r2, r0
+ kuap_save_and_lock r11, r12, r9, r2, r6
addi r2, r12, -THREAD
+#ifndef CONFIG_VMAP_STACK
lwz r9,KSP_LIMIT(r12)
cmplw r1,r9 /* if r1 <= ksp_limit */
ble- stack_ovf /* then the kernel stack overflowed */
+#endif
5:
#if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500)
lwz r12,TI_LOCAL_FLAGS(r2)
@@ -195,7 +198,8 @@ transfer_to_handler:
transfer_to_handler_cont:
3:
mflr r9
- tovirt(r2, r2) /* set r2 to current */
+ tovirt_novmstack r2, r2 /* set r2 to current */
+ tovirt_vmstack r9, r9
lwz r11,0(r9) /* virtual address of handler */
lwz r9,4(r9) /* where to go when done */
#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
@@ -210,7 +214,7 @@ transfer_to_handler_cont:
* To speed up the syscall path where interrupts stay on, let's check
* first if we are changing the MSR value at all.
*/
- tophys(r12, r1)
+ tophys_novmstack r12, r1
lwz r12,_MSR(r12)
andi. r12,r12,MSR_EE
bne 1f
@@ -230,7 +234,7 @@ transfer_to_handler_cont:
*/
lis r12,reenable_mmu@h
ori r12,r12,reenable_mmu@l
- LOAD_MSR_KERNEL(r0, MSR_KERNEL)
+ LOAD_REG_IMMEDIATE(r0, MSR_KERNEL)
mtspr SPRN_SRR0,r12
mtspr SPRN_SRR1,r0
SYNC
@@ -284,9 +288,11 @@ reenable_mmu:
rlwinm r9,r9,0,~MSR_EE
lwz r12,_LINK(r11) /* and return to address in LR */
kuap_restore r11, r2, r3, r4, r5
+ lwz r2, GPR2(r11)
b fast_exception_return
#endif
+#ifndef CONFIG_VMAP_STACK
/*
* On kernel stack overflow, load up an initial stack pointer
* and call StackOverflow(regs), which should not return.
@@ -304,7 +310,7 @@ stack_ovf:
addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
lis r9,StackOverflow@ha
addi r9,r9,StackOverflow@l
- LOAD_MSR_KERNEL(r10,MSR_KERNEL)
+ LOAD_REG_IMMEDIATE(r10,MSR_KERNEL)
#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
mtspr SPRN_NRI, r0
#endif
@@ -312,6 +318,7 @@ stack_ovf:
mtspr SPRN_SRR1,r10
SYNC
RFI
+#endif
#ifdef CONFIG_TRACE_IRQFLAGS
trace_syscall_entry_irq_off:
@@ -324,7 +331,7 @@ trace_syscall_entry_irq_off:
bl trace_hardirqs_on
/* Now enable for real */
- LOAD_MSR_KERNEL(r10, MSR_KERNEL | MSR_EE)
+ LOAD_REG_IMMEDIATE(r10, MSR_KERNEL | MSR_EE)
mtmsr r10
REST_GPR(0, r1)
@@ -394,10 +401,10 @@ ret_from_syscall:
#endif
mr r6,r3
/* disable interrupts so current_thread_info()->flags can't change */
- LOAD_MSR_KERNEL(r10,MSR_KERNEL) /* doesn't include MSR_EE */
+ LOAD_REG_IMMEDIATE(r10,MSR_KERNEL) /* doesn't include MSR_EE */
/* Note: We don't bother telling lockdep about it */
SYNC
- MTMSRD(r10)
+ mtmsr r10
lwz r9,TI_FLAGS(r2)
li r8,-MAX_ERRNO
andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)
@@ -554,7 +561,7 @@ syscall_exit_work:
*/
ori r10,r10,MSR_EE
SYNC
- MTMSRD(r10)
+ mtmsr r10
/* Save NVGPRS if they're not saved already */
lwz r4,_TRAP(r1)
@@ -621,7 +628,6 @@ ppc_swapcontext:
*/
.globl handle_page_fault
handle_page_fault:
- stw r4,_DAR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
#ifdef CONFIG_PPC_BOOK3S_32
andis. r0,r5,DSISR_DABRMATCH@h
@@ -697,7 +703,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_SPE)
and. r0,r0,r11 /* FP or altivec or SPE enabled? */
beq+ 1f
andc r11,r11,r0
- MTMSRD(r11)
+ mtmsr r11
isync
1: stw r11,_MSR(r1)
mfcr r10
@@ -777,11 +783,19 @@ fast_exception_return:
1: lis r3,exc_exit_restart_end@ha
addi r3,r3,exc_exit_restart_end@l
cmplw r12,r3
+#if CONFIG_PPC_BOOK3S_601
+ bge 2b
+#else
bge 3f
+#endif
lis r4,exc_exit_restart@ha
addi r4,r4,exc_exit_restart@l
cmplw r12,r4
+#if CONFIG_PPC_BOOK3S_601
+ blt 2b
+#else
blt 3f
+#endif
lis r3,fee_restarts@ha
tophys(r3,r3)
lwz r5,fee_restarts@l(r3)
@@ -800,9 +814,6 @@ fee_restarts:
/* aargh, we don't know which trap this is */
/* but the 601 doesn't implement the RI bit, so assume it's OK */
3:
-BEGIN_FTR_SECTION
- b 2b
-END_FTR_SECTION_IFSET(CPU_FTR_601)
li r10,-1
stw r10,_TRAP(r11)
addi r3,r1,STACK_FRAME_OVERHEAD
@@ -824,9 +835,9 @@ ret_from_except:
* can't change between when we test it and when we return
* from the interrupt. */
/* Note: We don't bother telling lockdep about it */
- LOAD_MSR_KERNEL(r10,MSR_KERNEL)
+ LOAD_REG_IMMEDIATE(r10,MSR_KERNEL)
SYNC /* Some chip revs have problems here... */
- MTMSRD(r10) /* disable interrupts */
+ mtmsr r10 /* disable interrupts */
lwz r3,_MSR(r1) /* Returning to user mode? */
andi. r0,r3,MSR_PR
@@ -892,7 +903,7 @@ resume_kernel:
bne- 0b
1:
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
/* check current_thread_info->preempt_count */
lwz r0,TI_PREEMPT(r2)
cmpwi 0,r0,0 /* if non-zero, just restore regs and return */
@@ -916,7 +927,7 @@ resume_kernel:
*/
bl trace_hardirqs_on
#endif
-#endif /* CONFIG_PREEMPT */
+#endif /* CONFIG_PREEMPTION */
restore_kuap:
kuap_restore r1, r2, r9, r10, r0
@@ -991,9 +1002,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
* can restart the exception exit path at the label
* exc_exit_restart below. -- paulus
*/
- LOAD_MSR_KERNEL(r10,MSR_KERNEL & ~MSR_RI)
+ LOAD_REG_IMMEDIATE(r10,MSR_KERNEL & ~MSR_RI)
SYNC
- MTMSRD(r10) /* clear the RI bit */
+ mtmsr r10 /* clear the RI bit */
.globl exc_exit_restart
exc_exit_restart:
lwz r12,_NIP(r1)
@@ -1066,7 +1077,7 @@ exc_exit_restart_end:
REST_NVGPRS(r1); \
lwz r3,_MSR(r1); \
andi. r3,r3,MSR_PR; \
- LOAD_MSR_KERNEL(r10,MSR_KERNEL); \
+ LOAD_REG_IMMEDIATE(r10,MSR_KERNEL); \
bne user_exc_return; \
lwz r0,GPR0(r1); \
lwz r2,GPR2(r1); \
@@ -1229,16 +1240,16 @@ do_resched: /* r10 contains MSR_KERNEL here */
#endif
ori r10,r10,MSR_EE
SYNC
- MTMSRD(r10) /* hard-enable interrupts */
+ mtmsr r10 /* hard-enable interrupts */
bl schedule
recheck:
/* Note: And we don't tell it we are disabling them again
* neither. Those disable/enable cycles used to peek at
* TI_FLAGS aren't advertised.
*/
- LOAD_MSR_KERNEL(r10,MSR_KERNEL)
+ LOAD_REG_IMMEDIATE(r10,MSR_KERNEL)
SYNC
- MTMSRD(r10) /* disable interrupts */
+ mtmsr r10 /* disable interrupts */
lwz r9,TI_FLAGS(r2)
andi. r0,r9,_TIF_NEED_RESCHED
bne- do_resched
@@ -1247,7 +1258,7 @@ recheck:
do_user_signal: /* r10 contains MSR_KERNEL here */
ori r10,r10,MSR_EE
SYNC
- MTMSRD(r10) /* hard-enable interrupts */
+ mtmsr r10 /* hard-enable interrupts */
/* save r13-r31 in the exception frame, if not already done */
lwz r3,_TRAP(r1)
andi. r0,r3,1
@@ -1270,11 +1281,19 @@ nonrecoverable:
lis r10,exc_exit_restart_end@ha
addi r10,r10,exc_exit_restart_end@l
cmplw r12,r10
+#ifdef CONFIG_PPC_BOOK3S_601
+ bgelr
+#else
bge 3f
+#endif
lis r11,exc_exit_restart@ha
addi r11,r11,exc_exit_restart@l
cmplw r12,r11
+#ifdef CONFIG_PPC_BOOK3S_601
+ bltlr
+#else
blt 3f
+#endif
lis r10,ee_restarts@ha
lwz r12,ee_restarts@l(r10)
addi r12,r12,1
@@ -1283,9 +1302,6 @@ nonrecoverable:
blr
3: /* OK, we can't recover, kill this process */
/* but the 601 doesn't implement the RI bit, so assume it's OK */
-BEGIN_FTR_SECTION
- blr
-END_FTR_SECTION_IFSET(CPU_FTR_601)
lwz r3,_TRAP(r1)
andi. r0,r3,1
beq 5f
@@ -1324,14 +1340,14 @@ _GLOBAL(enter_rtas)
lis r6,1f@ha /* physical return address for rtas */
addi r6,r6,1f@l
tophys(r6,r6)
- tophys(r7,r1)
+ tophys_novmstack r7, r1
lwz r8,RTASENTRY(r4)
lwz r4,RTASBASE(r4)
mfmsr r9
stw r9,8(r1)
- LOAD_MSR_KERNEL(r0,MSR_KERNEL)
+ LOAD_REG_IMMEDIATE(r0,MSR_KERNEL)
SYNC /* disable interrupts so SRR0/1 */
- MTMSRD(r0) /* don't get trashed */
+ mtmsr r0 /* don't get trashed */
li r9,MSR_KERNEL & ~(MSR_IR|MSR_DR)
mtlr r6
stw r7, THREAD + RTAS_SP(r2)
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 0a0b5310f54a..6ba675b0cf7d 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -69,24 +69,20 @@ BEGIN_FTR_SECTION
bne .Ltabort_syscall
END_FTR_SECTION_IFSET(CPU_FTR_TM)
#endif
- andi. r10,r12,MSR_PR
mr r10,r1
- addi r1,r1,-INT_FRAME_SIZE
- beq- 1f
ld r1,PACAKSAVE(r13)
-1: std r10,0(r1)
+ std r10,0(r1)
std r11,_NIP(r1)
std r12,_MSR(r1)
std r0,GPR0(r1)
std r10,GPR1(r1)
- beq 2f /* if from kernel mode */
#ifdef CONFIG_PPC_FSL_BOOK3E
START_BTB_FLUSH_SECTION
BTB_FLUSH(r10)
END_BTB_FLUSH_SECTION
#endif
ACCOUNT_CPU_USER_ENTRY(r13, r10, r11)
-2: std r2,GPR2(r1)
+ std r2,GPR2(r1)
std r3,GPR3(r1)
mfcr r2
std r4,GPR4(r1)
@@ -122,14 +118,13 @@ END_BTB_FLUSH_SECTION
#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(CONFIG_PPC_SPLPAR)
BEGIN_FW_FTR_SECTION
- beq 33f
- /* if from user, see if there are any DTL entries to process */
+ /* see if there are any DTL entries to process */
ld r10,PACALPPACAPTR(r13) /* get ptr to VPA */
ld r11,PACA_DTL_RIDX(r13) /* get log read index */
addi r10,r10,LPPACA_DTLIDX
LDX_BE r10,0,r10 /* get log write index */
- cmpd cr1,r11,r10
- beq+ cr1,33f
+ cmpd r11,r10
+ beq+ 33f
bl accumulate_stolen_time
REST_GPR(0,r1)
REST_4GPRS(3,r1)
@@ -203,6 +198,7 @@ system_call: /* label this so stack traces look sane */
mtctr r12
bctrl /* Call handler */
+ /* syscall_exit can exit to kernel mode, via ret_from_kernel_thread */
.Lsyscall_exit:
std r3,RESULT(r1)
@@ -216,11 +212,6 @@ system_call: /* label this so stack traces look sane */
ld r12, PACA_THREAD_INFO(r13)
ld r8,_MSR(r1)
-#ifdef CONFIG_PPC_BOOK3S
- /* No MSR:RI on BookE */
- andi. r10,r8,MSR_RI
- beq- .Lunrecov_restore
-#endif
/*
* This is a few instructions into the actual syscall exit path (which actually
@@ -546,6 +537,7 @@ flush_count_cache:
/* Save LR into r9 */
mflr r9
+ // Flush the link stack
.rept 64
bl .+4
.endr
@@ -555,6 +547,11 @@ flush_count_cache:
.balign 32
/* Restore LR */
1: mtlr r9
+
+ // If we're just flushing the link stack, return here
+3: nop
+ patch_site 3b patch__flush_link_stack_return
+
li r9,0x7fff
mtctr r9
@@ -600,8 +597,7 @@ _GLOBAL(_switch)
std r0,16(r1)
stdu r1,-SWITCH_FRAME_SIZE(r1)
/* r3-r13 are caller saved -- Cort */
- SAVE_8GPRS(14, r1)
- SAVE_10GPRS(22, r1)
+ SAVE_NVGPRS(r1)
std r0,_NIP(r1) /* Return to switch caller */
mfcr r23
std r23,_CCR(r1)
@@ -725,8 +721,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
mtcrf 0xFF,r6
/* r3-r13 are destroyed -- Cort */
- REST_8GPRS(14, r1)
- REST_10GPRS(22, r1)
+ REST_NVGPRS(r1)
/* convert old thread to its task_struct for return value */
addi r3,r3,-THREAD
@@ -849,7 +844,7 @@ resume_kernel:
bne- 0b
1:
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
/* Check if we need to preempt */
andi. r0,r4,_TIF_NEED_RESCHED
beq+ restore
@@ -880,7 +875,7 @@ resume_kernel:
li r10,MSR_RI
mtmsrd r10,1 /* Update machine state */
#endif /* CONFIG_PPC_BOOK3E */
-#endif /* CONFIG_PREEMPT */
+#endif /* CONFIG_PREEMPTION */
.globl fast_exc_return_irq
fast_exc_return_irq:
@@ -1158,8 +1153,7 @@ _GLOBAL(enter_rtas)
*/
SAVE_GPR(2, r1) /* Save the TOC */
SAVE_GPR(13, r1) /* Save paca */
- SAVE_8GPRS(14, r1) /* Save the non-volatiles */
- SAVE_10GPRS(22, r1) /* ditto */
+ SAVE_NVGPRS(r1) /* Save the non-volatiles */
mfcr r4
std r4,_CCR(r1)
@@ -1266,8 +1260,7 @@ rtas_restore_regs:
/* relocation is on at this point */
REST_GPR(2, r1) /* Restore the TOC */
REST_GPR(13, r1) /* Restore paca */
- REST_8GPRS(14, r1) /* Restore the non-volatiles */
- REST_10GPRS(22, r1) /* ditto */
+ REST_NVGPRS(r1) /* Restore the non-volatiles */
GET_PACA(r13)
@@ -1301,8 +1294,7 @@ _GLOBAL(enter_prom)
*/
SAVE_GPR(2, r1)
SAVE_GPR(13, r1)
- SAVE_8GPRS(14, r1)
- SAVE_10GPRS(22, r1)
+ SAVE_NVGPRS(r1)
mfcr r10
mfmsr r11
std r10,_CCR(r1)
@@ -1346,8 +1338,7 @@ _GLOBAL(enter_prom)
/* Restore other registers */
REST_GPR(2, r1)
REST_GPR(13, r1)
- REST_8GPRS(14, r1)
- REST_10GPRS(22, r1)
+ REST_NVGPRS(r1)
ld r4,_CCR(r1)
mtcr r4
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index 1cfb3da4a84a..e4076e3c072d 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -750,12 +750,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
ld r15,PACATOC(r13)
ld r14,interrupt_base_book3e@got(r15)
ld r15,__end_interrupts@got(r15)
-#else
- LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e)
- LOAD_REG_IMMEDIATE(r15,__end_interrupts)
-#endif
cmpld cr0,r10,r14
cmpld cr1,r10,r15
+#else
+ LOAD_REG_IMMEDIATE_SYM(r14, r15, interrupt_base_book3e)
+ cmpld cr0, r10, r14
+ LOAD_REG_IMMEDIATE_SYM(r14, r15, __end_interrupts)
+ cmpld cr1, r10, r14
+#endif
blt+ cr0,1f
bge+ cr1,1f
@@ -820,12 +822,14 @@ kernel_dbg_exc:
ld r15,PACATOC(r13)
ld r14,interrupt_base_book3e@got(r15)
ld r15,__end_interrupts@got(r15)
-#else
- LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e)
- LOAD_REG_IMMEDIATE(r15,__end_interrupts)
-#endif
cmpld cr0,r10,r14
cmpld cr1,r10,r15
+#else
+ LOAD_REG_IMMEDIATE_SYM(r14, r15, interrupt_base_book3e)
+ cmpld cr0, r10, r14
+ LOAD_REG_IMMEDIATE_SYM(r14, r15,__end_interrupts)
+ cmpld cr1, r10, r14
+#endif
blt+ cr0,1f
bge+ cr1,1f
@@ -1342,16 +1346,6 @@ skpinv: addi r6,r6,1 /* Increment */
sync
isync
-/*
- * The mapping only needs to be cache-coherent on SMP, except on
- * Freescale e500mc derivatives where it's also needed for coherent DMA.
- */
-#if defined(CONFIG_SMP) || defined(CONFIG_PPC_E500MC)
-#define M_IF_NEEDED MAS2_M
-#else
-#define M_IF_NEEDED 0
-#endif
-
/* 6. Setup KERNELBASE mapping in TLB[0]
*
* r3 = MAS0 w/TLBSEL & ESEL for the entry we started in
@@ -1364,7 +1358,7 @@ skpinv: addi r6,r6,1 /* Increment */
ori r6,r6,(MAS1_TSIZE(BOOK3E_PAGESZ_1GB))@l
mtspr SPRN_MAS1,r6
- LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET | M_IF_NEEDED)
+ LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET | MAS2_M_IF_NEEDED)
mtspr SPRN_MAS2,r6
rlwinm r5,r5,0,0,25
@@ -1449,7 +1443,7 @@ a2_tlbinit_code_start:
a2_tlbinit_after_linear_map:
/* Now we branch the new virtual address mapped by this entry */
- LOAD_REG_IMMEDIATE(r3,1f)
+ LOAD_REG_IMMEDIATE_SYM(r3, r5, 1f)
mtctr r3
bctr
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 6ba3cc2ef8ab..ffc15f4f079d 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -44,6 +44,58 @@
#endif
/*
+ * Following are fixed section helper macros.
+ *
+ * EXC_REAL_BEGIN/END - real, unrelocated exception vectors
+ * EXC_VIRT_BEGIN/END - virt (AIL), unrelocated exception vectors
+ * TRAMP_REAL_BEGIN - real, unrelocated helpers (virt may call these)
+ * TRAMP_VIRT_BEGIN - virt, unreloc helpers (in practice, real can use)
+ * TRAMP_KVM_BEGIN - KVM handlers, these are put into real, unrelocated
+ * EXC_COMMON - After switching to virtual, relocated mode.
+ */
+
+#define EXC_REAL_BEGIN(name, start, size) \
+ FIXED_SECTION_ENTRY_BEGIN_LOCATION(real_vectors, exc_real_##start##_##name, start, size)
+
+#define EXC_REAL_END(name, start, size) \
+ FIXED_SECTION_ENTRY_END_LOCATION(real_vectors, exc_real_##start##_##name, start, size)
+
+#define EXC_VIRT_BEGIN(name, start, size) \
+ FIXED_SECTION_ENTRY_BEGIN_LOCATION(virt_vectors, exc_virt_##start##_##name, start, size)
+
+#define EXC_VIRT_END(name, start, size) \
+ FIXED_SECTION_ENTRY_END_LOCATION(virt_vectors, exc_virt_##start##_##name, start, size)
+
+#define EXC_COMMON_BEGIN(name) \
+ USE_TEXT_SECTION(); \
+ .balign IFETCH_ALIGN_BYTES; \
+ .global name; \
+ _ASM_NOKPROBE_SYMBOL(name); \
+ DEFINE_FIXED_SYMBOL(name); \
+name:
+
+#define TRAMP_REAL_BEGIN(name) \
+ FIXED_SECTION_ENTRY_BEGIN(real_trampolines, name)
+
+#define TRAMP_VIRT_BEGIN(name) \
+ FIXED_SECTION_ENTRY_BEGIN(virt_trampolines, name)
+
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
+#define TRAMP_KVM_BEGIN(name) \
+ TRAMP_VIRT_BEGIN(name)
+#else
+#define TRAMP_KVM_BEGIN(name)
+#endif
+
+#define EXC_REAL_NONE(start, size) \
+ FIXED_SECTION_ENTRY_BEGIN_LOCATION(real_vectors, exc_real_##start##_##unused, start, size); \
+ FIXED_SECTION_ENTRY_END_LOCATION(real_vectors, exc_real_##start##_##unused, start, size)
+
+#define EXC_VIRT_NONE(start, size) \
+ FIXED_SECTION_ENTRY_BEGIN_LOCATION(virt_vectors, exc_virt_##start##_##unused, start, size); \
+ FIXED_SECTION_ENTRY_END_LOCATION(virt_vectors, exc_virt_##start##_##unused, start, size)
+
+/*
* We're short on space and time in the exception prolog, so we can't
* use the normal LOAD_REG_IMMEDIATE macro to load the address of label.
* Instead we get the base of the kernel from paca->kernelbase and or in the low
@@ -68,6 +120,7 @@
addis reg,reg,(ABS_ADDR(label))@h
/* Exception register prefixes */
+#define EXC_HV_OR_STD 2 /* depends on HVMODE */
#define EXC_HV 1
#define EXC_STD 0
@@ -127,126 +180,6 @@ BEGIN_FTR_SECTION_NESTED(943) \
std ra,offset(r13); \
END_FTR_SECTION_NESTED(ftr,ftr,943)
-.macro EXCEPTION_PROLOG_0 area
- SET_SCRATCH0(r13) /* save r13 */
- GET_PACA(r13)
- std r9,\area\()+EX_R9(r13) /* save r9 */
- OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR)
- HMT_MEDIUM
- std r10,\area\()+EX_R10(r13) /* save r10 - r12 */
- OPT_GET_SPR(r10, SPRN_CFAR, CPU_FTR_CFAR)
-.endm
-
-.macro EXCEPTION_PROLOG_1 hsrr, area, kvm, vec, dar, dsisr, bitmask
- OPT_SAVE_REG_TO_PACA(\area\()+EX_PPR, r9, CPU_FTR_HAS_PPR)
- OPT_SAVE_REG_TO_PACA(\area\()+EX_CFAR, r10, CPU_FTR_CFAR)
- INTERRUPT_TO_KERNEL
- SAVE_CTR(r10, \area\())
- mfcr r9
- .if \kvm
- KVMTEST \hsrr \vec
- .endif
- .if \bitmask
- lbz r10,PACAIRQSOFTMASK(r13)
- andi. r10,r10,\bitmask
- /* Associate vector numbers with bits in paca->irq_happened */
- .if \vec == 0x500 || \vec == 0xea0
- li r10,PACA_IRQ_EE
- .elseif \vec == 0x900
- li r10,PACA_IRQ_DEC
- .elseif \vec == 0xa00 || \vec == 0xe80
- li r10,PACA_IRQ_DBELL
- .elseif \vec == 0xe60
- li r10,PACA_IRQ_HMI
- .elseif \vec == 0xf00
- li r10,PACA_IRQ_PMI
- .else
- .abort "Bad maskable vector"
- .endif
-
- .if \hsrr
- bne masked_Hinterrupt
- .else
- bne masked_interrupt
- .endif
- .endif
-
- std r11,\area\()+EX_R11(r13)
- std r12,\area\()+EX_R12(r13)
-
- /*
- * DAR/DSISR, SCRATCH0 must be read before setting MSR[RI],
- * because a d-side MCE will clobber those registers so is
- * not recoverable if they are live.
- */
- GET_SCRATCH0(r10)
- std r10,\area\()+EX_R13(r13)
- .if \dar
- mfspr r10,SPRN_DAR
- std r10,\area\()+EX_DAR(r13)
- .endif
- .if \dsisr
- mfspr r10,SPRN_DSISR
- stw r10,\area\()+EX_DSISR(r13)
- .endif
-.endm
-
-.macro EXCEPTION_PROLOG_2_REAL label, hsrr, set_ri
- ld r10,PACAKMSR(r13) /* get MSR value for kernel */
- .if ! \set_ri
- xori r10,r10,MSR_RI /* Clear MSR_RI */
- .endif
- .if \hsrr
- mfspr r11,SPRN_HSRR0 /* save HSRR0 */
- mfspr r12,SPRN_HSRR1 /* and HSRR1 */
- mtspr SPRN_HSRR1,r10
- .else
- mfspr r11,SPRN_SRR0 /* save SRR0 */
- mfspr r12,SPRN_SRR1 /* and SRR1 */
- mtspr SPRN_SRR1,r10
- .endif
- LOAD_HANDLER(r10, \label\())
- .if \hsrr
- mtspr SPRN_HSRR0,r10
- HRFI_TO_KERNEL
- .else
- mtspr SPRN_SRR0,r10
- RFI_TO_KERNEL
- .endif
- b . /* prevent speculative execution */
-.endm
-
-.macro EXCEPTION_PROLOG_2_VIRT label, hsrr
-#ifdef CONFIG_RELOCATABLE
- .if \hsrr
- mfspr r11,SPRN_HSRR0 /* save HSRR0 */
- .else
- mfspr r11,SPRN_SRR0 /* save SRR0 */
- .endif
- LOAD_HANDLER(r12, \label\())
- mtctr r12
- .if \hsrr
- mfspr r12,SPRN_HSRR1 /* and HSRR1 */
- .else
- mfspr r12,SPRN_SRR1 /* and HSRR1 */
- .endif
- li r10,MSR_RI
- mtmsrd r10,1 /* Set RI (EE=0) */
- bctr
-#else
- .if \hsrr
- mfspr r11,SPRN_HSRR0 /* save HSRR0 */
- mfspr r12,SPRN_HSRR1 /* and HSRR1 */
- .else
- mfspr r11,SPRN_SRR0 /* save SRR0 */
- mfspr r12,SPRN_SRR1 /* and SRR1 */
- .endif
- li r10,MSR_RI
- mtmsrd r10,1 /* Set RI (EE=0) */
- b \label
-#endif
-.endm
-
/*
* Branch to label using its 0xC000 address. This results in instruction
* address suitable for MSR[IR]=0 or 1, which allows relocation to be turned
@@ -260,6 +193,11 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
mtctr reg; \
bctr
+.macro INT_KVM_HANDLER name, vec, hsrr, area, skip
+ TRAMP_KVM_BEGIN(\name\()_kvm)
+ KVM_HANDLER \vec, \hsrr, \area, \skip
+.endm
+
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
/*
@@ -272,17 +210,13 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
#define kvmppc_interrupt kvmppc_interrupt_pr
#endif
-.macro KVMTEST hsrr, n
+.macro KVMTEST name, hsrr, n
lbz r10,HSTATE_IN_GUEST(r13)
cmpwi r10,0
- .if \hsrr
- bne do_kvm_H\n
- .else
- bne do_kvm_\n
- .endif
+ bne \name\()_kvm
.endm
-.macro KVM_HANDLER area, hsrr, n, skip
+.macro KVM_HANDLER vec, hsrr, area, skip
.if \skip
cmpwi r10,KVM_GUEST_MODE_SKIP
beq 89f
@@ -301,10 +235,16 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
std r12,HSTATE_SCRATCH0(r13)
sldi r12,r9,32
/* HSRR variants have the 0x2 bit added to their trap number */
- .if \hsrr
- ori r12,r12,(\n + 0x2)
+ .if \hsrr == EXC_HV_OR_STD
+ BEGIN_FTR_SECTION
+ ori r12,r12,(\vec + 0x2)
+ FTR_SECTION_ELSE
+ ori r12,r12,(\vec)
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ .elseif \hsrr
+ ori r12,r12,(\vec + 0x2)
.else
- ori r12,r12,(\n)
+ ori r12,r12,(\vec)
.endif
#ifdef CONFIG_RELOCATABLE
@@ -329,7 +269,13 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
89: mtocrf 0x80,r9
ld r9,\area+EX_R9(r13)
ld r10,\area+EX_R10(r13)
- .if \hsrr
+ .if \hsrr == EXC_HV_OR_STD
+ BEGIN_FTR_SECTION
+ b kvmppc_skip_Hinterrupt
+ FTR_SECTION_ELSE
+ b kvmppc_skip_interrupt
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ .elseif \hsrr
b kvmppc_skip_Hinterrupt
.else
b kvmppc_skip_interrupt
@@ -338,88 +284,328 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
.endm
#else
-.macro KVMTEST hsrr, n
+.macro KVMTEST name, hsrr, n
.endm
-.macro KVM_HANDLER area, hsrr, n, skip
+.macro KVM_HANDLER name, vec, hsrr, area, skip
.endm
#endif
-#define EXCEPTION_PROLOG_COMMON_1() \
- std r9,_CCR(r1); /* save CR in stackframe */ \
- std r11,_NIP(r1); /* save SRR0 in stackframe */ \
- std r12,_MSR(r1); /* save SRR1 in stackframe */ \
- std r10,0(r1); /* make stack chain pointer */ \
- std r0,GPR0(r1); /* save r0 in stackframe */ \
- std r10,GPR1(r1); /* save r1 in stackframe */ \
-
-/* Save original regs values from save area to stack frame. */
-#define EXCEPTION_PROLOG_COMMON_2(area) \
- ld r9,area+EX_R9(r13); /* move r9, r10 to stackframe */ \
- ld r10,area+EX_R10(r13); \
- std r9,GPR9(r1); \
- std r10,GPR10(r1); \
- ld r9,area+EX_R11(r13); /* move r11 - r13 to stackframe */ \
- ld r10,area+EX_R12(r13); \
- ld r11,area+EX_R13(r13); \
- std r9,GPR11(r1); \
- std r10,GPR12(r1); \
- std r11,GPR13(r1); \
-BEGIN_FTR_SECTION_NESTED(66); \
- ld r10,area+EX_CFAR(r13); \
- std r10,ORIG_GPR3(r1); \
-END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66); \
- GET_CTR(r10, area); \
- std r10,_CTR(r1);
-
-#define EXCEPTION_PROLOG_COMMON_3(trap) \
- std r2,GPR2(r1); /* save r2 in stackframe */ \
- SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \
- SAVE_2GPRS(7, r1); /* save r7, r8 in stackframe */ \
- mflr r9; /* Get LR, later save to stack */ \
- ld r2,PACATOC(r13); /* get kernel TOC into r2 */ \
- std r9,_LINK(r1); \
- lbz r10,PACAIRQSOFTMASK(r13); \
- mfspr r11,SPRN_XER; /* save XER in stackframe */ \
- std r10,SOFTE(r1); \
- std r11,_XER(r1); \
- li r9,(trap)+1; \
- std r9,_TRAP(r1); /* set trap number */ \
- li r10,0; \
- ld r11,exception_marker@toc(r2); \
- std r10,RESULT(r1); /* clear regs->result */ \
- std r11,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */
+.macro INT_SAVE_SRR_AND_JUMP label, hsrr, set_ri
+ ld r10,PACAKMSR(r13) /* get MSR value for kernel */
+ .if ! \set_ri
+ xori r10,r10,MSR_RI /* Clear MSR_RI */
+ .endif
+ .if \hsrr == EXC_HV_OR_STD
+ BEGIN_FTR_SECTION
+ mfspr r11,SPRN_HSRR0 /* save HSRR0 */
+ mfspr r12,SPRN_HSRR1 /* and HSRR1 */
+ mtspr SPRN_HSRR1,r10
+ FTR_SECTION_ELSE
+ mfspr r11,SPRN_SRR0 /* save SRR0 */
+ mfspr r12,SPRN_SRR1 /* and SRR1 */
+ mtspr SPRN_SRR1,r10
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ .elseif \hsrr
+ mfspr r11,SPRN_HSRR0 /* save HSRR0 */
+ mfspr r12,SPRN_HSRR1 /* and HSRR1 */
+ mtspr SPRN_HSRR1,r10
+ .else
+ mfspr r11,SPRN_SRR0 /* save SRR0 */
+ mfspr r12,SPRN_SRR1 /* and SRR1 */
+ mtspr SPRN_SRR1,r10
+ .endif
+ LOAD_HANDLER(r10, \label\())
+ .if \hsrr == EXC_HV_OR_STD
+ BEGIN_FTR_SECTION
+ mtspr SPRN_HSRR0,r10
+ HRFI_TO_KERNEL
+ FTR_SECTION_ELSE
+ mtspr SPRN_SRR0,r10
+ RFI_TO_KERNEL
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ .elseif \hsrr
+ mtspr SPRN_HSRR0,r10
+ HRFI_TO_KERNEL
+ .else
+ mtspr SPRN_SRR0,r10
+ RFI_TO_KERNEL
+ .endif
+ b . /* prevent speculative execution */
+.endm
+
+/* INT_SAVE_SRR_AND_JUMP works for real or virt, this is faster but virt only */
+.macro INT_VIRT_SAVE_SRR_AND_JUMP label, hsrr
+#ifdef CONFIG_RELOCATABLE
+ .if \hsrr == EXC_HV_OR_STD
+ BEGIN_FTR_SECTION
+ mfspr r11,SPRN_HSRR0 /* save HSRR0 */
+ FTR_SECTION_ELSE
+ mfspr r11,SPRN_SRR0 /* save SRR0 */
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ .elseif \hsrr
+ mfspr r11,SPRN_HSRR0 /* save HSRR0 */
+ .else
+ mfspr r11,SPRN_SRR0 /* save SRR0 */
+ .endif
+ LOAD_HANDLER(r12, \label\())
+ mtctr r12
+ .if \hsrr == EXC_HV_OR_STD
+ BEGIN_FTR_SECTION
+ mfspr r12,SPRN_HSRR1 /* and HSRR1 */
+ FTR_SECTION_ELSE
+ mfspr r12,SPRN_SRR1 /* and HSRR1 */
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ .elseif \hsrr
+ mfspr r12,SPRN_HSRR1 /* and HSRR1 */
+ .else
+ mfspr r12,SPRN_SRR1 /* and HSRR1 */
+ .endif
+ li r10,MSR_RI
+ mtmsrd r10,1 /* Set RI (EE=0) */
+ bctr
+#else
+ .if \hsrr == EXC_HV_OR_STD
+ BEGIN_FTR_SECTION
+ mfspr r11,SPRN_HSRR0 /* save HSRR0 */
+ mfspr r12,SPRN_HSRR1 /* and HSRR1 */
+ FTR_SECTION_ELSE
+ mfspr r11,SPRN_SRR0 /* save SRR0 */
+ mfspr r12,SPRN_SRR1 /* and SRR1 */
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ .elseif \hsrr
+ mfspr r11,SPRN_HSRR0 /* save HSRR0 */
+ mfspr r12,SPRN_HSRR1 /* and HSRR1 */
+ .else
+ mfspr r11,SPRN_SRR0 /* save SRR0 */
+ mfspr r12,SPRN_SRR1 /* and SRR1 */
+ .endif
+ li r10,MSR_RI
+ mtmsrd r10,1 /* Set RI (EE=0) */
+ b \label
+#endif
+.endm
+
+/*
+ * This is the BOOK3S interrupt entry code macro.
+ *
+ * This can result in one of several things happening:
+ * - Branch to the _common handler, relocated, in virtual mode.
+ * These are normal interrupts (synchronous and asynchronous) handled by
+ * the kernel.
+ * - Branch to KVM, relocated but real mode interrupts remain in real mode.
+ * These occur when HSTATE_IN_GUEST is set. The interrupt may be caused by
+ * / intended for host or guest kernel, but KVM must always be involved
+ * because the machine state is set for guest execution.
+ * - Branch to the masked handler, unrelocated.
+ * These occur when maskable asynchronous interrupts are taken with the
+ * irq_soft_mask set.
+ * - Branch to an "early" handler in real mode but relocated.
+ * This is done if early=1. MCE and HMI use these to handle errors in real
+ * mode.
+ * - Fall through and continue executing in real, unrelocated mode.
+ * This is done if early=2.
+ */
+.macro INT_HANDLER name, vec, ool=0, early=0, virt=0, hsrr=0, area=PACA_EXGEN, ri=1, dar=0, dsisr=0, bitmask=0, kvm=0
+ SET_SCRATCH0(r13) /* save r13 */
+ GET_PACA(r13)
+ std r9,\area\()+EX_R9(r13) /* save r9 */
+ OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR)
+ HMT_MEDIUM
+ std r10,\area\()+EX_R10(r13) /* save r10 - r12 */
+ OPT_GET_SPR(r10, SPRN_CFAR, CPU_FTR_CFAR)
+ .if \ool
+ .if !\virt
+ b tramp_real_\name
+ .pushsection .text
+ TRAMP_REAL_BEGIN(tramp_real_\name)
+ .else
+ b tramp_virt_\name
+ .pushsection .text
+ TRAMP_VIRT_BEGIN(tramp_virt_\name)
+ .endif
+ .endif
+
+ OPT_SAVE_REG_TO_PACA(\area\()+EX_PPR, r9, CPU_FTR_HAS_PPR)
+ OPT_SAVE_REG_TO_PACA(\area\()+EX_CFAR, r10, CPU_FTR_CFAR)
+ INTERRUPT_TO_KERNEL
+ SAVE_CTR(r10, \area\())
+ mfcr r9
+ .if \kvm
+ KVMTEST \name \hsrr \vec
+ .endif
+ .if \bitmask
+ lbz r10,PACAIRQSOFTMASK(r13)
+ andi. r10,r10,\bitmask
+ /* Associate vector numbers with bits in paca->irq_happened */
+ .if \vec == 0x500 || \vec == 0xea0
+ li r10,PACA_IRQ_EE
+ .elseif \vec == 0x900
+ li r10,PACA_IRQ_DEC
+ .elseif \vec == 0xa00 || \vec == 0xe80
+ li r10,PACA_IRQ_DBELL
+ .elseif \vec == 0xe60
+ li r10,PACA_IRQ_HMI
+ .elseif \vec == 0xf00
+ li r10,PACA_IRQ_PMI
+ .else
+ .abort "Bad maskable vector"
+ .endif
+
+ .if \hsrr == EXC_HV_OR_STD
+ BEGIN_FTR_SECTION
+ bne masked_Hinterrupt
+ FTR_SECTION_ELSE
+ bne masked_interrupt
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ .elseif \hsrr
+ bne masked_Hinterrupt
+ .else
+ bne masked_interrupt
+ .endif
+ .endif
+
+ std r11,\area\()+EX_R11(r13)
+ std r12,\area\()+EX_R12(r13)
+
+ /*
+ * DAR/DSISR, SCRATCH0 must be read before setting MSR[RI],
+ * because a d-side MCE will clobber those registers so is
+ * not recoverable if they are live.
+ */
+ GET_SCRATCH0(r10)
+ std r10,\area\()+EX_R13(r13)
+ .if \dar
+ .if \hsrr
+ mfspr r10,SPRN_HDAR
+ .else
+ mfspr r10,SPRN_DAR
+ .endif
+ std r10,\area\()+EX_DAR(r13)
+ .endif
+ .if \dsisr
+ .if \hsrr
+ mfspr r10,SPRN_HDSISR
+ .else
+ mfspr r10,SPRN_DSISR
+ .endif
+ stw r10,\area\()+EX_DSISR(r13)
+ .endif
+
+ .if \early == 2
+ /* nothing more */
+ .elseif \early
+ mfctr r10 /* save ctr, even for !RELOCATABLE */
+ BRANCH_TO_C000(r11, \name\()_early_common)
+ .elseif !\virt
+ INT_SAVE_SRR_AND_JUMP \name\()_common, \hsrr, \ri
+ .else
+ INT_VIRT_SAVE_SRR_AND_JUMP \name\()_common, \hsrr
+ .endif
+ .if \ool
+ .popsection
+ .endif
+.endm
/*
* On entry r13 points to the paca, r9-r13 are saved in the paca,
* r9 contains the saved CR, r11 and r12 contain the saved SRR0 and
* SRR1, and relocation is on.
+ *
+ * If stack=0, then the stack is already set in r1, and r1 is saved in r10.
+ * PPR save and CPU accounting is not done for the !stack case (XXX why not?)
*/
-#define EXCEPTION_COMMON(area, trap) \
- andi. r10,r12,MSR_PR; /* See if coming from user */ \
- mr r10,r1; /* Save r1 */ \
- subi r1,r1,INT_FRAME_SIZE; /* alloc frame on kernel stack */ \
- beq- 1f; \
- ld r1,PACAKSAVE(r13); /* kernel stack to use */ \
-1: tdgei r1,-INT_FRAME_SIZE; /* trap if r1 is in userspace */ \
- EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0; \
-3: EXCEPTION_PROLOG_COMMON_1(); \
- kuap_save_amr_and_lock r9, r10, cr1, cr0; \
- beq 4f; /* if from kernel mode */ \
- ACCOUNT_CPU_USER_ENTRY(r13, r9, r10); \
- SAVE_PPR(area, r9); \
-4: EXCEPTION_PROLOG_COMMON_2(area); \
- EXCEPTION_PROLOG_COMMON_3(trap); \
+.macro INT_COMMON vec, area, stack, kuap, reconcile, dar, dsisr
+ .if \stack
+ andi. r10,r12,MSR_PR /* See if coming from user */
+ mr r10,r1 /* Save r1 */
+ subi r1,r1,INT_FRAME_SIZE /* alloc frame on kernel stack */
+ beq- 100f
+ ld r1,PACAKSAVE(r13) /* kernel stack to use */
+100: tdgei r1,-INT_FRAME_SIZE /* trap if r1 is in userspace */
+ EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,0
+ .endif
+
+ std r9,_CCR(r1) /* save CR in stackframe */
+ std r11,_NIP(r1) /* save SRR0 in stackframe */
+ std r12,_MSR(r1) /* save SRR1 in stackframe */
+ std r10,0(r1) /* make stack chain pointer */
+ std r0,GPR0(r1) /* save r0 in stackframe */
+ std r10,GPR1(r1) /* save r1 in stackframe */
+
+ .if \stack
+ .if \kuap
+ kuap_save_amr_and_lock r9, r10, cr1, cr0
+ .endif
+ beq 101f /* if from kernel mode */
+ ACCOUNT_CPU_USER_ENTRY(r13, r9, r10)
+ SAVE_PPR(\area, r9)
+101:
+ .else
+ .if \kuap
+ kuap_save_amr_and_lock r9, r10, cr1
+ .endif
+ .endif
+
+ /* Save original regs values from save area to stack frame. */
+ ld r9,\area+EX_R9(r13) /* move r9, r10 to stackframe */
+ ld r10,\area+EX_R10(r13)
+ std r9,GPR9(r1)
+ std r10,GPR10(r1)
+ ld r9,\area+EX_R11(r13) /* move r11 - r13 to stackframe */
+ ld r10,\area+EX_R12(r13)
+ ld r11,\area+EX_R13(r13)
+ std r9,GPR11(r1)
+ std r10,GPR12(r1)
+ std r11,GPR13(r1)
+ .if \dar
+ .if \dar == 2
+ ld r10,_NIP(r1)
+ .else
+ ld r10,\area+EX_DAR(r13)
+ .endif
+ std r10,_DAR(r1)
+ .endif
+ .if \dsisr
+ .if \dsisr == 2
+ ld r10,_MSR(r1)
+ lis r11,DSISR_SRR1_MATCH_64S@h
+ and r10,r10,r11
+ .else
+ lwz r10,\area+EX_DSISR(r13)
+ .endif
+ std r10,_DSISR(r1)
+ .endif
+BEGIN_FTR_SECTION_NESTED(66)
+ ld r10,\area+EX_CFAR(r13)
+ std r10,ORIG_GPR3(r1)
+END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66)
+ GET_CTR(r10, \area)
+ std r10,_CTR(r1)
+ std r2,GPR2(r1) /* save r2 in stackframe */
+ SAVE_4GPRS(3, r1) /* save r3 - r6 in stackframe */
+ SAVE_2GPRS(7, r1) /* save r7, r8 in stackframe */
+ mflr r9 /* Get LR, later save to stack */
+ ld r2,PACATOC(r13) /* get kernel TOC into r2 */
+ std r9,_LINK(r1)
+ lbz r10,PACAIRQSOFTMASK(r13)
+ mfspr r11,SPRN_XER /* save XER in stackframe */
+ std r10,SOFTE(r1)
+ std r11,_XER(r1)
+ li r9,(\vec)+1
+ std r9,_TRAP(r1) /* set trap number */
+ li r10,0
+ ld r11,exception_marker@toc(r2)
+ std r10,RESULT(r1) /* clear regs->result */
+ std r11,STACK_FRAME_OVERHEAD-16(r1) /* mark the frame */
+
+ .if \stack
ACCOUNT_STOLEN_TIME
+ .endif
-/*
- * Exception where stack is already set in r1, r1 is saved in r10.
- * PPR save and CPU accounting is not done (for some reason).
- */
-#define EXCEPTION_COMMON_STACK(area, trap) \
- EXCEPTION_PROLOG_COMMON_1(); \
- kuap_save_amr_and_lock r9, r10, cr1; \
- EXCEPTION_PROLOG_COMMON_2(area); \
- EXCEPTION_PROLOG_COMMON_3(trap)
+ .if \reconcile
+ RECONCILE_IRQ_STATE(r10, r11)
+ .endif
+.endm
/*
* Restore all registers including H/SRR0/1 saved in a stack frame of a
@@ -428,6 +614,9 @@ END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66); \
.macro EXCEPTION_RESTORE_REGS hsrr
/* Move original SRR0 and SRR1 into the respective regs */
ld r9,_MSR(r1)
+ .if \hsrr == EXC_HV_OR_STD
+ .error "EXC_HV_OR_STD Not implemented for EXCEPTION_RESTORE_REGS"
+ .endif
.if \hsrr
mtspr SPRN_HSRR1,r9
.else
@@ -481,219 +670,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
#define FINISH_NAP
#endif
-/*
- * Following are the BOOK3S exception handler helper macros.
- * Handlers come in a number of types, and each type has a number of varieties.
- *
- * EXC_REAL_* - real, unrelocated exception vectors
- * EXC_VIRT_* - virt (AIL), unrelocated exception vectors
- * TRAMP_REAL_* - real, unrelocated helpers (virt can call these)
- * TRAMP_VIRT_* - virt, unreloc helpers (in practice, real can use)
- * TRAMP_KVM - KVM handlers that get put into real, unrelocated
- * EXC_COMMON - virt, relocated common handlers
- *
- * The EXC handlers are given a name, and branch to name_common, or the
- * appropriate KVM or masking function. Vector handler verieties are as
- * follows:
- *
- * EXC_{REAL|VIRT}_BEGIN/END - used to open-code the exception
- *
- * EXC_{REAL|VIRT} - standard exception
- *
- * EXC_{REAL|VIRT}_suffix
- * where _suffix is:
- * - _MASKABLE - maskable exception
- * - _OOL - out of line with trampoline to common handler
- * - _HV - HV exception
- *
- * There can be combinations, e.g., EXC_VIRT_OOL_MASKABLE_HV
- *
- * KVM handlers come in the following verieties:
- * TRAMP_KVM
- * TRAMP_KVM_SKIP
- * TRAMP_KVM_HV
- * TRAMP_KVM_HV_SKIP
- *
- * COMMON handlers come in the following verieties:
- * EXC_COMMON_BEGIN/END - used to open-code the handler
- * EXC_COMMON
- * EXC_COMMON_ASYNC
- *
- * TRAMP_REAL and TRAMP_VIRT can be used with BEGIN/END. KVM
- * and OOL handlers are implemented as types of TRAMP and TRAMP_VIRT handlers.
- */
-
-#define __EXC_REAL(name, start, size, area) \
- EXC_REAL_BEGIN(name, start, size); \
- EXCEPTION_PROLOG_0 area ; \
- EXCEPTION_PROLOG_1 EXC_STD, area, 1, start, 0, 0, 0 ; \
- EXCEPTION_PROLOG_2_REAL name##_common, EXC_STD, 1 ; \
- EXC_REAL_END(name, start, size)
-
-#define EXC_REAL(name, start, size) \
- __EXC_REAL(name, start, size, PACA_EXGEN)
-
-#define __EXC_VIRT(name, start, size, realvec, area) \
- EXC_VIRT_BEGIN(name, start, size); \
- EXCEPTION_PROLOG_0 area ; \
- EXCEPTION_PROLOG_1 EXC_STD, area, 0, realvec, 0, 0, 0; \
- EXCEPTION_PROLOG_2_VIRT name##_common, EXC_STD ; \
- EXC_VIRT_END(name, start, size)
-
-#define EXC_VIRT(name, start, size, realvec) \
- __EXC_VIRT(name, start, size, realvec, PACA_EXGEN)
-
-#define EXC_REAL_MASKABLE(name, start, size, bitmask) \
- EXC_REAL_BEGIN(name, start, size); \
- EXCEPTION_PROLOG_0 PACA_EXGEN ; \
- EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 1, start, 0, 0, bitmask ; \
- EXCEPTION_PROLOG_2_REAL name##_common, EXC_STD, 1 ; \
- EXC_REAL_END(name, start, size)
-
-#define EXC_VIRT_MASKABLE(name, start, size, realvec, bitmask) \
- EXC_VIRT_BEGIN(name, start, size); \
- EXCEPTION_PROLOG_0 PACA_EXGEN ; \
- EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 0, realvec, 0, 0, bitmask ; \
- EXCEPTION_PROLOG_2_VIRT name##_common, EXC_STD ; \
- EXC_VIRT_END(name, start, size)
-
-#define EXC_REAL_HV(name, start, size) \
- EXC_REAL_BEGIN(name, start, size); \
- EXCEPTION_PROLOG_0 PACA_EXGEN; \
- EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 1, start, 0, 0, 0 ; \
- EXCEPTION_PROLOG_2_REAL name##_common, EXC_HV, 1 ; \
- EXC_REAL_END(name, start, size)
-
-#define EXC_VIRT_HV(name, start, size, realvec) \
- EXC_VIRT_BEGIN(name, start, size); \
- EXCEPTION_PROLOG_0 PACA_EXGEN; \
- EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 1, realvec, 0, 0, 0 ; \
- EXCEPTION_PROLOG_2_VIRT name##_common, EXC_HV ; \
- EXC_VIRT_END(name, start, size)
-
-#define __EXC_REAL_OOL(name, start, size) \
- EXC_REAL_BEGIN(name, start, size); \
- EXCEPTION_PROLOG_0 PACA_EXGEN ; \
- b tramp_real_##name ; \
- EXC_REAL_END(name, start, size)
-
-#define __TRAMP_REAL_OOL(name, vec) \
- TRAMP_REAL_BEGIN(tramp_real_##name); \
- EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 1, vec, 0, 0, 0 ; \
- EXCEPTION_PROLOG_2_REAL name##_common, EXC_STD, 1
-
-#define EXC_REAL_OOL(name, start, size) \
- __EXC_REAL_OOL(name, start, size); \
- __TRAMP_REAL_OOL(name, start)
-
-#define __EXC_REAL_OOL_MASKABLE(name, start, size) \
- __EXC_REAL_OOL(name, start, size)
-
-#define __TRAMP_REAL_OOL_MASKABLE(name, vec, bitmask) \
- TRAMP_REAL_BEGIN(tramp_real_##name); \
- EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 1, vec, 0, 0, bitmask ; \
- EXCEPTION_PROLOG_2_REAL name##_common, EXC_STD, 1
-
-#define EXC_REAL_OOL_MASKABLE(name, start, size, bitmask) \
- __EXC_REAL_OOL_MASKABLE(name, start, size); \
- __TRAMP_REAL_OOL_MASKABLE(name, start, bitmask)
-
-#define __EXC_REAL_OOL_HV(name, start, size) \
- __EXC_REAL_OOL(name, start, size)
-
-#define __TRAMP_REAL_OOL_HV(name, vec) \
- TRAMP_REAL_BEGIN(tramp_real_##name); \
- EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 1, vec, 0, 0, 0 ; \
- EXCEPTION_PROLOG_2_REAL name##_common, EXC_HV, 1
-
-#define EXC_REAL_OOL_HV(name, start, size) \
- __EXC_REAL_OOL_HV(name, start, size); \
- __TRAMP_REAL_OOL_HV(name, start)
-
-#define __EXC_REAL_OOL_MASKABLE_HV(name, start, size) \
- __EXC_REAL_OOL(name, start, size)
-
-#define __TRAMP_REAL_OOL_MASKABLE_HV(name, vec, bitmask) \
- TRAMP_REAL_BEGIN(tramp_real_##name); \
- EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 1, vec, 0, 0, bitmask ; \
- EXCEPTION_PROLOG_2_REAL name##_common, EXC_HV, 1
-
-#define EXC_REAL_OOL_MASKABLE_HV(name, start, size, bitmask) \
- __EXC_REAL_OOL_MASKABLE_HV(name, start, size); \
- __TRAMP_REAL_OOL_MASKABLE_HV(name, start, bitmask)
-
-#define __EXC_VIRT_OOL(name, start, size) \
- EXC_VIRT_BEGIN(name, start, size); \
- EXCEPTION_PROLOG_0 PACA_EXGEN ; \
- b tramp_virt_##name; \
- EXC_VIRT_END(name, start, size)
-
-#define __TRAMP_VIRT_OOL(name, realvec) \
- TRAMP_VIRT_BEGIN(tramp_virt_##name); \
- EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 0, vec, 0, 0, 0 ; \
- EXCEPTION_PROLOG_2_VIRT name##_common, EXC_STD
-
-#define EXC_VIRT_OOL(name, start, size, realvec) \
- __EXC_VIRT_OOL(name, start, size); \
- __TRAMP_VIRT_OOL(name, realvec)
-
-#define __EXC_VIRT_OOL_MASKABLE(name, start, size) \
- __EXC_VIRT_OOL(name, start, size)
-
-#define __TRAMP_VIRT_OOL_MASKABLE(name, realvec, bitmask) \
- TRAMP_VIRT_BEGIN(tramp_virt_##name); \
- EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 0, realvec, 0, 0, bitmask ; \
- EXCEPTION_PROLOG_2_REAL name##_common, EXC_STD, 1
-
-#define EXC_VIRT_OOL_MASKABLE(name, start, size, realvec, bitmask) \
- __EXC_VIRT_OOL_MASKABLE(name, start, size); \
- __TRAMP_VIRT_OOL_MASKABLE(name, realvec, bitmask)
-
-#define __EXC_VIRT_OOL_HV(name, start, size) \
- __EXC_VIRT_OOL(name, start, size)
-
-#define __TRAMP_VIRT_OOL_HV(name, realvec) \
- TRAMP_VIRT_BEGIN(tramp_virt_##name); \
- EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 1, realvec, 0, 0, 0 ; \
- EXCEPTION_PROLOG_2_VIRT name##_common, EXC_HV
-
-#define EXC_VIRT_OOL_HV(name, start, size, realvec) \
- __EXC_VIRT_OOL_HV(name, start, size); \
- __TRAMP_VIRT_OOL_HV(name, realvec)
-
-#define __EXC_VIRT_OOL_MASKABLE_HV(name, start, size) \
- __EXC_VIRT_OOL(name, start, size)
-
-#define __TRAMP_VIRT_OOL_MASKABLE_HV(name, realvec, bitmask) \
- TRAMP_VIRT_BEGIN(tramp_virt_##name); \
- EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 1, realvec, 0, 0, bitmask ; \
- EXCEPTION_PROLOG_2_VIRT name##_common, EXC_HV
-
-#define EXC_VIRT_OOL_MASKABLE_HV(name, start, size, realvec, bitmask) \
- __EXC_VIRT_OOL_MASKABLE_HV(name, start, size); \
- __TRAMP_VIRT_OOL_MASKABLE_HV(name, realvec, bitmask)
-
-#define TRAMP_KVM(area, n) \
- TRAMP_KVM_BEGIN(do_kvm_##n); \
- KVM_HANDLER area, EXC_STD, n, 0
-
-#define TRAMP_KVM_SKIP(area, n) \
- TRAMP_KVM_BEGIN(do_kvm_##n); \
- KVM_HANDLER area, EXC_STD, n, 1
-
-#define TRAMP_KVM_HV(area, n) \
- TRAMP_KVM_BEGIN(do_kvm_H##n); \
- KVM_HANDLER area, EXC_HV, n, 0
-
-#define TRAMP_KVM_HV_SKIP(area, n) \
- TRAMP_KVM_BEGIN(do_kvm_H##n); \
- KVM_HANDLER area, EXC_HV, n, 1
-
#define EXC_COMMON(name, realvec, hdlr) \
EXC_COMMON_BEGIN(name); \
- EXCEPTION_COMMON(PACA_EXGEN, realvec); \
+ INT_COMMON realvec, PACA_EXGEN, 1, 1, 1, 0, 0 ; \
bl save_nvgprs; \
- RECONCILE_IRQ_STATE(r10, r11); \
addi r3,r1,STACK_FRAME_OVERHEAD; \
bl hdlr; \
b ret_from_except
@@ -704,9 +684,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
*/
#define EXC_COMMON_ASYNC(name, realvec, hdlr) \
EXC_COMMON_BEGIN(name); \
- EXCEPTION_COMMON(PACA_EXGEN, realvec); \
+ INT_COMMON realvec, PACA_EXGEN, 1, 1, 1, 0, 0 ; \
FINISH_NAP; \
- RECONCILE_IRQ_STATE(r10, r11); \
RUNLATCH_ON; \
addi r3,r1,STACK_FRAME_OVERHEAD; \
bl hdlr; \
@@ -836,9 +815,7 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
#endif
- EXCEPTION_PROLOG_0 PACA_EXNMI
- EXCEPTION_PROLOG_1 EXC_STD, PACA_EXNMI, 1, 0x100, 0, 0, 0
- EXCEPTION_PROLOG_2_REAL system_reset_common, EXC_STD, 0
+ INT_HANDLER system_reset, 0x100, area=PACA_EXNMI, ri=0, kvm=1
/*
* MSR_RI is not enabled, because PACA_EXNMI and nmi stack is
* being used, so a nested NMI exception would corrupt it.
@@ -850,9 +827,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
* be dangerous anyway.
*/
EXC_REAL_END(system_reset, 0x100, 0x100)
-
EXC_VIRT_NONE(0x4100, 0x100)
-TRAMP_KVM(PACA_EXNMI, 0x100)
+INT_KVM_HANDLER system_reset 0x100, EXC_STD, PACA_EXNMI, 0
#ifdef CONFIG_PPC_P7_NAP
TRAMP_REAL_BEGIN(system_reset_idle_wake)
@@ -868,9 +844,7 @@ TRAMP_REAL_BEGIN(system_reset_idle_wake)
*/
TRAMP_REAL_BEGIN(system_reset_fwnmi)
/* See comment at system_reset exception, don't turn on RI */
- EXCEPTION_PROLOG_0 PACA_EXNMI
- EXCEPTION_PROLOG_1 EXC_STD, PACA_EXNMI, 0, 0x100, 0, 0, 0
- EXCEPTION_PROLOG_2_REAL system_reset_common, EXC_STD, 0
+ INT_HANDLER system_reset, 0x100, area=PACA_EXNMI, ri=0
#endif /* CONFIG_PPC_PSERIES */
@@ -890,7 +864,7 @@ EXC_COMMON_BEGIN(system_reset_common)
mr r10,r1
ld r1,PACA_NMI_EMERG_SP(r13)
subi r1,r1,INT_FRAME_SIZE
- EXCEPTION_COMMON_STACK(PACA_EXNMI, 0x100)
+ INT_COMMON 0x100, PACA_EXNMI, 0, 1, 0, 0, 0
bl save_nvgprs
/*
* Set IRQS_ALL_DISABLED unconditionally so arch_irqs_disabled does
@@ -933,26 +907,39 @@ EXC_COMMON_BEGIN(system_reset_common)
EXC_REAL_BEGIN(machine_check, 0x200, 0x100)
- /* This is moved out of line as it can be patched by FW, but
- * some code path might still want to branch into the original
- * vector
+ INT_HANDLER machine_check, 0x200, early=1, area=PACA_EXMC, dar=1, dsisr=1
+ /*
+ * MSR_RI is not enabled, because PACA_EXMC is being used, so a
+ * nested machine check corrupts it. machine_check_common enables
+ * MSR_RI.
*/
- EXCEPTION_PROLOG_0 PACA_EXMC
-BEGIN_FTR_SECTION
- b machine_check_common_early
-FTR_SECTION_ELSE
- b machine_check_pSeries_0
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
EXC_REAL_END(machine_check, 0x200, 0x100)
EXC_VIRT_NONE(0x4200, 0x100)
-TRAMP_REAL_BEGIN(machine_check_common_early)
- EXCEPTION_PROLOG_1 EXC_STD, PACA_EXMC, 0, 0x200, 0, 0, 0
+
+#ifdef CONFIG_PPC_PSERIES
+TRAMP_REAL_BEGIN(machine_check_fwnmi)
+ /* See comment at machine_check exception, don't turn on RI */
+ INT_HANDLER machine_check, 0x200, early=1, area=PACA_EXMC, dar=1, dsisr=1
+#endif
+
+INT_KVM_HANDLER machine_check 0x200, EXC_STD, PACA_EXMC, 1
+
+#define MACHINE_CHECK_HANDLER_WINDUP \
+ /* Clear MSR_RI before setting SRR0 and SRR1. */\
+ li r9,0; \
+ mtmsrd r9,1; /* Clear MSR_RI */ \
+ /* Decrement paca->in_mce now RI is clear. */ \
+ lhz r12,PACA_IN_MCE(r13); \
+ subi r12,r12,1; \
+ sth r12,PACA_IN_MCE(r13); \
+ EXCEPTION_RESTORE_REGS EXC_STD
+
+EXC_COMMON_BEGIN(machine_check_early_common)
+ mtctr r10 /* Restore ctr */
+ mfspr r11,SPRN_SRR0
+ mfspr r12,SPRN_SRR1
+
/*
- * Register contents:
- * R13 = PACA
- * R9 = CR
- * Original R9 to R13 is saved on PACA_EXMC
- *
* Switch to mc_emergency stack and handle re-entrancy (we limit
* the nested MCE upto level 4 to avoid stack overflow).
* Save MCE registers srr1, srr0, dar and dsisr and then set ME=1
@@ -973,103 +960,127 @@ TRAMP_REAL_BEGIN(machine_check_common_early)
* the machine check is handled then the idle wakeup code is called
* to restore state.
*/
- mr r11,r1 /* Save r1 */
lhz r10,PACA_IN_MCE(r13)
cmpwi r10,0 /* Are we in nested machine check */
- bne 0f /* Yes, we are. */
- /* First machine check entry */
- ld r1,PACAMCEMERGSP(r13) /* Use MC emergency stack */
-0: subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
+ cmpwi cr1,r10,MAX_MCE_DEPTH /* Are we at maximum nesting */
addi r10,r10,1 /* increment paca->in_mce */
sth r10,PACA_IN_MCE(r13)
- /* Limit nested MCE to level 4 to avoid stack overflow */
- cmpwi r10,MAX_MCE_DEPTH
- bgt 2f /* Check if we hit limit of 4 */
- std r11,GPR1(r1) /* Save r1 on the stack. */
- std r11,0(r1) /* make stack chain pointer */
- mfspr r11,SPRN_SRR0 /* Save SRR0 */
- std r11,_NIP(r1)
- mfspr r11,SPRN_SRR1 /* Save SRR1 */
- std r11,_MSR(r1)
- mfspr r11,SPRN_DAR /* Save DAR */
- std r11,_DAR(r1)
- mfspr r11,SPRN_DSISR /* Save DSISR */
- std r11,_DSISR(r1)
- std r9,_CCR(r1) /* Save CR in stackframe */
+
+ mr r10,r1 /* Save r1 */
+ bne 1f
+ /* First machine check entry */
+ ld r1,PACAMCEMERGSP(r13) /* Use MC emergency stack */
+1: /* Limit nested MCE to level 4 to avoid stack overflow */
+ bgt cr1,unrecoverable_mce /* Check if we hit limit of 4 */
+ subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
+
/* We don't touch AMR here, we never go to virtual mode */
- /* Save r9 through r13 from EXMC save area to stack frame. */
- EXCEPTION_PROLOG_COMMON_2(PACA_EXMC)
- mfmsr r11 /* get MSR value */
+ INT_COMMON 0x200, PACA_EXMC, 0, 0, 0, 1, 1
+
BEGIN_FTR_SECTION
- ori r11,r11,MSR_ME /* turn on ME bit */
+ bl enable_machine_check
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
- ori r11,r11,MSR_RI /* turn on RI bit */
- LOAD_HANDLER(r12, machine_check_handle_early)
-1: mtspr SPRN_SRR0,r12
- mtspr SPRN_SRR1,r11
- RFI_TO_KERNEL
- b . /* prevent speculative execution */
-2:
- /* Stack overflow. Stay on emergency stack and panic.
- * Keep the ME bit off while panic-ing, so that if we hit
- * another machine check we checkstop.
- */
- addi r1,r1,INT_FRAME_SIZE /* go back to previous stack frame */
- ld r11,PACAKMSR(r13)
- LOAD_HANDLER(r12, unrecover_mce)
- li r10,MSR_ME
- andc r11,r11,r10 /* Turn off MSR_ME */
- b 1b
- b . /* prevent speculative execution */
+ li r10,MSR_RI
+ mtmsrd r10,1
-TRAMP_REAL_BEGIN(machine_check_pSeries)
- .globl machine_check_fwnmi
-machine_check_fwnmi:
- EXCEPTION_PROLOG_0 PACA_EXMC
+ bl save_nvgprs
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl machine_check_early
+ std r3,RESULT(r1) /* Save result */
+ ld r12,_MSR(r1)
+
+#ifdef CONFIG_PPC_P7_NAP
+ /*
+ * Check if thread was in power saving mode. We come here when any
+ * of the following is true:
+ * a. thread wasn't in power saving mode
+ * b. thread was in power saving mode with no state loss,
+ * supervisor state loss or hypervisor state loss.
+ *
+ * Go back to nap/sleep/winkle mode again if (b) is true.
+ */
BEGIN_FTR_SECTION
- b machine_check_common_early
-END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
-machine_check_pSeries_0:
- EXCEPTION_PROLOG_1 EXC_STD, PACA_EXMC, 1, 0x200, 1, 1, 0
+ rlwinm. r11,r12,47-31,30,31
+ bne machine_check_idle_common
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+#endif
+
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
/*
- * MSR_RI is not enabled, because PACA_EXMC is being used, so a
- * nested machine check corrupts it. machine_check_common enables
- * MSR_RI.
+ * Check if we are coming from guest. If yes, then run the normal
+ * exception handler which will take the
+ * machine_check_kvm->kvmppc_interrupt branch to deliver the MC event
+ * to guest.
+ */
+ lbz r11,HSTATE_IN_GUEST(r13)
+ cmpwi r11,0 /* Check if coming from guest */
+ bne mce_deliver /* continue if we are. */
+#endif
+
+ /*
+ * Check if we are coming from userspace. If yes, then run the normal
+ * exception handler which will deliver the MC event to this kernel.
+ */
+ andi. r11,r12,MSR_PR /* See if coming from user. */
+ bne mce_deliver /* continue in V mode if we are. */
+
+ /*
+ * At this point we are coming from kernel context.
+ * Queue up the MCE event and return from the interrupt.
+ * But before that, check if this is an un-recoverable exception.
+ * If yes, then stay on emergency stack and panic.
*/
- EXCEPTION_PROLOG_2_REAL machine_check_common, EXC_STD, 0
+ andi. r11,r12,MSR_RI
+ beq unrecoverable_mce
-TRAMP_KVM_SKIP(PACA_EXMC, 0x200)
+ /*
+ * Check if we have successfully handled/recovered from error, if not
+ * then stay on emergency stack and panic.
+ */
+ ld r3,RESULT(r1) /* Load result */
+ cmpdi r3,0 /* see if we handled MCE successfully */
+ beq unrecoverable_mce /* if !handled then panic */
+
+ /*
+ * Return from MC interrupt.
+ * Queue up the MCE event so that we can log it later, while
+ * returning from kernel or opal call.
+ */
+ bl machine_check_queue_event
+ MACHINE_CHECK_HANDLER_WINDUP
+ RFI_TO_KERNEL
+
+mce_deliver:
+ /*
+ * This is a host user or guest MCE. Restore all registers, then
+ * run the "late" handler. For host user, this will run the
+ * machine_check_exception handler in virtual mode like a normal
+ * interrupt handler. For guest, this will trigger the KVM test
+ * and branch to the KVM interrupt similarly to other interrupts.
+ */
+BEGIN_FTR_SECTION
+ ld r10,ORIG_GPR3(r1)
+ mtspr SPRN_CFAR,r10
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+ MACHINE_CHECK_HANDLER_WINDUP
+ /* See comment at machine_check exception, don't turn on RI */
+ INT_HANDLER machine_check, 0x200, area=PACA_EXMC, ri=0, dar=1, dsisr=1, kvm=1
EXC_COMMON_BEGIN(machine_check_common)
/*
* Machine check is different because we use a different
* save area: PACA_EXMC instead of PACA_EXGEN.
*/
- EXCEPTION_COMMON(PACA_EXMC, 0x200)
+ INT_COMMON 0x200, PACA_EXMC, 1, 1, 1, 1, 1
FINISH_NAP
- RECONCILE_IRQ_STATE(r10, r11)
- ld r3,PACA_EXMC+EX_DAR(r13)
- lwz r4,PACA_EXMC+EX_DSISR(r13)
/* Enable MSR_RI when finished with PACA_EXMC */
li r10,MSR_RI
mtmsrd r10,1
- std r3,_DAR(r1)
- std r4,_DSISR(r1)
bl save_nvgprs
addi r3,r1,STACK_FRAME_OVERHEAD
bl machine_check_exception
b ret_from_except
-#define MACHINE_CHECK_HANDLER_WINDUP \
- /* Clear MSR_RI before setting SRR0 and SRR1. */\
- li r9,0; \
- mtmsrd r9,1; /* Clear MSR_RI */ \
- /* Decrement paca->in_mce now RI is clear. */ \
- lhz r12,PACA_IN_MCE(r13); \
- subi r12,r12,1; \
- sth r12,PACA_IN_MCE(r13); \
- EXCEPTION_RESTORE_REGS EXC_STD
-
#ifdef CONFIG_PPC_P7_NAP
/*
* This is an idle wakeup. Low level machine check has already been
@@ -1101,72 +1112,8 @@ EXC_COMMON_BEGIN(machine_check_idle_common)
bltlr cr1 /* no state loss, return to idle caller */
b idle_return_gpr_loss
#endif
- /*
- * Handle machine check early in real mode. We come here with
- * ME=1, MMU (IR=0 and DR=0) off and using MC emergency stack.
- */
-EXC_COMMON_BEGIN(machine_check_handle_early)
- std r0,GPR0(r1) /* Save r0 */
- EXCEPTION_PROLOG_COMMON_3(0x200)
- bl save_nvgprs
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl machine_check_early
- std r3,RESULT(r1) /* Save result */
- ld r12,_MSR(r1)
-BEGIN_FTR_SECTION
- b 4f
-END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
-
-#ifdef CONFIG_PPC_P7_NAP
- /*
- * Check if thread was in power saving mode. We come here when any
- * of the following is true:
- * a. thread wasn't in power saving mode
- * b. thread was in power saving mode with no state loss,
- * supervisor state loss or hypervisor state loss.
- *
- * Go back to nap/sleep/winkle mode again if (b) is true.
- */
-BEGIN_FTR_SECTION
- rlwinm. r11,r12,47-31,30,31
- bne machine_check_idle_common
-END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
-#endif
- /*
- * Check if we are coming from hypervisor userspace. If yes then we
- * continue in host kernel in V mode to deliver the MC event.
- */
- rldicl. r11,r12,4,63 /* See if MC hit while in HV mode. */
- beq 5f
-4: andi. r11,r12,MSR_PR /* See if coming from user. */
- bne 9f /* continue in V mode if we are. */
-
-5:
-#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
-BEGIN_FTR_SECTION
- /*
- * We are coming from kernel context. Check if we are coming from
- * guest. if yes, then we can continue. We will fall through
- * do_kvm_200->kvmppc_interrupt to deliver the MC event to guest.
- */
- lbz r11,HSTATE_IN_GUEST(r13)
- cmpwi r11,0 /* Check if coming from guest */
- bne 9f /* continue if we are. */
-END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
-#endif
- /*
- * At this point we are not sure about what context we come from.
- * Queue up the MCE event and return from the interrupt.
- * But before that, check if this is an un-recoverable exception.
- * If yes, then stay on emergency stack and panic.
- */
- andi. r11,r12,MSR_RI
- bne 2f
-1: mfspr r11,SPRN_SRR0
- LOAD_HANDLER(r10,unrecover_mce)
- mtspr SPRN_SRR0,r10
- ld r10,PACAKMSR(r13)
+EXC_COMMON_BEGIN(unrecoverable_mce)
/*
* We are going down. But there are chances that we might get hit by
* another MCE during panic path and we may run into unstable state
@@ -1174,84 +1121,36 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
* when another MCE is hit during panic path, system will checkstop
* and hypervisor will get restarted cleanly by SP.
*/
- li r3,MSR_ME
- andc r10,r10,r3 /* Turn off MSR_ME */
- mtspr SPRN_SRR1,r10
- RFI_TO_KERNEL
- b .
-2:
- /*
- * Check if we have successfully handled/recovered from error, if not
- * then stay on emergency stack and panic.
- */
- ld r3,RESULT(r1) /* Load result */
- cmpdi r3,0 /* see if we handled MCE successfully */
-
- beq 1b /* if !handled then panic */
BEGIN_FTR_SECTION
- /*
- * Return from MC interrupt.
- * Queue up the MCE event so that we can log it later, while
- * returning from kernel or opal call.
- */
- bl machine_check_queue_event
- MACHINE_CHECK_HANDLER_WINDUP
- RFI_TO_USER_OR_KERNEL
-FTR_SECTION_ELSE
- /*
- * pSeries: Return from MC interrupt. Before that stay on emergency
- * stack and call machine_check_exception to log the MCE event.
- */
- LOAD_HANDLER(r10,mce_return)
- mtspr SPRN_SRR0,r10
+ li r10,0 /* clear MSR_RI */
+ mtmsrd r10,1
+ bl disable_machine_check
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
ld r10,PACAKMSR(r13)
- mtspr SPRN_SRR1,r10
- RFI_TO_KERNEL
- b .
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
-9:
- /* Deliver the machine check to host kernel in V mode. */
- MACHINE_CHECK_HANDLER_WINDUP
- EXCEPTION_PROLOG_0 PACA_EXMC
- b machine_check_pSeries_0
+ li r3,MSR_ME
+ andc r10,r10,r3
+ mtmsrd r10
-EXC_COMMON_BEGIN(unrecover_mce)
/* Invoke machine_check_exception to print MCE event and panic. */
addi r3,r1,STACK_FRAME_OVERHEAD
bl machine_check_exception
+
/*
- * We will not reach here. Even if we did, there is no way out. Call
- * unrecoverable_exception and die.
+ * We will not reach here. Even if we did, there is no way out.
+ * Call unrecoverable_exception and die.
*/
-1: addi r3,r1,STACK_FRAME_OVERHEAD
- bl unrecoverable_exception
- b 1b
-
-EXC_COMMON_BEGIN(mce_return)
- /* Invoke machine_check_exception to print MCE event and return. */
addi r3,r1,STACK_FRAME_OVERHEAD
- bl machine_check_exception
- MACHINE_CHECK_HANDLER_WINDUP
- RFI_TO_KERNEL
+ bl unrecoverable_exception
b .
+
EXC_REAL_BEGIN(data_access, 0x300, 0x80)
- EXCEPTION_PROLOG_0 PACA_EXGEN
- b tramp_real_data_access
+ INT_HANDLER data_access, 0x300, ool=1, dar=1, dsisr=1, kvm=1
EXC_REAL_END(data_access, 0x300, 0x80)
-
-TRAMP_REAL_BEGIN(tramp_real_data_access)
- EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 1, 0x300, 1, 1, 0
- EXCEPTION_PROLOG_2_REAL data_access_common, EXC_STD, 1
-
EXC_VIRT_BEGIN(data_access, 0x4300, 0x80)
- EXCEPTION_PROLOG_0 PACA_EXGEN
- EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 0, 0x300, 1, 1, 0
-EXCEPTION_PROLOG_2_VIRT data_access_common, EXC_STD
+ INT_HANDLER data_access, 0x300, virt=1, dar=1, dsisr=1
EXC_VIRT_END(data_access, 0x4300, 0x80)
-
-TRAMP_KVM_SKIP(PACA_EXGEN, 0x300)
-
+INT_KVM_HANDLER data_access, 0x300, EXC_STD, PACA_EXGEN, 1
EXC_COMMON_BEGIN(data_access_common)
/*
* Here r13 points to the paca, r9 contains the saved CR,
@@ -1259,15 +1158,12 @@ EXC_COMMON_BEGIN(data_access_common)
* r9 - r13 are saved in paca->exgen.
* EX_DAR and EX_DSISR have saved DAR/DSISR
*/
- EXCEPTION_COMMON(PACA_EXGEN, 0x300)
- RECONCILE_IRQ_STATE(r10, r11)
- ld r12,_MSR(r1)
- ld r3,PACA_EXGEN+EX_DAR(r13)
- lwz r4,PACA_EXGEN+EX_DSISR(r13)
- li r5,0x300
- std r3,_DAR(r1)
- std r4,_DSISR(r1)
+ INT_COMMON 0x300, PACA_EXGEN, 1, 1, 1, 1, 1
+ ld r4,_DAR(r1)
+ ld r5,_DSISR(r1)
BEGIN_MMU_FTR_SECTION
+ ld r6,_MSR(r1)
+ li r3,0x300
b do_hash_page /* Try to handle as hpte fault */
MMU_FTR_SECTION_ELSE
b handle_page_fault
@@ -1275,26 +1171,15 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
EXC_REAL_BEGIN(data_access_slb, 0x380, 0x80)
- EXCEPTION_PROLOG_0 PACA_EXSLB
- b tramp_real_data_access_slb
+ INT_HANDLER data_access_slb, 0x380, ool=1, area=PACA_EXSLB, dar=1, kvm=1
EXC_REAL_END(data_access_slb, 0x380, 0x80)
-
-TRAMP_REAL_BEGIN(tramp_real_data_access_slb)
- EXCEPTION_PROLOG_1 EXC_STD, PACA_EXSLB, 1, 0x380, 1, 0, 0
- EXCEPTION_PROLOG_2_REAL data_access_slb_common, EXC_STD, 1
-
EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x80)
- EXCEPTION_PROLOG_0 PACA_EXSLB
- EXCEPTION_PROLOG_1 EXC_STD, PACA_EXSLB, 0, 0x380, 1, 0, 0
- EXCEPTION_PROLOG_2_VIRT data_access_slb_common, EXC_STD
+ INT_HANDLER data_access_slb, 0x380, virt=1, area=PACA_EXSLB, dar=1
EXC_VIRT_END(data_access_slb, 0x4380, 0x80)
-
-TRAMP_KVM_SKIP(PACA_EXSLB, 0x380)
-
+INT_KVM_HANDLER data_access_slb, 0x380, EXC_STD, PACA_EXSLB, 1
EXC_COMMON_BEGIN(data_access_slb_common)
- EXCEPTION_COMMON(PACA_EXSLB, 0x380)
- ld r4,PACA_EXSLB+EX_DAR(r13)
- std r4,_DAR(r1)
+ INT_COMMON 0x380, PACA_EXSLB, 1, 1, 0, 1, 0
+ ld r4,_DAR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
BEGIN_MMU_FTR_SECTION
/* HPT case, do SLB fault */
@@ -1317,33 +1202,36 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
b ret_from_except
-EXC_REAL(instruction_access, 0x400, 0x80)
-EXC_VIRT(instruction_access, 0x4400, 0x80, 0x400)
-TRAMP_KVM(PACA_EXGEN, 0x400)
-
+EXC_REAL_BEGIN(instruction_access, 0x400, 0x80)
+ INT_HANDLER instruction_access, 0x400, kvm=1
+EXC_REAL_END(instruction_access, 0x400, 0x80)
+EXC_VIRT_BEGIN(instruction_access, 0x4400, 0x80)
+ INT_HANDLER instruction_access, 0x400, virt=1
+EXC_VIRT_END(instruction_access, 0x4400, 0x80)
+INT_KVM_HANDLER instruction_access, 0x400, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_BEGIN(instruction_access_common)
- EXCEPTION_COMMON(PACA_EXGEN, 0x400)
- RECONCILE_IRQ_STATE(r10, r11)
- ld r12,_MSR(r1)
- ld r3,_NIP(r1)
- andis. r4,r12,DSISR_SRR1_MATCH_64S@h
- li r5,0x400
- std r3,_DAR(r1)
- std r4,_DSISR(r1)
+ INT_COMMON 0x400, PACA_EXGEN, 1, 1, 1, 2, 2
+ ld r4,_DAR(r1)
+ ld r5,_DSISR(r1)
BEGIN_MMU_FTR_SECTION
+ ld r6,_MSR(r1)
+ li r3,0x400
b do_hash_page /* Try to handle as hpte fault */
MMU_FTR_SECTION_ELSE
b handle_page_fault
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
-__EXC_REAL(instruction_access_slb, 0x480, 0x80, PACA_EXSLB)
-__EXC_VIRT(instruction_access_slb, 0x4480, 0x80, 0x480, PACA_EXSLB)
-TRAMP_KVM(PACA_EXSLB, 0x480)
-
+EXC_REAL_BEGIN(instruction_access_slb, 0x480, 0x80)
+ INT_HANDLER instruction_access_slb, 0x480, area=PACA_EXSLB, kvm=1
+EXC_REAL_END(instruction_access_slb, 0x480, 0x80)
+EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x80)
+ INT_HANDLER instruction_access_slb, 0x480, virt=1, area=PACA_EXSLB
+EXC_VIRT_END(instruction_access_slb, 0x4480, 0x80)
+INT_KVM_HANDLER instruction_access_slb, 0x480, EXC_STD, PACA_EXSLB, 0
EXC_COMMON_BEGIN(instruction_access_slb_common)
- EXCEPTION_COMMON(PACA_EXSLB, 0x480)
- ld r4,_NIP(r1)
+ INT_COMMON 0x480, PACA_EXSLB, 1, 1, 0, 2, 0
+ ld r4,_DAR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
BEGIN_MMU_FTR_SECTION
/* HPT case, do SLB fault */
@@ -1359,69 +1247,44 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
std r3,RESULT(r1)
bl save_nvgprs
RECONCILE_IRQ_STATE(r10, r11)
- ld r4,_NIP(r1)
+ ld r4,_DAR(r1)
ld r5,RESULT(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
bl do_bad_slb_fault
b ret_from_except
-
EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100)
- EXCEPTION_PROLOG_0 PACA_EXGEN
-BEGIN_FTR_SECTION
- EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 1, 0x500, 0, 0, IRQS_DISABLED
- EXCEPTION_PROLOG_2_REAL hardware_interrupt_common, EXC_HV, 1
-FTR_SECTION_ELSE
- EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 1, 0x500, 0, 0, IRQS_DISABLED
- EXCEPTION_PROLOG_2_REAL hardware_interrupt_common, EXC_STD, 1
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ INT_HANDLER hardware_interrupt, 0x500, hsrr=EXC_HV_OR_STD, bitmask=IRQS_DISABLED, kvm=1
EXC_REAL_END(hardware_interrupt, 0x500, 0x100)
-
EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x100)
- EXCEPTION_PROLOG_0 PACA_EXGEN
-BEGIN_FTR_SECTION
- EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 1, 0x500, 0, 0, IRQS_DISABLED
- EXCEPTION_PROLOG_2_VIRT hardware_interrupt_common, EXC_HV
-FTR_SECTION_ELSE
- EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 1, 0x500, 0, 0, IRQS_DISABLED
- EXCEPTION_PROLOG_2_VIRT hardware_interrupt_common, EXC_STD
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
+ INT_HANDLER hardware_interrupt, 0x500, virt=1, hsrr=EXC_HV_OR_STD, bitmask=IRQS_DISABLED, kvm=1
EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100)
-
-TRAMP_KVM(PACA_EXGEN, 0x500)
-TRAMP_KVM_HV(PACA_EXGEN, 0x500)
+INT_KVM_HANDLER hardware_interrupt, 0x500, EXC_HV_OR_STD, PACA_EXGEN, 0
EXC_COMMON_ASYNC(hardware_interrupt_common, 0x500, do_IRQ)
EXC_REAL_BEGIN(alignment, 0x600, 0x100)
- EXCEPTION_PROLOG_0 PACA_EXGEN
- EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 1, 0x600, 1, 1, 0
- EXCEPTION_PROLOG_2_REAL alignment_common, EXC_STD, 1
+ INT_HANDLER alignment, 0x600, dar=1, dsisr=1, kvm=1
EXC_REAL_END(alignment, 0x600, 0x100)
-
EXC_VIRT_BEGIN(alignment, 0x4600, 0x100)
- EXCEPTION_PROLOG_0 PACA_EXGEN
- EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 0, 0x600, 1, 1, 0
- EXCEPTION_PROLOG_2_VIRT alignment_common, EXC_STD
+ INT_HANDLER alignment, 0x600, virt=1, dar=1, dsisr=1
EXC_VIRT_END(alignment, 0x4600, 0x100)
-
-TRAMP_KVM(PACA_EXGEN, 0x600)
+INT_KVM_HANDLER alignment, 0x600, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_BEGIN(alignment_common)
- EXCEPTION_COMMON(PACA_EXGEN, 0x600)
- ld r3,PACA_EXGEN+EX_DAR(r13)
- lwz r4,PACA_EXGEN+EX_DSISR(r13)
- std r3,_DAR(r1)
- std r4,_DSISR(r1)
+ INT_COMMON 0x600, PACA_EXGEN, 1, 1, 1, 1, 1
bl save_nvgprs
- RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
bl alignment_exception
b ret_from_except
-EXC_REAL(program_check, 0x700, 0x100)
-EXC_VIRT(program_check, 0x4700, 0x100, 0x700)
-TRAMP_KVM(PACA_EXGEN, 0x700)
+EXC_REAL_BEGIN(program_check, 0x700, 0x100)
+ INT_HANDLER program_check, 0x700, kvm=1
+EXC_REAL_END(program_check, 0x700, 0x100)
+EXC_VIRT_BEGIN(program_check, 0x4700, 0x100)
+ INT_HANDLER program_check, 0x700, virt=1
+EXC_VIRT_END(program_check, 0x4700, 0x100)
+INT_KVM_HANDLER program_check, 0x700, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_BEGIN(program_check_common)
/*
* It's possible to receive a TM Bad Thing type program check with
@@ -1447,27 +1310,33 @@ EXC_COMMON_BEGIN(program_check_common)
mr r10,r1 /* Save r1 */
ld r1,PACAEMERGSP(r13) /* Use emergency stack */
subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
- b 3f /* Jump into the macro !! */
+ INT_COMMON 0x700, PACA_EXGEN, 0, 1, 1, 0, 0
+ b 3f
2:
- EXCEPTION_COMMON(PACA_EXGEN, 0x700)
+ INT_COMMON 0x700, PACA_EXGEN, 1, 1, 1, 0, 0
+3:
bl save_nvgprs
- RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
bl program_check_exception
b ret_from_except
-EXC_REAL(fp_unavailable, 0x800, 0x100)
-EXC_VIRT(fp_unavailable, 0x4800, 0x100, 0x800)
-TRAMP_KVM(PACA_EXGEN, 0x800)
+EXC_REAL_BEGIN(fp_unavailable, 0x800, 0x100)
+ INT_HANDLER fp_unavailable, 0x800, kvm=1
+EXC_REAL_END(fp_unavailable, 0x800, 0x100)
+EXC_VIRT_BEGIN(fp_unavailable, 0x4800, 0x100)
+ INT_HANDLER fp_unavailable, 0x800, virt=1
+EXC_VIRT_END(fp_unavailable, 0x4800, 0x100)
+INT_KVM_HANDLER fp_unavailable, 0x800, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_BEGIN(fp_unavailable_common)
- EXCEPTION_COMMON(PACA_EXGEN, 0x800)
+ INT_COMMON 0x800, PACA_EXGEN, 1, 1, 0, 0, 0
bne 1f /* if from user, just load it up */
bl save_nvgprs
RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
bl kernel_fp_unavailable_exception
- BUG_OPCODE
+0: trap
+ EMIT_BUG_ENTRY 0b, __FILE__, __LINE__, 0
1:
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
BEGIN_FTR_SECTION
@@ -1490,21 +1359,33 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
#endif
-EXC_REAL_OOL_MASKABLE(decrementer, 0x900, 0x80, IRQS_DISABLED)
-EXC_VIRT_MASKABLE(decrementer, 0x4900, 0x80, 0x900, IRQS_DISABLED)
-TRAMP_KVM(PACA_EXGEN, 0x900)
+EXC_REAL_BEGIN(decrementer, 0x900, 0x80)
+ INT_HANDLER decrementer, 0x900, ool=1, bitmask=IRQS_DISABLED, kvm=1
+EXC_REAL_END(decrementer, 0x900, 0x80)
+EXC_VIRT_BEGIN(decrementer, 0x4900, 0x80)
+ INT_HANDLER decrementer, 0x900, virt=1, bitmask=IRQS_DISABLED
+EXC_VIRT_END(decrementer, 0x4900, 0x80)
+INT_KVM_HANDLER decrementer, 0x900, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_ASYNC(decrementer_common, 0x900, timer_interrupt)
-EXC_REAL_HV(hdecrementer, 0x980, 0x80)
-EXC_VIRT_HV(hdecrementer, 0x4980, 0x80, 0x980)
-TRAMP_KVM_HV(PACA_EXGEN, 0x980)
+EXC_REAL_BEGIN(hdecrementer, 0x980, 0x80)
+ INT_HANDLER hdecrementer, 0x980, hsrr=EXC_HV, kvm=1
+EXC_REAL_END(hdecrementer, 0x980, 0x80)
+EXC_VIRT_BEGIN(hdecrementer, 0x4980, 0x80)
+ INT_HANDLER hdecrementer, 0x980, virt=1, hsrr=EXC_HV, kvm=1
+EXC_VIRT_END(hdecrementer, 0x4980, 0x80)
+INT_KVM_HANDLER hdecrementer, 0x980, EXC_HV, PACA_EXGEN, 0
EXC_COMMON(hdecrementer_common, 0x980, hdec_interrupt)
-EXC_REAL_MASKABLE(doorbell_super, 0xa00, 0x100, IRQS_DISABLED)
-EXC_VIRT_MASKABLE(doorbell_super, 0x4a00, 0x100, 0xa00, IRQS_DISABLED)
-TRAMP_KVM(PACA_EXGEN, 0xa00)
+EXC_REAL_BEGIN(doorbell_super, 0xa00, 0x100)
+ INT_HANDLER doorbell_super, 0xa00, bitmask=IRQS_DISABLED, kvm=1
+EXC_REAL_END(doorbell_super, 0xa00, 0x100)
+EXC_VIRT_BEGIN(doorbell_super, 0x4a00, 0x100)
+ INT_HANDLER doorbell_super, 0xa00, virt=1, bitmask=IRQS_DISABLED
+EXC_VIRT_END(doorbell_super, 0x4a00, 0x100)
+INT_KVM_HANDLER doorbell_super, 0xa00, EXC_STD, PACA_EXGEN, 0
#ifdef CONFIG_PPC_DOORBELL
EXC_COMMON_ASYNC(doorbell_super_common, 0xa00, doorbell_exception)
#else
@@ -1512,17 +1393,13 @@ EXC_COMMON_ASYNC(doorbell_super_common, 0xa00, unknown_exception)
#endif
-EXC_REAL(trap_0b, 0xb00, 0x100)
-EXC_VIRT(trap_0b, 0x4b00, 0x100, 0xb00)
-TRAMP_KVM(PACA_EXGEN, 0xb00)
-EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
+EXC_REAL_NONE(0xb00, 0x100)
+EXC_VIRT_NONE(0x4b00, 0x100)
/*
* system call / hypercall (0xc00, 0x4c00)
*
* The system call exception is invoked with "sc 0" and does not alter HV bit.
- * There is support for kernel code to invoke system calls but there are no
- * in-tree users.
*
* The hypercall is invoked with "sc 1" and sets HV=1.
*
@@ -1531,22 +1408,9 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
*
* Call convention:
*
- * syscall register convention is in Documentation/powerpc/syscall64-abi.rst
- *
- * For hypercalls, the register convention is as follows:
- * r0 volatile
- * r1-2 nonvolatile
- * r3 volatile parameter and return value for status
- * r4-r10 volatile input and output value
- * r11 volatile hypercall number and output value
- * r12 volatile input and output value
- * r13-r31 nonvolatile
- * LR nonvolatile
- * CTR volatile
- * XER volatile
- * CR0-1 CR5-7 volatile
- * CR2-4 nonvolatile
- * Other registers nonvolatile
+ * syscall and hypercalls register conventions are documented in
+ * Documentation/powerpc/syscall64-abi.rst and
+ * Documentation/powerpc/papr_hcalls.rst respectively.
*
* The intersection of volatile registers that don't contain possible
* inputs is: cr0, xer, ctr. We may use these as scratch regs upon entry
@@ -1567,7 +1431,7 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
GET_PACA(r13)
std r10,PACA_EXGEN+EX_R10(r13)
INTERRUPT_TO_KERNEL
- KVMTEST EXC_STD 0xc00 /* uses r10, branch to do_kvm_0xc00_system_call */
+ KVMTEST system_call EXC_STD 0xc00 /* uses r10, branch to system_call_kvm */
mfctr r9
#else
mr r9,r13
@@ -1621,7 +1485,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)
EXC_REAL_BEGIN(system_call, 0xc00, 0x100)
SYSTEM_CALL 0
EXC_REAL_END(system_call, 0xc00, 0x100)
-
EXC_VIRT_BEGIN(system_call, 0x4c00, 0x100)
SYSTEM_CALL 1
EXC_VIRT_END(system_call, 0x4c00, 0x100)
@@ -1634,7 +1497,7 @@ EXC_VIRT_END(system_call, 0x4c00, 0x100)
* ctr = orig r13
* orig r10 saved in PACA
*/
-TRAMP_KVM_BEGIN(do_kvm_0xc00)
+TRAMP_KVM_BEGIN(system_call_kvm)
/*
* Save the PPR (on systems that support it) before changing to
* HMT_MEDIUM. That allows the KVM code to save that value into the
@@ -1647,32 +1510,33 @@ TRAMP_KVM_BEGIN(do_kvm_0xc00)
SET_SCRATCH0(r10)
std r9,PACA_EXGEN+EX_R9(r13)
mfcr r9
- KVM_HANDLER PACA_EXGEN, EXC_STD, 0xc00, 0
+ KVM_HANDLER 0xc00, EXC_STD, PACA_EXGEN, 0
#endif
-EXC_REAL(single_step, 0xd00, 0x100)
-EXC_VIRT(single_step, 0x4d00, 0x100, 0xd00)
-TRAMP_KVM(PACA_EXGEN, 0xd00)
+EXC_REAL_BEGIN(single_step, 0xd00, 0x100)
+ INT_HANDLER single_step, 0xd00, kvm=1
+EXC_REAL_END(single_step, 0xd00, 0x100)
+EXC_VIRT_BEGIN(single_step, 0x4d00, 0x100)
+ INT_HANDLER single_step, 0xd00, virt=1
+EXC_VIRT_END(single_step, 0x4d00, 0x100)
+INT_KVM_HANDLER single_step, 0xd00, EXC_STD, PACA_EXGEN, 0
EXC_COMMON(single_step_common, 0xd00, single_step_exception)
-EXC_REAL_OOL_HV(h_data_storage, 0xe00, 0x20)
-EXC_VIRT_OOL_HV(h_data_storage, 0x4e00, 0x20, 0xe00)
-TRAMP_KVM_HV_SKIP(PACA_EXGEN, 0xe00)
+
+EXC_REAL_BEGIN(h_data_storage, 0xe00, 0x20)
+ INT_HANDLER h_data_storage, 0xe00, ool=1, hsrr=EXC_HV, dar=1, dsisr=1, kvm=1
+EXC_REAL_END(h_data_storage, 0xe00, 0x20)
+EXC_VIRT_BEGIN(h_data_storage, 0x4e00, 0x20)
+ INT_HANDLER h_data_storage, 0xe00, ool=1, virt=1, hsrr=EXC_HV, dar=1, dsisr=1, kvm=1
+EXC_VIRT_END(h_data_storage, 0x4e00, 0x20)
+INT_KVM_HANDLER h_data_storage, 0xe00, EXC_HV, PACA_EXGEN, 1
EXC_COMMON_BEGIN(h_data_storage_common)
- mfspr r10,SPRN_HDAR
- std r10,PACA_EXGEN+EX_DAR(r13)
- mfspr r10,SPRN_HDSISR
- stw r10,PACA_EXGEN+EX_DSISR(r13)
- EXCEPTION_COMMON(PACA_EXGEN, 0xe00)
+ INT_COMMON 0xe00, PACA_EXGEN, 1, 1, 1, 1, 1
bl save_nvgprs
- RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
BEGIN_MMU_FTR_SECTION
- ld r4,PACA_EXGEN+EX_DAR(r13)
- lwz r5,PACA_EXGEN+EX_DSISR(r13)
- std r4,_DAR(r1)
- std r5,_DSISR(r1)
+ ld r4,_DAR(r1)
li r5,SIGSEGV
bl bad_page_fault
MMU_FTR_SECTION_ELSE
@@ -1681,15 +1545,23 @@ ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_TYPE_RADIX)
b ret_from_except
-EXC_REAL_OOL_HV(h_instr_storage, 0xe20, 0x20)
-EXC_VIRT_OOL_HV(h_instr_storage, 0x4e20, 0x20, 0xe20)
-TRAMP_KVM_HV(PACA_EXGEN, 0xe20)
+EXC_REAL_BEGIN(h_instr_storage, 0xe20, 0x20)
+ INT_HANDLER h_instr_storage, 0xe20, ool=1, hsrr=EXC_HV, kvm=1
+EXC_REAL_END(h_instr_storage, 0xe20, 0x20)
+EXC_VIRT_BEGIN(h_instr_storage, 0x4e20, 0x20)
+ INT_HANDLER h_instr_storage, 0xe20, ool=1, virt=1, hsrr=EXC_HV, kvm=1
+EXC_VIRT_END(h_instr_storage, 0x4e20, 0x20)
+INT_KVM_HANDLER h_instr_storage, 0xe20, EXC_HV, PACA_EXGEN, 0
EXC_COMMON(h_instr_storage_common, 0xe20, unknown_exception)
-EXC_REAL_OOL_HV(emulation_assist, 0xe40, 0x20)
-EXC_VIRT_OOL_HV(emulation_assist, 0x4e40, 0x20, 0xe40)
-TRAMP_KVM_HV(PACA_EXGEN, 0xe40)
+EXC_REAL_BEGIN(emulation_assist, 0xe40, 0x20)
+ INT_HANDLER emulation_assist, 0xe40, ool=1, hsrr=EXC_HV, kvm=1
+EXC_REAL_END(emulation_assist, 0xe40, 0x20)
+EXC_VIRT_BEGIN(emulation_assist, 0x4e40, 0x20)
+ INT_HANDLER emulation_assist, 0xe40, ool=1, virt=1, hsrr=EXC_HV, kvm=1
+EXC_VIRT_END(emulation_assist, 0x4e40, 0x20)
+INT_KVM_HANDLER emulation_assist, 0xe40, EXC_HV, PACA_EXGEN, 0
EXC_COMMON(emulation_assist_common, 0xe40, emulation_assist_interrupt)
@@ -1699,16 +1571,10 @@ EXC_COMMON(emulation_assist_common, 0xe40, emulation_assist_interrupt)
* mode.
*/
EXC_REAL_BEGIN(hmi_exception, 0xe60, 0x20)
- EXCEPTION_PROLOG_0 PACA_EXGEN
- b hmi_exception_early
+ INT_HANDLER hmi_exception, 0xe60, ool=1, early=1, hsrr=EXC_HV, ri=0, kvm=1
EXC_REAL_END(hmi_exception, 0xe60, 0x20)
EXC_VIRT_NONE(0x4e60, 0x20)
-TRAMP_KVM_HV(PACA_EXGEN, 0xe60)
-TRAMP_REAL_BEGIN(hmi_exception_early)
- EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 1, 0xe60, 0, 0, 0
- mfctr r10 /* save ctr, even for !RELOCATABLE */
- BRANCH_TO_C000(r11, hmi_exception_early_common)
-
+INT_KVM_HANDLER hmi_exception, 0xe60, EXC_HV, PACA_EXGEN, 0
EXC_COMMON_BEGIN(hmi_exception_early_common)
mtctr r10 /* Restore ctr */
mfspr r11,SPRN_HSRR0 /* Save HSRR0 */
@@ -1716,10 +1582,10 @@ EXC_COMMON_BEGIN(hmi_exception_early_common)
mr r10,r1 /* Save r1 */
ld r1,PACAEMERGSP(r13) /* Use emergency stack for realmode */
subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
- EXCEPTION_PROLOG_COMMON_1()
+
/* We don't touch AMR here, we never go to virtual mode */
- EXCEPTION_PROLOG_COMMON_2(PACA_EXGEN)
- EXCEPTION_PROLOG_COMMON_3(0xe60)
+ INT_COMMON 0xe60, PACA_EXGEN, 0, 0, 0, 0, 0
+
addi r3,r1,STACK_FRAME_OVERHEAD
bl hmi_exception_realmode
cmpdi cr0,r3,0
@@ -1734,23 +1600,25 @@ EXC_COMMON_BEGIN(hmi_exception_early_common)
* firmware.
*/
EXCEPTION_RESTORE_REGS EXC_HV
- EXCEPTION_PROLOG_0 PACA_EXGEN
- EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 1, 0xe60, 0, 0, IRQS_DISABLED
- EXCEPTION_PROLOG_2_REAL hmi_exception_common, EXC_HV, 1
+ INT_HANDLER hmi_exception, 0xe60, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1
EXC_COMMON_BEGIN(hmi_exception_common)
- EXCEPTION_COMMON(PACA_EXGEN, 0xe60)
+ INT_COMMON 0xe60, PACA_EXGEN, 1, 1, 1, 0, 0
FINISH_NAP
- bl save_nvgprs
- RECONCILE_IRQ_STATE(r10, r11)
RUNLATCH_ON
+ bl save_nvgprs
addi r3,r1,STACK_FRAME_OVERHEAD
bl handle_hmi_exception
b ret_from_except
-EXC_REAL_OOL_MASKABLE_HV(h_doorbell, 0xe80, 0x20, IRQS_DISABLED)
-EXC_VIRT_OOL_MASKABLE_HV(h_doorbell, 0x4e80, 0x20, 0xe80, IRQS_DISABLED)
-TRAMP_KVM_HV(PACA_EXGEN, 0xe80)
+
+EXC_REAL_BEGIN(h_doorbell, 0xe80, 0x20)
+ INT_HANDLER h_doorbell, 0xe80, ool=1, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1
+EXC_REAL_END(h_doorbell, 0xe80, 0x20)
+EXC_VIRT_BEGIN(h_doorbell, 0x4e80, 0x20)
+ INT_HANDLER h_doorbell, 0xe80, ool=1, virt=1, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1
+EXC_VIRT_END(h_doorbell, 0x4e80, 0x20)
+INT_KVM_HANDLER h_doorbell, 0xe80, EXC_HV, PACA_EXGEN, 0
#ifdef CONFIG_PPC_DOORBELL
EXC_COMMON_ASYNC(h_doorbell_common, 0xe80, doorbell_exception)
#else
@@ -1758,9 +1626,13 @@ EXC_COMMON_ASYNC(h_doorbell_common, 0xe80, unknown_exception)
#endif
-EXC_REAL_OOL_MASKABLE_HV(h_virt_irq, 0xea0, 0x20, IRQS_DISABLED)
-EXC_VIRT_OOL_MASKABLE_HV(h_virt_irq, 0x4ea0, 0x20, 0xea0, IRQS_DISABLED)
-TRAMP_KVM_HV(PACA_EXGEN, 0xea0)
+EXC_REAL_BEGIN(h_virt_irq, 0xea0, 0x20)
+ INT_HANDLER h_virt_irq, 0xea0, ool=1, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1
+EXC_REAL_END(h_virt_irq, 0xea0, 0x20)
+EXC_VIRT_BEGIN(h_virt_irq, 0x4ea0, 0x20)
+ INT_HANDLER h_virt_irq, 0xea0, ool=1, virt=1, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1
+EXC_VIRT_END(h_virt_irq, 0x4ea0, 0x20)
+INT_KVM_HANDLER h_virt_irq, 0xea0, EXC_HV, PACA_EXGEN, 0
EXC_COMMON_ASYNC(h_virt_irq_common, 0xea0, do_IRQ)
@@ -1770,17 +1642,25 @@ EXC_REAL_NONE(0xee0, 0x20)
EXC_VIRT_NONE(0x4ee0, 0x20)
-EXC_REAL_OOL_MASKABLE(performance_monitor, 0xf00, 0x20, IRQS_PMI_DISABLED)
-EXC_VIRT_OOL_MASKABLE(performance_monitor, 0x4f00, 0x20, 0xf00, IRQS_PMI_DISABLED)
-TRAMP_KVM(PACA_EXGEN, 0xf00)
+EXC_REAL_BEGIN(performance_monitor, 0xf00, 0x20)
+ INT_HANDLER performance_monitor, 0xf00, ool=1, bitmask=IRQS_PMI_DISABLED, kvm=1
+EXC_REAL_END(performance_monitor, 0xf00, 0x20)
+EXC_VIRT_BEGIN(performance_monitor, 0x4f00, 0x20)
+ INT_HANDLER performance_monitor, 0xf00, ool=1, virt=1, bitmask=IRQS_PMI_DISABLED
+EXC_VIRT_END(performance_monitor, 0x4f00, 0x20)
+INT_KVM_HANDLER performance_monitor, 0xf00, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_ASYNC(performance_monitor_common, 0xf00, performance_monitor_exception)
-EXC_REAL_OOL(altivec_unavailable, 0xf20, 0x20)
-EXC_VIRT_OOL(altivec_unavailable, 0x4f20, 0x20, 0xf20)
-TRAMP_KVM(PACA_EXGEN, 0xf20)
+EXC_REAL_BEGIN(altivec_unavailable, 0xf20, 0x20)
+ INT_HANDLER altivec_unavailable, 0xf20, ool=1, kvm=1
+EXC_REAL_END(altivec_unavailable, 0xf20, 0x20)
+EXC_VIRT_BEGIN(altivec_unavailable, 0x4f20, 0x20)
+ INT_HANDLER altivec_unavailable, 0xf20, ool=1, virt=1
+EXC_VIRT_END(altivec_unavailable, 0x4f20, 0x20)
+INT_KVM_HANDLER altivec_unavailable, 0xf20, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_BEGIN(altivec_unavailable_common)
- EXCEPTION_COMMON(PACA_EXGEN, 0xf20)
+ INT_COMMON 0xf20, PACA_EXGEN, 1, 1, 0, 0, 0
#ifdef CONFIG_ALTIVEC
BEGIN_FTR_SECTION
beq 1f
@@ -1813,11 +1693,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
b ret_from_except
-EXC_REAL_OOL(vsx_unavailable, 0xf40, 0x20)
-EXC_VIRT_OOL(vsx_unavailable, 0x4f40, 0x20, 0xf40)
-TRAMP_KVM(PACA_EXGEN, 0xf40)
+EXC_REAL_BEGIN(vsx_unavailable, 0xf40, 0x20)
+ INT_HANDLER vsx_unavailable, 0xf40, ool=1, kvm=1
+EXC_REAL_END(vsx_unavailable, 0xf40, 0x20)
+EXC_VIRT_BEGIN(vsx_unavailable, 0x4f40, 0x20)
+ INT_HANDLER vsx_unavailable, 0xf40, ool=1, virt=1
+EXC_VIRT_END(vsx_unavailable, 0x4f40, 0x20)
+INT_KVM_HANDLER vsx_unavailable, 0xf40, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_BEGIN(vsx_unavailable_common)
- EXCEPTION_COMMON(PACA_EXGEN, 0xf40)
+ INT_COMMON 0xf40, PACA_EXGEN, 1, 1, 0, 0, 0
#ifdef CONFIG_VSX
BEGIN_FTR_SECTION
beq 1f
@@ -1849,15 +1733,23 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
b ret_from_except
-EXC_REAL_OOL(facility_unavailable, 0xf60, 0x20)
-EXC_VIRT_OOL(facility_unavailable, 0x4f60, 0x20, 0xf60)
-TRAMP_KVM(PACA_EXGEN, 0xf60)
+EXC_REAL_BEGIN(facility_unavailable, 0xf60, 0x20)
+ INT_HANDLER facility_unavailable, 0xf60, ool=1, kvm=1
+EXC_REAL_END(facility_unavailable, 0xf60, 0x20)
+EXC_VIRT_BEGIN(facility_unavailable, 0x4f60, 0x20)
+ INT_HANDLER facility_unavailable, 0xf60, ool=1, virt=1
+EXC_VIRT_END(facility_unavailable, 0x4f60, 0x20)
+INT_KVM_HANDLER facility_unavailable, 0xf60, EXC_STD, PACA_EXGEN, 0
EXC_COMMON(facility_unavailable_common, 0xf60, facility_unavailable_exception)
-EXC_REAL_OOL_HV(h_facility_unavailable, 0xf80, 0x20)
-EXC_VIRT_OOL_HV(h_facility_unavailable, 0x4f80, 0x20, 0xf80)
-TRAMP_KVM_HV(PACA_EXGEN, 0xf80)
+EXC_REAL_BEGIN(h_facility_unavailable, 0xf80, 0x20)
+ INT_HANDLER h_facility_unavailable, 0xf80, ool=1, hsrr=EXC_HV, kvm=1
+EXC_REAL_END(h_facility_unavailable, 0xf80, 0x20)
+EXC_VIRT_BEGIN(h_facility_unavailable, 0x4f80, 0x20)
+ INT_HANDLER h_facility_unavailable, 0xf80, ool=1, virt=1, hsrr=EXC_HV, kvm=1
+EXC_VIRT_END(h_facility_unavailable, 0x4f80, 0x20)
+INT_KVM_HANDLER h_facility_unavailable, 0xf80, EXC_HV, PACA_EXGEN, 0
EXC_COMMON(h_facility_unavailable_common, 0xf80, facility_unavailable_exception)
@@ -1874,9 +1766,11 @@ EXC_REAL_NONE(0x1100, 0x100)
EXC_VIRT_NONE(0x5100, 0x100)
#ifdef CONFIG_CBE_RAS
-EXC_REAL_HV(cbe_system_error, 0x1200, 0x100)
+EXC_REAL_BEGIN(cbe_system_error, 0x1200, 0x100)
+ INT_HANDLER cbe_system_error, 0x1200, ool=1, hsrr=EXC_HV, kvm=1
+EXC_REAL_END(cbe_system_error, 0x1200, 0x100)
EXC_VIRT_NONE(0x5200, 0x100)
-TRAMP_KVM_HV_SKIP(PACA_EXGEN, 0x1200)
+INT_KVM_HANDLER cbe_system_error, 0x1200, EXC_HV, PACA_EXGEN, 1
EXC_COMMON(cbe_system_error_common, 0x1200, cbe_system_error_exception)
#else /* CONFIG_CBE_RAS */
EXC_REAL_NONE(0x1200, 0x100)
@@ -1884,37 +1778,43 @@ EXC_VIRT_NONE(0x5200, 0x100)
#endif
-EXC_REAL(instruction_breakpoint, 0x1300, 0x100)
-EXC_VIRT(instruction_breakpoint, 0x5300, 0x100, 0x1300)
-TRAMP_KVM_SKIP(PACA_EXGEN, 0x1300)
+EXC_REAL_BEGIN(instruction_breakpoint, 0x1300, 0x100)
+ INT_HANDLER instruction_breakpoint, 0x1300, kvm=1
+EXC_REAL_END(instruction_breakpoint, 0x1300, 0x100)
+EXC_VIRT_BEGIN(instruction_breakpoint, 0x5300, 0x100)
+ INT_HANDLER instruction_breakpoint, 0x1300, virt=1
+EXC_VIRT_END(instruction_breakpoint, 0x5300, 0x100)
+INT_KVM_HANDLER instruction_breakpoint, 0x1300, EXC_STD, PACA_EXGEN, 1
EXC_COMMON(instruction_breakpoint_common, 0x1300, instruction_breakpoint_exception)
+
EXC_REAL_NONE(0x1400, 0x100)
EXC_VIRT_NONE(0x5400, 0x100)
EXC_REAL_BEGIN(denorm_exception_hv, 0x1500, 0x100)
- EXCEPTION_PROLOG_0 PACA_EXGEN
- EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 0, 0x1500, 0, 0, 0
-
+ INT_HANDLER denorm_exception_hv, 0x1500, early=2, hsrr=EXC_HV
#ifdef CONFIG_PPC_DENORMALISATION
mfspr r10,SPRN_HSRR1
andis. r10,r10,(HSRR1_DENORM)@h /* denorm? */
bne+ denorm_assist
#endif
-
- KVMTEST EXC_HV 0x1500
- EXCEPTION_PROLOG_2_REAL denorm_common, EXC_HV, 1
+ KVMTEST denorm_exception_hv, EXC_HV 0x1500
+ INT_SAVE_SRR_AND_JUMP denorm_common, EXC_HV, 1
EXC_REAL_END(denorm_exception_hv, 0x1500, 0x100)
#ifdef CONFIG_PPC_DENORMALISATION
EXC_VIRT_BEGIN(denorm_exception, 0x5500, 0x100)
- b exc_real_0x1500_denorm_exception_hv
+ INT_HANDLER denorm_exception, 0x1500, 0, 2, 1, EXC_HV, PACA_EXGEN, 1, 0, 0, 0, 0
+ mfspr r10,SPRN_HSRR1
+ andis. r10,r10,(HSRR1_DENORM)@h /* denorm? */
+ bne+ denorm_assist
+ INT_VIRT_SAVE_SRR_AND_JUMP denorm_common, EXC_HV
EXC_VIRT_END(denorm_exception, 0x5500, 0x100)
#else
EXC_VIRT_NONE(0x5500, 0x100)
#endif
-TRAMP_KVM_HV(PACA_EXGEN, 0x1500)
+INT_KVM_HANDLER denorm_exception_hv, 0x1500, EXC_HV, PACA_EXGEN, 0
#ifdef CONFIG_PPC_DENORMALISATION
TRAMP_REAL_BEGIN(denorm_assist)
@@ -1989,9 +1889,11 @@ EXC_COMMON(denorm_common, 0x1500, unknown_exception)
#ifdef CONFIG_CBE_RAS
-EXC_REAL_HV(cbe_maintenance, 0x1600, 0x100)
+EXC_REAL_BEGIN(cbe_maintenance, 0x1600, 0x100)
+ INT_HANDLER cbe_maintenance, 0x1600, ool=1, hsrr=EXC_HV, kvm=1
+EXC_REAL_END(cbe_maintenance, 0x1600, 0x100)
EXC_VIRT_NONE(0x5600, 0x100)
-TRAMP_KVM_HV_SKIP(PACA_EXGEN, 0x1600)
+INT_KVM_HANDLER cbe_maintenance, 0x1600, EXC_HV, PACA_EXGEN, 1
EXC_COMMON(cbe_maintenance_common, 0x1600, cbe_maintenance_exception)
#else /* CONFIG_CBE_RAS */
EXC_REAL_NONE(0x1600, 0x100)
@@ -1999,9 +1901,13 @@ EXC_VIRT_NONE(0x5600, 0x100)
#endif
-EXC_REAL(altivec_assist, 0x1700, 0x100)
-EXC_VIRT(altivec_assist, 0x5700, 0x100, 0x1700)
-TRAMP_KVM(PACA_EXGEN, 0x1700)
+EXC_REAL_BEGIN(altivec_assist, 0x1700, 0x100)
+ INT_HANDLER altivec_assist, 0x1700, kvm=1
+EXC_REAL_END(altivec_assist, 0x1700, 0x100)
+EXC_VIRT_BEGIN(altivec_assist, 0x5700, 0x100)
+ INT_HANDLER altivec_assist, 0x1700, virt=1
+EXC_VIRT_END(altivec_assist, 0x5700, 0x100)
+INT_KVM_HANDLER altivec_assist, 0x1700, EXC_STD, PACA_EXGEN, 0
#ifdef CONFIG_ALTIVEC
EXC_COMMON(altivec_assist_common, 0x1700, altivec_assist_exception)
#else
@@ -2010,15 +1916,18 @@ EXC_COMMON(altivec_assist_common, 0x1700, unknown_exception)
#ifdef CONFIG_CBE_RAS
-EXC_REAL_HV(cbe_thermal, 0x1800, 0x100)
+EXC_REAL_BEGIN(cbe_thermal, 0x1800, 0x100)
+ INT_HANDLER cbe_thermal, 0x1800, ool=1, hsrr=EXC_HV, kvm=1
+EXC_REAL_END(cbe_thermal, 0x1800, 0x100)
EXC_VIRT_NONE(0x5800, 0x100)
-TRAMP_KVM_HV_SKIP(PACA_EXGEN, 0x1800)
+INT_KVM_HANDLER cbe_thermal, 0x1800, EXC_HV, PACA_EXGEN, 1
EXC_COMMON(cbe_thermal_common, 0x1800, cbe_thermal_exception)
#else /* CONFIG_CBE_RAS */
EXC_REAL_NONE(0x1800, 0x100)
EXC_VIRT_NONE(0x5800, 0x100)
#endif
+
#ifdef CONFIG_PPC_WATCHDOG
#define MASKED_DEC_HANDLER_LABEL 3f
@@ -2028,7 +1937,7 @@ EXC_VIRT_NONE(0x5800, 0x100)
std r12,PACA_EXGEN+EX_R12(r13); \
GET_SCRATCH0(r10); \
std r10,PACA_EXGEN+EX_R13(r13); \
- EXCEPTION_PROLOG_2_REAL soft_nmi_common, _H, 1
+ INT_SAVE_SRR_AND_JUMP soft_nmi_common, _H, 1
/*
* Branch to soft_nmi_interrupt using the emergency stack. The emergency
@@ -2043,9 +1952,8 @@ EXC_COMMON_BEGIN(soft_nmi_common)
mr r10,r1
ld r1,PACAEMERGSP(r13)
subi r1,r1,INT_FRAME_SIZE
- EXCEPTION_COMMON_STACK(PACA_EXGEN, 0x900)
+ INT_COMMON 0x900, PACA_EXGEN, 0, 1, 1, 0, 0
bl save_nvgprs
- RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
bl soft_nmi_interrupt
b ret_from_except
@@ -2287,11 +2195,20 @@ __end_interrupts:
DEFINE_FIXED_SYMBOL(__end_interrupts)
#ifdef CONFIG_PPC_970_NAP
+ /*
+ * Called by exception entry code if _TLF_NAPPING was set, this clears
+ * the NAPPING flag, and redirects the exception exit to
+ * power4_fixup_nap_return.
+ */
+ .globl power4_fixup_nap
EXC_COMMON_BEGIN(power4_fixup_nap)
andc r9,r9,r10
std r9,TI_LOCAL_FLAGS(r11)
- ld r10,_LINK(r1) /* make idle task do the */
- std r10,_NIP(r1) /* equivalent of a blr */
+ LOAD_REG_ADDR(r10, power4_idle_nap_return)
+ std r10,_NIP(r1)
+ blr
+
+power4_idle_nap_return:
blr
#endif
@@ -2302,6 +2219,35 @@ CLOSE_FIXED_SECTION(virt_trampolines);
USE_TEXT_SECTION()
+/* MSR[RI] should be clear because this uses SRR[01] */
+enable_machine_check:
+ mflr r0
+ bcl 20,31,$+4
+0: mflr r3
+ addi r3,r3,(1f - 0b)
+ mtspr SPRN_SRR0,r3
+ mfmsr r3
+ ori r3,r3,MSR_ME
+ mtspr SPRN_SRR1,r3
+ RFI_TO_KERNEL
+1: mtlr r0
+ blr
+
+/* MSR[RI] should be clear because this uses SRR[01] */
+disable_machine_check:
+ mflr r0
+ bcl 20,31,$+4
+0: mflr r3
+ addi r3,r3,(1f - 0b)
+ mtspr SPRN_SRR0,r3
+ mfmsr r3
+ li r4,MSR_ME
+ andc r3,r3,r4
+ mtspr SPRN_SRR1,r3
+ RFI_TO_KERNEL
+1: mtlr r0
+ blr
+
/*
* Hash table stuff
*/
@@ -2310,7 +2256,7 @@ do_hash_page:
#ifdef CONFIG_PPC_BOOK3S_64
lis r0,(DSISR_BAD_FAULT_64S | DSISR_DABRMATCH | DSISR_KEYFAULT)@h
ori r0,r0,DSISR_BAD_FAULT_64S@l
- and. r0,r4,r0 /* weird error? */
+ and. r0,r5,r0 /* weird error? */
bne- handle_page_fault /* if not, try to insert a HPTE */
ld r11, PACA_THREAD_INFO(r13)
lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */
@@ -2318,15 +2264,13 @@ do_hash_page:
bne 77f /* then don't call hash_page now */
/*
- * r3 contains the faulting address
- * r4 msr
- * r5 contains the trap number
- * r6 contains dsisr
+ * r3 contains the trap number
+ * r4 contains the faulting address
+ * r5 contains dsisr
+ * r6 msr
*
* at return r3 = 0 for success, 1 for page fault, negative for error
*/
- mr r4,r12
- ld r6,_DSISR(r1)
bl __hash_page /* build HPTE if possible */
cmpdi r3,0 /* see if __hash_page succeeded */
@@ -2336,16 +2280,15 @@ do_hash_page:
/* Error */
blt- 13f
- /* Reload DSISR into r4 for the DABR check below */
- ld r4,_DSISR(r1)
+ /* Reload DAR/DSISR into r4/r5 for the DABR check below */
+ ld r4,_DAR(r1)
+ ld r5,_DSISR(r1)
#endif /* CONFIG_PPC_BOOK3S_64 */
/* Here we have a page fault that hash_page can't handle. */
handle_page_fault:
-11: andis. r0,r4,DSISR_DABRMATCH@h
+11: andis. r0,r5,DSISR_DABRMATCH@h
bne- handle_dabr_fault
- ld r4,_DAR(r1)
- ld r5,_DSISR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
bl do_page_fault
cmpdi r3,0
@@ -2353,7 +2296,7 @@ handle_page_fault:
bl save_nvgprs
mr r5,r3
addi r3,r1,STACK_FRAME_OVERHEAD
- lwz r4,_DAR(r1)
+ ld r4,_DAR(r1)
bl bad_page_fault
b ret_from_except
@@ -2392,7 +2335,6 @@ handle_dabr_fault:
* the access, or panic if there isn't a handler.
*/
77: bl save_nvgprs
- mr r4,r3
addi r3,r1,STACK_FRAME_OVERHEAD
li r5,SIGSEGV
bl bad_page_fault
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 4eab97292cc2..ff0114aeba9b 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -28,24 +28,22 @@
#include <asm/debugfs.h>
#include <asm/page.h>
#include <asm/prom.h>
-#include <asm/rtas.h>
#include <asm/fadump.h>
+#include <asm/fadump-internal.h>
#include <asm/setup.h>
static struct fw_dump fw_dump;
-static struct fadump_mem_struct fdm;
-static const struct fadump_mem_struct *fdm_active;
-#ifdef CONFIG_CMA
-static struct cma *fadump_cma;
-#endif
+static void __init fadump_reserve_crash_area(u64 base);
+
+#ifndef CONFIG_PRESERVE_FA_DUMP
static DEFINE_MUTEX(fadump_mutex);
-struct fad_crash_memory_ranges *crash_memory_ranges;
-int crash_memory_ranges_size;
-int crash_mem_ranges;
-int max_crash_mem_ranges;
+struct fadump_mrange_info crash_mrange_info = { "crash", NULL, 0, 0, 0 };
+struct fadump_mrange_info reserved_mrange_info = { "reserved", NULL, 0, 0, 0 };
#ifdef CONFIG_CMA
+static struct cma *fadump_cma;
+
/*
* fadump_cma_init() - Initialize CMA area from a fadump reserved memory
*
@@ -107,84 +105,45 @@ static int __init fadump_cma_init(void) { return 1; }
#endif /* CONFIG_CMA */
/* Scan the Firmware Assisted dump configuration details. */
-int __init early_init_dt_scan_fw_dump(unsigned long node,
- const char *uname, int depth, void *data)
+int __init early_init_dt_scan_fw_dump(unsigned long node, const char *uname,
+ int depth, void *data)
{
- const __be32 *sections;
- int i, num_sections;
- int size;
- const __be32 *token;
-
- if (depth != 1 || strcmp(uname, "rtas") != 0)
+ if (depth != 1)
return 0;
- /*
- * Check if Firmware Assisted dump is supported. if yes, check
- * if dump has been initiated on last reboot.
- */
- token = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump", NULL);
- if (!token)
+ if (strcmp(uname, "rtas") == 0) {
+ rtas_fadump_dt_scan(&fw_dump, node);
return 1;
+ }
- fw_dump.fadump_supported = 1;
- fw_dump.ibm_configure_kernel_dump = be32_to_cpu(*token);
-
- /*
- * The 'ibm,kernel-dump' rtas node is present only if there is
- * dump data waiting for us.
- */
- fdm_active = of_get_flat_dt_prop(node, "ibm,kernel-dump", NULL);
- if (fdm_active)
- fw_dump.dump_active = 1;
-
- /* Get the sizes required to store dump data for the firmware provided
- * dump sections.
- * For each dump section type supported, a 32bit cell which defines
- * the ID of a supported section followed by two 32 bit cells which
- * gives teh size of the section in bytes.
- */
- sections = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump-sizes",
- &size);
-
- if (!sections)
+ if (strcmp(uname, "ibm,opal") == 0) {
+ opal_fadump_dt_scan(&fw_dump, node);
return 1;
-
- num_sections = size / (3 * sizeof(u32));
-
- for (i = 0; i < num_sections; i++, sections += 3) {
- u32 type = (u32)of_read_number(sections, 1);
-
- switch (type) {
- case FADUMP_CPU_STATE_DATA:
- fw_dump.cpu_state_data_size =
- of_read_ulong(&sections[1], 2);
- break;
- case FADUMP_HPTE_REGION:
- fw_dump.hpte_region_size =
- of_read_ulong(&sections[1], 2);
- break;
- }
}
- return 1;
+ return 0;
}
/*
* If fadump is registered, check if the memory provided
* falls within boot memory area and reserved memory area.
*/
-int is_fadump_memory_area(u64 addr, ulong size)
+int is_fadump_memory_area(u64 addr, unsigned long size)
{
- u64 d_start = fw_dump.reserve_dump_area_start;
- u64 d_end = d_start + fw_dump.reserve_dump_area_size;
+ u64 d_start, d_end;
if (!fw_dump.dump_registered)
return 0;
+ if (!size)
+ return 0;
+
+ d_start = fw_dump.reserve_dump_area_start;
+ d_end = d_start + fw_dump.reserve_dump_area_size;
if (((addr + size) > d_start) && (addr <= d_end))
return 1;
- return (addr + size) > RMA_START && addr <= fw_dump.boot_memory_size;
+ return (addr <= fw_dump.boot_mem_top);
}
int should_fadump_crash(void)
@@ -200,31 +159,29 @@ int is_fadump_active(void)
}
/*
- * Returns 1, if there are no holes in boot memory area,
- * 0 otherwise.
+ * Returns true, if there are no holes in memory area between d_start to d_end,
+ * false otherwise.
*/
-static int is_boot_memory_area_contiguous(void)
+static bool is_fadump_mem_area_contiguous(u64 d_start, u64 d_end)
{
struct memblock_region *reg;
- unsigned long tstart, tend;
- unsigned long start_pfn = PHYS_PFN(RMA_START);
- unsigned long end_pfn = PHYS_PFN(RMA_START + fw_dump.boot_memory_size);
- unsigned int ret = 0;
+ bool ret = false;
+ u64 start, end;
for_each_memblock(memory, reg) {
- tstart = max(start_pfn, memblock_region_memory_base_pfn(reg));
- tend = min(end_pfn, memblock_region_memory_end_pfn(reg));
- if (tstart < tend) {
- /* Memory hole from start_pfn to tstart */
- if (tstart > start_pfn)
+ start = max_t(u64, d_start, reg->base);
+ end = min_t(u64, d_end, (reg->base + reg->size));
+ if (d_start < end) {
+ /* Memory hole from d_start to start */
+ if (start > d_start)
break;
- if (tend == end_pfn) {
- ret = 1;
+ if (end == d_end) {
+ ret = true;
break;
}
- start_pfn = tend + 1;
+ d_start = end + 1;
}
}
@@ -232,37 +189,45 @@ static int is_boot_memory_area_contiguous(void)
}
/*
- * Returns true, if there are no holes in reserved memory area,
+ * Returns true, if there are no holes in boot memory area,
* false otherwise.
*/
-static bool is_reserved_memory_area_contiguous(void)
+bool is_fadump_boot_mem_contiguous(void)
{
- struct memblock_region *reg;
- unsigned long start, end;
- unsigned long d_start = fw_dump.reserve_dump_area_start;
- unsigned long d_end = d_start + fw_dump.reserve_dump_area_size;
-
- for_each_memblock(memory, reg) {
- start = max(d_start, (unsigned long)reg->base);
- end = min(d_end, (unsigned long)(reg->base + reg->size));
- if (d_start < end) {
- /* Memory hole from d_start to start */
- if (start > d_start)
- break;
+ unsigned long d_start, d_end;
+ bool ret = false;
+ int i;
- if (end == d_end)
- return true;
+ for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) {
+ d_start = fw_dump.boot_mem_addr[i];
+ d_end = d_start + fw_dump.boot_mem_sz[i];
- d_start = end + 1;
- }
+ ret = is_fadump_mem_area_contiguous(d_start, d_end);
+ if (!ret)
+ break;
}
- return false;
+ return ret;
+}
+
+/*
+ * Returns true, if there are no holes in reserved memory area,
+ * false otherwise.
+ */
+bool is_fadump_reserved_mem_contiguous(void)
+{
+ u64 d_start, d_end;
+
+ d_start = fw_dump.reserve_dump_area_start;
+ d_end = d_start + fw_dump.reserve_dump_area_size;
+ return is_fadump_mem_area_contiguous(d_start, d_end);
}
/* Print firmware assisted dump configurations for debugging purpose. */
static void fadump_show_config(void)
{
+ int i;
+
pr_debug("Support for firmware-assisted dump (fadump): %s\n",
(fw_dump.fadump_supported ? "present" : "no support"));
@@ -276,62 +241,13 @@ static void fadump_show_config(void)
pr_debug("Dump section sizes:\n");
pr_debug(" CPU state data size: %lx\n", fw_dump.cpu_state_data_size);
pr_debug(" HPTE region size : %lx\n", fw_dump.hpte_region_size);
- pr_debug("Boot memory size : %lx\n", fw_dump.boot_memory_size);
-}
-
-static unsigned long init_fadump_mem_struct(struct fadump_mem_struct *fdm,
- unsigned long addr)
-{
- if (!fdm)
- return 0;
-
- memset(fdm, 0, sizeof(struct fadump_mem_struct));
- addr = addr & PAGE_MASK;
-
- fdm->header.dump_format_version = cpu_to_be32(0x00000001);
- fdm->header.dump_num_sections = cpu_to_be16(3);
- fdm->header.dump_status_flag = 0;
- fdm->header.offset_first_dump_section =
- cpu_to_be32((u32)offsetof(struct fadump_mem_struct, cpu_state_data));
-
- /*
- * Fields for disk dump option.
- * We are not using disk dump option, hence set these fields to 0.
- */
- fdm->header.dd_block_size = 0;
- fdm->header.dd_block_offset = 0;
- fdm->header.dd_num_blocks = 0;
- fdm->header.dd_offset_disk_path = 0;
-
- /* set 0 to disable an automatic dump-reboot. */
- fdm->header.max_time_auto = 0;
-
- /* Kernel dump sections */
- /* cpu state data section. */
- fdm->cpu_state_data.request_flag = cpu_to_be32(FADUMP_REQUEST_FLAG);
- fdm->cpu_state_data.source_data_type = cpu_to_be16(FADUMP_CPU_STATE_DATA);
- fdm->cpu_state_data.source_address = 0;
- fdm->cpu_state_data.source_len = cpu_to_be64(fw_dump.cpu_state_data_size);
- fdm->cpu_state_data.destination_address = cpu_to_be64(addr);
- addr += fw_dump.cpu_state_data_size;
-
- /* hpte region section */
- fdm->hpte_region.request_flag = cpu_to_be32(FADUMP_REQUEST_FLAG);
- fdm->hpte_region.source_data_type = cpu_to_be16(FADUMP_HPTE_REGION);
- fdm->hpte_region.source_address = 0;
- fdm->hpte_region.source_len = cpu_to_be64(fw_dump.hpte_region_size);
- fdm->hpte_region.destination_address = cpu_to_be64(addr);
- addr += fw_dump.hpte_region_size;
-
- /* RMA region section */
- fdm->rmr_region.request_flag = cpu_to_be32(FADUMP_REQUEST_FLAG);
- fdm->rmr_region.source_data_type = cpu_to_be16(FADUMP_REAL_MODE_REGION);
- fdm->rmr_region.source_address = cpu_to_be64(RMA_START);
- fdm->rmr_region.source_len = cpu_to_be64(fw_dump.boot_memory_size);
- fdm->rmr_region.destination_address = cpu_to_be64(addr);
- addr += fw_dump.boot_memory_size;
-
- return addr;
+ pr_debug(" Boot memory size : %lx\n", fw_dump.boot_memory_size);
+ pr_debug(" Boot memory top : %llx\n", fw_dump.boot_mem_top);
+ pr_debug("Boot memory regions cnt: %llx\n", fw_dump.boot_mem_regs_cnt);
+ for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) {
+ pr_debug("[%03d] base = %llx, size = %llx\n", i,
+ fw_dump.boot_mem_addr[i], fw_dump.boot_mem_sz[i]);
+ }
}
/**
@@ -349,10 +265,10 @@ static unsigned long init_fadump_mem_struct(struct fadump_mem_struct *fdm,
* that is required for a kernel to boot successfully.
*
*/
-static inline unsigned long fadump_calculate_reserve_size(void)
+static inline u64 fadump_calculate_reserve_size(void)
{
+ u64 base, size, bootmem_min;
int ret;
- unsigned long long base, size;
if (fw_dump.reserve_bootvar)
pr_warn("'fadump_reserve_mem=' parameter is deprecated in favor of 'crashkernel=' parameter.\n");
@@ -402,7 +318,8 @@ static inline unsigned long fadump_calculate_reserve_size(void)
if (memory_limit && size > memory_limit)
size = memory_limit;
- return (size > MIN_BOOT_MEM ? size : MIN_BOOT_MEM);
+ bootmem_min = fw_dump.ops->fadump_get_bootmem_min();
+ return (size > bootmem_min ? size : bootmem_min);
}
/*
@@ -423,57 +340,136 @@ static unsigned long get_fadump_area_size(void)
size += sizeof(struct elf_phdr) * (memblock_num_regions(memory) + 2);
size = PAGE_ALIGN(size);
+
+ /* This is to hold kernel metadata on platforms that support it */
+ size += (fw_dump.ops->fadump_get_metadata_size ?
+ fw_dump.ops->fadump_get_metadata_size() : 0);
return size;
}
-static void __init fadump_reserve_crash_area(unsigned long base,
- unsigned long size)
+static int __init add_boot_mem_region(unsigned long rstart,
+ unsigned long rsize)
+{
+ int i = fw_dump.boot_mem_regs_cnt++;
+
+ if (fw_dump.boot_mem_regs_cnt > FADUMP_MAX_MEM_REGS) {
+ fw_dump.boot_mem_regs_cnt = FADUMP_MAX_MEM_REGS;
+ return 0;
+ }
+
+ pr_debug("Added boot memory range[%d] [%#016lx-%#016lx)\n",
+ i, rstart, (rstart + rsize));
+ fw_dump.boot_mem_addr[i] = rstart;
+ fw_dump.boot_mem_sz[i] = rsize;
+ return 1;
+}
+
+/*
+ * Firmware usually has a hard limit on the data it can copy per region.
+ * Honour that by splitting a memory range into multiple regions.
+ */
+static int __init add_boot_mem_regions(unsigned long mstart,
+ unsigned long msize)
+{
+ unsigned long rstart, rsize, max_size;
+ int ret = 1;
+
+ rstart = mstart;
+ max_size = fw_dump.max_copy_size ? fw_dump.max_copy_size : msize;
+ while (msize) {
+ if (msize > max_size)
+ rsize = max_size;
+ else
+ rsize = msize;
+
+ ret = add_boot_mem_region(rstart, rsize);
+ if (!ret)
+ break;
+
+ msize -= rsize;
+ rstart += rsize;
+ }
+
+ return ret;
+}
+
+static int __init fadump_get_boot_mem_regions(void)
{
+ unsigned long base, size, cur_size, hole_size, last_end;
+ unsigned long mem_size = fw_dump.boot_memory_size;
struct memblock_region *reg;
- unsigned long mstart, mend, msize;
+ int ret = 1;
+
+ fw_dump.boot_mem_regs_cnt = 0;
+ last_end = 0;
+ hole_size = 0;
+ cur_size = 0;
for_each_memblock(memory, reg) {
- mstart = max_t(unsigned long, base, reg->base);
- mend = reg->base + reg->size;
- mend = min(base + size, mend);
-
- if (mstart < mend) {
- msize = mend - mstart;
- memblock_reserve(mstart, msize);
- pr_info("Reserved %ldMB of memory at %#016lx for saving crash dump\n",
- (msize >> 20), mstart);
+ base = reg->base;
+ size = reg->size;
+ hole_size += (base - last_end);
+
+ if ((cur_size + size) >= mem_size) {
+ size = (mem_size - cur_size);
+ ret = add_boot_mem_regions(base, size);
+ break;
}
+
+ mem_size -= size;
+ cur_size += size;
+ ret = add_boot_mem_regions(base, size);
+ if (!ret)
+ break;
+
+ last_end = base + size;
}
+ fw_dump.boot_mem_top = PAGE_ALIGN(fw_dump.boot_memory_size + hole_size);
+
+ return ret;
}
int __init fadump_reserve_mem(void)
{
- unsigned long base, size, memory_boundary;
+ u64 base, size, mem_boundary, bootmem_min, align = PAGE_SIZE;
+ bool is_memblock_bottom_up = memblock_bottom_up();
+ int ret = 1;
if (!fw_dump.fadump_enabled)
return 0;
if (!fw_dump.fadump_supported) {
- printk(KERN_INFO "Firmware-assisted dump is not supported on"
- " this hardware\n");
- fw_dump.fadump_enabled = 0;
- return 0;
+ pr_info("Firmware-Assisted Dump is not supported on this hardware\n");
+ goto error_out;
}
+
/*
* Initialize boot memory size
* If dump is active then we have already calculated the size during
* first kernel.
*/
- if (fdm_active)
- fw_dump.boot_memory_size = be64_to_cpu(fdm_active->rmr_region.source_len);
- else {
- fw_dump.boot_memory_size = fadump_calculate_reserve_size();
+ if (!fw_dump.dump_active) {
+ fw_dump.boot_memory_size =
+ PAGE_ALIGN(fadump_calculate_reserve_size());
#ifdef CONFIG_CMA
- if (!fw_dump.nocma)
+ if (!fw_dump.nocma) {
+ align = FADUMP_CMA_ALIGNMENT;
fw_dump.boot_memory_size =
- ALIGN(fw_dump.boot_memory_size,
- FADUMP_CMA_ALIGNMENT);
+ ALIGN(fw_dump.boot_memory_size, align);
+ }
#endif
+
+ bootmem_min = fw_dump.ops->fadump_get_bootmem_min();
+ if (fw_dump.boot_memory_size < bootmem_min) {
+ pr_err("Can't enable fadump with boot memory size (0x%lx) less than 0x%llx\n",
+ fw_dump.boot_memory_size, bootmem_min);
+ goto error_out;
+ }
+
+ if (!fadump_get_boot_mem_regions()) {
+ pr_err("Too many holes in boot memory area to enable fadump\n");
+ goto error_out;
+ }
}
/*
@@ -493,10 +489,13 @@ int __init fadump_reserve_mem(void)
" dump, now %#016llx\n", memory_limit);
}
if (memory_limit)
- memory_boundary = memory_limit;
+ mem_boundary = memory_limit;
else
- memory_boundary = memblock_end_of_DRAM();
+ mem_boundary = memblock_end_of_DRAM();
+ base = fw_dump.boot_mem_top;
+ size = get_fadump_area_size();
+ fw_dump.reserve_dump_area_size = size;
if (fw_dump.dump_active) {
pr_info("Firmware-assisted dump is active.\n");
@@ -510,58 +509,55 @@ int __init fadump_reserve_mem(void)
#endif
/*
* If last boot has crashed then reserve all the memory
- * above boot_memory_size so that we don't touch it until
+ * above boot memory size so that we don't touch it until
* dump is written to disk by userspace tool. This memory
- * will be released for general use once the dump is saved.
+ * can be released for general use by invalidating fadump.
*/
- base = fw_dump.boot_memory_size;
- size = memory_boundary - base;
- fadump_reserve_crash_area(base, size);
-
- fw_dump.fadumphdr_addr =
- be64_to_cpu(fdm_active->rmr_region.destination_address) +
- be64_to_cpu(fdm_active->rmr_region.source_len);
- pr_debug("fadumphdr_addr = %pa\n", &fw_dump.fadumphdr_addr);
- fw_dump.reserve_dump_area_start = base;
- fw_dump.reserve_dump_area_size = size;
- } else {
- size = get_fadump_area_size();
+ fadump_reserve_crash_area(base);
+ pr_debug("fadumphdr_addr = %#016lx\n", fw_dump.fadumphdr_addr);
+ pr_debug("Reserve dump area start address: 0x%lx\n",
+ fw_dump.reserve_dump_area_start);
+ } else {
/*
* Reserve memory at an offset closer to bottom of the RAM to
- * minimize the impact of memory hot-remove operation. We can't
- * use memblock_find_in_range() here since it doesn't allocate
- * from bottom to top.
+ * minimize the impact of memory hot-remove operation.
*/
- for (base = fw_dump.boot_memory_size;
- base <= (memory_boundary - size);
- base += size) {
- if (memblock_is_region_memory(base, size) &&
- !memblock_is_region_reserved(base, size))
- break;
+ memblock_set_bottom_up(true);
+ base = memblock_find_in_range(base, mem_boundary, size, align);
+
+ /* Restore the previous allocation mode */
+ memblock_set_bottom_up(is_memblock_bottom_up);
+
+ if (!base) {
+ pr_err("Failed to find memory chunk for reservation!\n");
+ goto error_out;
}
- if ((base > (memory_boundary - size)) ||
- memblock_reserve(base, size)) {
- pr_err("Failed to reserve memory\n");
- return 0;
+ fw_dump.reserve_dump_area_start = base;
+
+ /*
+ * Calculate the kernel metadata address and register it with
+ * f/w if the platform supports.
+ */
+ if (fw_dump.ops->fadump_setup_metadata &&
+ (fw_dump.ops->fadump_setup_metadata(&fw_dump) < 0))
+ goto error_out;
+
+ if (memblock_reserve(base, size)) {
+ pr_err("Failed to reserve memory!\n");
+ goto error_out;
}
- pr_info("Reserved %ldMB of memory at %ldMB for firmware-"
- "assisted dump (System RAM: %ldMB)\n",
- (unsigned long)(size >> 20),
- (unsigned long)(base >> 20),
- (unsigned long)(memblock_phys_mem_size() >> 20));
+ pr_info("Reserved %lldMB of memory at %#016llx (System RAM: %lldMB)\n",
+ (size >> 20), base, (memblock_phys_mem_size() >> 20));
- fw_dump.reserve_dump_area_start = base;
- fw_dump.reserve_dump_area_size = size;
- return fadump_cma_init();
+ ret = fadump_cma_init();
}
- return 1;
-}
-unsigned long __init arch_reserved_kernel_pages(void)
-{
- return memblock_reserved_size() / PAGE_SIZE;
+ return ret;
+error_out:
+ fw_dump.fadump_enabled = 0;
+ return 0;
}
/* Look for fadump= cmdline option. */
@@ -596,61 +592,6 @@ static int __init early_fadump_reserve_mem(char *p)
}
early_param("fadump_reserve_mem", early_fadump_reserve_mem);
-static int register_fw_dump(struct fadump_mem_struct *fdm)
-{
- int rc, err;
- unsigned int wait_time;
-
- pr_debug("Registering for firmware-assisted kernel dump...\n");
-
- /* TODO: Add upper time limit for the delay */
- do {
- rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL,
- FADUMP_REGISTER, fdm,
- sizeof(struct fadump_mem_struct));
-
- wait_time = rtas_busy_delay_time(rc);
- if (wait_time)
- mdelay(wait_time);
-
- } while (wait_time);
-
- err = -EIO;
- switch (rc) {
- default:
- pr_err("Failed to register. Unknown Error(%d).\n", rc);
- break;
- case -1:
- printk(KERN_ERR "Failed to register firmware-assisted kernel"
- " dump. Hardware Error(%d).\n", rc);
- break;
- case -3:
- if (!is_boot_memory_area_contiguous())
- pr_err("Can't have holes in boot memory area while registering fadump\n");
- else if (!is_reserved_memory_area_contiguous())
- pr_err("Can't have holes in reserved memory area while"
- " registering fadump\n");
-
- printk(KERN_ERR "Failed to register firmware-assisted kernel"
- " dump. Parameter Error(%d).\n", rc);
- err = -EINVAL;
- break;
- case -9:
- printk(KERN_ERR "firmware-assisted kernel dump is already "
- " registered.");
- fw_dump.dump_registered = 1;
- err = -EEXIST;
- break;
- case 0:
- printk(KERN_INFO "firmware-assisted kernel dump registration"
- " is successful\n");
- fw_dump.dump_registered = 1;
- err = 0;
- break;
- }
- return err;
-}
-
void crash_fadump(struct pt_regs *regs, const char *str)
{
struct fadump_crash_info_header *fdh = NULL;
@@ -693,71 +634,10 @@ void crash_fadump(struct pt_regs *regs, const char *str)
fdh->online_mask = *cpu_online_mask;
- /* Call ibm,os-term rtas call to trigger firmware assisted dump */
- rtas_os_term((char *)str);
-}
-
-#define GPR_MASK 0xffffff0000000000
-static inline int fadump_gpr_index(u64 id)
-{
- int i = -1;
- char str[3];
-
- if ((id & GPR_MASK) == REG_ID("GPR")) {
- /* get the digits at the end */
- id &= ~GPR_MASK;
- id >>= 24;
- str[2] = '\0';
- str[1] = id & 0xff;
- str[0] = (id >> 8) & 0xff;
- sscanf(str, "%d", &i);
- if (i > 31)
- i = -1;
- }
- return i;
-}
-
-static inline void fadump_set_regval(struct pt_regs *regs, u64 reg_id,
- u64 reg_val)
-{
- int i;
-
- i = fadump_gpr_index(reg_id);
- if (i >= 0)
- regs->gpr[i] = (unsigned long)reg_val;
- else if (reg_id == REG_ID("NIA"))
- regs->nip = (unsigned long)reg_val;
- else if (reg_id == REG_ID("MSR"))
- regs->msr = (unsigned long)reg_val;
- else if (reg_id == REG_ID("CTR"))
- regs->ctr = (unsigned long)reg_val;
- else if (reg_id == REG_ID("LR"))
- regs->link = (unsigned long)reg_val;
- else if (reg_id == REG_ID("XER"))
- regs->xer = (unsigned long)reg_val;
- else if (reg_id == REG_ID("CR"))
- regs->ccr = (unsigned long)reg_val;
- else if (reg_id == REG_ID("DAR"))
- regs->dar = (unsigned long)reg_val;
- else if (reg_id == REG_ID("DSISR"))
- regs->dsisr = (unsigned long)reg_val;
-}
-
-static struct fadump_reg_entry*
-fadump_read_registers(struct fadump_reg_entry *reg_entry, struct pt_regs *regs)
-{
- memset(regs, 0, sizeof(struct pt_regs));
-
- while (be64_to_cpu(reg_entry->reg_id) != REG_ID("CPUEND")) {
- fadump_set_regval(regs, be64_to_cpu(reg_entry->reg_id),
- be64_to_cpu(reg_entry->reg_value));
- reg_entry++;
- }
- reg_entry++;
- return reg_entry;
+ fw_dump.ops->fadump_trigger(fdh, str);
}
-static u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)
+u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)
{
struct elf_prstatus prstatus;
@@ -772,7 +652,7 @@ static u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)
return buf;
}
-static void fadump_update_elfcore_header(char *bufp)
+void fadump_update_elfcore_header(char *bufp)
{
struct elfhdr *elf;
struct elf_phdr *phdr;
@@ -784,7 +664,7 @@ static void fadump_update_elfcore_header(char *bufp)
phdr = (struct elf_phdr *)bufp;
if (phdr->p_type == PT_NOTE) {
- phdr->p_paddr = fw_dump.cpu_notes_buf;
+ phdr->p_paddr = __pa(fw_dump.cpu_notes_buf_vaddr);
phdr->p_offset = phdr->p_paddr;
phdr->p_filesz = fw_dump.cpu_notes_buf_size;
phdr->p_memsz = fw_dump.cpu_notes_buf_size;
@@ -792,228 +672,100 @@ static void fadump_update_elfcore_header(char *bufp)
return;
}
-static void *fadump_cpu_notes_buf_alloc(unsigned long size)
+static void *fadump_alloc_buffer(unsigned long size)
{
- void *vaddr;
+ unsigned long count, i;
struct page *page;
- unsigned long order, count, i;
+ void *vaddr;
- order = get_order(size);
- vaddr = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, order);
+ vaddr = alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO);
if (!vaddr)
return NULL;
- count = 1 << order;
+ count = PAGE_ALIGN(size) / PAGE_SIZE;
page = virt_to_page(vaddr);
for (i = 0; i < count; i++)
- SetPageReserved(page + i);
+ mark_page_reserved(page + i);
return vaddr;
}
-static void fadump_cpu_notes_buf_free(unsigned long vaddr, unsigned long size)
+static void fadump_free_buffer(unsigned long vaddr, unsigned long size)
{
- struct page *page;
- unsigned long order, count, i;
-
- order = get_order(size);
- count = 1 << order;
- page = virt_to_page(vaddr);
- for (i = 0; i < count; i++)
- ClearPageReserved(page + i);
- __free_pages(page, order);
+ free_reserved_area((void *)vaddr, (void *)(vaddr + size), -1, NULL);
}
-/*
- * Read CPU state dump data and convert it into ELF notes.
- * The CPU dump starts with magic number "REGSAVE". NumCpusOffset should be
- * used to access the data to allow for additional fields to be added without
- * affecting compatibility. Each list of registers for a CPU starts with
- * "CPUSTRT" and ends with "CPUEND". Each register entry is of 16 bytes,
- * 8 Byte ASCII identifier and 8 Byte register value. The register entry
- * with identifier "CPUSTRT" and "CPUEND" contains 4 byte cpu id as part
- * of register value. For more details refer to PAPR document.
- *
- * Only for the crashing cpu we ignore the CPU dump data and get exact
- * state from fadump crash info structure populated by first kernel at the
- * time of crash.
- */
-static int __init fadump_build_cpu_notes(const struct fadump_mem_struct *fdm)
+s32 fadump_setup_cpu_notes_buf(u32 num_cpus)
{
- struct fadump_reg_save_area_header *reg_header;
- struct fadump_reg_entry *reg_entry;
- struct fadump_crash_info_header *fdh = NULL;
- void *vaddr;
- unsigned long addr;
- u32 num_cpus, *note_buf;
- struct pt_regs regs;
- int i, rc = 0, cpu = 0;
-
- if (!fdm->cpu_state_data.bytes_dumped)
- return -EINVAL;
-
- addr = be64_to_cpu(fdm->cpu_state_data.destination_address);
- vaddr = __va(addr);
-
- reg_header = vaddr;
- if (be64_to_cpu(reg_header->magic_number) != REGSAVE_AREA_MAGIC) {
- printk(KERN_ERR "Unable to read register save area.\n");
- return -ENOENT;
- }
- pr_debug("--------CPU State Data------------\n");
- pr_debug("Magic Number: %llx\n", be64_to_cpu(reg_header->magic_number));
- pr_debug("NumCpuOffset: %x\n", be32_to_cpu(reg_header->num_cpu_offset));
-
- vaddr += be32_to_cpu(reg_header->num_cpu_offset);
- num_cpus = be32_to_cpu(*((__be32 *)(vaddr)));
- pr_debug("NumCpus : %u\n", num_cpus);
- vaddr += sizeof(u32);
- reg_entry = (struct fadump_reg_entry *)vaddr;
-
/* Allocate buffer to hold cpu crash notes. */
fw_dump.cpu_notes_buf_size = num_cpus * sizeof(note_buf_t);
fw_dump.cpu_notes_buf_size = PAGE_ALIGN(fw_dump.cpu_notes_buf_size);
- note_buf = fadump_cpu_notes_buf_alloc(fw_dump.cpu_notes_buf_size);
- if (!note_buf) {
- printk(KERN_ERR "Failed to allocate 0x%lx bytes for "
- "cpu notes buffer\n", fw_dump.cpu_notes_buf_size);
+ fw_dump.cpu_notes_buf_vaddr =
+ (unsigned long)fadump_alloc_buffer(fw_dump.cpu_notes_buf_size);
+ if (!fw_dump.cpu_notes_buf_vaddr) {
+ pr_err("Failed to allocate %ld bytes for CPU notes buffer\n",
+ fw_dump.cpu_notes_buf_size);
return -ENOMEM;
}
- fw_dump.cpu_notes_buf = __pa(note_buf);
-
- pr_debug("Allocated buffer for cpu notes of size %ld at %p\n",
- (num_cpus * sizeof(note_buf_t)), note_buf);
-
- if (fw_dump.fadumphdr_addr)
- fdh = __va(fw_dump.fadumphdr_addr);
-
- for (i = 0; i < num_cpus; i++) {
- if (be64_to_cpu(reg_entry->reg_id) != REG_ID("CPUSTRT")) {
- printk(KERN_ERR "Unable to read CPU state data\n");
- rc = -ENOENT;
- goto error_out;
- }
- /* Lower 4 bytes of reg_value contains logical cpu id */
- cpu = be64_to_cpu(reg_entry->reg_value) & FADUMP_CPU_ID_MASK;
- if (fdh && !cpumask_test_cpu(cpu, &fdh->online_mask)) {
- SKIP_TO_NEXT_CPU(reg_entry);
- continue;
- }
- pr_debug("Reading register data for cpu %d...\n", cpu);
- if (fdh && fdh->crashing_cpu == cpu) {
- regs = fdh->regs;
- note_buf = fadump_regs_to_elf_notes(note_buf, &regs);
- SKIP_TO_NEXT_CPU(reg_entry);
- } else {
- reg_entry++;
- reg_entry = fadump_read_registers(reg_entry, &regs);
- note_buf = fadump_regs_to_elf_notes(note_buf, &regs);
- }
- }
- final_note(note_buf);
- if (fdh) {
- pr_debug("Updating elfcore header (%llx) with cpu notes\n",
- fdh->elfcorehdr_addr);
- fadump_update_elfcore_header((char *)__va(fdh->elfcorehdr_addr));
- }
+ pr_debug("Allocated buffer for cpu notes of size %ld at 0x%lx\n",
+ fw_dump.cpu_notes_buf_size,
+ fw_dump.cpu_notes_buf_vaddr);
return 0;
-
-error_out:
- fadump_cpu_notes_buf_free((unsigned long)__va(fw_dump.cpu_notes_buf),
- fw_dump.cpu_notes_buf_size);
- fw_dump.cpu_notes_buf = 0;
- fw_dump.cpu_notes_buf_size = 0;
- return rc;
-
}
-/*
- * Validate and process the dump data stored by firmware before exporting
- * it through '/proc/vmcore'.
- */
-static int __init process_fadump(const struct fadump_mem_struct *fdm_active)
+void fadump_free_cpu_notes_buf(void)
{
- struct fadump_crash_info_header *fdh;
- int rc = 0;
-
- if (!fdm_active || !fw_dump.fadumphdr_addr)
- return -EINVAL;
-
- /* Check if the dump data is valid. */
- if ((be16_to_cpu(fdm_active->header.dump_status_flag) == FADUMP_ERROR_FLAG) ||
- (fdm_active->cpu_state_data.error_flags != 0) ||
- (fdm_active->rmr_region.error_flags != 0)) {
- printk(KERN_ERR "Dump taken by platform is not valid\n");
- return -EINVAL;
- }
- if ((fdm_active->rmr_region.bytes_dumped !=
- fdm_active->rmr_region.source_len) ||
- !fdm_active->cpu_state_data.bytes_dumped) {
- printk(KERN_ERR "Dump taken by platform is incomplete\n");
- return -EINVAL;
- }
-
- /* Validate the fadump crash info header */
- fdh = __va(fw_dump.fadumphdr_addr);
- if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) {
- printk(KERN_ERR "Crash info header is not valid.\n");
- return -EINVAL;
- }
-
- rc = fadump_build_cpu_notes(fdm_active);
- if (rc)
- return rc;
-
- /*
- * We are done validating dump info and elfcore header is now ready
- * to be exported. set elfcorehdr_addr so that vmcore module will
- * export the elfcore header through '/proc/vmcore'.
- */
- elfcorehdr_addr = fdh->elfcorehdr_addr;
+ if (!fw_dump.cpu_notes_buf_vaddr)
+ return;
- return 0;
+ fadump_free_buffer(fw_dump.cpu_notes_buf_vaddr,
+ fw_dump.cpu_notes_buf_size);
+ fw_dump.cpu_notes_buf_vaddr = 0;
+ fw_dump.cpu_notes_buf_size = 0;
}
-static void free_crash_memory_ranges(void)
+static void fadump_free_mem_ranges(struct fadump_mrange_info *mrange_info)
{
- kfree(crash_memory_ranges);
- crash_memory_ranges = NULL;
- crash_memory_ranges_size = 0;
- max_crash_mem_ranges = 0;
+ kfree(mrange_info->mem_ranges);
+ mrange_info->mem_ranges = NULL;
+ mrange_info->mem_ranges_sz = 0;
+ mrange_info->max_mem_ranges = 0;
}
/*
- * Allocate or reallocate crash memory ranges array in incremental units
+ * Allocate or reallocate mem_ranges array in incremental units
* of PAGE_SIZE.
*/
-static int allocate_crash_memory_ranges(void)
+static int fadump_alloc_mem_ranges(struct fadump_mrange_info *mrange_info)
{
- struct fad_crash_memory_ranges *new_array;
+ struct fadump_memory_range *new_array;
u64 new_size;
- new_size = crash_memory_ranges_size + PAGE_SIZE;
- pr_debug("Allocating %llu bytes of memory for crash memory ranges\n",
- new_size);
+ new_size = mrange_info->mem_ranges_sz + PAGE_SIZE;
+ pr_debug("Allocating %llu bytes of memory for %s memory ranges\n",
+ new_size, mrange_info->name);
- new_array = krealloc(crash_memory_ranges, new_size, GFP_KERNEL);
+ new_array = krealloc(mrange_info->mem_ranges, new_size, GFP_KERNEL);
if (new_array == NULL) {
- pr_err("Insufficient memory for setting up crash memory ranges\n");
- free_crash_memory_ranges();
+ pr_err("Insufficient memory for setting up %s memory ranges\n",
+ mrange_info->name);
+ fadump_free_mem_ranges(mrange_info);
return -ENOMEM;
}
- crash_memory_ranges = new_array;
- crash_memory_ranges_size = new_size;
- max_crash_mem_ranges = (new_size /
- sizeof(struct fad_crash_memory_ranges));
+ mrange_info->mem_ranges = new_array;
+ mrange_info->mem_ranges_sz = new_size;
+ mrange_info->max_mem_ranges = (new_size /
+ sizeof(struct fadump_memory_range));
return 0;
}
-static inline int fadump_add_crash_memory(unsigned long long base,
- unsigned long long end)
+static inline int fadump_add_mem_range(struct fadump_mrange_info *mrange_info,
+ u64 base, u64 end)
{
- u64 start, size;
+ struct fadump_memory_range *mem_ranges = mrange_info->mem_ranges;
bool is_adjacent = false;
+ u64 start, size;
if (base == end)
return 0;
@@ -1022,38 +774,41 @@ static inline int fadump_add_crash_memory(unsigned long long base,
* Fold adjacent memory ranges to bring down the memory ranges/
* PT_LOAD segments count.
*/
- if (crash_mem_ranges) {
- start = crash_memory_ranges[crash_mem_ranges - 1].base;
- size = crash_memory_ranges[crash_mem_ranges - 1].size;
+ if (mrange_info->mem_range_cnt) {
+ start = mem_ranges[mrange_info->mem_range_cnt - 1].base;
+ size = mem_ranges[mrange_info->mem_range_cnt - 1].size;
if ((start + size) == base)
is_adjacent = true;
}
if (!is_adjacent) {
/* resize the array on reaching the limit */
- if (crash_mem_ranges == max_crash_mem_ranges) {
+ if (mrange_info->mem_range_cnt == mrange_info->max_mem_ranges) {
int ret;
- ret = allocate_crash_memory_ranges();
+ ret = fadump_alloc_mem_ranges(mrange_info);
if (ret)
return ret;
+
+ /* Update to the new resized array */
+ mem_ranges = mrange_info->mem_ranges;
}
start = base;
- crash_memory_ranges[crash_mem_ranges].base = start;
- crash_mem_ranges++;
+ mem_ranges[mrange_info->mem_range_cnt].base = start;
+ mrange_info->mem_range_cnt++;
}
- crash_memory_ranges[crash_mem_ranges - 1].size = (end - start);
- pr_debug("crash_memory_range[%d] [%#016llx-%#016llx], %#llx bytes\n",
- (crash_mem_ranges - 1), start, end - 1, (end - start));
+ mem_ranges[mrange_info->mem_range_cnt - 1].size = (end - start);
+ pr_debug("%s_memory_range[%d] [%#016llx-%#016llx], %#llx bytes\n",
+ mrange_info->name, (mrange_info->mem_range_cnt - 1),
+ start, end - 1, (end - start));
return 0;
}
-static int fadump_exclude_reserved_area(unsigned long long start,
- unsigned long long end)
+static int fadump_exclude_reserved_area(u64 start, u64 end)
{
- unsigned long long ra_start, ra_end;
+ u64 ra_start, ra_end;
int ret = 0;
ra_start = fw_dump.reserve_dump_area_start;
@@ -1061,18 +816,22 @@ static int fadump_exclude_reserved_area(unsigned long long start,
if ((ra_start < end) && (ra_end > start)) {
if ((start < ra_start) && (end > ra_end)) {
- ret = fadump_add_crash_memory(start, ra_start);
+ ret = fadump_add_mem_range(&crash_mrange_info,
+ start, ra_start);
if (ret)
return ret;
- ret = fadump_add_crash_memory(ra_end, end);
+ ret = fadump_add_mem_range(&crash_mrange_info,
+ ra_end, end);
} else if (start < ra_start) {
- ret = fadump_add_crash_memory(start, ra_start);
+ ret = fadump_add_mem_range(&crash_mrange_info,
+ start, ra_start);
} else if (ra_end < end) {
- ret = fadump_add_crash_memory(ra_end, end);
+ ret = fadump_add_mem_range(&crash_mrange_info,
+ ra_end, end);
}
} else
- ret = fadump_add_crash_memory(start, end);
+ ret = fadump_add_mem_range(&crash_mrange_info, start, end);
return ret;
}
@@ -1117,36 +876,36 @@ static int fadump_init_elfcore_header(char *bufp)
static int fadump_setup_crash_memory_ranges(void)
{
struct memblock_region *reg;
- unsigned long long start, end;
- int ret;
+ u64 start, end;
+ int i, ret;
pr_debug("Setup crash memory ranges.\n");
- crash_mem_ranges = 0;
+ crash_mrange_info.mem_range_cnt = 0;
/*
- * add the first memory chunk (RMA_START through boot_memory_size) as
- * a separate memory chunk. The reason is, at the time crash firmware
- * will move the content of this memory chunk to different location
- * specified during fadump registration. We need to create a separate
- * program header for this chunk with the correct offset.
+ * Boot memory region(s) registered with firmware are moved to
+ * different location at the time of crash. Create separate program
+ * header(s) for this memory chunk(s) with the correct offset.
*/
- ret = fadump_add_crash_memory(RMA_START, fw_dump.boot_memory_size);
- if (ret)
- return ret;
+ for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) {
+ start = fw_dump.boot_mem_addr[i];
+ end = start + fw_dump.boot_mem_sz[i];
+ ret = fadump_add_mem_range(&crash_mrange_info, start, end);
+ if (ret)
+ return ret;
+ }
for_each_memblock(memory, reg) {
- start = (unsigned long long)reg->base;
- end = start + (unsigned long long)reg->size;
+ start = (u64)reg->base;
+ end = start + (u64)reg->size;
/*
- * skip the first memory chunk that is already added (RMA_START
- * through boot_memory_size). This logic needs a relook if and
- * when RMA_START changes to a non-zero value.
+ * skip the memory chunk that is already added
+ * (0 through boot_memory_top).
*/
- BUILD_BUG_ON(RMA_START != 0);
- if (start < fw_dump.boot_memory_size) {
- if (end > fw_dump.boot_memory_size)
- start = fw_dump.boot_memory_size;
+ if (start < fw_dump.boot_mem_top) {
+ if (end > fw_dump.boot_mem_top)
+ start = fw_dump.boot_mem_top;
else
continue;
}
@@ -1167,17 +926,35 @@ static int fadump_setup_crash_memory_ranges(void)
*/
static inline unsigned long fadump_relocate(unsigned long paddr)
{
- if (paddr > RMA_START && paddr < fw_dump.boot_memory_size)
- return be64_to_cpu(fdm.rmr_region.destination_address) + paddr;
- else
- return paddr;
+ unsigned long raddr, rstart, rend, rlast, hole_size;
+ int i;
+
+ hole_size = 0;
+ rlast = 0;
+ raddr = paddr;
+ for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) {
+ rstart = fw_dump.boot_mem_addr[i];
+ rend = rstart + fw_dump.boot_mem_sz[i];
+ hole_size += (rstart - rlast);
+
+ if (paddr >= rstart && paddr < rend) {
+ raddr += fw_dump.boot_mem_dest_addr - hole_size;
+ break;
+ }
+
+ rlast = rend;
+ }
+
+ pr_debug("vmcoreinfo: paddr = 0x%lx, raddr = 0x%lx\n", paddr, raddr);
+ return raddr;
}
static int fadump_create_elfcore_headers(char *bufp)
{
- struct elfhdr *elf;
+ unsigned long long raddr, offset;
struct elf_phdr *phdr;
- int i;
+ struct elfhdr *elf;
+ int i, j;
fadump_init_elfcore_header(bufp);
elf = (struct elfhdr *)bufp;
@@ -1220,12 +997,14 @@ static int fadump_create_elfcore_headers(char *bufp)
(elf->e_phnum)++;
/* setup PT_LOAD sections. */
-
- for (i = 0; i < crash_mem_ranges; i++) {
- unsigned long long mbase, msize;
- mbase = crash_memory_ranges[i].base;
- msize = crash_memory_ranges[i].size;
-
+ j = 0;
+ offset = 0;
+ raddr = fw_dump.boot_mem_addr[0];
+ for (i = 0; i < crash_mrange_info.mem_range_cnt; i++) {
+ u64 mbase, msize;
+
+ mbase = crash_mrange_info.mem_ranges[i].base;
+ msize = crash_mrange_info.mem_ranges[i].size;
if (!msize)
continue;
@@ -1235,13 +1014,17 @@ static int fadump_create_elfcore_headers(char *bufp)
phdr->p_flags = PF_R|PF_W|PF_X;
phdr->p_offset = mbase;
- if (mbase == RMA_START) {
+ if (mbase == raddr) {
/*
- * The entire RMA region will be moved by firmware
- * to the specified destination_address. Hence set
- * the correct offset.
+ * The entire real memory region will be moved by
+ * firmware to the specified destination_address.
+ * Hence set the correct offset.
*/
- phdr->p_offset = be64_to_cpu(fdm.rmr_region.destination_address);
+ phdr->p_offset = fw_dump.boot_mem_dest_addr + offset;
+ if (j < (fw_dump.boot_mem_regs_cnt - 1)) {
+ offset += fw_dump.boot_mem_sz[j];
+ raddr = fw_dump.boot_mem_addr[++j];
+ }
}
phdr->p_paddr = mbase;
@@ -1263,7 +1046,6 @@ static unsigned long init_fadump_header(unsigned long addr)
if (!addr)
return 0;
- fw_dump.fadumphdr_addr = addr;
fdh = __va(addr);
addr += sizeof(struct fadump_crash_info_header);
@@ -1271,7 +1053,7 @@ static unsigned long init_fadump_header(unsigned long addr)
fdh->magic_number = FADUMP_CRASH_INFO_MAGIC;
fdh->elfcorehdr_addr = addr;
/* We will set the crashing cpu id in crash_fadump() during crash. */
- fdh->crashing_cpu = CPU_UNKNOWN;
+ fdh->crashing_cpu = FADUMP_CPU_UNKNOWN;
return addr;
}
@@ -1293,7 +1075,8 @@ static int register_fadump(void)
if (ret)
return ret;
- addr = be64_to_cpu(fdm.rmr_region.destination_address) + be64_to_cpu(fdm.rmr_region.source_len);
+ addr = fw_dump.fadumphdr_addr;
+
/* Initialize fadump crash info header. */
addr = init_fadump_header(addr);
vaddr = __va(addr);
@@ -1302,74 +1085,27 @@ static int register_fadump(void)
fadump_create_elfcore_headers(vaddr);
/* register the future kernel dump with firmware. */
- return register_fw_dump(&fdm);
-}
-
-static int fadump_unregister_dump(struct fadump_mem_struct *fdm)
-{
- int rc = 0;
- unsigned int wait_time;
-
- pr_debug("Un-register firmware-assisted dump\n");
-
- /* TODO: Add upper time limit for the delay */
- do {
- rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL,
- FADUMP_UNREGISTER, fdm,
- sizeof(struct fadump_mem_struct));
-
- wait_time = rtas_busy_delay_time(rc);
- if (wait_time)
- mdelay(wait_time);
- } while (wait_time);
-
- if (rc) {
- printk(KERN_ERR "Failed to un-register firmware-assisted dump."
- " unexpected error(%d).\n", rc);
- return rc;
- }
- fw_dump.dump_registered = 0;
- return 0;
-}
-
-static int fadump_invalidate_dump(const struct fadump_mem_struct *fdm)
-{
- int rc = 0;
- unsigned int wait_time;
-
- pr_debug("Invalidating firmware-assisted dump registration\n");
-
- /* TODO: Add upper time limit for the delay */
- do {
- rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL,
- FADUMP_INVALIDATE, fdm,
- sizeof(struct fadump_mem_struct));
-
- wait_time = rtas_busy_delay_time(rc);
- if (wait_time)
- mdelay(wait_time);
- } while (wait_time);
-
- if (rc) {
- pr_err("Failed to invalidate firmware-assisted dump registration. Unexpected error (%d).\n", rc);
- return rc;
- }
- fw_dump.dump_active = 0;
- fdm_active = NULL;
- return 0;
+ pr_debug("Registering for firmware-assisted kernel dump...\n");
+ return fw_dump.ops->fadump_register(&fw_dump);
}
void fadump_cleanup(void)
{
+ if (!fw_dump.fadump_supported)
+ return;
+
/* Invalidate the registration only if dump is active. */
if (fw_dump.dump_active) {
- /* pass the same memory dump structure provided by platform */
- fadump_invalidate_dump(fdm_active);
+ pr_debug("Invalidating firmware-assisted dump registration\n");
+ fw_dump.ops->fadump_invalidate(&fw_dump);
} else if (fw_dump.dump_registered) {
/* Un-register Firmware-assisted dump if it was registered. */
- fadump_unregister_dump(&fdm);
- free_crash_memory_ranges();
+ fw_dump.ops->fadump_unregister(&fw_dump);
+ fadump_free_mem_ranges(&crash_mrange_info);
}
+
+ if (fw_dump.ops->fadump_cleanup)
+ fw_dump.ops->fadump_cleanup(&fw_dump);
}
static void fadump_free_reserved_memory(unsigned long start_pfn,
@@ -1394,90 +1130,197 @@ static void fadump_free_reserved_memory(unsigned long start_pfn,
/*
* Skip memory holes and free memory that was actually reserved.
*/
-static void fadump_release_reserved_area(unsigned long start, unsigned long end)
+static void fadump_release_reserved_area(u64 start, u64 end)
{
+ u64 tstart, tend, spfn, epfn;
struct memblock_region *reg;
- unsigned long tstart, tend;
- unsigned long start_pfn = PHYS_PFN(start);
- unsigned long end_pfn = PHYS_PFN(end);
+ spfn = PHYS_PFN(start);
+ epfn = PHYS_PFN(end);
for_each_memblock(memory, reg) {
- tstart = max(start_pfn, memblock_region_memory_base_pfn(reg));
- tend = min(end_pfn, memblock_region_memory_end_pfn(reg));
+ tstart = max_t(u64, spfn, memblock_region_memory_base_pfn(reg));
+ tend = min_t(u64, epfn, memblock_region_memory_end_pfn(reg));
if (tstart < tend) {
fadump_free_reserved_memory(tstart, tend);
- if (tend == end_pfn)
+ if (tend == epfn)
break;
- start_pfn = tend + 1;
+ spfn = tend;
+ }
+ }
+}
+
+/*
+ * Sort the mem ranges in-place and merge adjacent ranges
+ * to minimize the memory ranges count.
+ */
+static void sort_and_merge_mem_ranges(struct fadump_mrange_info *mrange_info)
+{
+ struct fadump_memory_range *mem_ranges;
+ struct fadump_memory_range tmp_range;
+ u64 base, size;
+ int i, j, idx;
+
+ if (!reserved_mrange_info.mem_range_cnt)
+ return;
+
+ /* Sort the memory ranges */
+ mem_ranges = mrange_info->mem_ranges;
+ for (i = 0; i < mrange_info->mem_range_cnt; i++) {
+ idx = i;
+ for (j = (i + 1); j < mrange_info->mem_range_cnt; j++) {
+ if (mem_ranges[idx].base > mem_ranges[j].base)
+ idx = j;
+ }
+ if (idx != i) {
+ tmp_range = mem_ranges[idx];
+ mem_ranges[idx] = mem_ranges[i];
+ mem_ranges[i] = tmp_range;
+ }
+ }
+
+ /* Merge adjacent reserved ranges */
+ idx = 0;
+ for (i = 1; i < mrange_info->mem_range_cnt; i++) {
+ base = mem_ranges[i-1].base;
+ size = mem_ranges[i-1].size;
+ if (mem_ranges[i].base == (base + size))
+ mem_ranges[idx].size += mem_ranges[i].size;
+ else {
+ idx++;
+ if (i == idx)
+ continue;
+
+ mem_ranges[idx] = mem_ranges[i];
}
}
+ mrange_info->mem_range_cnt = idx + 1;
}
/*
- * Release the memory that was reserved in early boot to preserve the memory
- * contents. The released memory will be available for general use.
+ * Scan reserved-ranges to consider them while reserving/releasing
+ * memory for FADump.
*/
-static void fadump_release_memory(unsigned long begin, unsigned long end)
+static inline int fadump_scan_reserved_mem_ranges(void)
{
- unsigned long ra_start, ra_end;
+ struct device_node *root;
+ const __be32 *prop;
+ int len, ret = -1;
+ unsigned long i;
+
+ root = of_find_node_by_path("/");
+ if (!root)
+ return ret;
+
+ prop = of_get_property(root, "reserved-ranges", &len);
+ if (!prop)
+ return ret;
+
+ /*
+ * Each reserved range is an (address,size) pair, 2 cells each,
+ * totalling 4 cells per range.
+ */
+ for (i = 0; i < len / (sizeof(*prop) * 4); i++) {
+ u64 base, size;
+
+ base = of_read_number(prop + (i * 4) + 0, 2);
+ size = of_read_number(prop + (i * 4) + 2, 2);
+
+ if (size) {
+ ret = fadump_add_mem_range(&reserved_mrange_info,
+ base, base + size);
+ if (ret < 0) {
+ pr_warn("some reserved ranges are ignored!\n");
+ break;
+ }
+ }
+ }
+
+ return ret;
+}
+
+/*
+ * Release the memory that was reserved during early boot to preserve the
+ * crash'ed kernel's memory contents except reserved dump area (permanent
+ * reservation) and reserved ranges used by F/W. The released memory will
+ * be available for general use.
+ */
+static void fadump_release_memory(u64 begin, u64 end)
+{
+ u64 ra_start, ra_end, tstart;
+ int i, ret;
+
+ fadump_scan_reserved_mem_ranges();
ra_start = fw_dump.reserve_dump_area_start;
ra_end = ra_start + fw_dump.reserve_dump_area_size;
/*
- * exclude the dump reserve area. Will reuse it for next
- * fadump registration.
+ * Add reserved dump area to reserved ranges list
+ * and exclude all these ranges while releasing memory.
*/
- if (begin < ra_end && end > ra_start) {
- if (begin < ra_start)
- fadump_release_reserved_area(begin, ra_start);
- if (end > ra_end)
- fadump_release_reserved_area(ra_end, end);
- } else
- fadump_release_reserved_area(begin, end);
+ ret = fadump_add_mem_range(&reserved_mrange_info, ra_start, ra_end);
+ if (ret != 0) {
+ /*
+ * Not enough memory to setup reserved ranges but the system is
+ * running shortage of memory. So, release all the memory except
+ * Reserved dump area (reused for next fadump registration).
+ */
+ if (begin < ra_end && end > ra_start) {
+ if (begin < ra_start)
+ fadump_release_reserved_area(begin, ra_start);
+ if (end > ra_end)
+ fadump_release_reserved_area(ra_end, end);
+ } else
+ fadump_release_reserved_area(begin, end);
+
+ return;
+ }
+
+ /* Get the reserved ranges list in order first. */
+ sort_and_merge_mem_ranges(&reserved_mrange_info);
+
+ /* Exclude reserved ranges and release remaining memory */
+ tstart = begin;
+ for (i = 0; i < reserved_mrange_info.mem_range_cnt; i++) {
+ ra_start = reserved_mrange_info.mem_ranges[i].base;
+ ra_end = ra_start + reserved_mrange_info.mem_ranges[i].size;
+
+ if (tstart >= ra_end)
+ continue;
+
+ if (tstart < ra_start)
+ fadump_release_reserved_area(tstart, ra_start);
+ tstart = ra_end;
+ }
+
+ if (tstart < end)
+ fadump_release_reserved_area(tstart, end);
}
static void fadump_invalidate_release_mem(void)
{
- unsigned long reserved_area_start, reserved_area_end;
- unsigned long destination_address;
-
mutex_lock(&fadump_mutex);
if (!fw_dump.dump_active) {
mutex_unlock(&fadump_mutex);
return;
}
- destination_address = be64_to_cpu(fdm_active->cpu_state_data.destination_address);
fadump_cleanup();
mutex_unlock(&fadump_mutex);
+ fadump_release_memory(fw_dump.boot_mem_top, memblock_end_of_DRAM());
+ fadump_free_cpu_notes_buf();
+
/*
- * Save the current reserved memory bounds we will require them
- * later for releasing the memory for general use.
- */
- reserved_area_start = fw_dump.reserve_dump_area_start;
- reserved_area_end = reserved_area_start +
- fw_dump.reserve_dump_area_size;
- /*
- * Setup reserve_dump_area_start and its size so that we can
- * reuse this reserved memory for Re-registration.
+ * Setup kernel metadata and initialize the kernel dump
+ * memory structure for FADump re-registration.
*/
- fw_dump.reserve_dump_area_start = destination_address;
- fw_dump.reserve_dump_area_size = get_fadump_area_size();
-
- fadump_release_memory(reserved_area_start, reserved_area_end);
- if (fw_dump.cpu_notes_buf) {
- fadump_cpu_notes_buf_free(
- (unsigned long)__va(fw_dump.cpu_notes_buf),
- fw_dump.cpu_notes_buf_size);
- fw_dump.cpu_notes_buf = 0;
- fw_dump.cpu_notes_buf_size = 0;
- }
- /* Initialize the kernel dump memory structure for FAD registration. */
- init_fadump_mem_struct(&fdm, fw_dump.reserve_dump_area_start);
+ if (fw_dump.ops->fadump_setup_metadata &&
+ (fw_dump.ops->fadump_setup_metadata(&fw_dump) < 0))
+ pr_warn("Failed to setup kernel metadata!\n");
+ fw_dump.ops->fadump_init_mem_struct(&fw_dump);
}
static ssize_t fadump_release_memory_store(struct kobject *kobj,
@@ -1528,7 +1371,7 @@ static ssize_t fadump_register_store(struct kobject *kobj,
int ret = 0;
int input = -1;
- if (!fw_dump.fadump_enabled || fdm_active)
+ if (!fw_dump.fadump_enabled || fw_dump.dump_active)
return -EPERM;
if (kstrtoint(buf, 0, &input))
@@ -1541,13 +1384,15 @@ static ssize_t fadump_register_store(struct kobject *kobj,
if (fw_dump.dump_registered == 0) {
goto unlock_out;
}
+
/* Un-register Firmware-assisted dump */
- fadump_unregister_dump(&fdm);
+ pr_debug("Un-register firmware-assisted dump\n");
+ fw_dump.ops->fadump_unregister(&fw_dump);
break;
case 1:
if (fw_dump.dump_registered == 1) {
/* Un-register Firmware-assisted dump */
- fadump_unregister_dump(&fdm);
+ fw_dump.ops->fadump_unregister(&fw_dump);
}
/* Register Firmware-assisted dump */
ret = register_fadump();
@@ -1564,62 +1409,12 @@ unlock_out:
static int fadump_region_show(struct seq_file *m, void *private)
{
- const struct fadump_mem_struct *fdm_ptr;
-
if (!fw_dump.fadump_enabled)
return 0;
mutex_lock(&fadump_mutex);
- if (fdm_active)
- fdm_ptr = fdm_active;
- else {
- mutex_unlock(&fadump_mutex);
- fdm_ptr = &fdm;
- }
-
- seq_printf(m,
- "CPU : [%#016llx-%#016llx] %#llx bytes, "
- "Dumped: %#llx\n",
- be64_to_cpu(fdm_ptr->cpu_state_data.destination_address),
- be64_to_cpu(fdm_ptr->cpu_state_data.destination_address) +
- be64_to_cpu(fdm_ptr->cpu_state_data.source_len) - 1,
- be64_to_cpu(fdm_ptr->cpu_state_data.source_len),
- be64_to_cpu(fdm_ptr->cpu_state_data.bytes_dumped));
- seq_printf(m,
- "HPTE: [%#016llx-%#016llx] %#llx bytes, "
- "Dumped: %#llx\n",
- be64_to_cpu(fdm_ptr->hpte_region.destination_address),
- be64_to_cpu(fdm_ptr->hpte_region.destination_address) +
- be64_to_cpu(fdm_ptr->hpte_region.source_len) - 1,
- be64_to_cpu(fdm_ptr->hpte_region.source_len),
- be64_to_cpu(fdm_ptr->hpte_region.bytes_dumped));
- seq_printf(m,
- "DUMP: [%#016llx-%#016llx] %#llx bytes, "
- "Dumped: %#llx\n",
- be64_to_cpu(fdm_ptr->rmr_region.destination_address),
- be64_to_cpu(fdm_ptr->rmr_region.destination_address) +
- be64_to_cpu(fdm_ptr->rmr_region.source_len) - 1,
- be64_to_cpu(fdm_ptr->rmr_region.source_len),
- be64_to_cpu(fdm_ptr->rmr_region.bytes_dumped));
-
- if (!fdm_active ||
- (fw_dump.reserve_dump_area_start ==
- be64_to_cpu(fdm_ptr->cpu_state_data.destination_address)))
- goto out;
-
- /* Dump is active. Show reserved memory region. */
- seq_printf(m,
- " : [%#016llx-%#016llx] %#llx bytes, "
- "Dumped: %#llx\n",
- (unsigned long long)fw_dump.reserve_dump_area_start,
- be64_to_cpu(fdm_ptr->cpu_state_data.destination_address) - 1,
- be64_to_cpu(fdm_ptr->cpu_state_data.destination_address) -
- fw_dump.reserve_dump_area_start,
- be64_to_cpu(fdm_ptr->cpu_state_data.destination_address) -
- fw_dump.reserve_dump_area_start);
-out:
- if (fdm_active)
- mutex_unlock(&fadump_mutex);
+ fw_dump.ops->fadump_region_show(&fw_dump, m);
+ mutex_unlock(&fadump_mutex);
return 0;
}
@@ -1671,16 +1466,15 @@ static void fadump_init_files(void)
*/
int __init setup_fadump(void)
{
- if (!fw_dump.fadump_enabled)
- return 0;
-
- if (!fw_dump.fadump_supported) {
- printk(KERN_ERR "Firmware-assisted dump is not supported on"
- " this hardware\n");
+ if (!fw_dump.fadump_supported)
return 0;
- }
+ fadump_init_files();
fadump_show_config();
+
+ if (!fw_dump.fadump_enabled)
+ return 1;
+
/*
* If dump data is available then see if it is valid and prepare for
* saving it to the disk.
@@ -1690,14 +1484,75 @@ int __init setup_fadump(void)
* if dump process fails then invalidate the registration
* and release memory before proceeding for re-registration.
*/
- if (process_fadump(fdm_active) < 0)
+ if (fw_dump.ops->fadump_process(&fw_dump) < 0)
fadump_invalidate_release_mem();
}
/* Initialize the kernel dump memory structure for FAD registration. */
else if (fw_dump.reserve_dump_area_size)
- init_fadump_mem_struct(&fdm, fw_dump.reserve_dump_area_start);
- fadump_init_files();
+ fw_dump.ops->fadump_init_mem_struct(&fw_dump);
return 1;
}
subsys_initcall(setup_fadump);
+#else /* !CONFIG_PRESERVE_FA_DUMP */
+
+/* Scan the Firmware Assisted dump configuration details. */
+int __init early_init_dt_scan_fw_dump(unsigned long node, const char *uname,
+ int depth, void *data)
+{
+ if ((depth != 1) || (strcmp(uname, "ibm,opal") != 0))
+ return 0;
+
+ opal_fadump_dt_scan(&fw_dump, node);
+ return 1;
+}
+
+/*
+ * When dump is active but PRESERVE_FA_DUMP is enabled on the kernel,
+ * preserve crash data. The subsequent memory preserving kernel boot
+ * is likely to process this crash data.
+ */
+int __init fadump_reserve_mem(void)
+{
+ if (fw_dump.dump_active) {
+ /*
+ * If last boot has crashed then reserve all the memory
+ * above boot memory to preserve crash data.
+ */
+ pr_info("Preserving crash data for processing in next boot.\n");
+ fadump_reserve_crash_area(fw_dump.boot_mem_top);
+ } else
+ pr_debug("FADump-aware kernel..\n");
+
+ return 1;
+}
+#endif /* CONFIG_PRESERVE_FA_DUMP */
+
+/* Preserve everything above the base address */
+static void __init fadump_reserve_crash_area(u64 base)
+{
+ struct memblock_region *reg;
+ u64 mstart, msize;
+
+ for_each_memblock(memory, reg) {
+ mstart = reg->base;
+ msize = reg->size;
+
+ if ((mstart + msize) < base)
+ continue;
+
+ if (mstart < base) {
+ msize -= (base - mstart);
+ mstart = base;
+ }
+
+ pr_info("Reserving %lluMB of memory at %#016llx for preserving crash data",
+ (msize >> 20), mstart);
+ memblock_reserve(mstart, msize);
+ }
+}
+
+unsigned long __init arch_reserved_kernel_pages(void)
+{
+ return memblock_reserved_size() / PAGE_SIZE;
+}
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index 0bb991ddd264..3235a8da6af7 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -94,6 +94,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
/* enable use of FP after return */
#ifdef CONFIG_PPC32
mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */
+#ifdef CONFIG_VMAP_STACK
+ tovirt(r5, r5)
+#endif
lwz r4,THREAD_FPEXC_MODE(r5)
ori r9,r9,MSR_FP /* enable FP for current */
or r9,r9,r4
diff --git a/arch/powerpc/kernel/fsl_booke_entry_mapping.S b/arch/powerpc/kernel/fsl_booke_entry_mapping.S
index ea065282b303..8bccce6544b5 100644
--- a/arch/powerpc/kernel/fsl_booke_entry_mapping.S
+++ b/arch/powerpc/kernel/fsl_booke_entry_mapping.S
@@ -153,35 +153,24 @@ skpinv: addi r6,r6,1 /* Increment */
tlbivax 0,r9
TLBSYNC
-/*
- * The mapping only needs to be cache-coherent on SMP, except on
- * Freescale e500mc derivatives where it's also needed for coherent DMA.
- */
-#if defined(CONFIG_SMP) || defined(CONFIG_PPC_E500MC)
-#define M_IF_NEEDED MAS2_M
-#else
-#define M_IF_NEEDED 0
-#endif
-
#if defined(ENTRY_MAPPING_BOOT_SETUP)
-/* 6. Setup KERNELBASE mapping in TLB1[0] */
+/* 6. Setup kernstart_virt_addr mapping in TLB1[0] */
lis r6,0x1000 /* Set MAS0(TLBSEL) = TLB1(1), ESEL = 0 */
mtspr SPRN_MAS0,r6
lis r6,(MAS1_VALID|MAS1_IPROT)@h
ori r6,r6,(MAS1_TSIZE(BOOK3E_PAGESZ_64M))@l
mtspr SPRN_MAS1,r6
- lis r6,MAS2_VAL(PAGE_OFFSET, BOOK3E_PAGESZ_64M, M_IF_NEEDED)@h
- ori r6,r6,MAS2_VAL(PAGE_OFFSET, BOOK3E_PAGESZ_64M, M_IF_NEEDED)@l
+ lis r6,MAS2_EPN_MASK(BOOK3E_PAGESZ_64M)@h
+ ori r6,r6,MAS2_EPN_MASK(BOOK3E_PAGESZ_64M)@l
+ and r6,r6,r20
+ ori r6,r6,MAS2_M_IF_NEEDED@l
mtspr SPRN_MAS2,r6
mtspr SPRN_MAS3,r8
tlbwe
-/* 7. Jump to KERNELBASE mapping */
- lis r6,(KERNELBASE & ~0xfff)@h
- ori r6,r6,(KERNELBASE & ~0xfff)@l
- rlwinm r7,r25,0,0x03ffffff
- add r6,r7,r6
+/* 7. Jump to kernstart_virt_addr mapping */
+ mr r6,r20
#elif defined(ENTRY_MAPPING_KEXEC_SETUP)
/*
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index f255e22184b4..0493fcac6409 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -34,7 +34,16 @@
#include "head_32.h"
-/* 601 only have IBAT; cr0.eq is set on 601 when using this macro */
+/* 601 only have IBAT */
+#ifdef CONFIG_PPC_BOOK3S_601
+#define LOAD_BAT(n, reg, RA, RB) \
+ li RA,0; \
+ mtspr SPRN_IBAT##n##U,RA; \
+ lwz RA,(n*16)+0(reg); \
+ lwz RB,(n*16)+4(reg); \
+ mtspr SPRN_IBAT##n##U,RA; \
+ mtspr SPRN_IBAT##n##L,RB
+#else
#define LOAD_BAT(n, reg, RA, RB) \
/* see the comment for clear_bats() -- Cort */ \
li RA,0; \
@@ -44,12 +53,11 @@
lwz RB,(n*16)+4(reg); \
mtspr SPRN_IBAT##n##U,RA; \
mtspr SPRN_IBAT##n##L,RB; \
- beq 1f; \
lwz RA,(n*16)+8(reg); \
lwz RB,(n*16)+12(reg); \
mtspr SPRN_DBAT##n##U,RA; \
- mtspr SPRN_DBAT##n##L,RB; \
-1:
+ mtspr SPRN_DBAT##n##L,RB
+#endif
__HEAD
.stabs "arch/powerpc/kernel/",N_SO,0,0,0f
@@ -264,16 +272,21 @@ __secondary_hold_acknowledge:
*/
. = 0x200
DO_KVM 0x200
- mtspr SPRN_SPRG_SCRATCH0,r10
- mtspr SPRN_SPRG_SCRATCH1,r11
- mfcr r10
+MachineCheck:
+ EXCEPTION_PROLOG_0
+#ifdef CONFIG_VMAP_STACK
+ li r11, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */
+ mtmsr r11
+ isync
+#endif
#ifdef CONFIG_PPC_CHRP
mfspr r11, SPRN_SPRG_THREAD
+ tovirt_vmstack r11, r11
lwz r11, RTAS_SP(r11)
cmpwi cr1, r11, 0
bne cr1, 7f
#endif /* CONFIG_PPC_CHRP */
- EXCEPTION_PROLOG_1
+ EXCEPTION_PROLOG_1 for_rtas=1
7: EXCEPTION_PROLOG_2
addi r3,r1,STACK_FRAME_OVERHEAD
#ifdef CONFIG_PPC_CHRP
@@ -288,24 +301,21 @@ __secondary_hold_acknowledge:
. = 0x300
DO_KVM 0x300
DataAccess:
- EXCEPTION_PROLOG
- mfspr r10,SPRN_DSISR
- stw r10,_DSISR(r11)
+ EXCEPTION_PROLOG handle_dar_dsisr=1
+ get_and_save_dar_dsisr_on_stack r4, r5, r11
+BEGIN_MMU_FTR_SECTION
#ifdef CONFIG_PPC_KUAP
- andis. r0,r10,(DSISR_BAD_FAULT_32S | DSISR_DABRMATCH | DSISR_PROTFAULT)@h
+ andis. r0, r5, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH | DSISR_PROTFAULT)@h
#else
- andis. r0,r10,(DSISR_BAD_FAULT_32S|DSISR_DABRMATCH)@h
+ andis. r0, r5, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH)@h
#endif
- bne 1f /* if not, try to put a PTE */
- mfspr r4,SPRN_DAR /* into the hash table */
- rlwinm r3,r10,32-15,21,21 /* DSISR_STORE -> _PAGE_RW */
-BEGIN_MMU_FTR_SECTION
+ bne handle_page_fault_tramp_2 /* if not, try to put a PTE */
+ rlwinm r3, r5, 32 - 15, 21, 21 /* DSISR_STORE -> _PAGE_RW */
bl hash_page
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
-1: lwz r5,_DSISR(r11) /* get DSISR value */
- mfspr r4,SPRN_DAR
- EXC_XFER_LITE(0x300, handle_page_fault)
-
+ b handle_page_fault_tramp_1
+FTR_SECTION_ELSE
+ b handle_page_fault_tramp_2
+ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE)
/* Instruction access exception. */
. = 0x400
@@ -321,6 +331,7 @@ BEGIN_MMU_FTR_SECTION
END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
1: mr r4,r12
andis. r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */
+ stw r4, _DAR(r11)
EXC_XFER_LITE(0x400, handle_page_fault)
/* External interrupt */
@@ -330,11 +341,8 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
. = 0x600
DO_KVM 0x600
Alignment:
- EXCEPTION_PROLOG
- mfspr r4,SPRN_DAR
- stw r4,_DAR(r11)
- mfspr r5,SPRN_DSISR
- stw r5,_DSISR(r11)
+ EXCEPTION_PROLOG handle_dar_dsisr=1
+ save_dar_dsisr_on_stack r4, r5, r11
addi r3,r1,STACK_FRAME_OVERHEAD
EXC_XFER_STD(0x600, alignment_exception)
@@ -557,9 +565,9 @@ DataStoreTLBMiss:
cmplw 0,r1,r3
mfspr r2, SPRN_SPRG_PGDIR
#ifdef CONFIG_SWAP
- li r1, _PAGE_RW | _PAGE_PRESENT | _PAGE_ACCESSED
+ li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED
#else
- li r1, _PAGE_RW | _PAGE_PRESENT
+ li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT
#endif
bge- 112f
lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */
@@ -637,6 +645,16 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU)
. = 0x3000
+handle_page_fault_tramp_1:
+ lwz r4, _DAR(r11)
+ lwz r5, _DSISR(r11)
+ /* fall through */
+handle_page_fault_tramp_2:
+ EXC_XFER_LITE(0x300, handle_page_fault)
+
+stack_overflow:
+ vmap_stack_overflow_exception
+
AltiVecUnavailable:
EXCEPTION_PROLOG
#ifdef CONFIG_ALTIVEC
@@ -820,9 +838,6 @@ load_up_mmu:
/* Load the BAT registers with the values set up by MMU_init.
MMU_init takes care of whether we're on a 601 or not. */
- mfpvr r3
- srwi r3,r3,16
- cmpwi r3,1
lis r3,BATS@ha
addi r3,r3,BATS@l
tophys(r3,r3)
@@ -897,9 +912,11 @@ start_here:
bl machine_init
bl __save_cpu_setup
bl MMU_init
+#ifdef CONFIG_KASAN
BEGIN_MMU_FTR_SECTION
bl MMU_init_hw_patch
END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
+#endif
/*
* Go back to running unmapped so we can load up new values
@@ -910,6 +927,8 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
ori r4,r4,2f@l
tophys(r4,r4)
li r3,MSR_KERNEL & ~(MSR_IR|MSR_DR)
+
+ .align 4
mtspr SPRN_SRR0,r4
mtspr SPRN_SRR1,r3
SYNC
@@ -996,11 +1015,8 @@ EXPORT_SYMBOL(switch_mmu_context)
*/
clear_bats:
li r10,0
- mfspr r9,SPRN_PVR
- rlwinm r9,r9,16,16,31 /* r9 = 1 for 601, 4 for 604 */
- cmpwi r9, 1
- beq 1f
+#ifndef CONFIG_PPC_BOOK3S_601
mtspr SPRN_DBAT0U,r10
mtspr SPRN_DBAT0L,r10
mtspr SPRN_DBAT1U,r10
@@ -1009,7 +1025,7 @@ clear_bats:
mtspr SPRN_DBAT2L,r10
mtspr SPRN_DBAT3U,r10
mtspr SPRN_DBAT3L,r10
-1:
+#endif
mtspr SPRN_IBAT0U,r10
mtspr SPRN_IBAT0L,r10
mtspr SPRN_IBAT1U,r10
@@ -1054,6 +1070,8 @@ _ENTRY(update_bats)
rlwinm r0, r6, 0, ~MSR_RI
rlwinm r0, r0, 0, ~MSR_EE
mtmsr r0
+
+ .align 4
mtspr SPRN_SRR0, r4
mtspr SPRN_SRR1, r3
SYNC
@@ -1093,6 +1111,8 @@ mmu_off:
andi. r0,r3,MSR_DR|MSR_IR /* MMU enabled? */
beqlr
andc r3,r3,r0
+
+ .align 4
mtspr SPRN_SRR0,r4
mtspr SPRN_SRR1,r3
sync
@@ -1104,10 +1124,7 @@ mmu_off:
*/
initial_bats:
lis r11,PAGE_OFFSET@h
- mfspr r9,SPRN_PVR
- rlwinm r9,r9,16,16,31 /* r9 = 1 for 601, 4 for 604 */
- cmpwi 0,r9,1
- bne 4f
+#ifdef CONFIG_PPC_BOOK3S_601
ori r11,r11,4 /* set up BAT registers for 601 */
li r8,0x7f /* valid, block length = 8MB */
mtspr SPRN_IBAT0U,r11 /* N.B. 601 has valid bit in */
@@ -1120,10 +1137,8 @@ initial_bats:
addis r8,r8,0x800000@h
mtspr SPRN_IBAT2U,r11
mtspr SPRN_IBAT2L,r8
- isync
- blr
-
-4: tophys(r8,r11)
+#else
+ tophys(r8,r11)
#ifdef CONFIG_SMP
ori r8,r8,0x12 /* R/W access, M=1 */
#else
@@ -1135,10 +1150,10 @@ initial_bats:
mtspr SPRN_DBAT0U,r11 /* bit in upper BAT register */
mtspr SPRN_IBAT0L,r8
mtspr SPRN_IBAT0U,r11
+#endif
isync
blr
-
#ifdef CONFIG_BOOTX_TEXT
setup_disp_bat:
/*
@@ -1153,15 +1168,13 @@ setup_disp_bat:
beqlr
lwz r11,0(r8)
lwz r8,4(r8)
- mfspr r9,SPRN_PVR
- rlwinm r9,r9,16,16,31 /* r9 = 1 for 601, 4 for 604 */
- cmpwi 0,r9,1
- beq 1f
+#ifndef CONFIG_PPC_BOOK3S_601
mtspr SPRN_DBAT3L,r8
mtspr SPRN_DBAT3U,r11
- blr
-1: mtspr SPRN_IBAT3L,r8
+#else
+ mtspr SPRN_IBAT3L,r8
mtspr SPRN_IBAT3U,r11
+#endif
blr
#endif /* CONFIG_BOOTX_TEXT */
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index 4a692553651f..a6a5fbbf8504 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -5,46 +5,65 @@
#include <asm/ptrace.h> /* for STACK_FRAME_REGS_MARKER */
/*
- * MSR_KERNEL is > 0x8000 on 4xx/Book-E since it include MSR_CE.
- */
-.macro __LOAD_MSR_KERNEL r, x
-.if \x >= 0x8000
- lis \r, (\x)@h
- ori \r, \r, (\x)@l
-.else
- li \r, (\x)
-.endif
-.endm
-#define LOAD_MSR_KERNEL(r, x) __LOAD_MSR_KERNEL r, x
-
-/*
* Exception entry code. This code runs with address translation
* turned off, i.e. using physical addresses.
* We assume sprg3 has the physical address of the current
* task's thread_struct.
*/
+.macro EXCEPTION_PROLOG handle_dar_dsisr=0
+ EXCEPTION_PROLOG_0 handle_dar_dsisr=\handle_dar_dsisr
+ EXCEPTION_PROLOG_1
+ EXCEPTION_PROLOG_2 handle_dar_dsisr=\handle_dar_dsisr
+.endm
-.macro EXCEPTION_PROLOG
+.macro EXCEPTION_PROLOG_0 handle_dar_dsisr=0
mtspr SPRN_SPRG_SCRATCH0,r10
mtspr SPRN_SPRG_SCRATCH1,r11
+#ifdef CONFIG_VMAP_STACK
+ mfspr r10, SPRN_SPRG_THREAD
+ .if \handle_dar_dsisr
+ mfspr r11, SPRN_DAR
+ stw r11, DAR(r10)
+ mfspr r11, SPRN_DSISR
+ stw r11, DSISR(r10)
+ .endif
+ mfspr r11, SPRN_SRR0
+ stw r11, SRR0(r10)
+#endif
+ mfspr r11, SPRN_SRR1 /* check whether user or kernel */
+#ifdef CONFIG_VMAP_STACK
+ stw r11, SRR1(r10)
+#endif
mfcr r10
- EXCEPTION_PROLOG_1
- EXCEPTION_PROLOG_2
+ andi. r11, r11, MSR_PR
.endm
-.macro EXCEPTION_PROLOG_1
- mfspr r11,SPRN_SRR1 /* check whether user or kernel */
- andi. r11,r11,MSR_PR
+.macro EXCEPTION_PROLOG_1 for_rtas=0
+#ifdef CONFIG_VMAP_STACK
+ .ifeq \for_rtas
+ li r11, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */
+ mtmsr r11
+ isync
+ .endif
+ subi r11, r1, INT_FRAME_SIZE /* use r1 if kernel */
+#else
tophys(r11,r1) /* use tophys(r1) if kernel */
+ subi r11, r11, INT_FRAME_SIZE /* alloc exc. frame */
+#endif
beq 1f
mfspr r11,SPRN_SPRG_THREAD
+ tovirt_vmstack r11, r11
lwz r11,TASK_STACK-THREAD(r11)
- addi r11,r11,THREAD_SIZE
- tophys(r11,r11)
-1: subi r11,r11,INT_FRAME_SIZE /* alloc exc. frame */
+ addi r11, r11, THREAD_SIZE - INT_FRAME_SIZE
+ tophys_novmstack r11, r11
+1:
+#ifdef CONFIG_VMAP_STACK
+ mtcrf 0x7f, r11
+ bt 32 - THREAD_ALIGN_SHIFT, stack_overflow
+#endif
.endm
-.macro EXCEPTION_PROLOG_2
+.macro EXCEPTION_PROLOG_2 handle_dar_dsisr=0
stw r10,_CCR(r11) /* save registers */
stw r12,GPR12(r11)
stw r9,GPR9(r11)
@@ -54,16 +73,33 @@
stw r12,GPR11(r11)
mflr r10
stw r10,_LINK(r11)
+#ifdef CONFIG_VMAP_STACK
+ mfspr r12, SPRN_SPRG_THREAD
+ tovirt(r12, r12)
+ .if \handle_dar_dsisr
+ lwz r10, DAR(r12)
+ stw r10, _DAR(r11)
+ lwz r10, DSISR(r12)
+ stw r10, _DSISR(r11)
+ .endif
+ lwz r9, SRR1(r12)
+ lwz r12, SRR0(r12)
+#else
mfspr r12,SPRN_SRR0
mfspr r9,SPRN_SRR1
+#endif
stw r1,GPR1(r11)
stw r1,0(r11)
- tovirt(r1,r11) /* set new kernel sp */
+ tovirt_novmstack r1, r11 /* set new kernel sp */
#ifdef CONFIG_40x
rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */
#else
+#ifdef CONFIG_VMAP_STACK
+ li r10, MSR_KERNEL & ~MSR_IR /* can take exceptions */
+#else
li r10,MSR_KERNEL & ~(MSR_IR|MSR_DR) /* can take exceptions */
- MTMSRD(r10) /* (except for mach check in rtas) */
+#endif
+ mtmsr r10 /* (except for mach check in rtas) */
#endif
stw r0,GPR0(r11)
lis r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
@@ -75,25 +111,46 @@
.macro SYSCALL_ENTRY trapno
mfspr r12,SPRN_SPRG_THREAD
+#ifdef CONFIG_VMAP_STACK
+ mfspr r9, SPRN_SRR0
+ mfspr r11, SPRN_SRR1
+ stw r9, SRR0(r12)
+ stw r11, SRR1(r12)
+#endif
mfcr r10
lwz r11,TASK_STACK-THREAD(r12)
- mflr r9
- addi r11,r11,THREAD_SIZE - INT_FRAME_SIZE
rlwinm r10,r10,0,4,2 /* Clear SO bit in CR */
- tophys(r11,r11)
+ addi r11, r11, THREAD_SIZE - INT_FRAME_SIZE
+#ifdef CONFIG_VMAP_STACK
+ li r9, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */
+ mtmsr r9
+ isync
+#endif
+ tovirt_vmstack r12, r12
+ tophys_novmstack r11, r11
+ mflr r9
stw r10,_CCR(r11) /* save registers */
+ stw r9, _LINK(r11)
+#ifdef CONFIG_VMAP_STACK
+ lwz r10, SRR0(r12)
+ lwz r9, SRR1(r12)
+#else
mfspr r10,SPRN_SRR0
- stw r9,_LINK(r11)
mfspr r9,SPRN_SRR1
+#endif
stw r1,GPR1(r11)
stw r1,0(r11)
- tovirt(r1,r11) /* set new kernel sp */
+ tovirt_novmstack r1, r11 /* set new kernel sp */
stw r10,_NIP(r11)
#ifdef CONFIG_40x
rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */
#else
- LOAD_MSR_KERNEL(r10, MSR_KERNEL & ~(MSR_IR|MSR_DR)) /* can take exceptions */
- MTMSRD(r10) /* (except for mach check in rtas) */
+#ifdef CONFIG_VMAP_STACK
+ LOAD_REG_IMMEDIATE(r10, MSR_KERNEL & ~MSR_IR) /* can take exceptions */
+#else
+ LOAD_REG_IMMEDIATE(r10, MSR_KERNEL & ~(MSR_IR|MSR_DR)) /* can take exceptions */
+#endif
+ mtmsr r10 /* (except for mach check in rtas) */
#endif
lis r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
stw r2,GPR2(r11)
@@ -131,7 +188,7 @@
#endif
3:
- tovirt(r2, r2) /* set r2 to current */
+ tovirt_novmstack r2, r2 /* set r2 to current */
lis r11, transfer_to_syscall@h
ori r11, r11, transfer_to_syscall@l
#ifdef CONFIG_TRACE_IRQFLAGS
@@ -140,10 +197,10 @@
* otherwise we might risk taking an interrupt before we tell lockdep
* they are enabled.
*/
- LOAD_MSR_KERNEL(r10, MSR_KERNEL)
+ LOAD_REG_IMMEDIATE(r10, MSR_KERNEL)
rlwimi r10, r9, 0, MSR_EE
#else
- LOAD_MSR_KERNEL(r10, MSR_KERNEL | MSR_EE)
+ LOAD_REG_IMMEDIATE(r10, MSR_KERNEL | MSR_EE)
#endif
#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
mtspr SPRN_NRI, r0
@@ -154,6 +211,54 @@
RFI /* jump to handler, enable MMU */
.endm
+.macro save_dar_dsisr_on_stack reg1, reg2, sp
+#ifndef CONFIG_VMAP_STACK
+ mfspr \reg1, SPRN_DAR
+ mfspr \reg2, SPRN_DSISR
+ stw \reg1, _DAR(\sp)
+ stw \reg2, _DSISR(\sp)
+#endif
+.endm
+
+.macro get_and_save_dar_dsisr_on_stack reg1, reg2, sp
+#ifdef CONFIG_VMAP_STACK
+ lwz \reg1, _DAR(\sp)
+ lwz \reg2, _DSISR(\sp)
+#else
+ save_dar_dsisr_on_stack \reg1, \reg2, \sp
+#endif
+.endm
+
+.macro tovirt_vmstack dst, src
+#ifdef CONFIG_VMAP_STACK
+ tovirt(\dst, \src)
+#else
+ .ifnc \dst, \src
+ mr \dst, \src
+ .endif
+#endif
+.endm
+
+.macro tovirt_novmstack dst, src
+#ifndef CONFIG_VMAP_STACK
+ tovirt(\dst, \src)
+#else
+ .ifnc \dst, \src
+ mr \dst, \src
+ .endif
+#endif
+.endm
+
+.macro tophys_novmstack dst, src
+#ifndef CONFIG_VMAP_STACK
+ tophys(\dst, \src)
+#else
+ .ifnc \dst, \src
+ mr \dst, \src
+ .endif
+#endif
+.endm
+
/*
* Note: code which follows this uses cr0.eq (set if from kernel),
* r11, r12 (SRR0), and r9 (SRR1).
@@ -187,7 +292,7 @@ label:
#define EXC_XFER_TEMPLATE(hdlr, trap, msr, tfer, ret) \
li r10,trap; \
stw r10,_TRAP(r11); \
- LOAD_MSR_KERNEL(r10, msr); \
+ LOAD_REG_IMMEDIATE(r10, msr); \
bl tfer; \
.long hdlr; \
.long ret
@@ -200,4 +305,28 @@ label:
EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, transfer_to_handler, \
ret_from_except)
+.macro vmap_stack_overflow_exception
+#ifdef CONFIG_VMAP_STACK
+#ifdef CONFIG_SMP
+ mfspr r11, SPRN_SPRG_THREAD
+ tovirt(r11, r11)
+ lwz r11, TASK_CPU - THREAD(r11)
+ slwi r11, r11, 3
+ addis r11, r11, emergency_ctx@ha
+#else
+ lis r11, emergency_ctx@ha
+#endif
+ lwz r11, emergency_ctx@l(r11)
+ cmpwi cr1, r11, 0
+ bne cr1, 1f
+ lis r11, init_thread_union@ha
+ addi r11, r11, init_thread_union@l
+1: addi r11, r11, THREAD_SIZE - INT_FRAME_SIZE
+ EXCEPTION_PROLOG_2
+ SAVE_NVGPRS(r11)
+ addi r3, r1, STACK_FRAME_OVERHEAD
+ EXC_XFER_STD(0, stack_overflow_exception)
+#endif
+.endm
+
#endif /* __HEAD_32_H__ */
diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
index 585ea1976550..9bb663977e84 100644
--- a/arch/powerpc/kernel/head_40x.S
+++ b/arch/powerpc/kernel/head_40x.S
@@ -313,6 +313,7 @@ _ENTRY(saved_ksp_limit)
START_EXCEPTION(0x0400, InstructionAccess)
EXCEPTION_PROLOG
mr r4,r12 /* Pass SRR0 as arg2 */
+ stw r4, _DEAR(r11)
li r5,0 /* Pass zero as arg3 */
EXC_XFER_LITE(0x400, handle_page_fault)
@@ -676,6 +677,7 @@ DataAccess:
mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */
stw r5,_ESR(r11)
mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */
+ stw r4, _DEAR(r11)
EXC_XFER_LITE(0x300, handle_page_fault)
/* Other PowerPC processors, namely those derived from the 6xx-series
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 91d297e696dd..ad79fddb974d 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -182,7 +182,8 @@ __secondary_hold:
isync
bctr
#else
- BUG_OPCODE
+0: trap
+ EMIT_BUG_ENTRY 0b, __FILE__, __LINE__, 0
#endif
CLOSE_FIXED_SECTION(first_256B)
@@ -635,7 +636,7 @@ __after_prom_start:
sub r5,r5,r11
#else
/* just copy interrupts */
- LOAD_REG_IMMEDIATE(r5, FIXED_SYMBOL_ABS_ADDR(__end_interrupts))
+ LOAD_REG_IMMEDIATE_SYM(r5, r11, FIXED_SYMBOL_ABS_ADDR(__end_interrupts))
#endif
b 5f
3:
@@ -998,7 +999,8 @@ start_here_common:
bl start_kernel
/* Not reached */
- BUG_OPCODE
+ trap
+ EMIT_BUG_ENTRY 0b, __FILE__, __LINE__, 0
/*
* We put a few things here that have to be page-aligned.
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 5ab9178c2347..9922306ae512 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -15,6 +15,7 @@
*/
#include <linux/init.h>
+#include <linux/magic.h>
#include <asm/processor.h>
#include <asm/page.h>
#include <asm/mmu.h>
@@ -126,56 +127,36 @@ instruction_counter:
/* Machine check */
. = 0x200
MachineCheck:
- EXCEPTION_PROLOG
- mfspr r4,SPRN_DAR
- stw r4,_DAR(r11)
- li r5,RPN_PATTERN
- mtspr SPRN_DAR,r5 /* Tag DAR, to be used in DTLB Error */
- mfspr r5,SPRN_DSISR
- stw r5,_DSISR(r11)
+ EXCEPTION_PROLOG handle_dar_dsisr=1
+ save_dar_dsisr_on_stack r4, r5, r11
+ li r6, RPN_PATTERN
+ mtspr SPRN_DAR, r6 /* Tag DAR, to be used in DTLB Error */
addi r3,r1,STACK_FRAME_OVERHEAD
EXC_XFER_STD(0x200, machine_check_exception)
-/* Data access exception.
- * This is "never generated" by the MPC8xx.
- */
- . = 0x300
-DataAccess:
-
-/* Instruction access exception.
- * This is "never generated" by the MPC8xx.
- */
- . = 0x400
-InstructionAccess:
-
/* External interrupt */
EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE)
/* Alignment exception */
. = 0x600
Alignment:
- EXCEPTION_PROLOG
- mfspr r4,SPRN_DAR
- stw r4,_DAR(r11)
- li r5,RPN_PATTERN
- mtspr SPRN_DAR,r5 /* Tag DAR, to be used in DTLB Error */
- mfspr r5,SPRN_DSISR
- stw r5,_DSISR(r11)
+ EXCEPTION_PROLOG handle_dar_dsisr=1
+ save_dar_dsisr_on_stack r4, r5, r11
+ li r6, RPN_PATTERN
+ mtspr SPRN_DAR, r6 /* Tag DAR, to be used in DTLB Error */
addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_STD(0x600, alignment_exception)
+ b .Lalignment_exception_ool
/* Program check exception */
EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD)
-/* No FPU on MPC8xx. This exception is not supposed to happen.
-*/
- EXCEPTION(0x800, FPUnavailable, unknown_exception, EXC_XFER_STD)
-
/* Decrementer */
EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE)
- EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_STD)
+ /* With VMAP_STACK there's not enough room for this at 0x600 */
+ . = 0xa00
+.Lalignment_exception_ool:
+ EXC_XFER_STD(0x600, alignment_exception)
/* System call */
. = 0xc00
@@ -184,25 +165,12 @@ SystemCall:
/* Single step - not used on 601 */
EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD)
- EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0xf00, Trap_0f, unknown_exception, EXC_XFER_STD)
/* On the MPC8xx, this is a software emulation interrupt. It occurs
* for all unimplemented and illegal instructions.
*/
EXCEPTION(0x1000, SoftEmu, program_check_exception, EXC_XFER_STD)
-/* Called from DataStoreTLBMiss when perf TLB misses events are activated */
-#ifdef CONFIG_PERF_EVENTS
- patch_site 0f, patch__dtlbmiss_perf
-0: lwz r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0)
- addi r10, r10, 1
- stw r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0)
- mfspr r10, SPRN_SPRG_SCRATCH0
- mfspr r11, SPRN_SPRG_SCRATCH1
- rfi
-#endif
-
. = 0x1100
/*
* For the MPC8xx, this is a software tablewalk to load the instruction
@@ -342,8 +310,8 @@ ITLBMissLinear:
. = 0x1200
DataStoreTLBMiss:
- mtspr SPRN_SPRG_SCRATCH0, r10
- mtspr SPRN_SPRG_SCRATCH1, r11
+ mtspr SPRN_DAR, r10
+ mtspr SPRN_M_TW, r11
mfcr r11
/* If we are faulting a kernel address, we have to use the
@@ -408,10 +376,10 @@ DataStoreTLBMiss:
mtspr SPRN_MD_RPN, r10 /* Update TLB entry */
/* Restore registers */
- mtspr SPRN_DAR, r11 /* Tag DAR */
-0: mfspr r10, SPRN_SPRG_SCRATCH0
- mfspr r11, SPRN_SPRG_SCRATCH1
+0: mfspr r10, SPRN_DAR
+ mtspr SPRN_DAR, r11 /* Tag DAR */
+ mfspr r11, SPRN_M_TW
rfi
patch_site 0b, patch__dtlbmiss_exit_1
@@ -427,10 +395,10 @@ DTLBMissIMMR:
mtspr SPRN_MD_RPN, r10 /* Update TLB entry */
li r11, RPN_PATTERN
- mtspr SPRN_DAR, r11 /* Tag DAR */
-0: mfspr r10, SPRN_SPRG_SCRATCH0
- mfspr r11, SPRN_SPRG_SCRATCH1
+0: mfspr r10, SPRN_DAR
+ mtspr SPRN_DAR, r11 /* Tag DAR */
+ mfspr r11, SPRN_M_TW
rfi
patch_site 0b, patch__dtlbmiss_exit_2
@@ -464,10 +432,10 @@ DTLBMissLinear:
mtspr SPRN_MD_RPN, r10 /* Update TLB entry */
li r11, RPN_PATTERN
- mtspr SPRN_DAR, r11 /* Tag DAR */
-0: mfspr r10, SPRN_SPRG_SCRATCH0
- mfspr r11, SPRN_SPRG_SCRATCH1
+0: mfspr r10, SPRN_DAR
+ mtspr SPRN_DAR, r11 /* Tag DAR */
+ mfspr r11, SPRN_M_TW
rfi
patch_site 0b, patch__dtlbmiss_exit_3
@@ -485,6 +453,7 @@ InstructionTLBError:
tlbie r4
/* 0x400 is InstructionAccess exception, needed by bad_page_fault() */
.Litlbie:
+ stw r4, _DAR(r11)
EXC_XFER_LITE(0x400, handle_page_fault)
/* This is the data TLB error on the MPC8xx. This could be due to
@@ -493,58 +462,69 @@ InstructionTLBError:
*/
. = 0x1400
DataTLBError:
- mtspr SPRN_SPRG_SCRATCH0, r10
- mtspr SPRN_SPRG_SCRATCH1, r11
- mfcr r10
-
+ EXCEPTION_PROLOG_0 handle_dar_dsisr=1
mfspr r11, SPRN_DAR
- cmpwi cr0, r11, RPN_PATTERN
- beq- FixupDAR /* must be a buggy dcbX, icbi insn. */
+ cmpwi cr1, r11, RPN_PATTERN
+ beq- cr1, FixupDAR /* must be a buggy dcbX, icbi insn. */
DARFixed:/* Return from dcbx instruction bug workaround */
+#ifdef CONFIG_VMAP_STACK
+ li r11, RPN_PATTERN
+ mtspr SPRN_DAR, r11 /* Tag DAR, to be used in DTLB Error */
+#endif
EXCEPTION_PROLOG_1
- EXCEPTION_PROLOG_2
- mfspr r5,SPRN_DSISR
- stw r5,_DSISR(r11)
- mfspr r4,SPRN_DAR
+ EXCEPTION_PROLOG_2 handle_dar_dsisr=1
+ get_and_save_dar_dsisr_on_stack r4, r5, r11
andis. r10,r5,DSISR_NOHPTE@h
beq+ .Ldtlbie
tlbie r4
.Ldtlbie:
+#ifndef CONFIG_VMAP_STACK
li r10,RPN_PATTERN
mtspr SPRN_DAR,r10 /* Tag DAR, to be used in DTLB Error */
+#endif
/* 0x300 is DataAccess exception, needed by bad_page_fault() */
EXC_XFER_LITE(0x300, handle_page_fault)
- EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_STD)
+/* Called from DataStoreTLBMiss when perf TLB misses events are activated */
+#ifdef CONFIG_PERF_EVENTS
+ patch_site 0f, patch__dtlbmiss_perf
+0: lwz r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0)
+ addi r10, r10, 1
+ stw r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0)
+ mfspr r10, SPRN_DAR
+ mtspr SPRN_DAR, r11 /* Tag DAR */
+ mfspr r11, SPRN_M_TW
+ rfi
+#endif
+
+stack_overflow:
+ vmap_stack_overflow_exception
/* On the MPC8xx, these next four traps are used for development
* support of breakpoints and such. Someday I will get around to
* using them.
*/
- . = 0x1c00
-DataBreakpoint:
- mtspr SPRN_SPRG_SCRATCH0, r10
- mtspr SPRN_SPRG_SCRATCH1, r11
- mfcr r10
- mfspr r11, SPRN_SRR0
- cmplwi cr0, r11, (.Ldtlbie - PAGE_OFFSET)@l
- cmplwi cr7, r11, (.Litlbie - PAGE_OFFSET)@l
- beq- cr0, 11f
- beq- cr7, 11f
+do_databreakpoint:
EXCEPTION_PROLOG_1
- EXCEPTION_PROLOG_2
+ EXCEPTION_PROLOG_2 handle_dar_dsisr=1
addi r3,r1,STACK_FRAME_OVERHEAD
mfspr r4,SPRN_BAR
stw r4,_DAR(r11)
+#ifdef CONFIG_VMAP_STACK
+ lwz r5,_DSISR(r11)
+#else
mfspr r5,SPRN_DSISR
+#endif
EXC_XFER_STD(0x1c00, do_break)
-11:
+
+ . = 0x1c00
+DataBreakpoint:
+ EXCEPTION_PROLOG_0 handle_dar_dsisr=1
+ mfspr r11, SPRN_SRR0
+ cmplwi cr1, r11, (.Ldtlbie - PAGE_OFFSET)@l
+ cmplwi cr7, r11, (.Litlbie - PAGE_OFFSET)@l
+ cror 4*cr1+eq, 4*cr1+eq, 4*cr7+eq
+ bne cr1, do_databreakpoint
mtcr r10
mfspr r10, SPRN_SPRG_SCRATCH0
mfspr r11, SPRN_SPRG_SCRATCH1
@@ -574,17 +554,15 @@ InstructionBreakpoint:
* by decoding the registers used by the dcbx instruction and adding them.
* DAR is set to the calculated address.
*/
- /* define if you don't want to use self modifying code */
-#define NO_SELF_MODIFYING_CODE
FixupDAR:/* Entry point for dcbx workaround. */
mtspr SPRN_M_TW, r10
/* fetch instruction from memory. */
mfspr r10, SPRN_SRR0
mtspr SPRN_MD_EPN, r10
rlwinm r11, r10, 16, 0xfff8
- cmpli cr0, r11, PAGE_OFFSET@h
+ cmpli cr1, r11, PAGE_OFFSET@h
mfspr r11, SPRN_M_TWB /* Get level 1 table */
- blt+ 3f
+ blt+ cr1, 3f
rlwinm r11, r10, 16, 0xfff8
0: cmpli cr7, r11, (PAGE_OFFSET + 0x1800000)@h
@@ -599,7 +577,7 @@ FixupDAR:/* Entry point for dcbx workaround. */
3:
lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r11) /* Get the level 1 entry */
mtspr SPRN_MD_TWC, r11
- mtcr r11
+ mtcrf 0x01, r11
mfspr r11, SPRN_MD_TWC
lwz r11, 0(r11) /* Get the pte */
bt 28,200f /* bit 28 = Large page (8M) */
@@ -612,16 +590,16 @@ FixupDAR:/* Entry point for dcbx workaround. */
* no need to include them here */
xoris r10, r11, 0x7c00 /* check if major OP code is 31 */
rlwinm r10, r10, 0, 21, 5
- cmpwi cr0, r10, 2028 /* Is dcbz? */
- beq+ 142f
- cmpwi cr0, r10, 940 /* Is dcbi? */
- beq+ 142f
- cmpwi cr0, r10, 108 /* Is dcbst? */
- beq+ 144f /* Fix up store bit! */
- cmpwi cr0, r10, 172 /* Is dcbf? */
- beq+ 142f
- cmpwi cr0, r10, 1964 /* Is icbi? */
- beq+ 142f
+ cmpwi cr1, r10, 2028 /* Is dcbz? */
+ beq+ cr1, 142f
+ cmpwi cr1, r10, 940 /* Is dcbi? */
+ beq+ cr1, 142f
+ cmpwi cr1, r10, 108 /* Is dcbst? */
+ beq+ cr1, 144f /* Fix up store bit! */
+ cmpwi cr1, r10, 172 /* Is dcbf? */
+ beq+ cr1, 142f
+ cmpwi cr1, r10, 1964 /* Is icbi? */
+ beq+ cr1, 142f
141: mfspr r10,SPRN_M_TW
b DARFixed /* Nope, go back to normal TLB processing */
@@ -639,27 +617,6 @@ FixupDAR:/* Entry point for dcbx workaround. */
rlwinm r10, r10,0,7,5 /* Clear store bit for buggy dcbst insn */
mtspr SPRN_DSISR, r10
142: /* continue, it was a dcbx, dcbi instruction. */
-#ifndef NO_SELF_MODIFYING_CODE
- andis. r10,r11,0x1f /* test if reg RA is r0 */
- li r10,modified_instr@l
- dcbtst r0,r10 /* touch for store */
- rlwinm r11,r11,0,0,20 /* Zero lower 10 bits */
- oris r11,r11,640 /* Transform instr. to a "add r10,RA,RB" */
- ori r11,r11,532
- stw r11,0(r10) /* store add/and instruction */
- dcbf 0,r10 /* flush new instr. to memory. */
- icbi 0,r10 /* invalidate instr. cache line */
- mfspr r11, SPRN_SPRG_SCRATCH1 /* restore r11 */
- mfspr r10, SPRN_SPRG_SCRATCH0 /* restore r10 */
- isync /* Wait until new instr is loaded from memory */
-modified_instr:
- .space 4 /* this is where the add instr. is stored */
- bne+ 143f
- subf r10,r0,r10 /* r10=r10-r0, only if reg RA is r0 */
-143: mtdar r10 /* store faulting EA in DAR */
- mfspr r10,SPRN_M_TW
- b DARFixed /* Go back to normal TLB handling */
-#else
mfctr r10
mtdar r10 /* save ctr reg in DAR */
rlwinm r10, r11, 24, 24, 28 /* offset into jump table for reg RB */
@@ -701,8 +658,9 @@ modified_instr:
add r10, r10, r30 ;b 151f
add r10, r10, r31
151:
- rlwinm. r11,r11,19,24,28 /* offset into jump table for reg RA */
- beq 152f /* if reg RA is zero, don't add it */
+ rlwinm r11,r11,19,24,28 /* offset into jump table for reg RA */
+ cmpwi cr1, r11, 0
+ beq cr1, 152f /* if reg RA is zero, don't add it */
addi r11, r11, 150b@l /* add start of table */
mtctr r11 /* load ctr with jump address */
rlwinm r11,r11,0,16,10 /* make sure we don't execute this more than once */
@@ -710,7 +668,14 @@ modified_instr:
152:
mfdar r11
mtctr r11 /* restore ctr reg from DAR */
+#ifdef CONFIG_VMAP_STACK
+ mfspr r11, SPRN_SPRG_THREAD
+ stw r10, DAR(r11)
+ mfspr r10, SPRN_DSISR
+ stw r10, DSISR(r11)
+#else
mtdar r10 /* save fault EA to DAR */
+#endif
mfspr r10,SPRN_M_TW
b DARFixed /* Go back to normal TLB handling */
@@ -723,7 +688,6 @@ modified_instr:
add r10, r10, r11 /* add it */
mfctr r11 /* restore r11 */
b 151b
-#endif
/*
* This is where the main kernel code starts.
@@ -741,6 +705,9 @@ start_here:
/* stack */
lis r1,init_thread_union@ha
addi r1,r1,init_thread_union@l
+ lis r0, STACK_END_MAGIC@h
+ ori r0, r0, STACK_END_MAGIC@l
+ stw r0, 0(r1)
li r0,0
stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1)
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index 2ae635df9026..37fc84ed90e3 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -467,6 +467,7 @@ label:
mfspr r5,SPRN_ESR; /* Grab the ESR and save it */ \
stw r5,_ESR(r11); \
mfspr r4,SPRN_DEAR; /* Grab the DEAR */ \
+ stw r4, _DEAR(r11); \
EXC_XFER_LITE(0x0300, handle_page_fault)
#define INSTRUCTION_STORAGE_EXCEPTION \
@@ -475,6 +476,7 @@ label:
mfspr r5,SPRN_ESR; /* Grab the ESR and save it */ \
stw r5,_ESR(r11); \
mr r4,r12; /* Pass SRR0 as arg2 */ \
+ stw r4, _DEAR(r11); \
li r5,0; /* Pass zero as arg3 */ \
EXC_XFER_LITE(0x0400, handle_page_fault)
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index adf0505dbe02..840af004041e 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -155,6 +155,8 @@ _ENTRY(_start);
*/
_ENTRY(__early_start)
+ LOAD_REG_ADDR_PIC(r20, kernstart_virt_addr)
+ lwz r20,0(r20)
#define ENTRY_MAPPING_BOOT_SETUP
#include "fsl_booke_entry_mapping.S"
@@ -238,6 +240,9 @@ set_ivor:
bl early_init
+#ifdef CONFIG_KASAN
+ bl kasan_early_init
+#endif
#ifdef CONFIG_RELOCATABLE
mr r3,r30
mr r4,r31
@@ -264,9 +269,6 @@ set_ivor:
/*
* Decide what sort of machine this is and initialize the MMU.
*/
-#ifdef CONFIG_KASAN
- bl kasan_early_init
-#endif
mr r3,r30
mr r4,r31
bl machine_init
@@ -277,8 +279,8 @@ set_ivor:
ori r6, r6, swapper_pg_dir@l
lis r5, abatron_pteptrs@h
ori r5, r5, abatron_pteptrs@l
- lis r4, KERNELBASE@h
- ori r4, r4, KERNELBASE@l
+ lis r3, kernstart_virt_addr@ha
+ lwz r4, kernstart_virt_addr@l(r3)
stw r5, 0(r4) /* Save abatron_pteptrs at a fixed location */
stw r6, 0(r5)
@@ -376,6 +378,7 @@ interrupt_base:
mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */
andis. r10,r5,(ESR_ILK|ESR_DLK)@h
bne 1f
+ stw r4, _DEAR(r11)
EXC_XFER_LITE(0x0300, handle_page_fault)
1:
addi r3,r1,STACK_FRAME_OVERHEAD
@@ -1067,7 +1070,12 @@ __secondary_start:
mr r5,r25 /* phys kernel start */
rlwinm r5,r5,0,~0x3ffffff /* aligned 64M */
subf r4,r5,r4 /* memstart_addr - phys kernel start */
- li r5,0 /* no device tree */
+ lis r7,KERNELBASE@h
+ ori r7,r7,KERNELBASE@l
+ cmpw r20,r7 /* if kernstart_virt_addr != KERNELBASE, randomized */
+ beq 2f
+ li r4,0
+2: li r5,0 /* no device tree */
li r6,0 /* not boot cpu */
bl restore_to_as0
@@ -1115,6 +1123,54 @@ __secondary_hold_acknowledge:
#endif
/*
+ * Create a 64M tlb by address and entry
+ * r3 - entry
+ * r4 - virtual address
+ * r5/r6 - physical address
+ */
+_GLOBAL(create_kaslr_tlb_entry)
+ lis r7,0x1000 /* Set MAS0(TLBSEL) = 1 */
+ rlwimi r7,r3,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r6) */
+ mtspr SPRN_MAS0,r7 /* Write MAS0 */
+
+ lis r3,(MAS1_VALID|MAS1_IPROT)@h
+ ori r3,r3,(MAS1_TSIZE(BOOK3E_PAGESZ_64M))@l
+ mtspr SPRN_MAS1,r3 /* Write MAS1 */
+
+ lis r3,MAS2_EPN_MASK(BOOK3E_PAGESZ_64M)@h
+ ori r3,r3,MAS2_EPN_MASK(BOOK3E_PAGESZ_64M)@l
+ and r3,r3,r4
+ ori r3,r3,MAS2_M_IF_NEEDED@l
+ mtspr SPRN_MAS2,r3 /* Write MAS2(EPN) */
+
+#ifdef CONFIG_PHYS_64BIT
+ ori r8,r6,(MAS3_SW|MAS3_SR|MAS3_SX)
+ mtspr SPRN_MAS3,r8 /* Write MAS3(RPN) */
+ mtspr SPRN_MAS7,r5
+#else
+ ori r8,r5,(MAS3_SW|MAS3_SR|MAS3_SX)
+ mtspr SPRN_MAS3,r8 /* Write MAS3(RPN) */
+#endif
+
+ tlbwe /* Write TLB */
+ isync
+ sync
+ blr
+
+/*
+ * Return to the start of the relocated kernel and run again
+ * r3 - virtual address of fdt
+ * r4 - entry of the kernel
+ */
+_GLOBAL(reloc_kernel_entry)
+ mfmsr r7
+ rlwinm r7, r7, 0, ~(MSR_IS | MSR_DS)
+
+ mtspr SPRN_SRR0,r4
+ mtspr SPRN_SRR1,r7
+ rfi
+
+/*
* Create a tlb entry with the same effective and physical address as
* the tlb entry used by the current running code. But set the TS to 1.
* Then switch to the address space 1. It will return with the r3 set to
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index c8d1fa2e9d53..2462cd7c565c 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -127,15 +127,61 @@ int arch_bp_generic_fields(int type, int *gen_bp_type)
}
/*
+ * Watchpoint match range is always doubleword(8 bytes) aligned on
+ * powerpc. If the given range is crossing doubleword boundary, we
+ * need to increase the length such that next doubleword also get
+ * covered. Ex,
+ *
+ * address len = 6 bytes
+ * |=========.
+ * |------------v--|------v--------|
+ * | | | | | | | | | | | | | | | | |
+ * |---------------|---------------|
+ * <---8 bytes--->
+ *
+ * In this case, we should configure hw as:
+ * start_addr = address & ~HW_BREAKPOINT_ALIGN
+ * len = 16 bytes
+ *
+ * @start_addr and @end_addr are inclusive.
+ */
+static int hw_breakpoint_validate_len(struct arch_hw_breakpoint *hw)
+{
+ u16 max_len = DABR_MAX_LEN;
+ u16 hw_len;
+ unsigned long start_addr, end_addr;
+
+ start_addr = hw->address & ~HW_BREAKPOINT_ALIGN;
+ end_addr = (hw->address + hw->len - 1) | HW_BREAKPOINT_ALIGN;
+ hw_len = end_addr - start_addr + 1;
+
+ if (dawr_enabled()) {
+ max_len = DAWR_MAX_LEN;
+ /* DAWR region can't cross 512 bytes boundary */
+ if ((start_addr >> 9) != (end_addr >> 9))
+ return -EINVAL;
+ } else if (IS_ENABLED(CONFIG_PPC_8xx)) {
+ /* 8xx can setup a range without limitation */
+ max_len = U16_MAX;
+ }
+
+ if (hw_len > max_len)
+ return -EINVAL;
+
+ hw->hw_len = hw_len;
+ return 0;
+}
+
+/*
* Validate the arch-specific HW Breakpoint register settings
*/
int hw_breakpoint_arch_parse(struct perf_event *bp,
const struct perf_event_attr *attr,
struct arch_hw_breakpoint *hw)
{
- int ret = -EINVAL, length_max;
+ int ret = -EINVAL;
- if (!bp)
+ if (!bp || !attr->bp_len)
return ret;
hw->type = HW_BRK_TYPE_TRANSLATE;
@@ -155,26 +201,10 @@ int hw_breakpoint_arch_parse(struct perf_event *bp,
hw->address = attr->bp_addr;
hw->len = attr->bp_len;
- /*
- * Since breakpoint length can be a maximum of HW_BREAKPOINT_LEN(8)
- * and breakpoint addresses are aligned to nearest double-word
- * HW_BREAKPOINT_ALIGN by rounding off to the lower address, the
- * 'symbolsize' should satisfy the check below.
- */
if (!ppc_breakpoint_available())
return -ENODEV;
- length_max = 8; /* DABR */
- if (dawr_enabled()) {
- length_max = 512 ; /* 64 doublewords */
- /* DAWR region can't cross 512 boundary */
- if ((attr->bp_addr >> 9) !=
- ((attr->bp_addr + attr->bp_len - 1) >> 9))
- return -EINVAL;
- }
- if (hw->len >
- (length_max - (hw->address & HW_BREAKPOINT_ALIGN)))
- return -EINVAL;
- return 0;
+
+ return hw_breakpoint_validate_len(hw);
}
/*
@@ -195,20 +225,80 @@ void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs)
tsk->thread.last_hit_ubp = NULL;
}
+static bool dar_within_range(unsigned long dar, struct arch_hw_breakpoint *info)
+{
+ return ((info->address <= dar) && (dar - info->address < info->len));
+}
+
+static bool
+dar_range_overlaps(unsigned long dar, int size, struct arch_hw_breakpoint *info)
+{
+ return ((dar <= info->address + info->len - 1) &&
+ (dar + size - 1 >= info->address));
+}
+
/*
* Handle debug exception notifications.
*/
+static bool stepping_handler(struct pt_regs *regs, struct perf_event *bp,
+ struct arch_hw_breakpoint *info)
+{
+ unsigned int instr = 0;
+ int ret, type, size;
+ struct instruction_op op;
+ unsigned long addr = info->address;
+
+ if (__get_user_inatomic(instr, (unsigned int *)regs->nip))
+ goto fail;
+
+ ret = analyse_instr(&op, regs, instr);
+ type = GETTYPE(op.type);
+ size = GETSIZE(op.type);
+
+ if (!ret && (type == LARX || type == STCX)) {
+ printk_ratelimited("Breakpoint hit on instruction that can't be emulated."
+ " Breakpoint at 0x%lx will be disabled.\n", addr);
+ goto disable;
+ }
+
+ /*
+ * If it's extraneous event, we still need to emulate/single-
+ * step the instruction, but we don't generate an event.
+ */
+ if (size && !dar_range_overlaps(regs->dar, size, info))
+ info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
+
+ /* Do not emulate user-space instructions, instead single-step them */
+ if (user_mode(regs)) {
+ current->thread.last_hit_ubp = bp;
+ regs->msr |= MSR_SE;
+ return false;
+ }
+
+ if (!emulate_step(regs, instr))
+ goto fail;
+
+ return true;
+
+fail:
+ /*
+ * We've failed in reliably handling the hw-breakpoint. Unregister
+ * it and throw a warning message to let the user know about it.
+ */
+ WARN(1, "Unable to handle hardware breakpoint. Breakpoint at "
+ "0x%lx will be disabled.", addr);
+
+disable:
+ perf_event_disable_inatomic(bp);
+ return false;
+}
+
int hw_breakpoint_handler(struct die_args *args)
{
int rc = NOTIFY_STOP;
struct perf_event *bp;
struct pt_regs *regs = args->regs;
-#ifndef CONFIG_PPC_8xx
- int stepped = 1;
- unsigned int instr;
-#endif
struct arch_hw_breakpoint *info;
- unsigned long dar = regs->dar;
/* Disable breakpoints during exception handling */
hw_breakpoint_disable();
@@ -240,43 +330,14 @@ int hw_breakpoint_handler(struct die_args *args)
goto out;
}
- /*
- * Verify if dar lies within the address range occupied by the symbol
- * being watched to filter extraneous exceptions. If it doesn't,
- * we still need to single-step the instruction, but we don't
- * generate an event.
- */
info->type &= ~HW_BRK_TYPE_EXTRANEOUS_IRQ;
- if (!((bp->attr.bp_addr <= dar) &&
- (dar - bp->attr.bp_addr < bp->attr.bp_len)))
+ if (!dar_within_range(regs->dar, info))
info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
-#ifndef CONFIG_PPC_8xx
- /* Do not emulate user-space instructions, instead single-step them */
- if (user_mode(regs)) {
- current->thread.last_hit_ubp = bp;
- regs->msr |= MSR_SE;
+ if (!IS_ENABLED(CONFIG_PPC_8xx) && !stepping_handler(regs, bp, info))
goto out;
- }
-
- stepped = 0;
- instr = 0;
- if (!__get_user_inatomic(instr, (unsigned int *) regs->nip))
- stepped = emulate_step(regs, instr);
/*
- * emulate_step() could not execute it. We've failed in reliably
- * handling the hw-breakpoint. Unregister it and throw a warning
- * message to let the user know about it.
- */
- if (!stepped) {
- WARN(1, "Unable to handle hardware breakpoint. Breakpoint at "
- "0x%lx will be disabled.", info->address);
- perf_event_disable_inatomic(bp);
- goto out;
- }
-#endif
- /*
* As a policy, the callback is invoked in a 'trigger-after-execute'
* fashion
*/
diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c
index a36fd053c3db..422e31d2f5a2 100644
--- a/arch/powerpc/kernel/idle.c
+++ b/arch/powerpc/kernel/idle.c
@@ -77,6 +77,31 @@ void arch_cpu_idle(void)
int powersave_nap;
+#ifdef CONFIG_PPC_970_NAP
+void power4_idle(void)
+{
+ if (!cpu_has_feature(CPU_FTR_CAN_NAP))
+ return;
+
+ if (!powersave_nap)
+ return;
+
+ if (!prep_irq_for_idle())
+ return;
+
+ if (cpu_has_feature(CPU_FTR_ALTIVEC))
+ asm volatile("DSSALL ; sync" ::: "memory");
+
+ power4_idle_nap();
+
+ /*
+ * power4_idle_nap returns with interrupts enabled (soft and hard).
+ * to our caller with interrupts enabled (soft and hard). Our caller
+ * can cope with either interrupts disabled or enabled upon return.
+ */
+}
+#endif
+
#ifdef CONFIG_SYSCTL
/*
* Register the sysctl to set/clear powersave_nap.
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index d32751994a62..22f249b6f58d 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -15,7 +15,9 @@
#include <asm/asm-offsets.h>
#include <asm/ppc-opcode.h>
#include <asm/cpuidle.h>
+#include <asm/thread_info.h> /* TLF_NAPPING */
+#ifdef CONFIG_PPC_P7_NAP
/*
* Desired PSSCR in r3
*
@@ -181,4 +183,22 @@ _GLOBAL(isa206_idle_insn_mayloss)
bne 2f
IDLE_STATE_ENTER_SEQ_NORET(PPC_SLEEP)
2: IDLE_STATE_ENTER_SEQ_NORET(PPC_WINKLE)
+#endif
+#ifdef CONFIG_PPC_970_NAP
+_GLOBAL(power4_idle_nap)
+ LOAD_REG_IMMEDIATE(r7, MSR_KERNEL|MSR_EE|MSR_POW)
+ ld r9,PACA_THREAD_INFO(r13)
+ ld r8,TI_LOCAL_FLAGS(r9)
+ ori r8,r8,_TLF_NAPPING
+ std r8,TI_LOCAL_FLAGS(r9)
+ /*
+ * NAPPING bit is set, from this point onward power4_fixup_nap
+ * will cause exceptions to return to power4_idle_nap_return.
+ */
+1: sync
+ isync
+ mtmsrd r7
+ isync
+ b 1b
+#endif
diff --git a/arch/powerpc/kernel/idle_power4.S b/arch/powerpc/kernel/idle_power4.S
deleted file mode 100644
index 33c625329078..000000000000
--- a/arch/powerpc/kernel/idle_power4.S
+++ /dev/null
@@ -1,83 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * This file contains the power_save function for 970-family CPUs.
- */
-
-#include <linux/threads.h>
-#include <asm/processor.h>
-#include <asm/page.h>
-#include <asm/cputable.h>
-#include <asm/thread_info.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/irqflags.h>
-#include <asm/hw_irq.h>
-#include <asm/feature-fixups.h>
-
-#undef DEBUG
-
- .text
-
-_GLOBAL(power4_idle)
-BEGIN_FTR_SECTION
- blr
-END_FTR_SECTION_IFCLR(CPU_FTR_CAN_NAP)
- /* Now check if user or arch enabled NAP mode */
- LOAD_REG_ADDRBASE(r3,powersave_nap)
- lwz r4,ADDROFF(powersave_nap)(r3)
- cmpwi 0,r4,0
- beqlr
-
- /* This sequence is similar to prep_irq_for_idle() */
-
- /* Hard disable interrupts */
- mfmsr r7
- rldicl r0,r7,48,1
- rotldi r0,r0,16
- mtmsrd r0,1
-
- /* Check if something happened while soft-disabled */
- lbz r0,PACAIRQHAPPENED(r13)
- cmpwi cr0,r0,0
- bne- 2f
-
- /*
- * Soft-enable interrupts. This will make power4_fixup_nap return
- * to our caller with interrupts enabled (soft and hard). The caller
- * can cope with either interrupts disabled or enabled upon return.
- */
-#ifdef CONFIG_TRACE_IRQFLAGS
- /* Tell the tracer interrupts are on, because idle responds to them. */
- mflr r0
- std r0,16(r1)
- stdu r1,-128(r1)
- bl trace_hardirqs_on
- addi r1,r1,128
- ld r0,16(r1)
- mtlr r0
- mfmsr r7
-#endif /* CONFIG_TRACE_IRQFLAGS */
-
- li r0,IRQS_ENABLED
- stb r0,PACAIRQSOFTMASK(r13) /* we'll hard-enable shortly */
-BEGIN_FTR_SECTION
- DSSALL
- sync
-END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
- ld r9, PACA_THREAD_INFO(r13)
- ld r8,TI_LOCAL_FLAGS(r9) /* set napping bit */
- ori r8,r8,_TLF_NAPPING /* so when we take an exception */
- std r8,TI_LOCAL_FLAGS(r9) /* it will return to our caller */
- ori r7,r7,MSR_EE
- oris r7,r7,MSR_POW@h
-1: sync
- isync
- mtmsrd r7
- isync
- b 1b
-
-2: /* Return if an interrupt had happened while soft disabled */
- /* Set the HARD_DIS flag because interrupts are now hard disabled */
- ori r0,r0,PACA_IRQ_HARD_DIS
- stb r0,PACAIRQHAPPENED(r13)
- blr
diff --git a/arch/powerpc/kernel/ima_arch.c b/arch/powerpc/kernel/ima_arch.c
new file mode 100644
index 000000000000..e34116255ced
--- /dev/null
+++ b/arch/powerpc/kernel/ima_arch.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 IBM Corporation
+ * Author: Nayna Jain
+ */
+
+#include <linux/ima.h>
+#include <asm/secure_boot.h>
+
+bool arch_ima_get_secureboot(void)
+{
+ return is_ppc_secureboot_enabled();
+}
+
+/*
+ * The "secure_rules" are enabled only on "secureboot" enabled systems.
+ * These rules verify the file signatures against known good values.
+ * The "appraise_type=imasig|modsig" option allows the known good signature
+ * to be stored as an xattr or as an appended signature.
+ *
+ * To avoid duplicate signature verification as much as possible, the IMA
+ * policy rule for module appraisal is added only if CONFIG_MODULE_SIG_FORCE
+ * is not enabled.
+ */
+static const char *const secure_rules[] = {
+ "appraise func=KEXEC_KERNEL_CHECK appraise_flag=check_blacklist appraise_type=imasig|modsig",
+#ifndef CONFIG_MODULE_SIG_FORCE
+ "appraise func=MODULE_CHECK appraise_flag=check_blacklist appraise_type=imasig|modsig",
+#endif
+ NULL
+};
+
+/*
+ * The "trusted_rules" are enabled only on "trustedboot" enabled systems.
+ * These rules add the kexec kernel image and kernel modules file hashes to
+ * the IMA measurement list.
+ */
+static const char *const trusted_rules[] = {
+ "measure func=KEXEC_KERNEL_CHECK",
+ "measure func=MODULE_CHECK",
+ NULL
+};
+
+/*
+ * The "secure_and_trusted_rules" contains rules for both the secure boot and
+ * trusted boot. The "template=ima-modsig" option includes the appended
+ * signature, when available, in the IMA measurement list.
+ */
+static const char *const secure_and_trusted_rules[] = {
+ "measure func=KEXEC_KERNEL_CHECK template=ima-modsig",
+ "measure func=MODULE_CHECK template=ima-modsig",
+ "appraise func=KEXEC_KERNEL_CHECK appraise_flag=check_blacklist appraise_type=imasig|modsig",
+#ifndef CONFIG_MODULE_SIG_FORCE
+ "appraise func=MODULE_CHECK appraise_flag=check_blacklist appraise_type=imasig|modsig",
+#endif
+ NULL
+};
+
+/*
+ * Returns the relevant IMA arch-specific policies based on the system secure
+ * boot state.
+ */
+const char *const *arch_get_ima_policy(void)
+{
+ if (is_ppc_secureboot_enabled()) {
+ if (IS_ENABLED(CONFIG_MODULE_SIG))
+ set_module_sig_enforced();
+
+ if (is_ppc_trustedboot_enabled())
+ return secure_and_trusted_rules;
+ else
+ return secure_rules;
+ } else if (is_ppc_trustedboot_enabled()) {
+ return trusted_rules;
+ }
+
+ return NULL;
+}
diff --git a/arch/powerpc/kernel/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c
index fbd2d0007c52..0276bc8c8969 100644
--- a/arch/powerpc/kernel/io-workarounds.c
+++ b/arch/powerpc/kernel/io-workarounds.c
@@ -149,8 +149,8 @@ static const struct ppc_pci_io iowa_pci_io = {
};
#ifdef CONFIG_PPC_INDIRECT_MMIO
-static void __iomem *iowa_ioremap(phys_addr_t addr, unsigned long size,
- pgprot_t prot, void *caller)
+void __iomem *iowa_ioremap(phys_addr_t addr, unsigned long size,
+ pgprot_t prot, void *caller)
{
struct iowa_bus *bus;
void __iomem *res = __ioremap_caller(addr, size, prot, caller);
@@ -163,20 +163,17 @@ static void __iomem *iowa_ioremap(phys_addr_t addr, unsigned long size,
}
return res;
}
-#else /* CONFIG_PPC_INDIRECT_MMIO */
-#define iowa_ioremap NULL
#endif /* !CONFIG_PPC_INDIRECT_MMIO */
+bool io_workaround_inited;
+
/* Enable IO workaround */
static void io_workaround_init(void)
{
- static int io_workaround_inited;
-
if (io_workaround_inited)
return;
ppc_pci_io = iowa_pci_io;
- ppc_md.ioremap = iowa_ioremap;
- io_workaround_inited = 1;
+ io_workaround_inited = true;
}
/* Register new bus to support workaround */
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 0a67ce9f827e..9704f3f76e63 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -633,11 +633,54 @@ static void iommu_table_clear(struct iommu_table *tbl)
#endif
}
+static void iommu_table_reserve_pages(struct iommu_table *tbl,
+ unsigned long res_start, unsigned long res_end)
+{
+ int i;
+
+ WARN_ON_ONCE(res_end < res_start);
+ /*
+ * Reserve page 0 so it will not be used for any mappings.
+ * This avoids buggy drivers that consider page 0 to be invalid
+ * to crash the machine or even lose data.
+ */
+ if (tbl->it_offset == 0)
+ set_bit(0, tbl->it_map);
+
+ tbl->it_reserved_start = res_start;
+ tbl->it_reserved_end = res_end;
+
+ /* Check if res_start..res_end isn't empty and overlaps the table */
+ if (res_start && res_end &&
+ (tbl->it_offset + tbl->it_size < res_start ||
+ res_end < tbl->it_offset))
+ return;
+
+ for (i = tbl->it_reserved_start; i < tbl->it_reserved_end; ++i)
+ set_bit(i - tbl->it_offset, tbl->it_map);
+}
+
+static void iommu_table_release_pages(struct iommu_table *tbl)
+{
+ int i;
+
+ /*
+ * In case we have reserved the first bit, we should not emit
+ * the warning below.
+ */
+ if (tbl->it_offset == 0)
+ clear_bit(0, tbl->it_map);
+
+ for (i = tbl->it_reserved_start; i < tbl->it_reserved_end; ++i)
+ clear_bit(i - tbl->it_offset, tbl->it_map);
+}
+
/*
* Build a iommu_table structure. This contains a bit map which
* is used to manage allocation of the tce space.
*/
-struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)
+struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid,
+ unsigned long res_start, unsigned long res_end)
{
unsigned long sz;
static int welcomed = 0;
@@ -656,13 +699,7 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)
tbl->it_map = page_address(page);
memset(tbl->it_map, 0, sz);
- /*
- * Reserve page 0 so it will not be used for any mappings.
- * This avoids buggy drivers that consider page 0 to be invalid
- * to crash the machine or even lose data.
- */
- if (tbl->it_offset == 0)
- set_bit(0, tbl->it_map);
+ iommu_table_reserve_pages(tbl, res_start, res_end);
/* We only split the IOMMU table if we have 1GB or more of space */
if ((tbl->it_size << tbl->it_page_shift) >= (1UL * 1024 * 1024 * 1024))
@@ -714,12 +751,7 @@ static void iommu_table_free(struct kref *kref)
return;
}
- /*
- * In case we have reserved the first bit, we should not emit
- * the warning below.
- */
- if (tbl->it_offset == 0)
- clear_bit(0, tbl->it_map);
+ iommu_table_release_pages(tbl);
/* verify that table contains no entries */
if (!bitmap_empty(tbl->it_map, tbl->it_size))
@@ -981,29 +1013,32 @@ int iommu_tce_check_gpa(unsigned long page_shift, unsigned long gpa)
}
EXPORT_SYMBOL_GPL(iommu_tce_check_gpa);
-long iommu_tce_xchg(struct mm_struct *mm, struct iommu_table *tbl,
+extern long iommu_tce_xchg_no_kill(struct mm_struct *mm,
+ struct iommu_table *tbl,
unsigned long entry, unsigned long *hpa,
enum dma_data_direction *direction)
{
long ret;
unsigned long size = 0;
- ret = tbl->it_ops->exchange(tbl, entry, hpa, direction);
-
+ ret = tbl->it_ops->xchg_no_kill(tbl, entry, hpa, direction, false);
if (!ret && ((*direction == DMA_FROM_DEVICE) ||
(*direction == DMA_BIDIRECTIONAL)) &&
!mm_iommu_is_devmem(mm, *hpa, tbl->it_page_shift,
&size))
SetPageDirty(pfn_to_page(*hpa >> PAGE_SHIFT));
- /* if (unlikely(ret))
- pr_err("iommu_tce: %s failed on hwaddr=%lx ioba=%lx kva=%lx ret=%d\n",
- __func__, hwaddr, entry << tbl->it_page_shift,
- hwaddr, ret); */
-
return ret;
}
-EXPORT_SYMBOL_GPL(iommu_tce_xchg);
+EXPORT_SYMBOL_GPL(iommu_tce_xchg_no_kill);
+
+void iommu_tce_kill(struct iommu_table *tbl,
+ unsigned long entry, unsigned long pages)
+{
+ if (tbl->it_ops->tce_kill)
+ tbl->it_ops->tce_kill(tbl, entry, pages, false);
+}
+EXPORT_SYMBOL_GPL(iommu_tce_kill);
int iommu_take_ownership(struct iommu_table *tbl)
{
@@ -1017,22 +1052,21 @@ int iommu_take_ownership(struct iommu_table *tbl)
* requires exchange() callback defined so if it is not
* implemented, we disallow taking ownership over the table.
*/
- if (!tbl->it_ops->exchange)
+ if (!tbl->it_ops->xchg_no_kill)
return -EINVAL;
spin_lock_irqsave(&tbl->large_pool.lock, flags);
for (i = 0; i < tbl->nr_pools; i++)
spin_lock(&tbl->pools[i].lock);
- if (tbl->it_offset == 0)
- clear_bit(0, tbl->it_map);
+ iommu_table_release_pages(tbl);
if (!bitmap_empty(tbl->it_map, tbl->it_size)) {
pr_err("iommu_tce: it_map is not empty");
ret = -EBUSY;
- /* Restore bit#0 set by iommu_init_table() */
- if (tbl->it_offset == 0)
- set_bit(0, tbl->it_map);
+ /* Undo iommu_table_release_pages, i.e. restore bit#0, etc */
+ iommu_table_reserve_pages(tbl, tbl->it_reserved_start,
+ tbl->it_reserved_end);
} else {
memset(tbl->it_map, 0xff, sz);
}
@@ -1055,9 +1089,8 @@ void iommu_release_ownership(struct iommu_table *tbl)
memset(tbl->it_map, 0, sz);
- /* Restore bit#0 set by iommu_init_table() */
- if (tbl->it_offset == 0)
- set_bit(0, tbl->it_map);
+ iommu_table_reserve_pages(tbl, tbl->it_reserved_start,
+ tbl->it_reserved_end);
for (i = 0; i < tbl->nr_pools; i++)
spin_unlock(&tbl->pools[i].lock);
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 5645bc9cbc09..5c9b11878555 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -50,6 +50,7 @@
#include <linux/debugfs.h>
#include <linux/of.h>
#include <linux/of_irq.h>
+#include <linux/vmalloc.h>
#include <linux/uaccess.h>
#include <asm/io.h>
@@ -619,8 +620,6 @@ void __do_irq(struct pt_regs *regs)
trace_irq_entry(regs);
- check_stack_overflow();
-
/*
* Query the platform PIC for the interrupt & ack it.
*
@@ -652,6 +651,8 @@ void do_IRQ(struct pt_regs *regs)
irqsp = hardirq_ctx[raw_smp_processor_id()];
sirqsp = softirq_ctx[raw_smp_processor_id()];
+ check_stack_overflow();
+
/* Already there ? */
if (unlikely(cursp == irqsp || cursp == sirqsp)) {
__do_irq(regs);
@@ -664,8 +665,29 @@ void do_IRQ(struct pt_regs *regs)
set_irq_regs(old_regs);
}
+static void *__init alloc_vm_stack(void)
+{
+ return __vmalloc_node_range(THREAD_SIZE, THREAD_ALIGN, VMALLOC_START,
+ VMALLOC_END, THREADINFO_GFP, PAGE_KERNEL,
+ 0, NUMA_NO_NODE, (void*)_RET_IP_);
+}
+
+static void __init vmap_irqstack_init(void)
+{
+ int i;
+
+ for_each_possible_cpu(i) {
+ softirq_ctx[i] = alloc_vm_stack();
+ hardirq_ctx[i] = alloc_vm_stack();
+ }
+}
+
+
void __init init_IRQ(void)
{
+ if (IS_ENABLED(CONFIG_VMAP_STACK))
+ vmap_irqstack_init();
+
if (ppc_md.init_IRQ)
ppc_md.init_IRQ();
}
diff --git a/arch/powerpc/kernel/kexec_elf_64.c b/arch/powerpc/kernel/kexec_elf_64.c
deleted file mode 100644
index 83cf7b852876..000000000000
--- a/arch/powerpc/kernel/kexec_elf_64.c
+++ /dev/null
@@ -1,660 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Load ELF vmlinux file for the kexec_file_load syscall.
- *
- * Copyright (C) 2004 Adam Litke (agl@us.ibm.com)
- * Copyright (C) 2004 IBM Corp.
- * Copyright (C) 2005 R Sharada (sharada@in.ibm.com)
- * Copyright (C) 2006 Mohan Kumar M (mohan@in.ibm.com)
- * Copyright (C) 2016 IBM Corporation
- *
- * Based on kexec-tools' kexec-elf-exec.c and kexec-elf-ppc64.c.
- * Heavily modified for the kernel by
- * Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com>.
- */
-
-#define pr_fmt(fmt) "kexec_elf: " fmt
-
-#include <linux/elf.h>
-#include <linux/kexec.h>
-#include <linux/libfdt.h>
-#include <linux/module.h>
-#include <linux/of_fdt.h>
-#include <linux/slab.h>
-#include <linux/types.h>
-
-#define PURGATORY_STACK_SIZE (16 * 1024)
-
-#define elf_addr_to_cpu elf64_to_cpu
-
-#ifndef Elf_Rel
-#define Elf_Rel Elf64_Rel
-#endif /* Elf_Rel */
-
-struct elf_info {
- /*
- * Where the ELF binary contents are kept.
- * Memory managed by the user of the struct.
- */
- const char *buffer;
-
- const struct elfhdr *ehdr;
- const struct elf_phdr *proghdrs;
- struct elf_shdr *sechdrs;
-};
-
-static inline bool elf_is_elf_file(const struct elfhdr *ehdr)
-{
- return memcmp(ehdr->e_ident, ELFMAG, SELFMAG) == 0;
-}
-
-static uint64_t elf64_to_cpu(const struct elfhdr *ehdr, uint64_t value)
-{
- if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB)
- value = le64_to_cpu(value);
- else if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB)
- value = be64_to_cpu(value);
-
- return value;
-}
-
-static uint16_t elf16_to_cpu(const struct elfhdr *ehdr, uint16_t value)
-{
- if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB)
- value = le16_to_cpu(value);
- else if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB)
- value = be16_to_cpu(value);
-
- return value;
-}
-
-static uint32_t elf32_to_cpu(const struct elfhdr *ehdr, uint32_t value)
-{
- if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB)
- value = le32_to_cpu(value);
- else if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB)
- value = be32_to_cpu(value);
-
- return value;
-}
-
-/**
- * elf_is_ehdr_sane - check that it is safe to use the ELF header
- * @buf_len: size of the buffer in which the ELF file is loaded.
- */
-static bool elf_is_ehdr_sane(const struct elfhdr *ehdr, size_t buf_len)
-{
- if (ehdr->e_phnum > 0 && ehdr->e_phentsize != sizeof(struct elf_phdr)) {
- pr_debug("Bad program header size.\n");
- return false;
- } else if (ehdr->e_shnum > 0 &&
- ehdr->e_shentsize != sizeof(struct elf_shdr)) {
- pr_debug("Bad section header size.\n");
- return false;
- } else if (ehdr->e_ident[EI_VERSION] != EV_CURRENT ||
- ehdr->e_version != EV_CURRENT) {
- pr_debug("Unknown ELF version.\n");
- return false;
- }
-
- if (ehdr->e_phoff > 0 && ehdr->e_phnum > 0) {
- size_t phdr_size;
-
- /*
- * e_phnum is at most 65535 so calculating the size of the
- * program header cannot overflow.
- */
- phdr_size = sizeof(struct elf_phdr) * ehdr->e_phnum;
-
- /* Sanity check the program header table location. */
- if (ehdr->e_phoff + phdr_size < ehdr->e_phoff) {
- pr_debug("Program headers at invalid location.\n");
- return false;
- } else if (ehdr->e_phoff + phdr_size > buf_len) {
- pr_debug("Program headers truncated.\n");
- return false;
- }
- }
-
- if (ehdr->e_shoff > 0 && ehdr->e_shnum > 0) {
- size_t shdr_size;
-
- /*
- * e_shnum is at most 65536 so calculating
- * the size of the section header cannot overflow.
- */
- shdr_size = sizeof(struct elf_shdr) * ehdr->e_shnum;
-
- /* Sanity check the section header table location. */
- if (ehdr->e_shoff + shdr_size < ehdr->e_shoff) {
- pr_debug("Section headers at invalid location.\n");
- return false;
- } else if (ehdr->e_shoff + shdr_size > buf_len) {
- pr_debug("Section headers truncated.\n");
- return false;
- }
- }
-
- return true;
-}
-
-static int elf_read_ehdr(const char *buf, size_t len, struct elfhdr *ehdr)
-{
- struct elfhdr *buf_ehdr;
-
- if (len < sizeof(*buf_ehdr)) {
- pr_debug("Buffer is too small to hold ELF header.\n");
- return -ENOEXEC;
- }
-
- memset(ehdr, 0, sizeof(*ehdr));
- memcpy(ehdr->e_ident, buf, sizeof(ehdr->e_ident));
- if (!elf_is_elf_file(ehdr)) {
- pr_debug("No ELF header magic.\n");
- return -ENOEXEC;
- }
-
- if (ehdr->e_ident[EI_CLASS] != ELF_CLASS) {
- pr_debug("Not a supported ELF class.\n");
- return -ENOEXEC;
- } else if (ehdr->e_ident[EI_DATA] != ELFDATA2LSB &&
- ehdr->e_ident[EI_DATA] != ELFDATA2MSB) {
- pr_debug("Not a supported ELF data format.\n");
- return -ENOEXEC;
- }
-
- buf_ehdr = (struct elfhdr *) buf;
- if (elf16_to_cpu(ehdr, buf_ehdr->e_ehsize) != sizeof(*buf_ehdr)) {
- pr_debug("Bad ELF header size.\n");
- return -ENOEXEC;
- }
-
- ehdr->e_type = elf16_to_cpu(ehdr, buf_ehdr->e_type);
- ehdr->e_machine = elf16_to_cpu(ehdr, buf_ehdr->e_machine);
- ehdr->e_version = elf32_to_cpu(ehdr, buf_ehdr->e_version);
- ehdr->e_entry = elf_addr_to_cpu(ehdr, buf_ehdr->e_entry);
- ehdr->e_phoff = elf_addr_to_cpu(ehdr, buf_ehdr->e_phoff);
- ehdr->e_shoff = elf_addr_to_cpu(ehdr, buf_ehdr->e_shoff);
- ehdr->e_flags = elf32_to_cpu(ehdr, buf_ehdr->e_flags);
- ehdr->e_phentsize = elf16_to_cpu(ehdr, buf_ehdr->e_phentsize);
- ehdr->e_phnum = elf16_to_cpu(ehdr, buf_ehdr->e_phnum);
- ehdr->e_shentsize = elf16_to_cpu(ehdr, buf_ehdr->e_shentsize);
- ehdr->e_shnum = elf16_to_cpu(ehdr, buf_ehdr->e_shnum);
- ehdr->e_shstrndx = elf16_to_cpu(ehdr, buf_ehdr->e_shstrndx);
-
- return elf_is_ehdr_sane(ehdr, len) ? 0 : -ENOEXEC;
-}
-
-/**
- * elf_is_phdr_sane - check that it is safe to use the program header
- * @buf_len: size of the buffer in which the ELF file is loaded.
- */
-static bool elf_is_phdr_sane(const struct elf_phdr *phdr, size_t buf_len)
-{
-
- if (phdr->p_offset + phdr->p_filesz < phdr->p_offset) {
- pr_debug("ELF segment location wraps around.\n");
- return false;
- } else if (phdr->p_offset + phdr->p_filesz > buf_len) {
- pr_debug("ELF segment not in file.\n");
- return false;
- } else if (phdr->p_paddr + phdr->p_memsz < phdr->p_paddr) {
- pr_debug("ELF segment address wraps around.\n");
- return false;
- }
-
- return true;
-}
-
-static int elf_read_phdr(const char *buf, size_t len, struct elf_info *elf_info,
- int idx)
-{
- /* Override the const in proghdrs, we are the ones doing the loading. */
- struct elf_phdr *phdr = (struct elf_phdr *) &elf_info->proghdrs[idx];
- const char *pbuf;
- struct elf_phdr *buf_phdr;
-
- pbuf = buf + elf_info->ehdr->e_phoff + (idx * sizeof(*buf_phdr));
- buf_phdr = (struct elf_phdr *) pbuf;
-
- phdr->p_type = elf32_to_cpu(elf_info->ehdr, buf_phdr->p_type);
- phdr->p_offset = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_offset);
- phdr->p_paddr = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_paddr);
- phdr->p_vaddr = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_vaddr);
- phdr->p_flags = elf32_to_cpu(elf_info->ehdr, buf_phdr->p_flags);
-
- /*
- * The following fields have a type equivalent to Elf_Addr
- * both in 32 bit and 64 bit ELF.
- */
- phdr->p_filesz = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_filesz);
- phdr->p_memsz = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_memsz);
- phdr->p_align = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_align);
-
- return elf_is_phdr_sane(phdr, len) ? 0 : -ENOEXEC;
-}
-
-/**
- * elf_read_phdrs - read the program headers from the buffer
- *
- * This function assumes that the program header table was checked for sanity.
- * Use elf_is_ehdr_sane() if it wasn't.
- */
-static int elf_read_phdrs(const char *buf, size_t len,
- struct elf_info *elf_info)
-{
- size_t phdr_size, i;
- const struct elfhdr *ehdr = elf_info->ehdr;
-
- /*
- * e_phnum is at most 65535 so calculating the size of the
- * program header cannot overflow.
- */
- phdr_size = sizeof(struct elf_phdr) * ehdr->e_phnum;
-
- elf_info->proghdrs = kzalloc(phdr_size, GFP_KERNEL);
- if (!elf_info->proghdrs)
- return -ENOMEM;
-
- for (i = 0; i < ehdr->e_phnum; i++) {
- int ret;
-
- ret = elf_read_phdr(buf, len, elf_info, i);
- if (ret) {
- kfree(elf_info->proghdrs);
- elf_info->proghdrs = NULL;
- return ret;
- }
- }
-
- return 0;
-}
-
-/**
- * elf_is_shdr_sane - check that it is safe to use the section header
- * @buf_len: size of the buffer in which the ELF file is loaded.
- */
-static bool elf_is_shdr_sane(const struct elf_shdr *shdr, size_t buf_len)
-{
- bool size_ok;
-
- /* SHT_NULL headers have undefined values, so we can't check them. */
- if (shdr->sh_type == SHT_NULL)
- return true;
-
- /* Now verify sh_entsize */
- switch (shdr->sh_type) {
- case SHT_SYMTAB:
- size_ok = shdr->sh_entsize == sizeof(Elf_Sym);
- break;
- case SHT_RELA:
- size_ok = shdr->sh_entsize == sizeof(Elf_Rela);
- break;
- case SHT_DYNAMIC:
- size_ok = shdr->sh_entsize == sizeof(Elf_Dyn);
- break;
- case SHT_REL:
- size_ok = shdr->sh_entsize == sizeof(Elf_Rel);
- break;
- case SHT_NOTE:
- case SHT_PROGBITS:
- case SHT_HASH:
- case SHT_NOBITS:
- default:
- /*
- * This is a section whose entsize requirements
- * I don't care about. If I don't know about
- * the section I can't care about it's entsize
- * requirements.
- */
- size_ok = true;
- break;
- }
-
- if (!size_ok) {
- pr_debug("ELF section with wrong entry size.\n");
- return false;
- } else if (shdr->sh_addr + shdr->sh_size < shdr->sh_addr) {
- pr_debug("ELF section address wraps around.\n");
- return false;
- }
-
- if (shdr->sh_type != SHT_NOBITS) {
- if (shdr->sh_offset + shdr->sh_size < shdr->sh_offset) {
- pr_debug("ELF section location wraps around.\n");
- return false;
- } else if (shdr->sh_offset + shdr->sh_size > buf_len) {
- pr_debug("ELF section not in file.\n");
- return false;
- }
- }
-
- return true;
-}
-
-static int elf_read_shdr(const char *buf, size_t len, struct elf_info *elf_info,
- int idx)
-{
- struct elf_shdr *shdr = &elf_info->sechdrs[idx];
- const struct elfhdr *ehdr = elf_info->ehdr;
- const char *sbuf;
- struct elf_shdr *buf_shdr;
-
- sbuf = buf + ehdr->e_shoff + idx * sizeof(*buf_shdr);
- buf_shdr = (struct elf_shdr *) sbuf;
-
- shdr->sh_name = elf32_to_cpu(ehdr, buf_shdr->sh_name);
- shdr->sh_type = elf32_to_cpu(ehdr, buf_shdr->sh_type);
- shdr->sh_addr = elf_addr_to_cpu(ehdr, buf_shdr->sh_addr);
- shdr->sh_offset = elf_addr_to_cpu(ehdr, buf_shdr->sh_offset);
- shdr->sh_link = elf32_to_cpu(ehdr, buf_shdr->sh_link);
- shdr->sh_info = elf32_to_cpu(ehdr, buf_shdr->sh_info);
-
- /*
- * The following fields have a type equivalent to Elf_Addr
- * both in 32 bit and 64 bit ELF.
- */
- shdr->sh_flags = elf_addr_to_cpu(ehdr, buf_shdr->sh_flags);
- shdr->sh_size = elf_addr_to_cpu(ehdr, buf_shdr->sh_size);
- shdr->sh_addralign = elf_addr_to_cpu(ehdr, buf_shdr->sh_addralign);
- shdr->sh_entsize = elf_addr_to_cpu(ehdr, buf_shdr->sh_entsize);
-
- return elf_is_shdr_sane(shdr, len) ? 0 : -ENOEXEC;
-}
-
-/**
- * elf_read_shdrs - read the section headers from the buffer
- *
- * This function assumes that the section header table was checked for sanity.
- * Use elf_is_ehdr_sane() if it wasn't.
- */
-static int elf_read_shdrs(const char *buf, size_t len,
- struct elf_info *elf_info)
-{
- size_t shdr_size, i;
-
- /*
- * e_shnum is at most 65536 so calculating
- * the size of the section header cannot overflow.
- */
- shdr_size = sizeof(struct elf_shdr) * elf_info->ehdr->e_shnum;
-
- elf_info->sechdrs = kzalloc(shdr_size, GFP_KERNEL);
- if (!elf_info->sechdrs)
- return -ENOMEM;
-
- for (i = 0; i < elf_info->ehdr->e_shnum; i++) {
- int ret;
-
- ret = elf_read_shdr(buf, len, elf_info, i);
- if (ret) {
- kfree(elf_info->sechdrs);
- elf_info->sechdrs = NULL;
- return ret;
- }
- }
-
- return 0;
-}
-
-/**
- * elf_read_from_buffer - read ELF file and sets up ELF header and ELF info
- * @buf: Buffer to read ELF file from.
- * @len: Size of @buf.
- * @ehdr: Pointer to existing struct which will be populated.
- * @elf_info: Pointer to existing struct which will be populated.
- *
- * This function allows reading ELF files with different byte order than
- * the kernel, byte-swapping the fields as needed.
- *
- * Return:
- * On success returns 0, and the caller should call elf_free_info(elf_info) to
- * free the memory allocated for the section and program headers.
- */
-int elf_read_from_buffer(const char *buf, size_t len, struct elfhdr *ehdr,
- struct elf_info *elf_info)
-{
- int ret;
-
- ret = elf_read_ehdr(buf, len, ehdr);
- if (ret)
- return ret;
-
- elf_info->buffer = buf;
- elf_info->ehdr = ehdr;
- if (ehdr->e_phoff > 0 && ehdr->e_phnum > 0) {
- ret = elf_read_phdrs(buf, len, elf_info);
- if (ret)
- return ret;
- }
- if (ehdr->e_shoff > 0 && ehdr->e_shnum > 0) {
- ret = elf_read_shdrs(buf, len, elf_info);
- if (ret) {
- kfree(elf_info->proghdrs);
- return ret;
- }
- }
-
- return 0;
-}
-
-/**
- * elf_free_info - free memory allocated by elf_read_from_buffer
- */
-void elf_free_info(struct elf_info *elf_info)
-{
- kfree(elf_info->proghdrs);
- kfree(elf_info->sechdrs);
- memset(elf_info, 0, sizeof(*elf_info));
-}
-/**
- * build_elf_exec_info - read ELF executable and check that we can use it
- */
-static int build_elf_exec_info(const char *buf, size_t len, struct elfhdr *ehdr,
- struct elf_info *elf_info)
-{
- int i;
- int ret;
-
- ret = elf_read_from_buffer(buf, len, ehdr, elf_info);
- if (ret)
- return ret;
-
- /* Big endian vmlinux has type ET_DYN. */
- if (ehdr->e_type != ET_EXEC && ehdr->e_type != ET_DYN) {
- pr_err("Not an ELF executable.\n");
- goto error;
- } else if (!elf_info->proghdrs) {
- pr_err("No ELF program header.\n");
- goto error;
- }
-
- for (i = 0; i < ehdr->e_phnum; i++) {
- /*
- * Kexec does not support loading interpreters.
- * In addition this check keeps us from attempting
- * to kexec ordinay executables.
- */
- if (elf_info->proghdrs[i].p_type == PT_INTERP) {
- pr_err("Requires an ELF interpreter.\n");
- goto error;
- }
- }
-
- return 0;
-error:
- elf_free_info(elf_info);
- return -ENOEXEC;
-}
-
-static int elf64_probe(const char *buf, unsigned long len)
-{
- struct elfhdr ehdr;
- struct elf_info elf_info;
- int ret;
-
- ret = build_elf_exec_info(buf, len, &ehdr, &elf_info);
- if (ret)
- return ret;
-
- elf_free_info(&elf_info);
-
- return elf_check_arch(&ehdr) ? 0 : -ENOEXEC;
-}
-
-/**
- * elf_exec_load - load ELF executable image
- * @lowest_load_addr: On return, will be the address where the first PT_LOAD
- * section will be loaded in memory.
- *
- * Return:
- * 0 on success, negative value on failure.
- */
-static int elf_exec_load(struct kimage *image, struct elfhdr *ehdr,
- struct elf_info *elf_info,
- unsigned long *lowest_load_addr)
-{
- unsigned long base = 0, lowest_addr = UINT_MAX;
- int ret;
- size_t i;
- struct kexec_buf kbuf = { .image = image, .buf_max = ppc64_rma_size,
- .top_down = false };
-
- /* Read in the PT_LOAD segments. */
- for (i = 0; i < ehdr->e_phnum; i++) {
- unsigned long load_addr;
- size_t size;
- const struct elf_phdr *phdr;
-
- phdr = &elf_info->proghdrs[i];
- if (phdr->p_type != PT_LOAD)
- continue;
-
- size = phdr->p_filesz;
- if (size > phdr->p_memsz)
- size = phdr->p_memsz;
-
- kbuf.buffer = (void *) elf_info->buffer + phdr->p_offset;
- kbuf.bufsz = size;
- kbuf.memsz = phdr->p_memsz;
- kbuf.buf_align = phdr->p_align;
- kbuf.buf_min = phdr->p_paddr + base;
- kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
- ret = kexec_add_buffer(&kbuf);
- if (ret)
- goto out;
- load_addr = kbuf.mem;
-
- if (load_addr < lowest_addr)
- lowest_addr = load_addr;
- }
-
- /* Update entry point to reflect new load address. */
- ehdr->e_entry += base;
-
- *lowest_load_addr = lowest_addr;
- ret = 0;
- out:
- return ret;
-}
-
-static void *elf64_load(struct kimage *image, char *kernel_buf,
- unsigned long kernel_len, char *initrd,
- unsigned long initrd_len, char *cmdline,
- unsigned long cmdline_len)
-{
- int ret;
- unsigned int fdt_size;
- unsigned long kernel_load_addr;
- unsigned long initrd_load_addr = 0, fdt_load_addr;
- void *fdt;
- const void *slave_code;
- struct elfhdr ehdr;
- struct elf_info elf_info;
- struct kexec_buf kbuf = { .image = image, .buf_min = 0,
- .buf_max = ppc64_rma_size };
- struct kexec_buf pbuf = { .image = image, .buf_min = 0,
- .buf_max = ppc64_rma_size, .top_down = true,
- .mem = KEXEC_BUF_MEM_UNKNOWN };
-
- ret = build_elf_exec_info(kernel_buf, kernel_len, &ehdr, &elf_info);
- if (ret)
- goto out;
-
- ret = elf_exec_load(image, &ehdr, &elf_info, &kernel_load_addr);
- if (ret)
- goto out;
-
- pr_debug("Loaded the kernel at 0x%lx\n", kernel_load_addr);
-
- ret = kexec_load_purgatory(image, &pbuf);
- if (ret) {
- pr_err("Loading purgatory failed.\n");
- goto out;
- }
-
- pr_debug("Loaded purgatory at 0x%lx\n", pbuf.mem);
-
- if (initrd != NULL) {
- kbuf.buffer = initrd;
- kbuf.bufsz = kbuf.memsz = initrd_len;
- kbuf.buf_align = PAGE_SIZE;
- kbuf.top_down = false;
- kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
- ret = kexec_add_buffer(&kbuf);
- if (ret)
- goto out;
- initrd_load_addr = kbuf.mem;
-
- pr_debug("Loaded initrd at 0x%lx\n", initrd_load_addr);
- }
-
- fdt_size = fdt_totalsize(initial_boot_params) * 2;
- fdt = kmalloc(fdt_size, GFP_KERNEL);
- if (!fdt) {
- pr_err("Not enough memory for the device tree.\n");
- ret = -ENOMEM;
- goto out;
- }
- ret = fdt_open_into(initial_boot_params, fdt, fdt_size);
- if (ret < 0) {
- pr_err("Error setting up the new device tree.\n");
- ret = -EINVAL;
- goto out;
- }
-
- ret = setup_new_fdt(image, fdt, initrd_load_addr, initrd_len, cmdline);
- if (ret)
- goto out;
-
- fdt_pack(fdt);
-
- kbuf.buffer = fdt;
- kbuf.bufsz = kbuf.memsz = fdt_size;
- kbuf.buf_align = PAGE_SIZE;
- kbuf.top_down = true;
- kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
- ret = kexec_add_buffer(&kbuf);
- if (ret)
- goto out;
- fdt_load_addr = kbuf.mem;
-
- pr_debug("Loaded device tree at 0x%lx\n", fdt_load_addr);
-
- slave_code = elf_info.buffer + elf_info.proghdrs[0].p_offset;
- ret = setup_purgatory(image, slave_code, fdt, kernel_load_addr,
- fdt_load_addr);
- if (ret)
- pr_err("Error setting up the purgatory.\n");
-
-out:
- elf_free_info(&elf_info);
-
- /* Make kimage_file_post_load_cleanup free the fdt buffer for us. */
- return ret ? ERR_PTR(ret) : fdt;
-}
-
-const struct kexec_file_ops kexec_elf64_ops = {
- .probe = elf64_probe,
- .load = elf64_load,
-};
diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
index b7b3a5e4e224..617eba82531c 100644
--- a/arch/powerpc/kernel/kvm.c
+++ b/arch/powerpc/kernel/kvm.c
@@ -64,16 +64,17 @@
#define KVM_INST_MTSRIN 0x7c0001e4
static bool kvm_patching_worked = true;
-char kvm_tmp[1024 * 1024];
+extern char kvm_tmp[];
+extern char kvm_tmp_end[];
static int kvm_tmp_index;
-static inline void kvm_patch_ins(u32 *inst, u32 new_inst)
+static void __init kvm_patch_ins(u32 *inst, u32 new_inst)
{
*inst = new_inst;
flush_icache_range((ulong)inst, (ulong)inst + 4);
}
-static void kvm_patch_ins_ll(u32 *inst, long addr, u32 rt)
+static void __init kvm_patch_ins_ll(u32 *inst, long addr, u32 rt)
{
#ifdef CONFIG_64BIT
kvm_patch_ins(inst, KVM_INST_LD | rt | (addr & 0x0000fffc));
@@ -82,7 +83,7 @@ static void kvm_patch_ins_ll(u32 *inst, long addr, u32 rt)
#endif
}
-static void kvm_patch_ins_ld(u32 *inst, long addr, u32 rt)
+static void __init kvm_patch_ins_ld(u32 *inst, long addr, u32 rt)
{
#ifdef CONFIG_64BIT
kvm_patch_ins(inst, KVM_INST_LD | rt | (addr & 0x0000fffc));
@@ -91,12 +92,12 @@ static void kvm_patch_ins_ld(u32 *inst, long addr, u32 rt)
#endif
}
-static void kvm_patch_ins_lwz(u32 *inst, long addr, u32 rt)
+static void __init kvm_patch_ins_lwz(u32 *inst, long addr, u32 rt)
{
kvm_patch_ins(inst, KVM_INST_LWZ | rt | (addr & 0x0000ffff));
}
-static void kvm_patch_ins_std(u32 *inst, long addr, u32 rt)
+static void __init kvm_patch_ins_std(u32 *inst, long addr, u32 rt)
{
#ifdef CONFIG_64BIT
kvm_patch_ins(inst, KVM_INST_STD | rt | (addr & 0x0000fffc));
@@ -105,17 +106,17 @@ static void kvm_patch_ins_std(u32 *inst, long addr, u32 rt)
#endif
}
-static void kvm_patch_ins_stw(u32 *inst, long addr, u32 rt)
+static void __init kvm_patch_ins_stw(u32 *inst, long addr, u32 rt)
{
kvm_patch_ins(inst, KVM_INST_STW | rt | (addr & 0x0000fffc));
}
-static void kvm_patch_ins_nop(u32 *inst)
+static void __init kvm_patch_ins_nop(u32 *inst)
{
kvm_patch_ins(inst, KVM_INST_NOP);
}
-static void kvm_patch_ins_b(u32 *inst, int addr)
+static void __init kvm_patch_ins_b(u32 *inst, int addr)
{
#if defined(CONFIG_RELOCATABLE) && defined(CONFIG_PPC_BOOK3S)
/* On relocatable kernels interrupts handlers and our code
@@ -128,11 +129,11 @@ static void kvm_patch_ins_b(u32 *inst, int addr)
kvm_patch_ins(inst, KVM_INST_B | (addr & KVM_INST_B_MASK));
}
-static u32 *kvm_alloc(int len)
+static u32 * __init kvm_alloc(int len)
{
u32 *p;
- if ((kvm_tmp_index + len) > ARRAY_SIZE(kvm_tmp)) {
+ if ((kvm_tmp_index + len) > (kvm_tmp_end - kvm_tmp)) {
printk(KERN_ERR "KVM: No more space (%d + %d)\n",
kvm_tmp_index, len);
kvm_patching_worked = false;
@@ -151,7 +152,7 @@ extern u32 kvm_emulate_mtmsrd_orig_ins_offs;
extern u32 kvm_emulate_mtmsrd_len;
extern u32 kvm_emulate_mtmsrd[];
-static void kvm_patch_ins_mtmsrd(u32 *inst, u32 rt)
+static void __init kvm_patch_ins_mtmsrd(u32 *inst, u32 rt)
{
u32 *p;
int distance_start;
@@ -204,7 +205,7 @@ extern u32 kvm_emulate_mtmsr_orig_ins_offs;
extern u32 kvm_emulate_mtmsr_len;
extern u32 kvm_emulate_mtmsr[];
-static void kvm_patch_ins_mtmsr(u32 *inst, u32 rt)
+static void __init kvm_patch_ins_mtmsr(u32 *inst, u32 rt)
{
u32 *p;
int distance_start;
@@ -265,7 +266,7 @@ extern u32 kvm_emulate_wrtee_orig_ins_offs;
extern u32 kvm_emulate_wrtee_len;
extern u32 kvm_emulate_wrtee[];
-static void kvm_patch_ins_wrtee(u32 *inst, u32 rt, int imm_one)
+static void __init kvm_patch_ins_wrtee(u32 *inst, u32 rt, int imm_one)
{
u32 *p;
int distance_start;
@@ -322,7 +323,7 @@ extern u32 kvm_emulate_wrteei_0_branch_offs;
extern u32 kvm_emulate_wrteei_0_len;
extern u32 kvm_emulate_wrteei_0[];
-static void kvm_patch_ins_wrteei_0(u32 *inst)
+static void __init kvm_patch_ins_wrteei_0(u32 *inst)
{
u32 *p;
int distance_start;
@@ -363,7 +364,7 @@ extern u32 kvm_emulate_mtsrin_orig_ins_offs;
extern u32 kvm_emulate_mtsrin_len;
extern u32 kvm_emulate_mtsrin[];
-static void kvm_patch_ins_mtsrin(u32 *inst, u32 rt, u32 rb)
+static void __init kvm_patch_ins_mtsrin(u32 *inst, u32 rt, u32 rb)
{
u32 *p;
int distance_start;
@@ -399,7 +400,7 @@ static void kvm_patch_ins_mtsrin(u32 *inst, u32 rt, u32 rb)
#endif
-static void kvm_map_magic_page(void *data)
+static void __init kvm_map_magic_page(void *data)
{
u32 *features = data;
@@ -414,7 +415,7 @@ static void kvm_map_magic_page(void *data)
*features = out[0];
}
-static void kvm_check_ins(u32 *inst, u32 features)
+static void __init kvm_check_ins(u32 *inst, u32 features)
{
u32 _inst = *inst;
u32 inst_no_rt = _inst & ~KVM_MASK_RT;
@@ -658,7 +659,7 @@ static void kvm_check_ins(u32 *inst, u32 features)
extern u32 kvm_template_start[];
extern u32 kvm_template_end[];
-static void kvm_use_magic_page(void)
+static void __init kvm_use_magic_page(void)
{
u32 *p;
u32 *start, *end;
@@ -699,25 +700,13 @@ static void kvm_use_magic_page(void)
kvm_patching_worked ? "worked" : "failed");
}
-static __init void kvm_free_tmp(void)
-{
- /*
- * Inform kmemleak about the hole in the .bss section since the
- * corresponding pages will be unmapped with DEBUG_PAGEALLOC=y.
- */
- kmemleak_free_part(&kvm_tmp[kvm_tmp_index],
- ARRAY_SIZE(kvm_tmp) - kvm_tmp_index);
- free_reserved_area(&kvm_tmp[kvm_tmp_index],
- &kvm_tmp[ARRAY_SIZE(kvm_tmp)], -1, NULL);
-}
-
static int __init kvm_guest_init(void)
{
if (!kvm_para_available())
- goto free_tmp;
+ return 0;
if (!epapr_paravirt_enabled)
- goto free_tmp;
+ return 0;
if (kvm_para_has_feature(KVM_FEATURE_MAGIC_PAGE))
kvm_use_magic_page();
@@ -727,9 +716,6 @@ static int __init kvm_guest_init(void)
powersave_nap = 1;
#endif
-free_tmp:
- kvm_free_tmp();
-
return 0;
}
diff --git a/arch/powerpc/kernel/kvm_emul.S b/arch/powerpc/kernel/kvm_emul.S
index eb2568f583ae..7af6f8b50c5d 100644
--- a/arch/powerpc/kernel/kvm_emul.S
+++ b/arch/powerpc/kernel/kvm_emul.S
@@ -192,6 +192,8 @@ kvm_emulate_mtmsr_orig_ins_offs:
kvm_emulate_mtmsr_len:
.long (kvm_emulate_mtmsr_end - kvm_emulate_mtmsr) / 4
+#ifdef CONFIG_BOOKE
+
/* also used for wrteei 1 */
.global kvm_emulate_wrtee
kvm_emulate_wrtee:
@@ -285,6 +287,10 @@ kvm_emulate_wrteei_0_branch_offs:
kvm_emulate_wrteei_0_len:
.long (kvm_emulate_wrteei_0_end - kvm_emulate_wrteei_0) / 4
+#endif /* CONFIG_BOOKE */
+
+#ifdef CONFIG_PPC_BOOK3S_32
+
.global kvm_emulate_mtsrin
kvm_emulate_mtsrin:
@@ -334,5 +340,15 @@ kvm_emulate_mtsrin_orig_ins_offs:
kvm_emulate_mtsrin_len:
.long (kvm_emulate_mtsrin_end - kvm_emulate_mtsrin) / 4
+#endif /* CONFIG_PPC_BOOK3S_32 */
+
+ .balign 4
+ .global kvm_tmp
+kvm_tmp:
+ .space (64 * 1024)
+
+.global kvm_tmp_end
+kvm_tmp_end:
+
.global kvm_template_end
kvm_template_end:
diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c
index 7cea5978f21f..f061e06e9f51 100644
--- a/arch/powerpc/kernel/legacy_serial.c
+++ b/arch/powerpc/kernel/legacy_serial.c
@@ -479,8 +479,10 @@ static void __init fixup_port_irq(int index,
port->irq = virq;
#ifdef CONFIG_SERIAL_8250_FSL
- if (of_device_is_compatible(np, "fsl,ns16550"))
+ if (of_device_is_compatible(np, "fsl,ns16550")) {
port->handle_irq = fsl8250_handle_irq;
+ port->has_sysrq = IS_ENABLED(CONFIG_SERIAL_8250_CONSOLE);
+ }
#endif
}
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index b18df633eae9..34c1001e9e8b 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -33,13 +33,18 @@ static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT],
mce_ue_event_queue);
static void machine_check_process_queued_event(struct irq_work *work);
-void machine_check_ue_event(struct machine_check_event *evt);
+static void machine_check_ue_irq_work(struct irq_work *work);
+static void machine_check_ue_event(struct machine_check_event *evt);
static void machine_process_ue_event(struct work_struct *work);
static struct irq_work mce_event_process_work = {
.func = machine_check_process_queued_event,
};
+static struct irq_work mce_ue_event_irq_work = {
+ .func = machine_check_ue_irq_work,
+};
+
DECLARE_WORK(mce_ue_event_work, machine_process_ue_event);
static void mce_set_error_info(struct machine_check_event *mce,
@@ -144,6 +149,7 @@ void save_mce_event(struct pt_regs *regs, long handled,
if (phys_addr != ULONG_MAX) {
mce->u.ue_error.physical_address_provided = true;
mce->u.ue_error.physical_address = phys_addr;
+ mce->u.ue_error.ignore_event = mce_err->ignore_event;
machine_check_ue_event(mce);
}
}
@@ -199,11 +205,15 @@ void release_mce_event(void)
get_mce_event(NULL, true);
}
+static void machine_check_ue_irq_work(struct irq_work *work)
+{
+ schedule_work(&mce_ue_event_work);
+}
/*
* Queue up the MCE event which then can be handled later.
*/
-void machine_check_ue_event(struct machine_check_event *evt)
+static void machine_check_ue_event(struct machine_check_event *evt)
{
int index;
@@ -216,7 +226,7 @@ void machine_check_ue_event(struct machine_check_event *evt)
memcpy(this_cpu_ptr(&mce_ue_event_queue[index]), evt, sizeof(*evt));
/* Queue work to process this event later. */
- schedule_work(&mce_ue_event_work);
+ irq_work_queue(&mce_ue_event_irq_work);
}
/*
@@ -257,8 +267,17 @@ static void machine_process_ue_event(struct work_struct *work)
/*
* This should probably queued elsewhere, but
* oh! well
+ *
+ * Don't report this machine check because the caller has a
+ * asked us to ignore the event, it has a fixup handler which
+ * will do the appropriate error handling and reporting.
*/
if (evt->error_type == MCE_ERROR_TYPE_UE) {
+ if (evt->u.ue_error.ignore_event) {
+ __this_cpu_dec(mce_ue_count);
+ continue;
+ }
+
if (evt->u.ue_error.physical_address_provided) {
unsigned long pfn;
@@ -292,6 +311,12 @@ static void machine_check_process_queued_event(struct irq_work *work)
while (__this_cpu_read(mce_queue_count) > 0) {
index = __this_cpu_read(mce_queue_count) - 1;
evt = this_cpu_ptr(&mce_event_queue[index]);
+
+ if (evt->error_type == MCE_ERROR_TYPE_UE &&
+ evt->u.ue_error.ignore_event) {
+ __this_cpu_dec(mce_queue_count);
+ continue;
+ }
machine_check_print_event_info(evt, false, false);
__this_cpu_dec(mce_queue_count);
}
@@ -300,7 +325,7 @@ static void machine_check_process_queued_event(struct irq_work *work)
void machine_check_print_event_info(struct machine_check_event *evt,
bool user_mode, bool in_guest)
{
- const char *level, *sevstr, *subtype, *err_type;
+ const char *level, *sevstr, *subtype, *err_type, *initiator;
uint64_t ea = 0, pa = 0;
int n = 0;
char dar_str[50];
@@ -385,6 +410,28 @@ void machine_check_print_event_info(struct machine_check_event *evt,
break;
}
+ switch(evt->initiator) {
+ case MCE_INITIATOR_CPU:
+ initiator = "CPU";
+ break;
+ case MCE_INITIATOR_PCI:
+ initiator = "PCI";
+ break;
+ case MCE_INITIATOR_ISA:
+ initiator = "ISA";
+ break;
+ case MCE_INITIATOR_MEMORY:
+ initiator = "Memory";
+ break;
+ case MCE_INITIATOR_POWERMGM:
+ initiator = "Power Management";
+ break;
+ case MCE_INITIATOR_UNKNOWN:
+ default:
+ initiator = "Unknown";
+ break;
+ }
+
switch (evt->error_type) {
case MCE_ERROR_TYPE_UE:
err_type = "UE";
@@ -451,6 +498,14 @@ void machine_check_print_event_info(struct machine_check_event *evt,
if (evt->u.link_error.effective_address_provided)
ea = evt->u.link_error.effective_address;
break;
+ case MCE_ERROR_TYPE_DCACHE:
+ err_type = "D-Cache";
+ subtype = "Unknown";
+ break;
+ case MCE_ERROR_TYPE_ICACHE:
+ err_type = "I-Cache";
+ subtype = "Unknown";
+ break;
default:
case MCE_ERROR_TYPE_UNKNOWN:
err_type = "Unknown";
@@ -483,9 +538,17 @@ void machine_check_print_event_info(struct machine_check_event *evt,
level, evt->cpu, evt->srr0, (void *)evt->srr0, pa_str);
}
+ printk("%sMCE: CPU%d: Initiator %s\n", level, evt->cpu, initiator);
+
subtype = evt->error_class < ARRAY_SIZE(mc_error_class) ?
mc_error_class[evt->error_class] : "Unknown";
printk("%sMCE: CPU%d: %s\n", level, evt->cpu, subtype);
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ /* Display faulty slb contents for SLB errors. */
+ if (evt->error_type == MCE_ERROR_TYPE_SLB)
+ slb_dump_contents(local_paca->mce_faulty_slbs);
+#endif
}
EXPORT_SYMBOL_GPL(machine_check_print_event_info);
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index a814d2dfb5b0..1cbf7f1a4e3d 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -11,6 +11,7 @@
#include <linux/types.h>
#include <linux/ptrace.h>
+#include <linux/extable.h>
#include <asm/mmu.h>
#include <asm/mce.h>
#include <asm/machdep.h>
@@ -18,6 +19,7 @@
#include <asm/pte-walk.h>
#include <asm/sstep.h>
#include <asm/exception-64s.h>
+#include <asm/extable.h>
/*
* Convert an address related to an mm to a PFN. NOTE: we are in real
@@ -26,7 +28,8 @@
unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr)
{
pte_t *ptep;
- unsigned long flags;
+ unsigned int shift;
+ unsigned long pfn, flags;
struct mm_struct *mm;
if (user_mode(regs))
@@ -35,14 +38,23 @@ unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr)
mm = &init_mm;
local_irq_save(flags);
- if (mm == current->mm)
- ptep = find_current_mm_pte(mm->pgd, addr, NULL, NULL);
- else
- ptep = find_init_mm_pte(addr, NULL);
+ ptep = __find_linux_pte(mm->pgd, addr, NULL, &shift);
+
+ if (!ptep || pte_special(*ptep)) {
+ pfn = ULONG_MAX;
+ goto out;
+ }
+
+ if (shift <= PAGE_SHIFT)
+ pfn = pte_pfn(*ptep);
+ else {
+ unsigned long rpnmask = (1ul << shift) - PAGE_SIZE;
+ pfn = pte_pfn(__pte(pte_val(*ptep) | (addr & rpnmask)));
+ }
+
+out:
local_irq_restore(flags);
- if (!ptep || pte_special(*ptep))
- return ULONG_MAX;
- return pte_pfn(*ptep);
+ return pfn;
}
/* flush SLBs and reload */
@@ -344,7 +356,7 @@ static const struct mce_derror_table mce_p9_derror_table[] = {
MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0, false, 0, 0, 0, 0, 0 } };
-static int mce_find_instr_ea_and_pfn(struct pt_regs *regs, uint64_t *addr,
+static int mce_find_instr_ea_and_phys(struct pt_regs *regs, uint64_t *addr,
uint64_t *phys_addr)
{
/*
@@ -397,6 +409,8 @@ static int mce_handle_ierror(struct pt_regs *regs,
/* attempt to correct the error */
switch (table[i].error_type) {
case MCE_ERROR_TYPE_SLB:
+ if (local_paca->in_mce == 1)
+ slb_save_contents(local_paca->mce_faulty_slbs);
handled = mce_flush(MCE_FLUSH_SLB);
break;
case MCE_ERROR_TYPE_ERAT:
@@ -482,6 +496,8 @@ static int mce_handle_derror(struct pt_regs *regs,
/* attempt to correct the error */
switch (table[i].error_type) {
case MCE_ERROR_TYPE_SLB:
+ if (local_paca->in_mce == 1)
+ slb_save_contents(local_paca->mce_faulty_slbs);
if (mce_flush(MCE_FLUSH_SLB))
handled = 1;
break;
@@ -541,7 +557,8 @@ static int mce_handle_derror(struct pt_regs *regs,
* kernel/exception-64s.h
*/
if (get_paca()->in_mce < MAX_MCE_DEPTH)
- mce_find_instr_ea_and_pfn(regs, addr, phys_addr);
+ mce_find_instr_ea_and_phys(regs, addr,
+ phys_addr);
}
found = 1;
}
@@ -558,9 +575,18 @@ static int mce_handle_derror(struct pt_regs *regs,
return 0;
}
-static long mce_handle_ue_error(struct pt_regs *regs)
+static long mce_handle_ue_error(struct pt_regs *regs,
+ struct mce_error_info *mce_err)
{
long handled = 0;
+ const struct exception_table_entry *entry;
+
+ entry = search_kernel_exception_table(regs->nip);
+ if (entry) {
+ mce_err->ignore_event = true;
+ regs->nip = extable_fixup(entry);
+ return 1;
+ }
/*
* On specific SCOM read via MMIO we may get a machine check
@@ -593,7 +619,7 @@ static long mce_handle_error(struct pt_regs *regs,
&phys_addr);
if (!handled && mce_err.error_type == MCE_ERROR_TYPE_UE)
- handled = mce_handle_ue_error(regs);
+ handled = mce_handle_ue_error(regs, &mce_err);
save_mce_event(regs, handled, &mce_err, regs->nip, addr, phys_addr);
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index fe4bd321730e..d80212be8698 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -6,11 +6,6 @@
* Largely rewritten by Cort Dougan (cort@cs.nmt.edu)
* and Paul Mackerras.
*
- * kexec bits:
- * Copyright (C) 2002-2003 Eric Biederman <ebiederm@xmission.com>
- * GameCube/ppc32 port Copyright (C) 2004 Albert Herranz
- * PPC44x port. Copyright (C) 2011, IBM Corporation
- * Author: Suzuki Poulose <suzuki@in.ibm.com>
*/
#include <linux/sys.h>
@@ -25,7 +20,6 @@
#include <asm/thread_info.h>
#include <asm/asm-offsets.h>
#include <asm/processor.h>
-#include <asm/kexec.h>
#include <asm/bug.h>
#include <asm/ptrace.h>
#include <asm/export.h>
@@ -292,22 +286,20 @@ _GLOBAL(flush_instruction_cache)
iccci 0,r3
#endif
#elif defined(CONFIG_FSL_BOOKE)
-BEGIN_FTR_SECTION
+#ifdef CONFIG_E200
mfspr r3,SPRN_L1CSR0
ori r3,r3,L1CSR0_CFI|L1CSR0_CLFC
/* msync; isync recommended here */
mtspr SPRN_L1CSR0,r3
isync
blr
-END_FTR_SECTION_IFSET(CPU_FTR_UNIFIED_ID_CACHE)
+#endif
mfspr r3,SPRN_L1CSR1
ori r3,r3,L1CSR1_ICFI|L1CSR1_ICLFR
mtspr SPRN_L1CSR1,r3
+#elif defined(CONFIG_PPC_BOOK3S_601)
+ blr /* for 601, do nothing */
#else
- mfspr r3,SPRN_PVR
- rlwinm r3,r3,16,16,31
- cmpwi 0,r3,1
- beqlr /* for 601, do nothing */
/* 603/604 processor - use invalidate-all bit in HID0 */
mfspr r3,SPRN_HID0
ori r3,r3,HID0_ICFI
@@ -319,123 +311,6 @@ EXPORT_SYMBOL(flush_instruction_cache)
#endif /* CONFIG_PPC_8xx */
/*
- * Write any modified data cache blocks out to memory
- * and invalidate the corresponding instruction cache blocks.
- * This is a no-op on the 601.
- *
- * flush_icache_range(unsigned long start, unsigned long stop)
- */
-_GLOBAL(flush_icache_range)
-BEGIN_FTR_SECTION
- PURGE_PREFETCHED_INS
- blr /* for 601, do nothing */
-END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
- rlwinm r3,r3,0,0,31 - L1_CACHE_SHIFT
- subf r4,r3,r4
- addi r4,r4,L1_CACHE_BYTES - 1
- srwi. r4,r4,L1_CACHE_SHIFT
- beqlr
- mtctr r4
- mr r6,r3
-1: dcbst 0,r3
- addi r3,r3,L1_CACHE_BYTES
- bdnz 1b
- sync /* wait for dcbst's to get to ram */
-#ifndef CONFIG_44x
- mtctr r4
-2: icbi 0,r6
- addi r6,r6,L1_CACHE_BYTES
- bdnz 2b
-#else
- /* Flash invalidate on 44x because we are passed kmapped addresses and
- this doesn't work for userspace pages due to the virtually tagged
- icache. Sigh. */
- iccci 0, r0
-#endif
- sync /* additional sync needed on g4 */
- isync
- blr
-_ASM_NOKPROBE_SYMBOL(flush_icache_range)
-EXPORT_SYMBOL(flush_icache_range)
-
-/*
- * Flush a particular page from the data cache to RAM.
- * Note: this is necessary because the instruction cache does *not*
- * snoop from the data cache.
- * This is a no-op on the 601 which has a unified cache.
- *
- * void __flush_dcache_icache(void *page)
- */
-_GLOBAL(__flush_dcache_icache)
-BEGIN_FTR_SECTION
- PURGE_PREFETCHED_INS
- blr
-END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
- rlwinm r3,r3,0,0,31-PAGE_SHIFT /* Get page base address */
- li r4,PAGE_SIZE/L1_CACHE_BYTES /* Number of lines in a page */
- mtctr r4
- mr r6,r3
-0: dcbst 0,r3 /* Write line to ram */
- addi r3,r3,L1_CACHE_BYTES
- bdnz 0b
- sync
-#ifdef CONFIG_44x
- /* We don't flush the icache on 44x. Those have a virtual icache
- * and we don't have access to the virtual address here (it's
- * not the page vaddr but where it's mapped in user space). The
- * flushing of the icache on these is handled elsewhere, when
- * a change in the address space occurs, before returning to
- * user space
- */
-BEGIN_MMU_FTR_SECTION
- blr
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_44x)
-#endif /* CONFIG_44x */
- mtctr r4
-1: icbi 0,r6
- addi r6,r6,L1_CACHE_BYTES
- bdnz 1b
- sync
- isync
- blr
-
-#ifndef CONFIG_BOOKE
-/*
- * Flush a particular page from the data cache to RAM, identified
- * by its physical address. We turn off the MMU so we can just use
- * the physical address (this may be a highmem page without a kernel
- * mapping).
- *
- * void __flush_dcache_icache_phys(unsigned long physaddr)
- */
-_GLOBAL(__flush_dcache_icache_phys)
-BEGIN_FTR_SECTION
- PURGE_PREFETCHED_INS
- blr /* for 601, do nothing */
-END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
- mfmsr r10
- rlwinm r0,r10,0,28,26 /* clear DR */
- mtmsr r0
- isync
- rlwinm r3,r3,0,0,31-PAGE_SHIFT /* Get page base address */
- li r4,PAGE_SIZE/L1_CACHE_BYTES /* Number of lines in a page */
- mtctr r4
- mr r6,r3
-0: dcbst 0,r3 /* Write line to ram */
- addi r3,r3,L1_CACHE_BYTES
- bdnz 0b
- sync
- mtctr r4
-1: icbi 0,r6
- addi r6,r6,L1_CACHE_BYTES
- bdnz 1b
- sync
- mtmsr r10 /* restore DR */
- isync
- blr
-#endif /* CONFIG_BOOKE */
-
-/*
* Copy a whole page. We use the dcbz instruction on the destination
* to reduce memory traffic (it eliminates the unnecessary reads of
* the destination into cache). This requires that the destination
@@ -452,7 +327,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
stwu r9,16(r3)
_GLOBAL(copy_page)
+ rlwinm r5, r3, 0, L1_CACHE_BYTES - 1
addi r3,r3,-4
+
+0: twnei r5, 0 /* WARN if r3 is not cache aligned */
+ EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING
+
addi r4,r4,-4
li r5,4
@@ -608,488 +488,3 @@ _GLOBAL(start_secondary_resume)
*/
_GLOBAL(__main)
blr
-
-#ifdef CONFIG_KEXEC_CORE
- /*
- * Must be relocatable PIC code callable as a C function.
- */
- .globl relocate_new_kernel
-relocate_new_kernel:
- /* r3 = page_list */
- /* r4 = reboot_code_buffer */
- /* r5 = start_address */
-
-#ifdef CONFIG_FSL_BOOKE
-
- mr r29, r3
- mr r30, r4
- mr r31, r5
-
-#define ENTRY_MAPPING_KEXEC_SETUP
-#include "fsl_booke_entry_mapping.S"
-#undef ENTRY_MAPPING_KEXEC_SETUP
-
- mr r3, r29
- mr r4, r30
- mr r5, r31
-
- li r0, 0
-#elif defined(CONFIG_44x)
-
- /* Save our parameters */
- mr r29, r3
- mr r30, r4
- mr r31, r5
-
-#ifdef CONFIG_PPC_47x
- /* Check for 47x cores */
- mfspr r3,SPRN_PVR
- srwi r3,r3,16
- cmplwi cr0,r3,PVR_476FPE@h
- beq setup_map_47x
- cmplwi cr0,r3,PVR_476@h
- beq setup_map_47x
- cmplwi cr0,r3,PVR_476_ISS@h
- beq setup_map_47x
-#endif /* CONFIG_PPC_47x */
-
-/*
- * Code for setting up 1:1 mapping for PPC440x for KEXEC
- *
- * We cannot switch off the MMU on PPC44x.
- * So we:
- * 1) Invalidate all the mappings except the one we are running from.
- * 2) Create a tmp mapping for our code in the other address space(TS) and
- * jump to it. Invalidate the entry we started in.
- * 3) Create a 1:1 mapping for 0-2GiB in chunks of 256M in original TS.
- * 4) Jump to the 1:1 mapping in original TS.
- * 5) Invalidate the tmp mapping.
- *
- * - Based on the kexec support code for FSL BookE
- *
- */
-
- /*
- * Load the PID with kernel PID (0).
- * Also load our MSR_IS and TID to MMUCR for TLB search.
- */
- li r3, 0
- mtspr SPRN_PID, r3
- mfmsr r4
- andi. r4,r4,MSR_IS@l
- beq wmmucr
- oris r3,r3,PPC44x_MMUCR_STS@h
-wmmucr:
- mtspr SPRN_MMUCR,r3
- sync
-
- /*
- * Invalidate all the TLB entries except the current entry
- * where we are running from
- */
- bl 0f /* Find our address */
-0: mflr r5 /* Make it accessible */
- tlbsx r23,0,r5 /* Find entry we are in */
- li r4,0 /* Start at TLB entry 0 */
- li r3,0 /* Set PAGEID inval value */
-1: cmpw r23,r4 /* Is this our entry? */
- beq skip /* If so, skip the inval */
- tlbwe r3,r4,PPC44x_TLB_PAGEID /* If not, inval the entry */
-skip:
- addi r4,r4,1 /* Increment */
- cmpwi r4,64 /* Are we done? */
- bne 1b /* If not, repeat */
- isync
-
- /* Create a temp mapping and jump to it */
- andi. r6, r23, 1 /* Find the index to use */
- addi r24, r6, 1 /* r24 will contain 1 or 2 */
-
- mfmsr r9 /* get the MSR */
- rlwinm r5, r9, 27, 31, 31 /* Extract the MSR[IS] */
- xori r7, r5, 1 /* Use the other address space */
-
- /* Read the current mapping entries */
- tlbre r3, r23, PPC44x_TLB_PAGEID
- tlbre r4, r23, PPC44x_TLB_XLAT
- tlbre r5, r23, PPC44x_TLB_ATTRIB
-
- /* Save our current XLAT entry */
- mr r25, r4
-
- /* Extract the TLB PageSize */
- li r10, 1 /* r10 will hold PageSize */
- rlwinm r11, r3, 0, 24, 27 /* bits 24-27 */
-
- /* XXX: As of now we use 256M, 4K pages */
- cmpwi r11, PPC44x_TLB_256M
- bne tlb_4k
- rotlwi r10, r10, 28 /* r10 = 256M */
- b write_out
-tlb_4k:
- cmpwi r11, PPC44x_TLB_4K
- bne default
- rotlwi r10, r10, 12 /* r10 = 4K */
- b write_out
-default:
- rotlwi r10, r10, 10 /* r10 = 1K */
-
-write_out:
- /*
- * Write out the tmp 1:1 mapping for this code in other address space
- * Fixup EPN = RPN , TS=other address space
- */
- insrwi r3, r7, 1, 23 /* Bit 23 is TS for PAGEID field */
-
- /* Write out the tmp mapping entries */
- tlbwe r3, r24, PPC44x_TLB_PAGEID
- tlbwe r4, r24, PPC44x_TLB_XLAT
- tlbwe r5, r24, PPC44x_TLB_ATTRIB
-
- subi r11, r10, 1 /* PageOffset Mask = PageSize - 1 */
- not r10, r11 /* Mask for PageNum */
-
- /* Switch to other address space in MSR */
- insrwi r9, r7, 1, 26 /* Set MSR[IS] = r7 */
-
- bl 1f
-1: mflr r8
- addi r8, r8, (2f-1b) /* Find the target offset */
-
- /* Jump to the tmp mapping */
- mtspr SPRN_SRR0, r8
- mtspr SPRN_SRR1, r9
- rfi
-
-2:
- /* Invalidate the entry we were executing from */
- li r3, 0
- tlbwe r3, r23, PPC44x_TLB_PAGEID
-
- /* attribute fields. rwx for SUPERVISOR mode */
- li r5, 0
- ori r5, r5, (PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G)
-
- /* Create 1:1 mapping in 256M pages */
- xori r7, r7, 1 /* Revert back to Original TS */
-
- li r8, 0 /* PageNumber */
- li r6, 3 /* TLB Index, start at 3 */
-
-next_tlb:
- rotlwi r3, r8, 28 /* Create EPN (bits 0-3) */
- mr r4, r3 /* RPN = EPN */
- ori r3, r3, (PPC44x_TLB_VALID | PPC44x_TLB_256M) /* SIZE = 256M, Valid */
- insrwi r3, r7, 1, 23 /* Set TS from r7 */
-
- tlbwe r3, r6, PPC44x_TLB_PAGEID /* PageID field : EPN, V, SIZE */
- tlbwe r4, r6, PPC44x_TLB_XLAT /* Address translation : RPN */
- tlbwe r5, r6, PPC44x_TLB_ATTRIB /* Attributes */
-
- addi r8, r8, 1 /* Increment PN */
- addi r6, r6, 1 /* Increment TLB Index */
- cmpwi r8, 8 /* Are we done ? */
- bne next_tlb
- isync
-
- /* Jump to the new mapping 1:1 */
- li r9,0
- insrwi r9, r7, 1, 26 /* Set MSR[IS] = r7 */
-
- bl 1f
-1: mflr r8
- and r8, r8, r11 /* Get our offset within page */
- addi r8, r8, (2f-1b)
-
- and r5, r25, r10 /* Get our target PageNum */
- or r8, r8, r5 /* Target jump address */
-
- mtspr SPRN_SRR0, r8
- mtspr SPRN_SRR1, r9
- rfi
-2:
- /* Invalidate the tmp entry we used */
- li r3, 0
- tlbwe r3, r24, PPC44x_TLB_PAGEID
- sync
- b ppc44x_map_done
-
-#ifdef CONFIG_PPC_47x
-
- /* 1:1 mapping for 47x */
-
-setup_map_47x:
-
- /*
- * Load the kernel pid (0) to PID and also to MMUCR[TID].
- * Also set the MSR IS->MMUCR STS
- */
- li r3, 0
- mtspr SPRN_PID, r3 /* Set PID */
- mfmsr r4 /* Get MSR */
- andi. r4, r4, MSR_IS@l /* TS=1? */
- beq 1f /* If not, leave STS=0 */
- oris r3, r3, PPC47x_MMUCR_STS@h /* Set STS=1 */
-1: mtspr SPRN_MMUCR, r3 /* Put MMUCR */
- sync
-
- /* Find the entry we are running from */
- bl 2f
-2: mflr r23
- tlbsx r23, 0, r23
- tlbre r24, r23, 0 /* TLB Word 0 */
- tlbre r25, r23, 1 /* TLB Word 1 */
- tlbre r26, r23, 2 /* TLB Word 2 */
-
-
- /*
- * Invalidates all the tlb entries by writing to 256 RPNs(r4)
- * of 4k page size in all 4 ways (0-3 in r3).
- * This would invalidate the entire UTLB including the one we are
- * running from. However the shadow TLB entries would help us
- * to continue the execution, until we flush them (rfi/isync).
- */
- addis r3, 0, 0x8000 /* specify the way */
- addi r4, 0, 0 /* TLB Word0 = (EPN=0, VALID = 0) */
- addi r5, 0, 0
- b clear_utlb_entry
-
- /* Align the loop to speed things up. from head_44x.S */
- .align 6
-
-clear_utlb_entry:
-
- tlbwe r4, r3, 0
- tlbwe r5, r3, 1
- tlbwe r5, r3, 2
- addis r3, r3, 0x2000 /* Increment the way */
- cmpwi r3, 0
- bne clear_utlb_entry
- addis r3, 0, 0x8000
- addis r4, r4, 0x100 /* Increment the EPN */
- cmpwi r4, 0
- bne clear_utlb_entry
-
- /* Create the entries in the other address space */
- mfmsr r5
- rlwinm r7, r5, 27, 31, 31 /* Get the TS (Bit 26) from MSR */
- xori r7, r7, 1 /* r7 = !TS */
-
- insrwi r24, r7, 1, 21 /* Change the TS in the saved TLB word 0 */
-
- /*
- * write out the TLB entries for the tmp mapping
- * Use way '0' so that we could easily invalidate it later.
- */
- lis r3, 0x8000 /* Way '0' */
-
- tlbwe r24, r3, 0
- tlbwe r25, r3, 1
- tlbwe r26, r3, 2
-
- /* Update the msr to the new TS */
- insrwi r5, r7, 1, 26
-
- bl 1f
-1: mflr r6
- addi r6, r6, (2f-1b)
-
- mtspr SPRN_SRR0, r6
- mtspr SPRN_SRR1, r5
- rfi
-
- /*
- * Now we are in the tmp address space.
- * Create a 1:1 mapping for 0-2GiB in the original TS.
- */
-2:
- li r3, 0
- li r4, 0 /* TLB Word 0 */
- li r5, 0 /* TLB Word 1 */
- li r6, 0
- ori r6, r6, PPC47x_TLB2_S_RWX /* TLB word 2 */
-
- li r8, 0 /* PageIndex */
-
- xori r7, r7, 1 /* revert back to original TS */
-
-write_utlb:
- rotlwi r5, r8, 28 /* RPN = PageIndex * 256M */
- /* ERPN = 0 as we don't use memory above 2G */
-
- mr r4, r5 /* EPN = RPN */
- ori r4, r4, (PPC47x_TLB0_VALID | PPC47x_TLB0_256M)
- insrwi r4, r7, 1, 21 /* Insert the TS to Word 0 */
-
- tlbwe r4, r3, 0 /* Write out the entries */
- tlbwe r5, r3, 1
- tlbwe r6, r3, 2
- addi r8, r8, 1
- cmpwi r8, 8 /* Have we completed ? */
- bne write_utlb
-
- /* make sure we complete the TLB write up */
- isync
-
- /*
- * Prepare to jump to the 1:1 mapping.
- * 1) Extract page size of the tmp mapping
- * DSIZ = TLB_Word0[22:27]
- * 2) Calculate the physical address of the address
- * to jump to.
- */
- rlwinm r10, r24, 0, 22, 27
-
- cmpwi r10, PPC47x_TLB0_4K
- bne 0f
- li r10, 0x1000 /* r10 = 4k */
- bl 1f
-
-0:
- /* Defaults to 256M */
- lis r10, 0x1000
-
- bl 1f
-1: mflr r4
- addi r4, r4, (2f-1b) /* virtual address of 2f */
-
- subi r11, r10, 1 /* offsetmask = Pagesize - 1 */
- not r10, r11 /* Pagemask = ~(offsetmask) */
-
- and r5, r25, r10 /* Physical page */
- and r6, r4, r11 /* offset within the current page */
-
- or r5, r5, r6 /* Physical address for 2f */
-
- /* Switch the TS in MSR to the original one */
- mfmsr r8
- insrwi r8, r7, 1, 26
-
- mtspr SPRN_SRR1, r8
- mtspr SPRN_SRR0, r5
- rfi
-
-2:
- /* Invalidate the tmp mapping */
- lis r3, 0x8000 /* Way '0' */
-
- clrrwi r24, r24, 12 /* Clear the valid bit */
- tlbwe r24, r3, 0
- tlbwe r25, r3, 1
- tlbwe r26, r3, 2
-
- /* Make sure we complete the TLB write and flush the shadow TLB */
- isync
-
-#endif
-
-ppc44x_map_done:
-
-
- /* Restore the parameters */
- mr r3, r29
- mr r4, r30
- mr r5, r31
-
- li r0, 0
-#else
- li r0, 0
-
- /*
- * Set Machine Status Register to a known status,
- * switch the MMU off and jump to 1: in a single step.
- */
-
- mr r8, r0
- ori r8, r8, MSR_RI|MSR_ME
- mtspr SPRN_SRR1, r8
- addi r8, r4, 1f - relocate_new_kernel
- mtspr SPRN_SRR0, r8
- sync
- rfi
-
-1:
-#endif
- /* from this point address translation is turned off */
- /* and interrupts are disabled */
-
- /* set a new stack at the bottom of our page... */
- /* (not really needed now) */
- addi r1, r4, KEXEC_CONTROL_PAGE_SIZE - 8 /* for LR Save+Back Chain */
- stw r0, 0(r1)
-
- /* Do the copies */
- li r6, 0 /* checksum */
- mr r0, r3
- b 1f
-
-0: /* top, read another word for the indirection page */
- lwzu r0, 4(r3)
-
-1:
- /* is it a destination page? (r8) */
- rlwinm. r7, r0, 0, 31, 31 /* IND_DESTINATION (1<<0) */
- beq 2f
-
- rlwinm r8, r0, 0, 0, 19 /* clear kexec flags, page align */
- b 0b
-
-2: /* is it an indirection page? (r3) */
- rlwinm. r7, r0, 0, 30, 30 /* IND_INDIRECTION (1<<1) */
- beq 2f
-
- rlwinm r3, r0, 0, 0, 19 /* clear kexec flags, page align */
- subi r3, r3, 4
- b 0b
-
-2: /* are we done? */
- rlwinm. r7, r0, 0, 29, 29 /* IND_DONE (1<<2) */
- beq 2f
- b 3f
-
-2: /* is it a source page? (r9) */
- rlwinm. r7, r0, 0, 28, 28 /* IND_SOURCE (1<<3) */
- beq 0b
-
- rlwinm r9, r0, 0, 0, 19 /* clear kexec flags, page align */
-
- li r7, PAGE_SIZE / 4
- mtctr r7
- subi r9, r9, 4
- subi r8, r8, 4
-9:
- lwzu r0, 4(r9) /* do the copy */
- xor r6, r6, r0
- stwu r0, 4(r8)
- dcbst 0, r8
- sync
- icbi 0, r8
- bdnz 9b
-
- addi r9, r9, 4
- addi r8, r8, 4
- b 0b
-
-3:
-
- /* To be certain of avoiding problems with self-modifying code
- * execute a serializing instruction here.
- */
- isync
- sync
-
- mfspr r3, SPRN_PIR /* current core we are running on */
- mr r4, r5 /* load physical address of chunk called */
-
- /* jump to the entry point, usually the setup routine */
- mtlr r5
- blrl
-
-1: b 1b
-
-relocate_new_kernel_end:
-
- .globl relocate_new_kernel_size
-relocate_new_kernel_size:
- .long relocate_new_kernel_end - relocate_new_kernel
-#endif
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index b55a7b4cb543..1864605eca29 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -49,108 +49,6 @@ _GLOBAL(call_do_irq)
mtlr r0
blr
- .section ".toc","aw"
-PPC64_CACHES:
- .tc ppc64_caches[TC],ppc64_caches
- .section ".text"
-
-/*
- * Write any modified data cache blocks out to memory
- * and invalidate the corresponding instruction cache blocks.
- *
- * flush_icache_range(unsigned long start, unsigned long stop)
- *
- * flush all bytes from start through stop-1 inclusive
- */
-
-_GLOBAL_TOC(flush_icache_range)
-BEGIN_FTR_SECTION
- PURGE_PREFETCHED_INS
- blr
-END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
-/*
- * Flush the data cache to memory
- *
- * Different systems have different cache line sizes
- * and in some cases i-cache and d-cache line sizes differ from
- * each other.
- */
- ld r10,PPC64_CACHES@toc(r2)
- lwz r7,DCACHEL1BLOCKSIZE(r10)/* Get cache block size */
- addi r5,r7,-1
- andc r6,r3,r5 /* round low to line bdy */
- subf r8,r6,r4 /* compute length */
- add r8,r8,r5 /* ensure we get enough */
- lwz r9,DCACHEL1LOGBLOCKSIZE(r10) /* Get log-2 of cache block size */
- srw. r8,r8,r9 /* compute line count */
- beqlr /* nothing to do? */
- mtctr r8
-1: dcbst 0,r6
- add r6,r6,r7
- bdnz 1b
- sync
-
-/* Now invalidate the instruction cache */
-
- lwz r7,ICACHEL1BLOCKSIZE(r10) /* Get Icache block size */
- addi r5,r7,-1
- andc r6,r3,r5 /* round low to line bdy */
- subf r8,r6,r4 /* compute length */
- add r8,r8,r5
- lwz r9,ICACHEL1LOGBLOCKSIZE(r10) /* Get log-2 of Icache block size */
- srw. r8,r8,r9 /* compute line count */
- beqlr /* nothing to do? */
- mtctr r8
-2: icbi 0,r6
- add r6,r6,r7
- bdnz 2b
- isync
- blr
-_ASM_NOKPROBE_SYMBOL(flush_icache_range)
-EXPORT_SYMBOL(flush_icache_range)
-
-/*
- * Flush a particular page from the data cache to RAM.
- * Note: this is necessary because the instruction cache does *not*
- * snoop from the data cache.
- *
- * void __flush_dcache_icache(void *page)
- */
-_GLOBAL(__flush_dcache_icache)
-/*
- * Flush the data cache to memory
- *
- * Different systems have different cache line sizes
- */
-
-BEGIN_FTR_SECTION
- PURGE_PREFETCHED_INS
- blr
-END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
-
-/* Flush the dcache */
- ld r7,PPC64_CACHES@toc(r2)
- clrrdi r3,r3,PAGE_SHIFT /* Page align */
- lwz r4,DCACHEL1BLOCKSPERPAGE(r7) /* Get # dcache blocks per page */
- lwz r5,DCACHEL1BLOCKSIZE(r7) /* Get dcache block size */
- mr r6,r3
- mtctr r4
-0: dcbst 0,r6
- add r6,r6,r5
- bdnz 0b
- sync
-
-/* Now invalidate the icache */
-
- lwz r4,ICACHEL1BLOCKSPERPAGE(r7) /* Get # icache blocks per page */
- lwz r5,ICACHEL1BLOCKSIZE(r7) /* Get icache block size */
- mtctr r4
-1: icbi 0,r3
- add r3,r3,r5
- bdnz 1b
- isync
- blr
-
_GLOBAL(__bswapdi2)
EXPORT_SYMBOL(__bswapdi2)
srdi r8,r3,32
@@ -432,18 +330,13 @@ kexec_create_tlb:
rlwimi r9,r10,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r9) */
/* Set up a temp identity mapping v:0 to p:0 and return to it. */
-#if defined(CONFIG_SMP) || defined(CONFIG_PPC_E500MC)
-#define M_IF_NEEDED MAS2_M
-#else
-#define M_IF_NEEDED 0
-#endif
mtspr SPRN_MAS0,r9
lis r9,(MAS1_VALID|MAS1_IPROT)@h
ori r9,r9,(MAS1_TSIZE(BOOK3E_PAGESZ_1GB))@l
mtspr SPRN_MAS1,r9
- LOAD_REG_IMMEDIATE(r9, 0x0 | M_IF_NEEDED)
+ LOAD_REG_IMMEDIATE(r9, 0x0 | MAS2_M_IF_NEEDED)
mtspr SPRN_MAS2,r9
LOAD_REG_IMMEDIATE(r9, 0x0 | MAS3_SR | MAS3_SW | MAS3_SX)
diff --git a/arch/powerpc/kernel/note.S b/arch/powerpc/kernel/note.S
new file mode 100644
index 000000000000..bcdad15395dd
--- /dev/null
+++ b/arch/powerpc/kernel/note.S
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * PowerPC ELF notes.
+ *
+ * Copyright 2019, IBM Corporation
+ */
+
+#include <linux/elfnote.h>
+#include <asm/elfnote.h>
+
+/*
+ * Ultravisor-capable bit (PowerNV only).
+ *
+ * Bit 0 indicates that the powerpc kernel binary knows how to run in an
+ * ultravisor-enabled system.
+ *
+ * In an ultravisor-enabled system, some machine resources are now controlled
+ * by the ultravisor. If the kernel is not ultravisor-capable, but it ends up
+ * being run on a machine with ultravisor, the kernel will probably crash
+ * trying to access ultravisor resources. For instance, it may crash in early
+ * boot trying to set the partition table entry 0.
+ *
+ * In an ultravisor-enabled system, a bootloader could warn the user or prevent
+ * the kernel from being run if the PowerPC ultravisor capability doesn't exist
+ * or the Ultravisor-capable bit is not set.
+ */
+#ifdef CONFIG_PPC_POWERNV
+#define PPCCAP_ULTRAVISOR_BIT (1 << 0)
+#else
+#define PPCCAP_ULTRAVISOR_BIT 0
+#endif
+
+/*
+ * Add the PowerPC Capabilities in the binary ELF note. It is a bitmap that
+ * can be used to advertise kernel capabilities to userland.
+ */
+#define PPC_CAPABILITIES_BITMAP (PPCCAP_ULTRAVISOR_BIT)
+
+ELFNOTE(PowerPC, PPC_ELFNOTE_CAPABILITIES,
+ .long PPC_CAPABILITIES_BITMAP)
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index e3ad8aa4730d..949eceb254d8 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -14,6 +14,8 @@
#include <asm/sections.h>
#include <asm/pgtable.h>
#include <asm/kexec.h>
+#include <asm/svm.h>
+#include <asm/ultravisor.h>
#include "setup.h"
@@ -52,6 +54,43 @@ static void *__init alloc_paca_data(unsigned long size, unsigned long align,
#ifdef CONFIG_PPC_PSERIES
+#define LPPACA_SIZE 0x400
+
+static void *__init alloc_shared_lppaca(unsigned long size, unsigned long align,
+ unsigned long limit, int cpu)
+{
+ size_t shared_lppaca_total_size = PAGE_ALIGN(nr_cpu_ids * LPPACA_SIZE);
+ static unsigned long shared_lppaca_size;
+ static void *shared_lppaca;
+ void *ptr;
+
+ if (!shared_lppaca) {
+ memblock_set_bottom_up(true);
+
+ shared_lppaca =
+ memblock_alloc_try_nid(shared_lppaca_total_size,
+ PAGE_SIZE, MEMBLOCK_LOW_LIMIT,
+ limit, NUMA_NO_NODE);
+ if (!shared_lppaca)
+ panic("cannot allocate shared data");
+
+ memblock_set_bottom_up(false);
+ uv_share_page(PHYS_PFN(__pa(shared_lppaca)),
+ shared_lppaca_total_size >> PAGE_SHIFT);
+ }
+
+ ptr = shared_lppaca + shared_lppaca_size;
+ shared_lppaca_size += size;
+
+ /*
+ * This is very early in boot, so no harm done if the kernel crashes at
+ * this point.
+ */
+ BUG_ON(shared_lppaca_size >= shared_lppaca_total_size);
+
+ return ptr;
+}
+
/*
* See asm/lppaca.h for more detail.
*
@@ -65,7 +104,7 @@ static inline void init_lppaca(struct lppaca *lppaca)
*lppaca = (struct lppaca) {
.desc = cpu_to_be32(0xd397d781), /* "LpPa" */
- .size = cpu_to_be16(0x400),
+ .size = cpu_to_be16(LPPACA_SIZE),
.fpregs_in_use = 1,
.slb_count = cpu_to_be16(64),
.vmxregs_in_use = 0,
@@ -75,19 +114,22 @@ static inline void init_lppaca(struct lppaca *lppaca)
static struct lppaca * __init new_lppaca(int cpu, unsigned long limit)
{
struct lppaca *lp;
- size_t size = 0x400;
- BUILD_BUG_ON(size < sizeof(struct lppaca));
+ BUILD_BUG_ON(sizeof(struct lppaca) > LPPACA_SIZE);
if (early_cpu_has_feature(CPU_FTR_HVMODE))
return NULL;
- lp = alloc_paca_data(size, 0x400, limit, cpu);
+ if (is_secure_guest())
+ lp = alloc_shared_lppaca(LPPACA_SIZE, 0x400, limit, cpu);
+ else
+ lp = alloc_paca_data(LPPACA_SIZE, 0x400, limit, cpu);
+
init_lppaca(lp);
return lp;
}
-#endif /* CONFIG_PPC_BOOK3S */
+#endif /* CONFIG_PPC_PSERIES */
#ifdef CONFIG_PPC_BOOK3S_64
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index f627e15bb43c..c6c03416a151 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -261,12 +261,6 @@ int pcibios_sriov_disable(struct pci_dev *pdev)
#endif /* CONFIG_PCI_IOV */
-void pcibios_bus_add_device(struct pci_dev *pdev)
-{
- if (ppc_md.pcibios_bus_add_device)
- ppc_md.pcibios_bus_add_device(pdev);
-}
-
static resource_size_t pcibios_io_size(const struct pci_controller *hose)
{
#ifdef CONFIG_PPC64
@@ -964,7 +958,7 @@ void pcibios_setup_bus_self(struct pci_bus *bus)
phb->controller_ops.dma_bus_setup(bus);
}
-static void pcibios_setup_device(struct pci_dev *dev)
+void pcibios_bus_add_device(struct pci_dev *dev)
{
struct pci_controller *phb;
/* Fixup NUMA node as it may not be setup yet by the generic
@@ -985,17 +979,13 @@ static void pcibios_setup_device(struct pci_dev *dev)
pci_read_irq_line(dev);
if (ppc_md.pci_irq_fixup)
ppc_md.pci_irq_fixup(dev);
+
+ if (ppc_md.pcibios_bus_add_device)
+ ppc_md.pcibios_bus_add_device(dev);
}
int pcibios_add_device(struct pci_dev *dev)
{
- /*
- * We can only call pcibios_setup_device() after bus setup is complete,
- * since some of the platform specific DMA setup code depends on it.
- */
- if (dev->bus->is_added)
- pcibios_setup_device(dev);
-
#ifdef CONFIG_PCI_IOV
if (ppc_md.pcibios_fixup_sriov)
ppc_md.pcibios_fixup_sriov(dev);
@@ -1004,24 +994,6 @@ int pcibios_add_device(struct pci_dev *dev)
return 0;
}
-void pcibios_setup_bus_devices(struct pci_bus *bus)
-{
- struct pci_dev *dev;
-
- pr_debug("PCI: Fixup bus devices %d (%s)\n",
- bus->number, bus->self ? pci_name(bus->self) : "PHB");
-
- list_for_each_entry(dev, &bus->devices, bus_list) {
- /* Cardbus can call us to add new devices to a bus, so ignore
- * those who are already fully discovered
- */
- if (pci_dev_is_added(dev))
- continue;
-
- pcibios_setup_device(dev);
- }
-}
-
void pcibios_set_master(struct pci_dev *dev)
{
/* No special bus mastering setup handling */
@@ -1037,19 +1009,9 @@ void pcibios_fixup_bus(struct pci_bus *bus)
/* Now fixup the bus bus */
pcibios_setup_bus_self(bus);
-
- /* Now fixup devices on that bus */
- pcibios_setup_bus_devices(bus);
}
EXPORT_SYMBOL(pcibios_fixup_bus);
-void pci_fixup_cardbus(struct pci_bus *bus)
-{
- /* Now fixup devices on that bus */
- pcibios_setup_bus_devices(bus);
-}
-
-
static int skip_isa_ioresource_align(struct pci_dev *dev)
{
if (pci_has_flag(PCI_CAN_SKIP_ISA_ALIGN) &&
@@ -1379,10 +1341,6 @@ void __init pcibios_resource_survey(void)
pr_debug("PCI: Assigning unassigned resources...\n");
pci_assign_unassigned_resources();
}
-
- /* Call machine dependent fixup */
- if (ppc_md.pcibios_fixup)
- ppc_md.pcibios_fixup();
}
/* This is used by the PCI hotplug driver to allocate resource
diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c
index 0b0cf8168b47..d6a67f814983 100644
--- a/arch/powerpc/kernel/pci-hotplug.c
+++ b/arch/powerpc/kernel/pci-hotplug.c
@@ -55,11 +55,18 @@ EXPORT_SYMBOL_GPL(pci_find_bus_by_node);
void pcibios_release_device(struct pci_dev *dev)
{
struct pci_controller *phb = pci_bus_to_host(dev->bus);
+ struct pci_dn *pdn = pci_get_pdn(dev);
eeh_remove_device(dev);
if (phb->controller_ops.release_device)
phb->controller_ops.release_device(dev);
+
+ /* free()ing the pci_dn has been deferred to us, do it now */
+ if (pdn && (pdn->flags & PCI_DN_FLAG_DEAD)) {
+ pci_dbg(dev, "freeing dead pdn\n");
+ kfree(pdn);
+ }
}
/**
@@ -127,7 +134,6 @@ void pci_hp_add_devices(struct pci_bus *bus)
*/
slotno = PCI_SLOT(PCI_DN(dn->child)->devfn);
pci_scan_slot(bus, PCI_DEVFN(slotno, 0));
- pcibios_setup_bus_devices(bus);
max = bus->busn_res.start;
/*
* Scan bridges that are already configured. We don't touch
diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c
index 50942a1d1a5f..b49e1060a3bf 100644
--- a/arch/powerpc/kernel/pci_32.c
+++ b/arch/powerpc/kernel/pci_32.c
@@ -263,6 +263,10 @@ static int __init pcibios_init(void)
/* Call common code to handle resource allocation */
pcibios_resource_survey();
+ /* Call machine dependent fixup */
+ if (ppc_md.pcibios_fixup)
+ ppc_md.pcibios_fixup();
+
/* Call machine dependent post-init code */
if (ppc_md.pcibios_after_init)
ppc_md.pcibios_after_init();
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
index b7030b1189d0..f83d1f69b1dd 100644
--- a/arch/powerpc/kernel/pci_64.c
+++ b/arch/powerpc/kernel/pci_64.c
@@ -54,14 +54,20 @@ static int __init pcibios_init(void)
pci_add_flags(PCI_ENABLE_PROC_DOMAINS | PCI_COMPAT_DOMAIN_0);
/* Scan all of the recorded PCI controllers. */
- list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+ list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
pcibios_scan_phb(hose);
- pci_bus_add_devices(hose->bus);
- }
/* Call common code to handle resource allocation */
pcibios_resource_survey();
+ /* Add devices. */
+ list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
+ pci_bus_add_devices(hose->bus);
+
+ /* Call machine dependent fixup */
+ if (ppc_md.pcibios_fixup)
+ ppc_md.pcibios_fixup();
+
printk(KERN_DEBUG "PCI: Probing PCI hardware done\n");
return 0;
diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
index c4c8c237a106..4e654df55969 100644
--- a/arch/powerpc/kernel/pci_dn.c
+++ b/arch/powerpc/kernel/pci_dn.c
@@ -125,7 +125,7 @@ struct pci_dn *pci_get_pdn(struct pci_dev *pdev)
}
#ifdef CONFIG_PCI_IOV
-static struct pci_dn *add_one_dev_pci_data(struct pci_dn *parent,
+static struct pci_dn *add_one_sriov_vf_pdn(struct pci_dn *parent,
int vf_index,
int busno, int devfn)
{
@@ -151,17 +151,15 @@ static struct pci_dn *add_one_dev_pci_data(struct pci_dn *parent,
return pdn;
}
-#endif
-struct pci_dn *add_dev_pci_data(struct pci_dev *pdev)
+struct pci_dn *add_sriov_vf_pdns(struct pci_dev *pdev)
{
-#ifdef CONFIG_PCI_IOV
struct pci_dn *parent, *pdn;
int i;
/* Only support IOV for now */
- if (!pdev->is_physfn)
- return pci_get_pdn(pdev);
+ if (WARN_ON(!pdev->is_physfn))
+ return NULL;
/* Check if VFs have been populated */
pdn = pci_get_pdn(pdev);
@@ -176,7 +174,7 @@ struct pci_dn *add_dev_pci_data(struct pci_dev *pdev)
for (i = 0; i < pci_sriov_get_totalvfs(pdev); i++) {
struct eeh_dev *edev __maybe_unused;
- pdn = add_one_dev_pci_data(parent, i,
+ pdn = add_one_sriov_vf_pdn(parent, i,
pci_iov_virtfn_bus(pdev, i),
pci_iov_virtfn_devfn(pdev, i));
if (!pdn) {
@@ -192,31 +190,17 @@ struct pci_dn *add_dev_pci_data(struct pci_dev *pdev)
edev->physfn = pdev;
#endif /* CONFIG_EEH */
}
-#endif /* CONFIG_PCI_IOV */
-
return pci_get_pdn(pdev);
}
-void remove_dev_pci_data(struct pci_dev *pdev)
+void remove_sriov_vf_pdns(struct pci_dev *pdev)
{
-#ifdef CONFIG_PCI_IOV
struct pci_dn *parent;
struct pci_dn *pdn, *tmp;
int i;
- /*
- * VF and VF PE are created/released dynamically, so we need to
- * bind/unbind them. Otherwise the VF and VF PE would be mismatched
- * when re-enabling SR-IOV.
- */
- if (pdev->is_virtfn) {
- pdn = pci_get_pdn(pdev);
- pdn->pe_number = IODA_INVALID_PE;
- return;
- }
-
/* Only support IOV PF for now */
- if (!pdev->is_physfn)
+ if (WARN_ON(!pdev->is_physfn))
return;
/* Check if VFs have been populated */
@@ -244,9 +228,22 @@ void remove_dev_pci_data(struct pci_dev *pdev)
continue;
#ifdef CONFIG_EEH
- /* Release EEH device for the VF */
+ /*
+ * Release EEH state for this VF. The PCI core
+ * has already torn down the pci_dev for this VF, but
+ * we're responsible to removing the eeh_dev since it
+ * has the same lifetime as the pci_dn that spawned it.
+ */
edev = pdn_to_eeh_dev(pdn);
if (edev) {
+ /*
+ * We allocate pci_dn's for the totalvfs count,
+ * but only only the vfs that were activated
+ * have a configured PE.
+ */
+ if (edev->pe)
+ eeh_rmv_from_parent_pe(edev);
+
pdn->edev = NULL;
kfree(edev);
}
@@ -258,8 +255,8 @@ void remove_dev_pci_data(struct pci_dev *pdev)
kfree(pdn);
}
}
-#endif /* CONFIG_PCI_IOV */
}
+#endif /* CONFIG_PCI_IOV */
struct pci_dn *pci_add_device_node_info(struct pci_controller *hose,
struct device_node *dn)
@@ -323,6 +320,7 @@ void pci_remove_device_node_info(struct device_node *dn)
{
struct pci_dn *pdn = dn ? PCI_DN(dn) : NULL;
struct device_node *parent;
+ struct pci_dev *pdev;
#ifdef CONFIG_EEH
struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
@@ -336,12 +334,28 @@ void pci_remove_device_node_info(struct device_node *dn)
WARN_ON(!list_empty(&pdn->child_list));
list_del(&pdn->list);
+ /* Drop the parent pci_dn's ref to our backing dt node */
parent = of_get_parent(dn);
if (parent)
of_node_put(parent);
- dn->data = NULL;
- kfree(pdn);
+ /*
+ * At this point we *might* still have a pci_dev that was
+ * instantiated from this pci_dn. So defer free()ing it until
+ * the pci_dev's release function is called.
+ */
+ pdev = pci_get_domain_bus_and_slot(pdn->phb->global_number,
+ pdn->busno, pdn->devfn);
+ if (pdev) {
+ /* NB: pdev has a ref to dn */
+ pci_dbg(pdev, "marked pdn (from %pOF) as dead\n", dn);
+ pdn->flags |= PCI_DN_FLAG_DEAD;
+ } else {
+ dn->data = NULL;
+ kfree(pdn);
+ }
+
+ pci_dev_put(pdev);
}
EXPORT_SYMBOL_GPL(pci_remove_device_node_info);
diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c
index 409c6c1beabf..c3024f104765 100644
--- a/arch/powerpc/kernel/pci_of_scan.c
+++ b/arch/powerpc/kernel/pci_of_scan.c
@@ -34,31 +34,75 @@ static u32 get_int_prop(struct device_node *np, const char *name, u32 def)
* pci_parse_of_flags - Parse the flags cell of a device tree PCI address
* @addr0: value of 1st cell of a device tree PCI address.
* @bridge: Set this flag if the address is from a bridge 'ranges' property
+ *
+ * PCI Bus Binding to IEEE Std 1275-1994
+ *
+ * Bit# 33222222 22221111 11111100 00000000
+ * 10987654 32109876 54321098 76543210
+ * phys.hi cell: npt000ss bbbbbbbb dddddfff rrrrrrrr
+ * phys.mid cell: hhhhhhhh hhhhhhhh hhhhhhhh hhhhhhhh
+ * phys.lo cell: llllllll llllllll llllllll llllllll
+ *
+ * where:
+ * n is 0 if the address is relocatable, 1 otherwise
+ * p is 1 if the addressable region is "prefetchable", 0 otherwise
+ * t is 1 if the address is aliased (for non-relocatable I/O),
+ * below 1 MB (for Memory),or below 64 KB (for relocatable I/O).
+ * ss is the space code, denoting the address space:
+ * 00 denotes Configuration Space
+ * 01 denotes I/O Space
+ * 10 denotes 32-bit-address Memory Space
+ * 11 denotes 64-bit-address Memory Space
+ * bbbbbbbb is the 8-bit Bus Number
+ * ddddd is the 5-bit Device Number
+ * fff is the 3-bit Function Number
+ * rrrrrrrr is the 8-bit Register Number
*/
+#define OF_PCI_ADDR0_SPACE(ss) (((ss)&3)<<24)
+#define OF_PCI_ADDR0_SPACE_CFG OF_PCI_ADDR0_SPACE(0)
+#define OF_PCI_ADDR0_SPACE_IO OF_PCI_ADDR0_SPACE(1)
+#define OF_PCI_ADDR0_SPACE_MMIO32 OF_PCI_ADDR0_SPACE(2)
+#define OF_PCI_ADDR0_SPACE_MMIO64 OF_PCI_ADDR0_SPACE(3)
+#define OF_PCI_ADDR0_SPACE_MASK OF_PCI_ADDR0_SPACE(3)
+#define OF_PCI_ADDR0_RELOC (1UL<<31)
+#define OF_PCI_ADDR0_PREFETCH (1UL<<30)
+#define OF_PCI_ADDR0_ALIAS (1UL<<29)
+#define OF_PCI_ADDR0_BUS 0x00FF0000UL
+#define OF_PCI_ADDR0_DEV 0x0000F800UL
+#define OF_PCI_ADDR0_FN 0x00000700UL
+#define OF_PCI_ADDR0_BARREG 0x000000FFUL
+
unsigned int pci_parse_of_flags(u32 addr0, int bridge)
{
- unsigned int flags = 0;
+ unsigned int flags = 0, as = addr0 & OF_PCI_ADDR0_SPACE_MASK;
- if (addr0 & 0x02000000) {
+ if (as == OF_PCI_ADDR0_SPACE_MMIO32 || as == OF_PCI_ADDR0_SPACE_MMIO64) {
flags = IORESOURCE_MEM | PCI_BASE_ADDRESS_SPACE_MEMORY;
- flags |= (addr0 >> 22) & PCI_BASE_ADDRESS_MEM_TYPE_64;
- if (flags & PCI_BASE_ADDRESS_MEM_TYPE_64)
- flags |= IORESOURCE_MEM_64;
- flags |= (addr0 >> 28) & PCI_BASE_ADDRESS_MEM_TYPE_1M;
- if (addr0 & 0x40000000)
- flags |= IORESOURCE_PREFETCH
- | PCI_BASE_ADDRESS_MEM_PREFETCH;
+
+ if (as == OF_PCI_ADDR0_SPACE_MMIO64)
+ flags |= PCI_BASE_ADDRESS_MEM_TYPE_64 | IORESOURCE_MEM_64;
+
+ if (addr0 & OF_PCI_ADDR0_ALIAS)
+ flags |= PCI_BASE_ADDRESS_MEM_TYPE_1M;
+
+ if (addr0 & OF_PCI_ADDR0_PREFETCH)
+ flags |= IORESOURCE_PREFETCH |
+ PCI_BASE_ADDRESS_MEM_PREFETCH;
+
/* Note: We don't know whether the ROM has been left enabled
* by the firmware or not. We mark it as disabled (ie, we do
* not set the IORESOURCE_ROM_ENABLE flag) for now rather than
* do a config space read, it will be force-enabled if needed
*/
- if (!bridge && (addr0 & 0xff) == 0x30)
+ if (!bridge && (addr0 & OF_PCI_ADDR0_BARREG) == PCI_ROM_ADDRESS)
flags |= IORESOURCE_READONLY;
- } else if (addr0 & 0x01000000)
+
+ } else if (as == OF_PCI_ADDR0_SPACE_IO)
flags = IORESOURCE_IO | PCI_BASE_ADDRESS_SPACE_IO;
+
if (flags)
flags |= IORESOURCE_SIZEALIGN;
+
return flags;
}
@@ -370,7 +414,6 @@ static void __of_scan_bus(struct device_node *node, struct pci_bus *bus,
*/
if (!rescan_existing)
pcibios_setup_bus_self(bus);
- pcibios_setup_bus_devices(bus);
/* Now scan child busses */
for_each_pci_bridge(dev, bus)
diff --git a/arch/powerpc/kernel/proc_powerpc.c b/arch/powerpc/kernel/proc_powerpc.c
index be3758d54e59..877817471e3c 100644
--- a/arch/powerpc/kernel/proc_powerpc.c
+++ b/arch/powerpc/kernel/proc_powerpc.c
@@ -39,10 +39,10 @@ static int page_map_mmap( struct file *file, struct vm_area_struct *vma )
return 0;
}
-static const struct file_operations page_map_fops = {
- .llseek = page_map_seek,
- .read = page_map_read,
- .mmap = page_map_mmap
+static const struct proc_ops page_map_proc_ops = {
+ .proc_lseek = page_map_seek,
+ .proc_read = page_map_read,
+ .proc_mmap = page_map_mmap,
};
@@ -51,7 +51,7 @@ static int __init proc_ppc64_init(void)
struct proc_dir_entry *pde;
pde = proc_create_data("powerpc/systemcfg", S_IFREG | 0444, NULL,
- &page_map_fops, vdso_data);
+ &page_map_proc_ops, vdso_data);
if (!pde)
return 1;
proc_set_size(pde, PAGE_SIZE);
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 8fc4de0d22b4..fad50db9dcf2 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -101,21 +101,8 @@ static void check_if_tm_restore_required(struct task_struct *tsk)
}
}
-static bool tm_active_with_fp(struct task_struct *tsk)
-{
- return MSR_TM_ACTIVE(tsk->thread.regs->msr) &&
- (tsk->thread.ckpt_regs.msr & MSR_FP);
-}
-
-static bool tm_active_with_altivec(struct task_struct *tsk)
-{
- return MSR_TM_ACTIVE(tsk->thread.regs->msr) &&
- (tsk->thread.ckpt_regs.msr & MSR_VEC);
-}
#else
static inline void check_if_tm_restore_required(struct task_struct *tsk) { }
-static inline bool tm_active_with_fp(struct task_struct *tsk) { return false; }
-static inline bool tm_active_with_altivec(struct task_struct *tsk) { return false; }
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
bool strict_msr_control;
@@ -252,7 +239,7 @@ EXPORT_SYMBOL(enable_kernel_fp);
static int restore_fp(struct task_struct *tsk)
{
- if (tsk->thread.load_fp || tm_active_with_fp(tsk)) {
+ if (tsk->thread.load_fp) {
load_fp_state(&current->thread.fp_state);
current->thread.load_fp++;
return 1;
@@ -334,8 +321,7 @@ EXPORT_SYMBOL_GPL(flush_altivec_to_thread);
static int restore_altivec(struct task_struct *tsk)
{
- if (cpu_has_feature(CPU_FTR_ALTIVEC) &&
- (tsk->thread.load_vec || tm_active_with_altivec(tsk))) {
+ if (cpu_has_feature(CPU_FTR_ALTIVEC) && (tsk->thread.load_vec)) {
load_vr_state(&tsk->thread.vr_state);
tsk->thread.used_vr = 1;
tsk->thread.load_vec++;
@@ -497,13 +483,14 @@ void giveup_all(struct task_struct *tsk)
if (!tsk->thread.regs)
return;
+ check_if_tm_restore_required(tsk);
+
usermsr = tsk->thread.regs->msr;
if ((usermsr & msr_all_available) == 0)
return;
msr_check_and_set(msr_all_available);
- check_if_tm_restore_required(tsk);
WARN_ON((usermsr & MSR_VSX) && !((usermsr & MSR_FP) && (usermsr & MSR_VEC)));
@@ -728,6 +715,8 @@ static void set_debug_reg_defaults(struct thread_struct *thread)
{
thread->hw_brk.address = 0;
thread->hw_brk.type = 0;
+ thread->hw_brk.len = 0;
+ thread->hw_brk.hw_len = 0;
if (ppc_breakpoint_available())
set_breakpoint(&thread->hw_brk);
}
@@ -751,28 +740,6 @@ static inline int __set_dabr(unsigned long dabr, unsigned long dabrx)
mtspr(SPRN_DABRX, dabrx);
return 0;
}
-#elif defined(CONFIG_PPC_8xx)
-static inline int __set_dabr(unsigned long dabr, unsigned long dabrx)
-{
- unsigned long addr = dabr & ~HW_BRK_TYPE_DABR;
- unsigned long lctrl1 = 0x90000000; /* compare type: equal on E & F */
- unsigned long lctrl2 = 0x8e000002; /* watchpoint 1 on cmp E | F */
-
- if ((dabr & HW_BRK_TYPE_RDWR) == HW_BRK_TYPE_READ)
- lctrl1 |= 0xa0000;
- else if ((dabr & HW_BRK_TYPE_RDWR) == HW_BRK_TYPE_WRITE)
- lctrl1 |= 0xf0000;
- else if ((dabr & HW_BRK_TYPE_RDWR) == 0)
- lctrl2 = 0;
-
- mtspr(SPRN_LCTRL2, 0);
- mtspr(SPRN_CMPE, addr);
- mtspr(SPRN_CMPF, addr + 4);
- mtspr(SPRN_LCTRL1, lctrl1);
- mtspr(SPRN_LCTRL2, lctrl2);
-
- return 0;
-}
#else
static inline int __set_dabr(unsigned long dabr, unsigned long dabrx)
{
@@ -793,6 +760,39 @@ static inline int set_dabr(struct arch_hw_breakpoint *brk)
return __set_dabr(dabr, dabrx);
}
+static inline int set_breakpoint_8xx(struct arch_hw_breakpoint *brk)
+{
+ unsigned long lctrl1 = LCTRL1_CTE_GT | LCTRL1_CTF_LT | LCTRL1_CRWE_RW |
+ LCTRL1_CRWF_RW;
+ unsigned long lctrl2 = LCTRL2_LW0EN | LCTRL2_LW0LADC | LCTRL2_SLW0EN;
+ unsigned long start_addr = brk->address & ~HW_BREAKPOINT_ALIGN;
+ unsigned long end_addr = (brk->address + brk->len - 1) | HW_BREAKPOINT_ALIGN;
+
+ if (start_addr == 0)
+ lctrl2 |= LCTRL2_LW0LA_F;
+ else if (end_addr == ~0U)
+ lctrl2 |= LCTRL2_LW0LA_E;
+ else
+ lctrl2 |= LCTRL2_LW0LA_EandF;
+
+ mtspr(SPRN_LCTRL2, 0);
+
+ if ((brk->type & HW_BRK_TYPE_RDWR) == 0)
+ return 0;
+
+ if ((brk->type & HW_BRK_TYPE_RDWR) == HW_BRK_TYPE_READ)
+ lctrl1 |= LCTRL1_CRWE_RO | LCTRL1_CRWF_RO;
+ if ((brk->type & HW_BRK_TYPE_RDWR) == HW_BRK_TYPE_WRITE)
+ lctrl1 |= LCTRL1_CRWE_WO | LCTRL1_CRWF_WO;
+
+ mtspr(SPRN_CMPE, start_addr - 1);
+ mtspr(SPRN_CMPF, end_addr + 1);
+ mtspr(SPRN_LCTRL1, lctrl1);
+ mtspr(SPRN_LCTRL2, lctrl2);
+
+ return 0;
+}
+
void __set_breakpoint(struct arch_hw_breakpoint *brk)
{
memcpy(this_cpu_ptr(&current_brk), brk, sizeof(*brk));
@@ -800,6 +800,8 @@ void __set_breakpoint(struct arch_hw_breakpoint *brk)
if (dawr_enabled())
// Power8 or later
set_dawr(brk);
+ else if (IS_ENABLED(CONFIG_PPC_8xx))
+ set_breakpoint_8xx(brk);
else if (!cpu_has_feature(CPU_FTR_ARCH_207S))
// Power7 or earlier
set_dabr(brk);
@@ -829,6 +831,7 @@ static inline bool hw_brk_match(struct arch_hw_breakpoint *a,
return false;
if (a->len != b->len)
return false;
+ /* no need to check hw_len. it's calculated from address and len */
return true;
}
@@ -1274,16 +1277,6 @@ void show_user_instructions(struct pt_regs *regs)
pc = regs->nip - (NR_INSN_TO_PRINT * 3 / 4 * sizeof(int));
- /*
- * Make sure the NIP points at userspace, not kernel text/data or
- * elsewhere.
- */
- if (!__access_ok(pc, NR_INSN_TO_PRINT * sizeof(int), USER_DS)) {
- pr_info("%s[%d]: Bad NIP, not dumping instructions.\n",
- current->comm, current->pid);
- return;
- }
-
seq_buf_init(&s, buf, sizeof(buf));
while (n) {
@@ -1294,7 +1287,7 @@ void show_user_instructions(struct pt_regs *regs)
for (i = 0; i < 8 && n; i++, n--, pc += sizeof(int)) {
int instr;
- if (probe_kernel_address((const void *)pc, instr)) {
+ if (probe_user_read(&instr, (void __user *)pc, sizeof(instr))) {
seq_buf_printf(&s, "XXXXXXXX ");
continue;
}
@@ -1600,8 +1593,9 @@ static void setup_ksp_vsid(struct task_struct *p, unsigned long sp)
/*
* Copy architecture-specific thread state
*/
-int copy_thread(unsigned long clone_flags, unsigned long usp,
- unsigned long kthread_arg, struct task_struct *p)
+int copy_thread_tls(unsigned long clone_flags, unsigned long usp,
+ unsigned long kthread_arg, struct task_struct *p,
+ unsigned long tls)
{
struct pt_regs *childregs, *kregs;
extern void ret_from_fork(void);
@@ -1642,10 +1636,10 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
if (clone_flags & CLONE_SETTLS) {
#ifdef CONFIG_PPC64
if (!is_32bit_task())
- childregs->gpr[13] = childregs->gpr[6];
+ childregs->gpr[13] = tls;
else
#endif
- childregs->gpr[2] = childregs->gpr[6];
+ childregs->gpr[2] = tls;
}
f = ret_from_fork;
@@ -2046,10 +2040,8 @@ void show_stack(struct task_struct *tsk, unsigned long *stack)
int count = 0;
int firstframe = 1;
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- struct ftrace_ret_stack *ret_stack;
- extern void return_to_handler(void);
- unsigned long rth = (unsigned long)return_to_handler;
- int curr_frame = 0;
+ unsigned long ret_addr;
+ int ftrace_idx = 0;
#endif
if (tsk == NULL)
@@ -2078,15 +2070,10 @@ void show_stack(struct task_struct *tsk, unsigned long *stack)
if (!firstframe || ip != lr) {
printk("["REG"] ["REG"] %pS", sp, ip, (void *)ip);
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- if ((ip == rth) && curr_frame >= 0) {
- ret_stack = ftrace_graph_get_ret_stack(current,
- curr_frame++);
- if (ret_stack)
- pr_cont(" (%pS)",
- (void *)ret_stack->ret);
- else
- curr_frame = -1;
- }
+ ret_addr = ftrace_graph_ret_addr(current,
+ &ftrace_idx, ip, stack);
+ if (ret_addr != ip)
+ pr_cont(" (%pS)", (void *)ret_addr);
#endif
if (firstframe)
pr_cont(" (unreliable)");
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 7159e791a70d..6620f37abe73 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -55,6 +55,7 @@
#include <asm/firmware.h>
#include <asm/dt_cpu_ftrs.h>
#include <asm/drmem.h>
+#include <asm/ultravisor.h>
#include <mm/mmu_decl.h>
@@ -702,9 +703,12 @@ void __init early_init_devtree(void *params)
#ifdef CONFIG_PPC_POWERNV
/* Some machines might need OPAL info for debugging, grab it now. */
of_scan_flat_dt(early_init_dt_scan_opal, NULL);
+
+ /* Scan tree for ultravisor feature */
+ of_scan_flat_dt(early_init_dt_scan_ultravisor, NULL);
#endif
-#ifdef CONFIG_FA_DUMP
+#if defined(CONFIG_FA_DUMP) || defined(CONFIG_PRESERVE_FA_DUMP)
/* scan tree to see if dump is active during last boot */
of_scan_flat_dt(early_init_dt_scan_fw_dump, NULL);
#endif
@@ -731,7 +735,7 @@ void __init early_init_devtree(void *params)
if (PHYSICAL_START > MEMORY_START)
memblock_reserve(MEMORY_START, 0x8000);
reserve_kdump_trampoline();
-#ifdef CONFIG_FA_DUMP
+#if defined(CONFIG_FA_DUMP) || defined(CONFIG_PRESERVE_FA_DUMP)
/*
* If we fail to reserve memory for firmware-assisted dump then
* fallback to kexec based kdump.
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 514707ef6779..577345382b23 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -40,6 +40,7 @@
#include <asm/sections.h>
#include <asm/machdep.h>
#include <asm/asm-prototypes.h>
+#include <asm/ultravisor-api.h>
#include <linux/linux_logo.h>
@@ -94,7 +95,7 @@ static int of_workarounds __prombss;
#define PROM_BUG() do { \
prom_printf("kernel BUG at %s line 0x%x!\n", \
__FILE__, __LINE__); \
- __asm__ __volatile__(".long " BUG_ILLEGAL_INSTR); \
+ __builtin_trap(); \
} while (0)
#ifdef DEBUG_PROM
@@ -171,6 +172,10 @@ static bool __prombss prom_radix_disable;
static bool __prombss prom_xive_disable;
#endif
+#ifdef CONFIG_PPC_SVM
+static bool __prombss prom_svm_enable;
+#endif
+
struct platform_support {
bool hash_mmu;
bool radix_mmu;
@@ -298,16 +303,24 @@ static char __init *prom_strstr(const char *s1, const char *s2)
return NULL;
}
-static size_t __init prom_strlcpy(char *dest, const char *src, size_t size)
+static size_t __init prom_strlcat(char *dest, const char *src, size_t count)
{
- size_t ret = prom_strlen(src);
+ size_t dsize = prom_strlen(dest);
+ size_t len = prom_strlen(src);
+ size_t res = dsize + len;
+
+ /* This would be a bug */
+ if (dsize >= count)
+ return count;
+
+ dest += dsize;
+ count -= dsize;
+ if (len >= count)
+ len = count-1;
+ memcpy(dest, src, len);
+ dest[len] = 0;
+ return res;
- if (size) {
- size_t len = (ret >= size) ? size - 1 : ret;
- memcpy(dest, src, len);
- dest[len] = '\0';
- }
- return ret;
}
#ifdef CONFIG_PPC_PSERIES
@@ -759,10 +772,14 @@ static void __init early_cmdline_parse(void)
prom_cmd_line[0] = 0;
p = prom_cmd_line;
- if ((long)prom.chosen > 0)
+
+ if (!IS_ENABLED(CONFIG_CMDLINE_FORCE) && (long)prom.chosen > 0)
l = prom_getprop(prom.chosen, "bootargs", p, COMMAND_LINE_SIZE-1);
- if (IS_ENABLED(CONFIG_CMDLINE_BOOL) && (l <= 0 || p[0] == '\0')) /* dbl check */
- prom_strlcpy(prom_cmd_line, CONFIG_CMDLINE, sizeof(prom_cmd_line));
+
+ if (IS_ENABLED(CONFIG_CMDLINE_EXTEND) || l <= 0 || p[0] == '\0')
+ prom_strlcat(prom_cmd_line, " " CONFIG_CMDLINE,
+ sizeof(prom_cmd_line));
+
prom_printf("command line: %s\n", prom_cmd_line);
#ifdef CONFIG_PPC64
@@ -812,6 +829,17 @@ static void __init early_cmdline_parse(void)
prom_debug("XIVE disabled from cmdline\n");
}
#endif /* CONFIG_PPC_PSERIES */
+
+#ifdef CONFIG_PPC_SVM
+ opt = prom_strstr(prom_cmd_line, "svm=");
+ if (opt) {
+ bool val;
+
+ opt += sizeof("svm=") - 1;
+ if (!prom_strtobool(opt, &val))
+ prom_svm_enable = val;
+ }
+#endif /* CONFIG_PPC_SVM */
}
#ifdef CONFIG_PPC_PSERIES
@@ -1037,7 +1065,7 @@ static const struct ibm_arch_vec ibm_architecture_vec_template __initconst = {
.reserved2 = 0,
.reserved3 = 0,
.subprocessors = 1,
- .byte22 = OV5_FEAT(OV5_DRMEM_V2),
+ .byte22 = OV5_FEAT(OV5_DRMEM_V2) | OV5_FEAT(OV5_DRC_INFO),
.intarch = 0,
.mmu = 0,
.hash_ext = 0,
@@ -1712,6 +1740,43 @@ static void __init prom_close_stdin(void)
}
}
+#ifdef CONFIG_PPC_SVM
+static int prom_rtas_hcall(uint64_t args)
+{
+ register uint64_t arg1 asm("r3") = H_RTAS;
+ register uint64_t arg2 asm("r4") = args;
+
+ asm volatile("sc 1\n" : "=r" (arg1) :
+ "r" (arg1),
+ "r" (arg2) :);
+ return arg1;
+}
+
+static struct rtas_args __prombss os_term_args;
+
+static void __init prom_rtas_os_term(char *str)
+{
+ phandle rtas_node;
+ __be32 val;
+ u32 token;
+
+ prom_debug("%s: start...\n", __func__);
+ rtas_node = call_prom("finddevice", 1, 1, ADDR("/rtas"));
+ prom_debug("rtas_node: %x\n", rtas_node);
+ if (!PHANDLE_VALID(rtas_node))
+ return;
+
+ val = 0;
+ prom_getprop(rtas_node, "ibm,os-term", &val, sizeof(val));
+ token = be32_to_cpu(val);
+ prom_debug("ibm,os-term: %x\n", token);
+ if (token == 0)
+ prom_panic("Could not get token for ibm,os-term\n");
+ os_term_args.token = cpu_to_be32(token);
+ prom_rtas_hcall((uint64_t)&os_term_args);
+}
+#endif /* CONFIG_PPC_SVM */
+
/*
* Allocate room for and instantiate RTAS
*/
@@ -3168,6 +3233,59 @@ static void unreloc_toc(void)
#endif
#endif
+#ifdef CONFIG_PPC_SVM
+/*
+ * Perform the Enter Secure Mode ultracall.
+ */
+static int enter_secure_mode(unsigned long kbase, unsigned long fdt)
+{
+ register unsigned long r3 asm("r3") = UV_ESM;
+ register unsigned long r4 asm("r4") = kbase;
+ register unsigned long r5 asm("r5") = fdt;
+
+ asm volatile("sc 2" : "+r"(r3) : "r"(r4), "r"(r5));
+
+ return r3;
+}
+
+/*
+ * Call the Ultravisor to transfer us to secure memory if we have an ESM blob.
+ */
+static void setup_secure_guest(unsigned long kbase, unsigned long fdt)
+{
+ int ret;
+
+ if (!prom_svm_enable)
+ return;
+
+ /* Switch to secure mode. */
+ prom_printf("Switching to secure mode.\n");
+
+ /*
+ * The ultravisor will do an integrity check of the kernel image but we
+ * relocated it so the check will fail. Restore the original image by
+ * relocating it back to the kernel virtual base address.
+ */
+ if (IS_ENABLED(CONFIG_RELOCATABLE))
+ relocate(KERNELBASE);
+
+ ret = enter_secure_mode(kbase, fdt);
+
+ /* Relocate the kernel again. */
+ if (IS_ENABLED(CONFIG_RELOCATABLE))
+ relocate(kbase);
+
+ if (ret != U_SUCCESS) {
+ prom_printf("Returned %d from switching to secure mode.\n", ret);
+ prom_rtas_os_term("Switch to secure mode failed.\n");
+ }
+}
+#else
+static void setup_secure_guest(unsigned long kbase, unsigned long fdt)
+{
+}
+#endif /* CONFIG_PPC_SVM */
+
/*
* We enter here early on, when the Open Firmware prom is still
* handling exceptions and the MMU hash table for us.
@@ -3366,6 +3484,9 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
unreloc_toc();
#endif
+ /* Move to secure memory if we're supposed to be secure guests. */
+ setup_secure_guest(kbase, hdr);
+
__start(hdr, kbase, 0, 0, 0, 0, 0);
return 0;
diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh
index 160bef0d553d..b183ab9c5107 100644
--- a/arch/powerpc/kernel/prom_init_check.sh
+++ b/arch/powerpc/kernel/prom_init_check.sh
@@ -26,14 +26,15 @@ _end enter_prom $MEM_FUNCS reloc_offset __secondary_hold
__secondary_hold_acknowledge __secondary_hold_spinloop __start
logo_linux_clut224 btext_prepare_BAT
reloc_got2 kernstart_addr memstart_addr linux_banner _stext
-__prom_init_toc_start __prom_init_toc_end btext_setup_display TOC."
+__prom_init_toc_start __prom_init_toc_end btext_setup_display TOC.
+relocate"
NM="$1"
OBJ="$2"
ERROR=0
-function check_section()
+check_section()
{
file=$1
section=$2
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 8c92febf5f44..25c0424e8868 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -2425,7 +2425,8 @@ static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
return -EIO;
hw_brk.address = data & (~HW_BRK_TYPE_DABR);
hw_brk.type = (data & HW_BRK_TYPE_DABR) | HW_BRK_TYPE_PRIV_ALL;
- hw_brk.len = 8;
+ hw_brk.len = DABR_MAX_LEN;
+ hw_brk.hw_len = DABR_MAX_LEN;
set_bp = (data) && (hw_brk.type & HW_BRK_TYPE_RDWR);
#ifdef CONFIG_HAVE_HW_BREAKPOINT
bp = thread->ptrace_bps[0];
@@ -2439,6 +2440,7 @@ static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
if (bp) {
attr = bp->attr;
attr.bp_addr = hw_brk.address;
+ attr.bp_len = DABR_MAX_LEN;
arch_bp_generic_fields(hw_brk.type, &attr.bp_type);
/* Enable breakpoint */
@@ -2456,7 +2458,7 @@ static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
/* Create a new breakpoint request if one doesn't exist already */
hw_breakpoint_init(&attr);
attr.bp_addr = hw_brk.address;
- attr.bp_len = 8;
+ attr.bp_len = DABR_MAX_LEN;
arch_bp_generic_fields(hw_brk.type,
&attr.bp_type);
@@ -2880,18 +2882,14 @@ static long ppc_set_hwdebug(struct task_struct *child,
if ((unsigned long)bp_info->addr >= TASK_SIZE)
return -EIO;
- brk.address = bp_info->addr & ~7UL;
+ brk.address = bp_info->addr & ~HW_BREAKPOINT_ALIGN;
brk.type = HW_BRK_TYPE_TRANSLATE;
- brk.len = 8;
+ brk.len = DABR_MAX_LEN;
if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ)
brk.type |= HW_BRK_TYPE_READ;
if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
brk.type |= HW_BRK_TYPE_WRITE;
#ifdef CONFIG_HAVE_HW_BREAKPOINT
- /*
- * Check if the request is for 'range' breakpoints. We can
- * support it if range < 8 bytes.
- */
if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE)
len = bp_info->addr2 - bp_info->addr;
else if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_EXACT)
@@ -2904,7 +2902,7 @@ static long ppc_set_hwdebug(struct task_struct *child,
/* Create a new breakpoint request if one doesn't exist already */
hw_breakpoint_init(&attr);
- attr.bp_addr = (unsigned long)bp_info->addr & ~HW_BREAKPOINT_ALIGN;
+ attr.bp_addr = (unsigned long)bp_info->addr;
attr.bp_len = len;
arch_bp_generic_fields(brk.type, &attr.bp_type);
@@ -3361,6 +3359,12 @@ void do_syscall_trace_leave(struct pt_regs *regs)
user_enter();
}
+void __init pt_regs_check(void);
+
+/*
+ * Dummy function, its purpose is to break the build if struct pt_regs and
+ * struct user_pt_regs don't match.
+ */
void __init pt_regs_check(void)
{
BUILD_BUG_ON(offsetof(struct pt_regs, gpr) !=
@@ -3398,4 +3402,67 @@ void __init pt_regs_check(void)
offsetof(struct user_pt_regs, result));
BUILD_BUG_ON(sizeof(struct user_pt_regs) > sizeof(struct pt_regs));
+
+ // Now check that the pt_regs offsets match the uapi #defines
+ #define CHECK_REG(_pt, _reg) \
+ BUILD_BUG_ON(_pt != (offsetof(struct user_pt_regs, _reg) / \
+ sizeof(unsigned long)));
+
+ CHECK_REG(PT_R0, gpr[0]);
+ CHECK_REG(PT_R1, gpr[1]);
+ CHECK_REG(PT_R2, gpr[2]);
+ CHECK_REG(PT_R3, gpr[3]);
+ CHECK_REG(PT_R4, gpr[4]);
+ CHECK_REG(PT_R5, gpr[5]);
+ CHECK_REG(PT_R6, gpr[6]);
+ CHECK_REG(PT_R7, gpr[7]);
+ CHECK_REG(PT_R8, gpr[8]);
+ CHECK_REG(PT_R9, gpr[9]);
+ CHECK_REG(PT_R10, gpr[10]);
+ CHECK_REG(PT_R11, gpr[11]);
+ CHECK_REG(PT_R12, gpr[12]);
+ CHECK_REG(PT_R13, gpr[13]);
+ CHECK_REG(PT_R14, gpr[14]);
+ CHECK_REG(PT_R15, gpr[15]);
+ CHECK_REG(PT_R16, gpr[16]);
+ CHECK_REG(PT_R17, gpr[17]);
+ CHECK_REG(PT_R18, gpr[18]);
+ CHECK_REG(PT_R19, gpr[19]);
+ CHECK_REG(PT_R20, gpr[20]);
+ CHECK_REG(PT_R21, gpr[21]);
+ CHECK_REG(PT_R22, gpr[22]);
+ CHECK_REG(PT_R23, gpr[23]);
+ CHECK_REG(PT_R24, gpr[24]);
+ CHECK_REG(PT_R25, gpr[25]);
+ CHECK_REG(PT_R26, gpr[26]);
+ CHECK_REG(PT_R27, gpr[27]);
+ CHECK_REG(PT_R28, gpr[28]);
+ CHECK_REG(PT_R29, gpr[29]);
+ CHECK_REG(PT_R30, gpr[30]);
+ CHECK_REG(PT_R31, gpr[31]);
+ CHECK_REG(PT_NIP, nip);
+ CHECK_REG(PT_MSR, msr);
+ CHECK_REG(PT_ORIG_R3, orig_gpr3);
+ CHECK_REG(PT_CTR, ctr);
+ CHECK_REG(PT_LNK, link);
+ CHECK_REG(PT_XER, xer);
+ CHECK_REG(PT_CCR, ccr);
+#ifdef CONFIG_PPC64
+ CHECK_REG(PT_SOFTE, softe);
+#else
+ CHECK_REG(PT_MQ, mq);
+#endif
+ CHECK_REG(PT_TRAP, trap);
+ CHECK_REG(PT_DAR, dar);
+ CHECK_REG(PT_DSISR, dsisr);
+ CHECK_REG(PT_RESULT, result);
+ #undef CHECK_REG
+
+ BUILD_BUG_ON(PT_REGS_COUNT != sizeof(struct user_pt_regs) / sizeof(unsigned long));
+
+ /*
+ * PT_DSCR isn't a real reg, but it's important that it doesn't overlap the
+ * real registers.
+ */
+ BUILD_BUG_ON(PT_DSCR < sizeof(struct user_pt_regs) / sizeof(unsigned long));
}
diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c
index 487dcd8da4de..2d33f342a293 100644
--- a/arch/powerpc/kernel/rtas-proc.c
+++ b/arch/powerpc/kernel/rtas-proc.c
@@ -159,12 +159,12 @@ static int poweron_open(struct inode *inode, struct file *file)
return single_open(file, ppc_rtas_poweron_show, NULL);
}
-static const struct file_operations ppc_rtas_poweron_operations = {
- .open = poweron_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .write = ppc_rtas_poweron_write,
- .release = single_release,
+static const struct proc_ops ppc_rtas_poweron_proc_ops = {
+ .proc_open = poweron_open,
+ .proc_read = seq_read,
+ .proc_lseek = seq_lseek,
+ .proc_write = ppc_rtas_poweron_write,
+ .proc_release = single_release,
};
static int progress_open(struct inode *inode, struct file *file)
@@ -172,12 +172,12 @@ static int progress_open(struct inode *inode, struct file *file)
return single_open(file, ppc_rtas_progress_show, NULL);
}
-static const struct file_operations ppc_rtas_progress_operations = {
- .open = progress_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .write = ppc_rtas_progress_write,
- .release = single_release,
+static const struct proc_ops ppc_rtas_progress_proc_ops = {
+ .proc_open = progress_open,
+ .proc_read = seq_read,
+ .proc_lseek = seq_lseek,
+ .proc_write = ppc_rtas_progress_write,
+ .proc_release = single_release,
};
static int clock_open(struct inode *inode, struct file *file)
@@ -185,12 +185,12 @@ static int clock_open(struct inode *inode, struct file *file)
return single_open(file, ppc_rtas_clock_show, NULL);
}
-static const struct file_operations ppc_rtas_clock_operations = {
- .open = clock_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .write = ppc_rtas_clock_write,
- .release = single_release,
+static const struct proc_ops ppc_rtas_clock_proc_ops = {
+ .proc_open = clock_open,
+ .proc_read = seq_read,
+ .proc_lseek = seq_lseek,
+ .proc_write = ppc_rtas_clock_write,
+ .proc_release = single_release,
};
static int tone_freq_open(struct inode *inode, struct file *file)
@@ -198,12 +198,12 @@ static int tone_freq_open(struct inode *inode, struct file *file)
return single_open(file, ppc_rtas_tone_freq_show, NULL);
}
-static const struct file_operations ppc_rtas_tone_freq_operations = {
- .open = tone_freq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .write = ppc_rtas_tone_freq_write,
- .release = single_release,
+static const struct proc_ops ppc_rtas_tone_freq_proc_ops = {
+ .proc_open = tone_freq_open,
+ .proc_read = seq_read,
+ .proc_lseek = seq_lseek,
+ .proc_write = ppc_rtas_tone_freq_write,
+ .proc_release = single_release,
};
static int tone_volume_open(struct inode *inode, struct file *file)
@@ -211,12 +211,12 @@ static int tone_volume_open(struct inode *inode, struct file *file)
return single_open(file, ppc_rtas_tone_volume_show, NULL);
}
-static const struct file_operations ppc_rtas_tone_volume_operations = {
- .open = tone_volume_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .write = ppc_rtas_tone_volume_write,
- .release = single_release,
+static const struct proc_ops ppc_rtas_tone_volume_proc_ops = {
+ .proc_open = tone_volume_open,
+ .proc_read = seq_read,
+ .proc_lseek = seq_lseek,
+ .proc_write = ppc_rtas_tone_volume_write,
+ .proc_release = single_release,
};
static int ppc_rtas_find_all_sensors(void);
@@ -238,17 +238,17 @@ static int __init proc_rtas_init(void)
return -ENODEV;
proc_create("powerpc/rtas/progress", 0644, NULL,
- &ppc_rtas_progress_operations);
+ &ppc_rtas_progress_proc_ops);
proc_create("powerpc/rtas/clock", 0644, NULL,
- &ppc_rtas_clock_operations);
+ &ppc_rtas_clock_proc_ops);
proc_create("powerpc/rtas/poweron", 0644, NULL,
- &ppc_rtas_poweron_operations);
+ &ppc_rtas_poweron_proc_ops);
proc_create_single("powerpc/rtas/sensors", 0444, NULL,
ppc_rtas_sensors_show);
proc_create("powerpc/rtas/frequency", 0644, NULL,
- &ppc_rtas_tone_freq_operations);
+ &ppc_rtas_tone_freq_proc_ops);
proc_create("powerpc/rtas/volume", 0644, NULL,
- &ppc_rtas_tone_volume_operations);
+ &ppc_rtas_tone_volume_proc_ops);
proc_create_single("powerpc/rtas/rmo_buffer", 0400, NULL,
ppc_rtas_rmo_buf_show);
return 0;
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 5faf0a64c92b..c5fa251b8950 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -16,6 +16,7 @@
#include <linux/capability.h>
#include <linux/delay.h>
#include <linux/cpu.h>
+#include <linux/sched.h>
#include <linux/smp.h>
#include <linux/completion.h>
#include <linux/cpumask.h>
@@ -871,15 +872,17 @@ static int rtas_cpu_state_change_mask(enum rtas_cpu_state state,
return 0;
for_each_cpu(cpu, cpus) {
+ struct device *dev = get_cpu_device(cpu);
+
switch (state) {
case DOWN:
- cpuret = cpu_down(cpu);
+ cpuret = device_offline(dev);
break;
case UP:
- cpuret = cpu_up(cpu);
+ cpuret = device_online(dev);
break;
}
- if (cpuret) {
+ if (cpuret < 0) {
pr_debug("%s: cpu_%s for cpu#%d returned %d.\n",
__func__,
((state == UP) ? "up" : "down"),
@@ -896,6 +899,7 @@ static int rtas_cpu_state_change_mask(enum rtas_cpu_state state,
cpumask_clear_cpu(cpu, cpus);
}
}
+ cond_resched();
}
return ret;
@@ -922,13 +926,11 @@ int rtas_online_cpus_mask(cpumask_var_t cpus)
return ret;
}
-EXPORT_SYMBOL(rtas_online_cpus_mask);
int rtas_offline_cpus_mask(cpumask_var_t cpus)
{
return rtas_cpu_state_change_mask(DOWN, cpus);
}
-EXPORT_SYMBOL(rtas_offline_cpus_mask);
int rtas_ibm_suspend_me(u64 handle)
{
@@ -968,6 +970,8 @@ int rtas_ibm_suspend_me(u64 handle)
data.token = rtas_token("ibm,suspend-me");
data.complete = &done;
+ lock_device_hotplug();
+
/* All present CPUs must be online */
cpumask_andnot(offline_mask, cpu_present_mask, cpu_online_mask);
cpuret = rtas_online_cpus_mask(offline_mask);
@@ -1006,6 +1010,7 @@ out_hotplug_enable:
__func__);
out:
+ unlock_device_hotplug();
free_cpumask_var(offline_mask);
return atomic_read(&data.error);
}
diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c
index 84f794782c62..a99179d83538 100644
--- a/arch/powerpc/kernel/rtas_flash.c
+++ b/arch/powerpc/kernel/rtas_flash.c
@@ -655,7 +655,7 @@ struct rtas_flash_file {
const char *filename;
const char *rtas_call_name;
int *status;
- const struct file_operations fops;
+ const struct proc_ops ops;
};
static const struct rtas_flash_file rtas_flash_files[] = {
@@ -663,36 +663,36 @@ static const struct rtas_flash_file rtas_flash_files[] = {
.filename = "powerpc/rtas/" FIRMWARE_FLASH_NAME,
.rtas_call_name = "ibm,update-flash-64-and-reboot",
.status = &rtas_update_flash_data.status,
- .fops.read = rtas_flash_read_msg,
- .fops.write = rtas_flash_write,
- .fops.release = rtas_flash_release,
- .fops.llseek = default_llseek,
+ .ops.proc_read = rtas_flash_read_msg,
+ .ops.proc_write = rtas_flash_write,
+ .ops.proc_release = rtas_flash_release,
+ .ops.proc_lseek = default_llseek,
},
{
.filename = "powerpc/rtas/" FIRMWARE_UPDATE_NAME,
.rtas_call_name = "ibm,update-flash-64-and-reboot",
.status = &rtas_update_flash_data.status,
- .fops.read = rtas_flash_read_num,
- .fops.write = rtas_flash_write,
- .fops.release = rtas_flash_release,
- .fops.llseek = default_llseek,
+ .ops.proc_read = rtas_flash_read_num,
+ .ops.proc_write = rtas_flash_write,
+ .ops.proc_release = rtas_flash_release,
+ .ops.proc_lseek = default_llseek,
},
{
.filename = "powerpc/rtas/" VALIDATE_FLASH_NAME,
.rtas_call_name = "ibm,validate-flash-image",
.status = &rtas_validate_flash_data.status,
- .fops.read = validate_flash_read,
- .fops.write = validate_flash_write,
- .fops.release = validate_flash_release,
- .fops.llseek = default_llseek,
+ .ops.proc_read = validate_flash_read,
+ .ops.proc_write = validate_flash_write,
+ .ops.proc_release = validate_flash_release,
+ .ops.proc_lseek = default_llseek,
},
{
.filename = "powerpc/rtas/" MANAGE_FLASH_NAME,
.rtas_call_name = "ibm,manage-flash-image",
.status = &rtas_manage_flash_data.status,
- .fops.read = manage_flash_read,
- .fops.write = manage_flash_write,
- .fops.llseek = default_llseek,
+ .ops.proc_read = manage_flash_read,
+ .ops.proc_write = manage_flash_write,
+ .ops.proc_lseek = default_llseek,
}
};
@@ -723,7 +723,7 @@ static int __init rtas_flash_init(void)
const struct rtas_flash_file *f = &rtas_flash_files[i];
int token;
- if (!proc_create(f->filename, 0600, NULL, &f->fops))
+ if (!proc_create(f->filename, 0600, NULL, &f->ops))
goto enomem;
/*
diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c
index 8d02e047f96a..89b798f8f656 100644
--- a/arch/powerpc/kernel/rtasd.c
+++ b/arch/powerpc/kernel/rtasd.c
@@ -385,12 +385,12 @@ static __poll_t rtas_log_poll(struct file *file, poll_table * wait)
return 0;
}
-static const struct file_operations proc_rtas_log_operations = {
- .read = rtas_log_read,
- .poll = rtas_log_poll,
- .open = rtas_log_open,
- .release = rtas_log_release,
- .llseek = noop_llseek,
+static const struct proc_ops rtas_log_proc_ops = {
+ .proc_read = rtas_log_read,
+ .proc_poll = rtas_log_poll,
+ .proc_open = rtas_log_open,
+ .proc_release = rtas_log_release,
+ .proc_lseek = noop_llseek,
};
static int enable_surveillance(int timeout)
@@ -572,7 +572,7 @@ static int __init rtas_init(void)
return -ENODEV;
entry = proc_create("powerpc/rtas/error_log", 0400, NULL,
- &proc_rtas_log_operations);
+ &rtas_log_proc_ops);
if (!entry)
printk(KERN_ERR "Failed to create error_log proc entry\n");
diff --git a/arch/powerpc/kernel/secure_boot.c b/arch/powerpc/kernel/secure_boot.c
new file mode 100644
index 000000000000..4b982324d368
--- /dev/null
+++ b/arch/powerpc/kernel/secure_boot.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 IBM Corporation
+ * Author: Nayna Jain
+ */
+#include <linux/types.h>
+#include <linux/of.h>
+#include <asm/secure_boot.h>
+
+static struct device_node *get_ppc_fw_sb_node(void)
+{
+ static const struct of_device_id ids[] = {
+ { .compatible = "ibm,secureboot", },
+ { .compatible = "ibm,secureboot-v1", },
+ { .compatible = "ibm,secureboot-v2", },
+ {},
+ };
+
+ return of_find_matching_node(NULL, ids);
+}
+
+bool is_ppc_secureboot_enabled(void)
+{
+ struct device_node *node;
+ bool enabled = false;
+
+ node = get_ppc_fw_sb_node();
+ enabled = of_property_read_bool(node, "os-secureboot-enforcing");
+
+ of_node_put(node);
+
+ pr_info("Secure boot mode %s\n", enabled ? "enabled" : "disabled");
+
+ return enabled;
+}
+
+bool is_ppc_trustedboot_enabled(void)
+{
+ struct device_node *node;
+ bool enabled = false;
+
+ node = get_ppc_fw_sb_node();
+ enabled = of_property_read_bool(node, "trusted-enabled");
+
+ of_node_put(node);
+
+ pr_info("Trusted boot mode %s\n", enabled ? "enabled" : "disabled");
+
+ return enabled;
+}
diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
index e1c9cf079503..bd70f5be1c27 100644
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -16,7 +16,7 @@
#include <asm/setup.h>
-unsigned long powerpc_security_features __read_mostly = SEC_FTR_DEFAULT;
+u64 powerpc_security_features __read_mostly = SEC_FTR_DEFAULT;
enum count_cache_flush_type {
COUNT_CACHE_FLUSH_NONE = 0x1,
@@ -24,11 +24,12 @@ enum count_cache_flush_type {
COUNT_CACHE_FLUSH_HW = 0x4,
};
static enum count_cache_flush_type count_cache_flush_type = COUNT_CACHE_FLUSH_NONE;
+static bool link_stack_flush_enabled;
bool barrier_nospec_enabled;
static bool no_nospec;
static bool btb_flush_enabled;
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_BOOK3S_64)
static bool no_spectrev2;
#endif
@@ -94,13 +95,14 @@ static int barrier_nospec_get(void *data, u64 *val)
return 0;
}
-DEFINE_SIMPLE_ATTRIBUTE(fops_barrier_nospec,
- barrier_nospec_get, barrier_nospec_set, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(fops_barrier_nospec, barrier_nospec_get,
+ barrier_nospec_set, "%llu\n");
static __init int barrier_nospec_debugfs_init(void)
{
- debugfs_create_file("barrier_nospec", 0600, powerpc_debugfs_root, NULL,
- &fops_barrier_nospec);
+ debugfs_create_file_unsafe("barrier_nospec", 0600,
+ powerpc_debugfs_root, NULL,
+ &fops_barrier_nospec);
return 0;
}
device_initcall(barrier_nospec_debugfs_init);
@@ -108,13 +110,13 @@ device_initcall(barrier_nospec_debugfs_init);
static __init int security_feature_debugfs_init(void)
{
debugfs_create_x64("security_features", 0400, powerpc_debugfs_root,
- (u64 *)&powerpc_security_features);
+ &powerpc_security_features);
return 0;
}
device_initcall(security_feature_debugfs_init);
#endif /* CONFIG_DEBUG_FS */
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_BOOK3S_64)
static int __init handle_nospectre_v2(char *p)
{
no_spectrev2 = true;
@@ -122,6 +124,9 @@ static int __init handle_nospectre_v2(char *p)
return 0;
}
early_param("nospectre_v2", handle_nospectre_v2);
+#endif /* CONFIG_PPC_FSL_BOOK3E || CONFIG_PPC_BOOK3S_64 */
+
+#ifdef CONFIG_PPC_FSL_BOOK3E
void setup_spectre_v2(void)
{
if (no_spectrev2 || cpu_mitigations_off())
@@ -138,32 +143,33 @@ ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, cha
thread_priv = security_ftr_enabled(SEC_FTR_L1D_THREAD_PRIV);
- if (rfi_flush || thread_priv) {
+ if (rfi_flush) {
struct seq_buf s;
seq_buf_init(&s, buf, PAGE_SIZE - 1);
- seq_buf_printf(&s, "Mitigation: ");
-
- if (rfi_flush)
- seq_buf_printf(&s, "RFI Flush");
-
- if (rfi_flush && thread_priv)
- seq_buf_printf(&s, ", ");
-
+ seq_buf_printf(&s, "Mitigation: RFI Flush");
if (thread_priv)
- seq_buf_printf(&s, "L1D private per thread");
+ seq_buf_printf(&s, ", L1D private per thread");
seq_buf_printf(&s, "\n");
return s.len;
}
+ if (thread_priv)
+ return sprintf(buf, "Vulnerable: L1D private per thread\n");
+
if (!security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) &&
!security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR))
return sprintf(buf, "Not affected\n");
return sprintf(buf, "Vulnerable\n");
}
+
+ssize_t cpu_show_l1tf(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ return cpu_show_meltdown(dev, attr, buf);
+}
#endif
ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf)
@@ -209,11 +215,19 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c
if (ccd)
seq_buf_printf(&s, "Indirect branch cache disabled");
+
+ if (link_stack_flush_enabled)
+ seq_buf_printf(&s, ", Software link stack flush");
+
} else if (count_cache_flush_type != COUNT_CACHE_FLUSH_NONE) {
seq_buf_printf(&s, "Mitigation: Software count cache flush");
if (count_cache_flush_type == COUNT_CACHE_FLUSH_HW)
seq_buf_printf(&s, " (hardware accelerated)");
+
+ if (link_stack_flush_enabled)
+ seq_buf_printf(&s, ", Software link stack flush");
+
} else if (btb_flush_enabled) {
seq_buf_printf(&s, "Mitigation: Branch predictor state flush");
} else {
@@ -364,28 +378,61 @@ static int stf_barrier_get(void *data, u64 *val)
return 0;
}
-DEFINE_SIMPLE_ATTRIBUTE(fops_stf_barrier, stf_barrier_get, stf_barrier_set, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(fops_stf_barrier, stf_barrier_get, stf_barrier_set,
+ "%llu\n");
static __init int stf_barrier_debugfs_init(void)
{
- debugfs_create_file("stf_barrier", 0600, powerpc_debugfs_root, NULL, &fops_stf_barrier);
+ debugfs_create_file_unsafe("stf_barrier", 0600, powerpc_debugfs_root,
+ NULL, &fops_stf_barrier);
return 0;
}
device_initcall(stf_barrier_debugfs_init);
#endif /* CONFIG_DEBUG_FS */
+static void no_count_cache_flush(void)
+{
+ count_cache_flush_type = COUNT_CACHE_FLUSH_NONE;
+ pr_info("count-cache-flush: software flush disabled.\n");
+}
+
static void toggle_count_cache_flush(bool enable)
{
- if (!enable || !security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE)) {
+ if (!security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE) &&
+ !security_ftr_enabled(SEC_FTR_FLUSH_LINK_STACK))
+ enable = false;
+
+ if (!enable) {
patch_instruction_site(&patch__call_flush_count_cache, PPC_INST_NOP);
- count_cache_flush_type = COUNT_CACHE_FLUSH_NONE;
- pr_info("count-cache-flush: software flush disabled.\n");
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ patch_instruction_site(&patch__call_kvm_flush_link_stack, PPC_INST_NOP);
+#endif
+ pr_info("link-stack-flush: software flush disabled.\n");
+ link_stack_flush_enabled = false;
+ no_count_cache_flush();
return;
}
+ // This enables the branch from _switch to flush_count_cache
patch_branch_site(&patch__call_flush_count_cache,
(u64)&flush_count_cache, BRANCH_SET_LINK);
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ // This enables the branch from guest_exit_cont to kvm_flush_link_stack
+ patch_branch_site(&patch__call_kvm_flush_link_stack,
+ (u64)&kvm_flush_link_stack, BRANCH_SET_LINK);
+#endif
+
+ pr_info("link-stack-flush: software flush enabled.\n");
+ link_stack_flush_enabled = true;
+
+ // If we just need to flush the link stack, patch an early return
+ if (!security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE)) {
+ patch_instruction_site(&patch__flush_link_stack_return, PPC_INST_BLR);
+ no_count_cache_flush();
+ return;
+ }
+
if (!security_ftr_enabled(SEC_FTR_BCCTR_FLUSH_ASSIST)) {
count_cache_flush_type = COUNT_CACHE_FLUSH_SW;
pr_info("count-cache-flush: full software flush sequence enabled.\n");
@@ -399,7 +446,26 @@ static void toggle_count_cache_flush(bool enable)
void setup_count_cache_flush(void)
{
- toggle_count_cache_flush(true);
+ bool enable = true;
+
+ if (no_spectrev2 || cpu_mitigations_off()) {
+ if (security_ftr_enabled(SEC_FTR_BCCTRL_SERIALISED) ||
+ security_ftr_enabled(SEC_FTR_COUNT_CACHE_DISABLED))
+ pr_warn("Spectre v2 mitigations not fully under software control, can't disable\n");
+
+ enable = false;
+ }
+
+ /*
+ * There's no firmware feature flag/hypervisor bit to tell us we need to
+ * flush the link stack on context switch. So we set it here if we see
+ * either of the Spectre v2 mitigations that aim to protect userspace.
+ */
+ if (security_ftr_enabled(SEC_FTR_COUNT_CACHE_DISABLED) ||
+ security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE))
+ security_ftr_set(SEC_FTR_FLUSH_LINK_STACK);
+
+ toggle_count_cache_flush(enable);
}
#ifdef CONFIG_DEBUG_FS
@@ -429,13 +495,14 @@ static int count_cache_flush_get(void *data, u64 *val)
return 0;
}
-DEFINE_SIMPLE_ATTRIBUTE(fops_count_cache_flush, count_cache_flush_get,
- count_cache_flush_set, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(fops_count_cache_flush, count_cache_flush_get,
+ count_cache_flush_set, "%llu\n");
static __init int count_cache_flush_debugfs_init(void)
{
- debugfs_create_file("count_cache_flush", 0600, powerpc_debugfs_root,
- NULL, &fops_count_cache_flush);
+ debugfs_create_file_unsafe("count_cache_flush", 0600,
+ powerpc_debugfs_root, NULL,
+ &fops_count_cache_flush);
return 0;
}
device_initcall(count_cache_flush_debugfs_init);
diff --git a/arch/powerpc/kernel/secvar-ops.c b/arch/powerpc/kernel/secvar-ops.c
new file mode 100644
index 000000000000..6a29777d6a2d
--- /dev/null
+++ b/arch/powerpc/kernel/secvar-ops.c
@@ -0,0 +1,17 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 IBM Corporation
+ * Author: Nayna Jain
+ *
+ * This file initializes secvar operations for PowerPC Secureboot
+ */
+
+#include <linux/cache.h>
+#include <asm/secvar.h>
+
+const struct secvar_operations *secvar_ops __ro_after_init;
+
+void set_secvar_ops(const struct secvar_operations *ops)
+{
+ secvar_ops = ops;
+}
diff --git a/arch/powerpc/kernel/secvar-sysfs.c b/arch/powerpc/kernel/secvar-sysfs.c
new file mode 100644
index 000000000000..a0a78aba2083
--- /dev/null
+++ b/arch/powerpc/kernel/secvar-sysfs.c
@@ -0,0 +1,248 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2019 IBM Corporation <nayna@linux.ibm.com>
+ *
+ * This code exposes secure variables to user via sysfs
+ */
+
+#define pr_fmt(fmt) "secvar-sysfs: "fmt
+
+#include <linux/slab.h>
+#include <linux/compat.h>
+#include <linux/string.h>
+#include <linux/of.h>
+#include <asm/secvar.h>
+
+#define NAME_MAX_SIZE 1024
+
+static struct kobject *secvar_kobj;
+static struct kset *secvar_kset;
+
+static ssize_t format_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ ssize_t rc = 0;
+ struct device_node *node;
+ const char *format;
+
+ node = of_find_compatible_node(NULL, NULL, "ibm,secvar-backend");
+ if (!of_device_is_available(node))
+ return -ENODEV;
+
+ rc = of_property_read_string(node, "format", &format);
+ if (rc)
+ return rc;
+
+ rc = sprintf(buf, "%s\n", format);
+
+ of_node_put(node);
+
+ return rc;
+}
+
+
+static ssize_t size_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ uint64_t dsize;
+ int rc;
+
+ rc = secvar_ops->get(kobj->name, strlen(kobj->name) + 1, NULL, &dsize);
+ if (rc) {
+ pr_err("Error retrieving %s variable size %d\n", kobj->name,
+ rc);
+ return rc;
+ }
+
+ return sprintf(buf, "%llu\n", dsize);
+}
+
+static ssize_t data_read(struct file *filep, struct kobject *kobj,
+ struct bin_attribute *attr, char *buf, loff_t off,
+ size_t count)
+{
+ uint64_t dsize;
+ char *data;
+ int rc;
+
+ rc = secvar_ops->get(kobj->name, strlen(kobj->name) + 1, NULL, &dsize);
+ if (rc) {
+ pr_err("Error getting %s variable size %d\n", kobj->name, rc);
+ return rc;
+ }
+ pr_debug("dsize is %llu\n", dsize);
+
+ data = kzalloc(dsize, GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ rc = secvar_ops->get(kobj->name, strlen(kobj->name) + 1, data, &dsize);
+ if (rc) {
+ pr_err("Error getting %s variable %d\n", kobj->name, rc);
+ goto data_fail;
+ }
+
+ rc = memory_read_from_buffer(buf, count, &off, data, dsize);
+
+data_fail:
+ kfree(data);
+ return rc;
+}
+
+static ssize_t update_write(struct file *filep, struct kobject *kobj,
+ struct bin_attribute *attr, char *buf, loff_t off,
+ size_t count)
+{
+ int rc;
+
+ pr_debug("count is %ld\n", count);
+ rc = secvar_ops->set(kobj->name, strlen(kobj->name) + 1, buf, count);
+ if (rc) {
+ pr_err("Error setting the %s variable %d\n", kobj->name, rc);
+ return rc;
+ }
+
+ return count;
+}
+
+static struct kobj_attribute format_attr = __ATTR_RO(format);
+
+static struct kobj_attribute size_attr = __ATTR_RO(size);
+
+static struct bin_attribute data_attr = __BIN_ATTR_RO(data, 0);
+
+static struct bin_attribute update_attr = __BIN_ATTR_WO(update, 0);
+
+static struct bin_attribute *secvar_bin_attrs[] = {
+ &data_attr,
+ &update_attr,
+ NULL,
+};
+
+static struct attribute *secvar_attrs[] = {
+ &size_attr.attr,
+ NULL,
+};
+
+static const struct attribute_group secvar_attr_group = {
+ .attrs = secvar_attrs,
+ .bin_attrs = secvar_bin_attrs,
+};
+__ATTRIBUTE_GROUPS(secvar_attr);
+
+static struct kobj_type secvar_ktype = {
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = secvar_attr_groups,
+};
+
+static int update_kobj_size(void)
+{
+
+ struct device_node *node;
+ u64 varsize;
+ int rc = 0;
+
+ node = of_find_compatible_node(NULL, NULL, "ibm,secvar-backend");
+ if (!of_device_is_available(node)) {
+ rc = -ENODEV;
+ goto out;
+ }
+
+ rc = of_property_read_u64(node, "max-var-size", &varsize);
+ if (rc)
+ goto out;
+
+ data_attr.size = varsize;
+ update_attr.size = varsize;
+
+out:
+ of_node_put(node);
+
+ return rc;
+}
+
+static int secvar_sysfs_load(void)
+{
+ char *name;
+ uint64_t namesize = 0;
+ struct kobject *kobj;
+ int rc;
+
+ name = kzalloc(NAME_MAX_SIZE, GFP_KERNEL);
+ if (!name)
+ return -ENOMEM;
+
+ do {
+ rc = secvar_ops->get_next(name, &namesize, NAME_MAX_SIZE);
+ if (rc) {
+ if (rc != -ENOENT)
+ pr_err("error getting secvar from firmware %d\n",
+ rc);
+ break;
+ }
+
+ kobj = kzalloc(sizeof(*kobj), GFP_KERNEL);
+ if (!kobj) {
+ rc = -ENOMEM;
+ break;
+ }
+
+ kobject_init(kobj, &secvar_ktype);
+
+ rc = kobject_add(kobj, &secvar_kset->kobj, "%s", name);
+ if (rc) {
+ pr_warn("kobject_add error %d for attribute: %s\n", rc,
+ name);
+ kobject_put(kobj);
+ kobj = NULL;
+ }
+
+ if (kobj)
+ kobject_uevent(kobj, KOBJ_ADD);
+
+ } while (!rc);
+
+ kfree(name);
+ return rc;
+}
+
+static int secvar_sysfs_init(void)
+{
+ int rc;
+
+ if (!secvar_ops) {
+ pr_warn("secvar: failed to retrieve secvar operations.\n");
+ return -ENODEV;
+ }
+
+ secvar_kobj = kobject_create_and_add("secvar", firmware_kobj);
+ if (!secvar_kobj) {
+ pr_err("secvar: Failed to create firmware kobj\n");
+ return -ENOMEM;
+ }
+
+ rc = sysfs_create_file(secvar_kobj, &format_attr.attr);
+ if (rc) {
+ kobject_put(secvar_kobj);
+ return -ENOMEM;
+ }
+
+ secvar_kset = kset_create_and_add("vars", NULL, secvar_kobj);
+ if (!secvar_kset) {
+ pr_err("secvar: sysfs kobject registration failed.\n");
+ kobject_put(secvar_kobj);
+ return -ENOMEM;
+ }
+
+ rc = update_kobj_size();
+ if (rc) {
+ pr_err("Cannot read the size of the attribute\n");
+ return rc;
+ }
+
+ secvar_sysfs_load();
+
+ return 0;
+}
+
+late_initcall(secvar_sysfs_init);
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 1f8db666468d..7f8c890360fe 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -715,8 +715,28 @@ static struct notifier_block ppc_panic_block = {
.priority = INT_MIN /* may not return; must be done last */
};
+/*
+ * Dump out kernel offset information on panic.
+ */
+static int dump_kernel_offset(struct notifier_block *self, unsigned long v,
+ void *p)
+{
+ pr_emerg("Kernel Offset: 0x%lx from 0x%lx\n",
+ kaslr_offset(), KERNELBASE);
+
+ return 0;
+}
+
+static struct notifier_block kernel_offset_notifier = {
+ .notifier_call = dump_kernel_offset
+};
+
void __init setup_panic(void)
{
+ if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_offset() > 0)
+ atomic_notifier_chain_register(&panic_notifier_list,
+ &kernel_offset_notifier);
+
/* PPC64 always does a hard irq disable in its panic handler */
if (!IS_ENABLED(CONFIG_PPC64) && !ppc_md.panic)
return;
@@ -778,12 +798,6 @@ void ppc_printk_progress(char *s, unsigned short hex)
pr_info("%s\n", s);
}
-void arch_setup_pdev_archdata(struct platform_device *pdev)
-{
- pdev->archdata.dma_mask = DMA_BIT_MASK(32);
- pdev->dev.dma_mask = &pdev->archdata.dma_mask;
-}
-
static __init void print_system_info(void)
{
pr_info("-----------------------------------------------------\n");
@@ -806,9 +820,15 @@ static __init void print_system_info(void)
pr_info("mmu_features = 0x%08x\n", cur_cpu_spec->mmu_features);
#ifdef CONFIG_PPC64
pr_info("firmware_features = 0x%016lx\n", powerpc_firmware_features);
+#ifdef CONFIG_PPC_BOOK3S
+ pr_info("vmalloc start = 0x%lx\n", KERN_VIRT_START);
+ pr_info("IO start = 0x%lx\n", KERN_IO_START);
+ pr_info("vmemmap start = 0x%lx\n", (unsigned long)vmemmap);
+#endif
#endif
- print_system_hash_info();
+ if (!early_radix_enabled())
+ print_system_hash_info();
if (PHYSICAL_START > 0)
pr_info("physical_start = 0x%llx\n",
@@ -929,9 +949,6 @@ void __init setup_arch(char **cmdline_p)
early_memtest(min_low_pfn << PAGE_SHIFT, max_low_pfn << PAGE_SHIFT);
- if (IS_ENABLED(CONFIG_DUMMY_CONSOLE))
- conswitchp = &dummy_con;
-
if (ppc_md.setup_arch)
ppc_md.setup_arch();
diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h
index c82577c4b15d..2dd0d9cb5a20 100644
--- a/arch/powerpc/kernel/setup.h
+++ b/arch/powerpc/kernel/setup.h
@@ -35,7 +35,7 @@ void exc_lvl_early_init(void);
static inline void exc_lvl_early_init(void) { };
#endif
-#ifdef CONFIG_PPC64
+#if defined(CONFIG_PPC64) || defined(CONFIG_VMAP_STACK)
void emergency_stack_init(void);
#else
static inline void emergency_stack_init(void) { };
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 94517e4a2723..5b49b26eb154 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -44,6 +44,7 @@
#include <asm/asm-prototypes.h>
#include <asm/kdump.h>
#include <asm/feature-fixups.h>
+#include <asm/early_ioremap.h>
#include "setup.h"
@@ -80,6 +81,8 @@ notrace void __init machine_init(u64 dt_ptr)
/* Configure static keys first, now that we're relocated. */
setup_feature_keys();
+ early_ioremap_setup();
+
/* Enable early debugging if any specified (see udbg.h) */
udbg_early_init();
@@ -137,7 +140,7 @@ arch_initcall(ppc_init);
static void *__init alloc_stack(void)
{
- void *ptr = memblock_alloc(THREAD_SIZE, THREAD_SIZE);
+ void *ptr = memblock_alloc(THREAD_SIZE, THREAD_ALIGN);
if (!ptr)
panic("cannot allocate %d bytes for stack at %pS\n",
@@ -150,6 +153,9 @@ void __init irqstack_early_init(void)
{
unsigned int i;
+ if (IS_ENABLED(CONFIG_VMAP_STACK))
+ return;
+
/* interrupt stacks must be in lowmem, we get that for free on ppc32
* as the memblock is limited to lowmem by default */
for_each_possible_cpu(i) {
@@ -158,6 +164,18 @@ void __init irqstack_early_init(void)
}
}
+#ifdef CONFIG_VMAP_STACK
+void *emergency_ctx[NR_CPUS] __ro_after_init;
+
+void __init emergency_stack_init(void)
+{
+ unsigned int i;
+
+ for_each_possible_cpu(i)
+ emergency_ctx[i] = alloc_stack();
+}
+#endif
+
#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
void __init exc_lvl_early_init(void)
{
@@ -206,6 +224,6 @@ __init void initialize_cache_info(void)
dcache_bsize = cur_cpu_spec->dcache_bsize;
icache_bsize = cur_cpu_spec->icache_bsize;
ucache_bsize = 0;
- if (cpu_has_feature(CPU_FTR_UNIFIED_ID_CACHE))
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S_601) || IS_ENABLED(CONFIG_E200))
ucache_bsize = icache_bsize = dcache_bsize;
}
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 44b4c432a273..e05e6dd67ae6 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -65,15 +65,10 @@
#include <asm/hw_irq.h>
#include <asm/feature-fixups.h>
#include <asm/kup.h>
+#include <asm/early_ioremap.h>
#include "setup.h"
-#ifdef DEBUG
-#define DBG(fmt...) udbg_printf(fmt)
-#else
-#define DBG(fmt...)
-#endif
-
int spinning_secondaries;
u64 ppc64_pft_size;
@@ -305,7 +300,7 @@ void __init early_setup(unsigned long dt_ptr)
/* Enable early debugging if any specified (see udbg.h) */
udbg_early_init();
- DBG(" -> early_setup(), dt_ptr: 0x%lx\n", dt_ptr);
+ udbg_printf(" -> %s(), dt_ptr: 0x%lx\n", __func__, dt_ptr);
/*
* Do early initialization using the flattened device
@@ -338,6 +333,8 @@ void __init early_setup(unsigned long dt_ptr)
apply_feature_fixups();
setup_feature_keys();
+ early_ioremap_setup();
+
/* Initialize the hash table or TLB handling */
early_init_mmu();
@@ -362,11 +359,11 @@ void __init early_setup(unsigned long dt_ptr)
*/
this_cpu_enable_ftrace();
- DBG(" <- early_setup()\n");
+ udbg_printf(" <- %s()\n", __func__);
#ifdef CONFIG_PPC_EARLY_DEBUG_BOOTX
/*
- * This needs to be done *last* (after the above DBG() even)
+ * This needs to be done *last* (after the above udbg_printf() even)
*
* Right after we return from this function, we turn on the MMU
* which means the real-mode access trick that btext does will
@@ -436,8 +433,6 @@ void smp_release_cpus(void)
if (!use_spinloop())
return;
- DBG(" -> smp_release_cpus()\n");
-
/* All secondary cpus are spinning on a common spinloop, release them
* all now so they can start to spin on their individual paca
* spinloops. For non SMP kernels, the secondary cpus never get out
@@ -456,9 +451,7 @@ void smp_release_cpus(void)
break;
udelay(1);
}
- DBG("spinning_secondaries = %d\n", spinning_secondaries);
-
- DBG(" <- smp_release_cpus()\n");
+ pr_debug("spinning_secondaries = %d\n", spinning_secondaries);
}
#endif /* CONFIG_SMP || CONFIG_KEXEC_CORE */
@@ -551,8 +544,6 @@ void __init initialize_cache_info(void)
struct device_node *cpu = NULL, *l2, *l3 = NULL;
u32 pvr;
- DBG(" -> initialize_cache_info()\n");
-
/*
* All shipping POWER8 machines have a firmware bug that
* puts incorrect information in the device-tree. This will
@@ -576,10 +567,10 @@ void __init initialize_cache_info(void)
*/
if (cpu) {
if (!parse_cache_info(cpu, false, &ppc64_caches.l1d))
- DBG("Argh, can't find dcache properties !\n");
+ pr_warn("Argh, can't find dcache properties !\n");
if (!parse_cache_info(cpu, true, &ppc64_caches.l1i))
- DBG("Argh, can't find icache properties !\n");
+ pr_warn("Argh, can't find icache properties !\n");
/*
* Try to find the L2 and L3 if any. Assume they are
@@ -604,8 +595,6 @@ void __init initialize_cache_info(void)
cur_cpu_spec->dcache_bsize = dcache_bsize;
cur_cpu_spec->icache_bsize = icache_bsize;
-
- DBG(" <- initialize_cache_info()\n");
}
/*
@@ -644,7 +633,7 @@ static void *__init alloc_stack(unsigned long limit, int cpu)
BUILD_BUG_ON(STACK_INT_FRAME_SIZE % 16);
- ptr = memblock_alloc_try_nid(THREAD_SIZE, THREAD_SIZE,
+ ptr = memblock_alloc_try_nid(THREAD_SIZE, THREAD_ALIGN,
MEMBLOCK_LOW_LIMIT, limit,
early_cpu_to_node(cpu));
if (!ptr)
diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c
index 1e2276963f6d..e2a46cfed5fd 100644
--- a/arch/powerpc/kernel/stacktrace.c
+++ b/arch/powerpc/kernel/stacktrace.c
@@ -182,7 +182,7 @@ static int __save_stack_trace_tsk_reliable(struct task_struct *tsk,
* FIXME: IMHO these tests do not belong in
* arch-dependent code, they are generic.
*/
- ip = ftrace_graph_ret_addr(tsk, &graph_idx, ip, NULL);
+ ip = ftrace_graph_ret_addr(tsk, &graph_idx, ip, stack);
#ifdef CONFIG_KPROBES
/*
* Mark stacktraces with kretprobed functions on them
diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c
index 3bfb3888e897..078608ec2e92 100644
--- a/arch/powerpc/kernel/syscalls.c
+++ b/arch/powerpc/kernel/syscalls.c
@@ -79,7 +79,7 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, size_t, len,
* sys_select() with the appropriate args. -- Cort
*/
int
-ppc_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct timeval __user *tvp)
+ppc_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct __kernel_old_timeval __user *tvp)
{
if ( (unsigned long)n >= 4096 )
{
@@ -89,7 +89,7 @@ ppc_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, s
|| __get_user(inp, ((fd_set __user * __user *)(buffer+1)))
|| __get_user(outp, ((fd_set __user * __user *)(buffer+2)))
|| __get_user(exp, ((fd_set __user * __user *)(buffer+3)))
- || __get_user(tvp, ((struct timeval __user * __user *)(buffer+4))))
+ || __get_user(tvp, ((struct __kernel_old_timeval __user * __user *)(buffer+4))))
return -EFAULT;
}
return sys_select(n, inp, outp, exp, tvp);
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index 43f736ed47f2..35b61bfc1b1a 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -517,3 +517,5 @@
433 common fspick sys_fspick
434 common pidfd_open sys_pidfd_open
435 nospu clone3 ppc_clone3
+437 common openat2 sys_openat2
+438 common pidfd_getfd sys_pidfd_getfd
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index e2147d7c9e72..80a676da11cb 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -19,6 +19,7 @@
#include <asm/smp.h>
#include <asm/pmc.h>
#include <asm/firmware.h>
+#include <asm/svm.h>
#include "cacheinfo.h"
#include "setup.h"
@@ -715,6 +716,23 @@ static struct device_attribute pa6t_attrs[] = {
#endif /* HAS_PPC_PMC_PA6T */
#endif /* HAS_PPC_PMC_CLASSIC */
+#ifdef CONFIG_PPC_SVM
+static ssize_t show_svm(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%u\n", is_secure_guest());
+}
+static DEVICE_ATTR(svm, 0444, show_svm, NULL);
+
+static void create_svm_file(void)
+{
+ device_create_file(cpu_subsys.dev_root, &dev_attr_svm);
+}
+#else
+static void create_svm_file(void)
+{
+}
+#endif /* CONFIG_PPC_SVM */
+
static int register_cpu_online(unsigned int cpu)
{
struct cpu *c = &per_cpu(cpu_devices, cpu);
@@ -1058,6 +1076,8 @@ static int __init topology_init(void)
sysfs_create_dscr_default();
#endif /* CONFIG_PPC64 */
+ create_svm_file();
+
return 0;
}
subsys_initcall(topology_init);
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 694522308cd5..1168e8b37e30 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -232,7 +232,7 @@ static u64 scan_dispatch_log(u64 stop_tb)
* Accumulate stolen time by scanning the dispatch trace log.
* Called on entry from user mode.
*/
-void accumulate_stolen_time(void)
+void notrace accumulate_stolen_time(void)
{
u64 sst, ust;
unsigned long save_irq_soft_mask = irq_soft_mask_return();
@@ -338,7 +338,7 @@ static unsigned long vtime_delta(struct task_struct *tsk,
return stime;
}
-void vtime_account_system(struct task_struct *tsk)
+void vtime_account_kernel(struct task_struct *tsk)
{
unsigned long stime, stime_scaled, steal_time;
struct cpu_accounting_data *acct = get_accounting(tsk);
@@ -366,7 +366,7 @@ void vtime_account_system(struct task_struct *tsk)
#endif
}
}
-EXPORT_SYMBOL_GPL(vtime_account_system);
+EXPORT_SYMBOL_GPL(vtime_account_kernel);
void vtime_account_idle(struct task_struct *tsk)
{
@@ -395,7 +395,7 @@ static void vtime_flush_scaled(struct task_struct *tsk,
/*
* Account the whole cputime accumulated in the paca
* Must be called with interrupts disabled.
- * Assumes that vtime_account_system/idle() has been called
+ * Assumes that vtime_account_kernel/idle() has been called
* recently (i.e. since the last entry from usermode) so that
* get_paca()->user_time_scaled is up to date.
*/
@@ -885,7 +885,7 @@ static notrace u64 timebase_read(struct clocksource *cs)
void update_vsyscall(struct timekeeper *tk)
{
- struct timespec xt;
+ struct timespec64 xt;
struct clocksource *clock = tk->tkr_mono.clock;
u32 mult = tk->tkr_mono.mult;
u32 shift = tk->tkr_mono.shift;
@@ -957,8 +957,10 @@ void update_vsyscall(struct timekeeper *tk)
vdso_data->tb_to_xs = new_tb_to_xs;
vdso_data->wtom_clock_sec = tk->wall_to_monotonic.tv_sec;
vdso_data->wtom_clock_nsec = tk->wall_to_monotonic.tv_nsec;
- vdso_data->stamp_xtime = xt;
+ vdso_data->stamp_xtime_sec = xt.tv_sec;
+ vdso_data->stamp_xtime_nsec = xt.tv_nsec;
vdso_data->stamp_sec_fraction = frac_sec;
+ vdso_data->hrtimer_res = hrtimer_resolution;
smp_wmb();
++(vdso_data->tb_update_count);
}
diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c
index be1ca98fce5c..7ea0ca044b65 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -944,7 +944,8 @@ int ftrace_disable_ftrace_graph_caller(void)
* Hook the return address and push it in the stack of return addrs
* in current thread info. Return the address we want to divert to.
*/
-unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip)
+unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip,
+ unsigned long sp)
{
unsigned long return_hooker;
@@ -956,7 +957,7 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip)
return_hooker = ppc_function_entry(return_to_handler);
- if (!function_graph_enter(parent, ip, 0, NULL))
+ if (!function_graph_enter(parent, ip, 0, (unsigned long *)sp))
parent = return_hooker;
out:
return parent;
diff --git a/arch/powerpc/kernel/trace/ftrace_32.S b/arch/powerpc/kernel/trace/ftrace_32.S
index 183f608efb81..e023ae59c429 100644
--- a/arch/powerpc/kernel/trace/ftrace_32.S
+++ b/arch/powerpc/kernel/trace/ftrace_32.S
@@ -50,6 +50,7 @@ _GLOBAL(ftrace_stub)
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
_GLOBAL(ftrace_graph_caller)
+ addi r5, r1, 48
/* load r4 with local address */
lwz r4, 44(r1)
subi r4, r4, MCOUNT_INSN_SIZE
diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
index 74acbf16a666..f9fd5f743eba 100644
--- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
+++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
@@ -294,6 +294,7 @@ _GLOBAL(ftrace_graph_caller)
std r2, 24(r1)
ld r2, PACATOC(r13) /* get kernel TOC in r2 */
+ addi r5, r1, 112
mfctr r4 /* ftrace_caller has moved local addr here */
std r4, 40(r1)
mflr r3 /* ftrace_caller has restored LR from stack */
diff --git a/arch/powerpc/kernel/trace/ftrace_64_pg.S b/arch/powerpc/kernel/trace/ftrace_64_pg.S
index e41a7d13c99c..6708e24db0ab 100644
--- a/arch/powerpc/kernel/trace/ftrace_64_pg.S
+++ b/arch/powerpc/kernel/trace/ftrace_64_pg.S
@@ -41,6 +41,7 @@ _GLOBAL(ftrace_stub)
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
_GLOBAL(ftrace_graph_caller)
+ addi r5, r1, 112
/* load r4 with local address */
ld r4, 128(r1)
subi r4, r4, MCOUNT_INSN_SIZE
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 11caa0291254..82a3438300fd 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -250,15 +250,22 @@ static void oops_end(unsigned long flags, struct pt_regs *regs,
}
NOKPROBE_SYMBOL(oops_end);
+static char *get_mmu_str(void)
+{
+ if (early_radix_enabled())
+ return " MMU=Radix";
+ if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE))
+ return " MMU=Hash";
+ return "";
+}
+
static int __die(const char *str, struct pt_regs *regs, long err)
{
printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter);
- printk("%s PAGE_SIZE=%luK%s%s%s%s%s%s%s %s\n",
+ printk("%s PAGE_SIZE=%luK%s%s%s%s%s%s %s\n",
IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN) ? "LE" : "BE",
- PAGE_SIZE / 1024,
- early_radix_enabled() ? " MMU=Radix" : "",
- early_mmu_has_feature(MMU_FTR_HPTE_TABLE) ? " MMU=Hash" : "",
+ PAGE_SIZE / 1024, get_mmu_str(),
IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "",
IS_ENABLED(CONFIG_SMP) ? " SMP" : "",
IS_ENABLED(CONFIG_SMP) ? (" NR_CPUS=" __stringify(NR_CPUS)) : "",
@@ -472,6 +479,7 @@ void system_reset_exception(struct pt_regs *regs)
if (debugger(regs))
goto out;
+ kmsg_dump(KMSG_DUMP_OOPS);
/*
* A system reset is a request to dump, so we always send
* it through the crashdump code (if fadump or kdump are
@@ -1629,6 +1637,15 @@ void StackOverflow(struct pt_regs *regs)
panic("kernel stack overflow");
}
+void stack_overflow_exception(struct pt_regs *regs)
+{
+ enum ctx_state prev_state = exception_enter();
+
+ die("Kernel stack overflow", regs, SIGSEGV);
+
+ exception_exit(prev_state);
+}
+
void kernel_fp_unavailable_exception(struct pt_regs *regs)
{
enum ctx_state prev_state = exception_enter();
diff --git a/arch/powerpc/kernel/ucall.S b/arch/powerpc/kernel/ucall.S
new file mode 100644
index 000000000000..07296bc39166
--- /dev/null
+++ b/arch/powerpc/kernel/ucall.S
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Generic code to perform an ultravisor call.
+ *
+ * Copyright 2019, IBM Corporation.
+ *
+ */
+#include <asm/ppc_asm.h>
+#include <asm/export.h>
+
+_GLOBAL(ucall_norets)
+EXPORT_SYMBOL_GPL(ucall_norets)
+ sc 2 /* Invoke the ultravisor */
+ blr /* Return r3 = status */
diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c
index a384e7c8b01c..01595e8cafe7 100644
--- a/arch/powerpc/kernel/udbg.c
+++ b/arch/powerpc/kernel/udbg.c
@@ -120,13 +120,15 @@ int udbg_write(const char *s, int n)
#define UDBG_BUFSIZE 256
void udbg_printf(const char *fmt, ...)
{
- char buf[UDBG_BUFSIZE];
- va_list args;
+ if (udbg_putc) {
+ char buf[UDBG_BUFSIZE];
+ va_list args;
- va_start(args, fmt);
- vsnprintf(buf, UDBG_BUFSIZE, fmt, args);
- udbg_puts(buf);
- va_end(args);
+ va_start(args, fmt);
+ vsnprintf(buf, UDBG_BUFSIZE, fmt, args);
+ udbg_puts(buf);
+ va_end(args);
+ }
}
void __init udbg_progress(char *s, unsigned short hex)
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index d60598113a9f..b9a108411c0d 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -94,28 +94,6 @@ static struct vdso_patch_def vdso_patches[] = {
CPU_FTR_COHERENT_ICACHE, CPU_FTR_COHERENT_ICACHE,
"__kernel_sync_dicache", "__kernel_sync_dicache_p5"
},
-#ifdef CONFIG_PPC32
- {
- CPU_FTR_USE_RTC, CPU_FTR_USE_RTC,
- "__kernel_gettimeofday", NULL
- },
- {
- CPU_FTR_USE_RTC, CPU_FTR_USE_RTC,
- "__kernel_clock_gettime", NULL
- },
- {
- CPU_FTR_USE_RTC, CPU_FTR_USE_RTC,
- "__kernel_clock_getres", NULL
- },
- {
- CPU_FTR_USE_RTC, CPU_FTR_USE_RTC,
- "__kernel_get_tbfreq", NULL
- },
- {
- CPU_FTR_USE_RTC, CPU_FTR_USE_RTC,
- "__kernel_time", NULL
- },
-#endif
};
/*
@@ -750,11 +728,6 @@ static int __init vdso_init(void)
*/
vdso64_pages = (&vdso64_end - &vdso64_start) >> PAGE_SHIFT;
DBG("vdso64_kbase: %p, 0x%x pages\n", vdso64_kbase, vdso64_pages);
-#else
- vdso_data->dcache_block_size = L1_CACHE_BYTES;
- vdso_data->dcache_log_block_size = L1_CACHE_SHIFT;
- vdso_data->icache_block_size = L1_CACHE_BYTES;
- vdso_data->icache_log_block_size = L1_CACHE_SHIFT;
#endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile
index 06f54d947057..e147bbdc12cd 100644
--- a/arch/powerpc/kernel/vdso32/Makefile
+++ b/arch/powerpc/kernel/vdso32/Makefile
@@ -2,9 +2,7 @@
# List of files in the vdso, has to be asm only for now
-obj-vdso32-$(CONFIG_PPC64) = getcpu.o
-obj-vdso32 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o \
- $(obj-vdso32-y)
+obj-vdso32 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o getcpu.o
# Build rules
diff --git a/arch/powerpc/kernel/vdso32/cacheflush.S b/arch/powerpc/kernel/vdso32/cacheflush.S
index 7f882e7b9f43..3440ddf21c8b 100644
--- a/arch/powerpc/kernel/vdso32/cacheflush.S
+++ b/arch/powerpc/kernel/vdso32/cacheflush.S
@@ -8,7 +8,9 @@
#include <asm/processor.h>
#include <asm/ppc_asm.h>
#include <asm/vdso.h>
+#include <asm/vdso_datapage.h>
#include <asm/asm-offsets.h>
+#include <asm/cache.h>
.text
@@ -22,42 +24,62 @@
*/
V_FUNCTION_BEGIN(__kernel_sync_dicache)
.cfi_startproc
+#ifdef CONFIG_PPC64
mflr r12
.cfi_register lr,r12
- mr r11,r3
- bl __get_datapage@local
+ get_datapage r10, r0
mtlr r12
- mr r10,r3
+#endif
+#ifdef CONFIG_PPC64
lwz r7,CFG_DCACHE_BLOCKSZ(r10)
addi r5,r7,-1
- andc r6,r11,r5 /* round low to line bdy */
+#else
+ li r5, L1_CACHE_BYTES - 1
+#endif
+ andc r6,r3,r5 /* round low to line bdy */
subf r8,r6,r4 /* compute length */
add r8,r8,r5 /* ensure we get enough */
+#ifdef CONFIG_PPC64
lwz r9,CFG_DCACHE_LOGBLOCKSZ(r10)
srw. r8,r8,r9 /* compute line count */
+#else
+ srwi. r8, r8, L1_CACHE_SHIFT
+ mr r7, r6
+#endif
crclr cr0*4+so
beqlr /* nothing to do? */
mtctr r8
1: dcbst 0,r6
+#ifdef CONFIG_PPC64
add r6,r6,r7
+#else
+ addi r6, r6, L1_CACHE_BYTES
+#endif
bdnz 1b
sync
/* Now invalidate the instruction cache */
+#ifdef CONFIG_PPC64
lwz r7,CFG_ICACHE_BLOCKSZ(r10)
addi r5,r7,-1
- andc r6,r11,r5 /* round low to line bdy */
+ andc r6,r3,r5 /* round low to line bdy */
subf r8,r6,r4 /* compute length */
add r8,r8,r5
lwz r9,CFG_ICACHE_LOGBLOCKSZ(r10)
srw. r8,r8,r9 /* compute line count */
crclr cr0*4+so
beqlr /* nothing to do? */
+#endif
mtctr r8
+#ifdef CONFIG_PPC64
2: icbi 0,r6
add r6,r6,r7
+#else
+2: icbi 0, r7
+ addi r7, r7, L1_CACHE_BYTES
+#endif
bdnz 2b
isync
li r3,0
diff --git a/arch/powerpc/kernel/vdso32/datapage.S b/arch/powerpc/kernel/vdso32/datapage.S
index 6984125b9fc0..217bb630f8f9 100644
--- a/arch/powerpc/kernel/vdso32/datapage.S
+++ b/arch/powerpc/kernel/vdso32/datapage.S
@@ -10,35 +10,13 @@
#include <asm/asm-offsets.h>
#include <asm/unistd.h>
#include <asm/vdso.h>
+#include <asm/vdso_datapage.h>
.text
.global __kernel_datapage_offset;
__kernel_datapage_offset:
.long 0
-V_FUNCTION_BEGIN(__get_datapage)
- .cfi_startproc
- /* We don't want that exposed or overridable as we want other objects
- * to be able to bl directly to here
- */
- .protected __get_datapage
- .hidden __get_datapage
-
- mflr r0
- .cfi_register lr,r0
-
- bcl 20,31,data_page_branch
-data_page_branch:
- mflr r3
- mtlr r0
- addi r3, r3, __kernel_datapage_offset-data_page_branch
- lwz r0,0(r3)
- .cfi_restore lr
- add r3,r0,r3
- blr
- .cfi_endproc
-V_FUNCTION_END(__get_datapage)
-
/*
* void *__kernel_get_syscall_map(unsigned int *syscall_count) ;
*
@@ -52,11 +30,10 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map)
.cfi_startproc
mflr r12
.cfi_register lr,r12
- mr r4,r3
- bl __get_datapage@local
+ mr. r4,r3
+ get_datapage r3, r0
mtlr r12
addi r3,r3,CFG_SYSCALL_MAP32
- cmpli cr0,r4,0
beqlr
li r0,NR_syscalls
stw r0,0(r4)
@@ -70,11 +47,12 @@ V_FUNCTION_END(__kernel_get_syscall_map)
*
* returns the timebase frequency in HZ
*/
+#ifndef CONFIG_PPC_BOOK3S_601
V_FUNCTION_BEGIN(__kernel_get_tbfreq)
.cfi_startproc
mflr r12
.cfi_register lr,r12
- bl __get_datapage@local
+ get_datapage r3, r0
lwz r4,(CFG_TB_TICKS_PER_SEC + 4)(r3)
lwz r3,CFG_TB_TICKS_PER_SEC(r3)
mtlr r12
@@ -82,3 +60,4 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq)
blr
.cfi_endproc
V_FUNCTION_END(__kernel_get_tbfreq)
+#endif
diff --git a/arch/powerpc/kernel/vdso32/getcpu.S b/arch/powerpc/kernel/vdso32/getcpu.S
index 63e914539e1a..ff5e214fec41 100644
--- a/arch/powerpc/kernel/vdso32/getcpu.S
+++ b/arch/powerpc/kernel/vdso32/getcpu.S
@@ -15,6 +15,7 @@
* int __kernel_getcpu(unsigned *cpu, unsigned *node);
*
*/
+#if defined(CONFIG_PPC64)
V_FUNCTION_BEGIN(__kernel_getcpu)
.cfi_startproc
mfspr r5,SPRN_SPRG_VDSO_READ
@@ -24,10 +25,26 @@ V_FUNCTION_BEGIN(__kernel_getcpu)
rlwinm r7,r5,16,31-15,31-0
beq cr0,1f
stw r6,0(r3)
-1: beq cr1,2f
- stw r7,0(r4)
-2: crclr cr0*4+so
+1: crclr cr0*4+so
li r3,0 /* always success */
+ beqlr cr1
+ stw r7,0(r4)
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__kernel_getcpu)
+#elif !defined(CONFIG_SMP)
+V_FUNCTION_BEGIN(__kernel_getcpu)
+ .cfi_startproc
+ cmpwi cr0, r3, 0
+ cmpwi cr1, r4, 0
+ li r5, 0
+ beq cr0, 1f
+ stw r5, 0(r3)
+1: li r3, 0 /* always success */
+ crclr cr0*4+so
+ beqlr cr1
+ stw r5, 0(r4)
blr
.cfi_endproc
V_FUNCTION_END(__kernel_getcpu)
+#endif
diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S
index becd9f8767ed..a3951567118a 100644
--- a/arch/powerpc/kernel/vdso32/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso32/gettimeofday.S
@@ -9,16 +9,15 @@
#include <asm/processor.h>
#include <asm/ppc_asm.h>
#include <asm/vdso.h>
+#include <asm/vdso_datapage.h>
#include <asm/asm-offsets.h>
#include <asm/unistd.h>
/* Offset for the low 32-bit part of a field of long type */
#ifdef CONFIG_PPC64
#define LOPART 4
-#define TSPEC_TV_SEC TSPC64_TV_SEC+LOPART
#else
#define LOPART 0
-#define TSPEC_TV_SEC TSPC32_TV_SEC
#endif
.text
@@ -33,28 +32,26 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday)
mflr r12
.cfi_register lr,r12
- mr r10,r3 /* r10 saves tv */
+ mr. r10,r3 /* r10 saves tv */
mr r11,r4 /* r11 saves tz */
- bl __get_datapage@local /* get data page */
- mr r9, r3 /* datapage ptr in r9 */
- cmplwi r10,0 /* check if tv is NULL */
+ get_datapage r9, r0
beq 3f
- lis r7,1000000@ha /* load up USEC_PER_SEC */
- addi r7,r7,1000000@l /* so we get microseconds in r4 */
+ LOAD_REG_IMMEDIATE(r7, 1000000) /* load up USEC_PER_SEC */
bl __do_get_tspec@local /* get sec/usec from tb & kernel */
stw r3,TVAL32_TV_SEC(r10)
stw r4,TVAL32_TV_USEC(r10)
3: cmplwi r11,0 /* check if tz is NULL */
- beq 1f
+ mtlr r12
+ crclr cr0*4+so
+ li r3,0
+ beqlr
+
lwz r4,CFG_TZ_MINUTEWEST(r9)/* fill tz */
lwz r5,CFG_TZ_DSTTIME(r9)
stw r4,TZONE_TZ_MINWEST(r11)
stw r5,TZONE_TZ_DSTTIME(r11)
-1: mtlr r12
- crclr cr0*4+so
- li r3,0
blr
.cfi_endproc
V_FUNCTION_END(__kernel_gettimeofday)
@@ -71,17 +68,23 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
cmpli cr0,r3,CLOCK_REALTIME
cmpli cr1,r3,CLOCK_MONOTONIC
cror cr0*4+eq,cr0*4+eq,cr1*4+eq
- bne cr0,99f
+
+ cmpli cr5,r3,CLOCK_REALTIME_COARSE
+ cmpli cr6,r3,CLOCK_MONOTONIC_COARSE
+ cror cr5*4+eq,cr5*4+eq,cr6*4+eq
+
+ cror cr0*4+eq,cr0*4+eq,cr5*4+eq
+ bne cr0, .Lgettime_fallback
mflr r12 /* r12 saves lr */
.cfi_register lr,r12
mr r11,r4 /* r11 saves tp */
- bl __get_datapage@local /* get data page */
- mr r9,r3 /* datapage ptr in r9 */
- lis r7,NSEC_PER_SEC@h /* want nanoseconds */
- ori r7,r7,NSEC_PER_SEC@l
-50: bl __do_get_tspec@local /* get sec/nsec from tb & kernel */
- bne cr1,80f /* not monotonic -> all done */
+ get_datapage r9, r0
+ LOAD_REG_IMMEDIATE(r7, NSEC_PER_SEC) /* load up NSEC_PER_SEC */
+ beq cr5, .Lcoarse_clocks
+.Lprecise_clocks:
+ bl __do_get_tspec@local /* get sec/nsec from tb & kernel */
+ bne cr1, .Lfinish /* not monotonic -> all done */
/*
* CLOCK_MONOTONIC
@@ -105,12 +108,53 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
add r9,r9,r0
lwz r0,(CFG_TB_UPDATE_COUNT+LOPART)(r9)
cmpl cr0,r8,r0 /* check if updated */
- bne- 50b
+ bne- .Lprecise_clocks
+ b .Lfinish_monotonic
+
+ /*
+ * For coarse clocks we get data directly from the vdso data page, so
+ * we don't need to call __do_get_tspec, but we still need to do the
+ * counter trick.
+ */
+.Lcoarse_clocks:
+ lwz r8,(CFG_TB_UPDATE_COUNT+LOPART)(r9)
+ andi. r0,r8,1 /* pending update ? loop */
+ bne- .Lcoarse_clocks
+ add r9,r9,r0 /* r0 is already 0 */
+
+ /*
+ * CLOCK_REALTIME_COARSE, below values are needed for MONOTONIC_COARSE
+ * too
+ */
+ lwz r3,STAMP_XTIME_SEC+LOPART(r9)
+ lwz r4,STAMP_XTIME_NSEC+LOPART(r9)
+ bne cr6,1f
+
+ /* CLOCK_MONOTONIC_COARSE */
+ lwz r5,(WTOM_CLOCK_SEC+LOPART)(r9)
+ lwz r6,WTOM_CLOCK_NSEC(r9)
+
+ /* check if counter has updated */
+ or r0,r6,r5
+1: or r0,r0,r3
+ or r0,r0,r4
+ xor r0,r0,r0
+ add r3,r3,r0
+ lwz r0,CFG_TB_UPDATE_COUNT+LOPART(r9)
+ cmpl cr0,r0,r8 /* check if updated */
+ bne- .Lcoarse_clocks
+
+ /* Counter has not updated, so continue calculating proper values for
+ * sec and nsec if monotonic coarse, or just return with the proper
+ * values for realtime.
+ */
+ bne cr6, .Lfinish
/* Calculate and store result. Note that this mimics the C code,
* which may cause funny results if nsec goes negative... is that
* possible at all ?
*/
+.Lfinish_monotonic:
add r3,r3,r5
add r4,r4,r6
cmpw cr0,r4,r7
@@ -118,11 +162,12 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
blt 1f
subf r4,r7,r4
addi r3,r3,1
-1: bge cr1,80f
+1: bge cr1, .Lfinish
addi r3,r3,-1
add r4,r4,r7
-80: stw r3,TSPC32_TV_SEC(r11)
+.Lfinish:
+ stw r3,TSPC32_TV_SEC(r11)
stw r4,TSPC32_TV_NSEC(r11)
mtlr r12
@@ -133,7 +178,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
/*
* syscall fallback
*/
-99:
+.Lgettime_fallback:
li r0,__NR_clock_gettime
.cfi_restore lr
sc
@@ -151,27 +196,33 @@ V_FUNCTION_END(__kernel_clock_gettime)
V_FUNCTION_BEGIN(__kernel_clock_getres)
.cfi_startproc
/* Check for supported clock IDs */
- cmpwi cr0,r3,CLOCK_REALTIME
- cmpwi cr1,r3,CLOCK_MONOTONIC
- cror cr0*4+eq,cr0*4+eq,cr1*4+eq
- bne cr0,99f
+ cmplwi cr0, r3, CLOCK_MAX
+ cmpwi cr1, r3, CLOCK_REALTIME_COARSE
+ cmpwi cr7, r3, CLOCK_MONOTONIC_COARSE
+ bgt cr0, 99f
+ LOAD_REG_IMMEDIATE(r5, KTIME_LOW_RES)
+ beq cr1, 1f
+ beq cr7, 1f
- li r3,0
+ mflr r12
+ .cfi_register lr,r12
+ get_datapage r3, r0
+ lwz r5, CLOCK_HRTIMER_RES(r3)
+ mtlr r12
+1: li r3,0
cmpli cr0,r4,0
crclr cr0*4+so
beqlr
- lis r5,CLOCK_REALTIME_RES@h
- ori r5,r5,CLOCK_REALTIME_RES@l
stw r3,TSPC32_TV_SEC(r4)
stw r5,TSPC32_TV_NSEC(r4)
blr
/*
- * syscall fallback
+ * invalid clock
*/
99:
- li r0,__NR_clock_getres
- sc
+ li r3, EINVAL
+ crset so
blr
.cfi_endproc
V_FUNCTION_END(__kernel_clock_getres)
@@ -189,16 +240,15 @@ V_FUNCTION_BEGIN(__kernel_time)
.cfi_register lr,r12
mr r11,r3 /* r11 holds t */
- bl __get_datapage@local
- mr r9, r3 /* datapage ptr in r9 */
+ get_datapage r9, r0
- lwz r3,STAMP_XTIME+TSPEC_TV_SEC(r9)
+ lwz r3,STAMP_XTIME_SEC+LOPART(r9)
cmplwi r11,0 /* check if t is NULL */
- beq 2f
- stw r3,0(r11) /* store result at *t */
-2: mtlr r12
+ mtlr r12
crclr cr0*4+so
+ beqlr
+ stw r3,0(r11) /* store result at *t */
blr
.cfi_endproc
V_FUNCTION_END(__kernel_time)
@@ -268,7 +318,7 @@ __do_get_tspec:
* as a 32.32 fixed-point number in r3 and r4.
* Load & add the xtime stamp.
*/
- lwz r5,STAMP_XTIME+TSPEC_TV_SEC(r9)
+ lwz r5,STAMP_XTIME_SEC+LOPART(r9)
lwz r6,STAMP_SEC_FRAC(r9)
addc r4,r4,r6
adde r3,r3,r5
diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso32/vdso32.lds.S
index 099a6db14e67..5206c2eb2a1d 100644
--- a/arch/powerpc/kernel/vdso32/vdso32.lds.S
+++ b/arch/powerpc/kernel/vdso32/vdso32.lds.S
@@ -144,18 +144,20 @@ VERSION
__kernel_datapage_offset;
__kernel_get_syscall_map;
+#ifndef CONFIG_PPC_BOOK3S_601
__kernel_gettimeofday;
__kernel_clock_gettime;
__kernel_clock_getres;
+ __kernel_time;
__kernel_get_tbfreq;
+#endif
__kernel_sync_dicache;
__kernel_sync_dicache_p5;
__kernel_sigtramp32;
__kernel_sigtramp_rt32;
-#ifdef CONFIG_PPC64
+#if defined(CONFIG_PPC64) || !defined(CONFIG_SMP)
__kernel_getcpu;
#endif
- __kernel_time;
local: *;
};
diff --git a/arch/powerpc/kernel/vdso64/cacheflush.S b/arch/powerpc/kernel/vdso64/cacheflush.S
index 3f92561a64c4..526f5ba2593e 100644
--- a/arch/powerpc/kernel/vdso64/cacheflush.S
+++ b/arch/powerpc/kernel/vdso64/cacheflush.S
@@ -35,7 +35,7 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache)
subf r8,r6,r4 /* compute length */
add r8,r8,r5 /* ensure we get enough */
lwz r9,CFG_DCACHE_LOGBLOCKSZ(r10)
- srw. r8,r8,r9 /* compute line count */
+ srd. r8,r8,r9 /* compute line count */
crclr cr0*4+so
beqlr /* nothing to do? */
mtctr r8
@@ -52,7 +52,7 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache)
subf r8,r6,r4 /* compute length */
add r8,r8,r5
lwz r9,CFG_ICACHE_LOGBLOCKSZ(r10)
- srw. r8,r8,r9 /* compute line count */
+ srd. r8,r8,r9 /* compute line count */
crclr cr0*4+so
beqlr /* nothing to do? */
mtctr r8
diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S
index 07bfe33fe874..1c9a04703250 100644
--- a/arch/powerpc/kernel/vdso64/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso64/gettimeofday.S
@@ -116,8 +116,8 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
* CLOCK_REALTIME_COARSE, below values are needed for MONOTONIC_COARSE
* too
*/
- ld r4,STAMP_XTIME+TSPC64_TV_SEC(r3)
- ld r5,STAMP_XTIME+TSPC64_TV_NSEC(r3)
+ ld r4,STAMP_XTIME_SEC(r3)
+ ld r5,STAMP_XTIME_NSEC(r3)
bne cr6,75f
/* CLOCK_MONOTONIC_COARSE */
@@ -186,12 +186,15 @@ V_FUNCTION_BEGIN(__kernel_clock_getres)
cror cr0*4+eq,cr0*4+eq,cr1*4+eq
bne cr0,99f
+ mflr r12
+ .cfi_register lr,r12
+ bl V_LOCAL_FUNC(__get_datapage)
+ lwz r5, CLOCK_HRTIMER_RES(r3)
+ mtlr r12
li r3,0
cmpldi cr0,r4,0
crclr cr0*4+so
beqlr
- lis r5,CLOCK_REALTIME_RES@h
- ori r5,r5,CLOCK_REALTIME_RES@l
std r3,TSPC64_TV_SEC(r4)
std r5,TSPC64_TV_NSEC(r4)
blr
@@ -220,7 +223,7 @@ V_FUNCTION_BEGIN(__kernel_time)
mr r11,r3 /* r11 holds t */
bl V_LOCAL_FUNC(__get_datapage)
- ld r4,STAMP_XTIME+TSPC64_TV_SEC(r3)
+ ld r4,STAMP_XTIME_SEC(r3)
cmpldi r11,0 /* check if t is NULL */
beq 2f
@@ -265,7 +268,7 @@ V_FUNCTION_BEGIN(__do_get_tspec)
mulhdu r6,r6,r5 /* in units of 2^-32 seconds */
/* Add stamp since epoch */
- ld r4,STAMP_XTIME+TSPC64_TV_SEC(r3)
+ ld r4,STAMP_XTIME_SEC(r3)
lwz r5,STAMP_SEC_FRAC(r3)
or r0,r4,r5
or r0,r0,r6
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index 8eb867dbad5f..25c14a0981bf 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -67,6 +67,9 @@ _GLOBAL(load_up_altivec)
#ifdef CONFIG_PPC32
mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */
oris r9,r9,MSR_VEC@h
+#ifdef CONFIG_VMAP_STACK
+ tovirt(r5, r5)
+#endif
#else
ld r4,PACACURRENT(r13)
addi r5,r4,THREAD /* Get THREAD */
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 060a1acd7c6d..b4c89a1acebb 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -6,6 +6,8 @@
#endif
#define BSS_FIRST_SECTIONS *(.bss.prominit)
+#define EMITS_PT_NOTE
+#define RO_EXCEPTION_TABLE_ALIGN 0
#include <asm/page.h>
#include <asm-generic/vmlinux.lds.h>
@@ -18,22 +20,8 @@
ENTRY(_stext)
PHDRS {
- kernel PT_LOAD FLAGS(7); /* RWX */
- notes PT_NOTE FLAGS(0);
- dummy PT_NOTE FLAGS(0);
-
- /* binutils < 2.18 has a bug that makes it misbehave when taking an
- ELF file with all segments at load address 0 as input. This
- happens when running "strip" on vmlinux, because of the AT() magic
- in this linker script. People using GCC >= 4.2 won't run into
- this problem, because the "build-id" support will put some data
- into the "notes" segment (at a non-zero load address).
-
- To work around this, we force some data into both the "dummy"
- segment and the kernel segment, so the dummy segment will get a
- non-zero load address. It's not enough to always create the
- "notes" segment, since if nothing gets assigned to it, its load
- address will be zero. */
+ text PT_LOAD FLAGS(7); /* RWX */
+ note PT_NOTE FLAGS(0);
}
#ifdef CONFIG_PPC64
@@ -77,7 +65,7 @@ SECTIONS
#else /* !CONFIG_PPC64 */
HEAD_TEXT
#endif
- } :kernel
+ } :text
__head_end = .;
@@ -126,7 +114,7 @@ SECTIONS
__got2_end = .;
#endif /* CONFIG_PPC32 */
- } :kernel
+ } :text
. = ALIGN(ETEXT_ALIGN_SIZE);
_etext = .;
@@ -175,17 +163,6 @@ SECTIONS
__stop__btb_flush_fixup = .;
}
#endif
- EXCEPTION_TABLE(0)
-
- NOTES :kernel :notes
-
- /* The dummy segment contents for the bug workaround mentioned above
- near PHDRS. */
- .dummy : AT(ADDR(.dummy) - LOAD_OFFSET) {
- LONG(0)
- LONG(0)
- LONG(0)
- } :kernel :dummy
/*
* Init sections discarded at runtime
@@ -200,7 +177,7 @@ SECTIONS
#ifdef CONFIG_PPC64
*(.tramp.ftrace.init);
#endif
- } :kernel
+ } :text
/* .exit.text is discarded at runtime, not link time,
* to deal with references from __bug_table
@@ -346,7 +323,7 @@ SECTIONS
#endif
/* The initial task and kernel stack */
- INIT_TASK_DATA_SECTION(THREAD_SIZE)
+ INIT_TASK_DATA_SECTION(THREAD_ALIGN)
.data..page_aligned : AT(ADDR(.data..page_aligned) - LOAD_OFFSET) {
PAGE_ALIGNED_DATA(PAGE_SIZE)
diff --git a/arch/powerpc/kexec/Makefile b/arch/powerpc/kexec/Makefile
new file mode 100644
index 000000000000..378f6108a414
--- /dev/null
+++ b/arch/powerpc/kexec/Makefile
@@ -0,0 +1,25 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the linux kernel.
+#
+
+# Avoid clang warnings around longjmp/setjmp declarations
+CFLAGS_crash.o += -ffreestanding
+
+obj-y += core.o crash.o core_$(BITS).o
+
+obj-$(CONFIG_PPC32) += relocate_32.o
+
+obj-$(CONFIG_KEXEC_FILE) += file_load.o elf_$(BITS).o
+
+ifdef CONFIG_HAVE_IMA_KEXEC
+ifdef CONFIG_IMA
+obj-y += ima.o
+endif
+endif
+
+
+# Disable GCOV, KCOV & sanitizers in odd or sensitive code
+GCOV_PROFILE_core_$(BITS).o := n
+KCOV_INSTRUMENT_core_$(BITS).o := n
+UBSAN_SANITIZE_core_$(BITS).o := n
diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kexec/core.c
index c4ed328a7b96..078fe3d76feb 100644
--- a/arch/powerpc/kernel/machine_kexec.c
+++ b/arch/powerpc/kexec/core.c
@@ -86,6 +86,7 @@ void arch_crash_save_vmcoreinfo(void)
VMCOREINFO_STRUCT_SIZE(mmu_psize_def);
VMCOREINFO_OFFSET(mmu_psize_def, shift);
#endif
+ vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset());
}
/*
diff --git a/arch/powerpc/kernel/machine_kexec_32.c b/arch/powerpc/kexec/core_32.c
index bf9f1f906d64..bf9f1f906d64 100644
--- a/arch/powerpc/kernel/machine_kexec_32.c
+++ b/arch/powerpc/kexec/core_32.c
diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kexec/core_64.c
index 18481b0e2788..04a7cba58eff 100644
--- a/arch/powerpc/kernel/machine_kexec_64.c
+++ b/arch/powerpc/kexec/core_64.c
@@ -29,6 +29,8 @@
#include <asm/smp.h>
#include <asm/hw_breakpoint.h>
#include <asm/asm-prototypes.h>
+#include <asm/svm.h>
+#include <asm/ultravisor.h>
int default_machine_kexec_prepare(struct kimage *image)
{
@@ -327,6 +329,13 @@ void default_machine_kexec(struct kimage *image)
#ifdef CONFIG_PPC_PSERIES
kexec_paca.lppaca_ptr = NULL;
#endif
+
+ if (is_secure_guest() && !(image->preserve_context ||
+ image->type == KEXEC_TYPE_CRASH)) {
+ uv_unshare_all_pages();
+ printk("kexec: Unshared all shared pages.\n");
+ }
+
paca_ptrs[kexec_paca.paca_index] = &kexec_paca;
setup_paca(&kexec_paca);
diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kexec/crash.c
index d488311efab1..d488311efab1 100644
--- a/arch/powerpc/kernel/crash.c
+++ b/arch/powerpc/kexec/crash.c
diff --git a/arch/powerpc/kexec/elf_64.c b/arch/powerpc/kexec/elf_64.c
new file mode 100644
index 000000000000..3072fd6dbe94
--- /dev/null
+++ b/arch/powerpc/kexec/elf_64.c
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Load ELF vmlinux file for the kexec_file_load syscall.
+ *
+ * Copyright (C) 2004 Adam Litke (agl@us.ibm.com)
+ * Copyright (C) 2004 IBM Corp.
+ * Copyright (C) 2005 R Sharada (sharada@in.ibm.com)
+ * Copyright (C) 2006 Mohan Kumar M (mohan@in.ibm.com)
+ * Copyright (C) 2016 IBM Corporation
+ *
+ * Based on kexec-tools' kexec-elf-exec.c and kexec-elf-ppc64.c.
+ * Heavily modified for the kernel by
+ * Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com>.
+ */
+
+#define pr_fmt(fmt) "kexec_elf: " fmt
+
+#include <linux/elf.h>
+#include <linux/kexec.h>
+#include <linux/libfdt.h>
+#include <linux/module.h>
+#include <linux/of_fdt.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+static void *elf64_load(struct kimage *image, char *kernel_buf,
+ unsigned long kernel_len, char *initrd,
+ unsigned long initrd_len, char *cmdline,
+ unsigned long cmdline_len)
+{
+ int ret;
+ unsigned int fdt_size;
+ unsigned long kernel_load_addr;
+ unsigned long initrd_load_addr = 0, fdt_load_addr;
+ void *fdt;
+ const void *slave_code;
+ struct elfhdr ehdr;
+ struct kexec_elf_info elf_info;
+ struct kexec_buf kbuf = { .image = image, .buf_min = 0,
+ .buf_max = ppc64_rma_size };
+ struct kexec_buf pbuf = { .image = image, .buf_min = 0,
+ .buf_max = ppc64_rma_size, .top_down = true,
+ .mem = KEXEC_BUF_MEM_UNKNOWN };
+
+ ret = kexec_build_elf_info(kernel_buf, kernel_len, &ehdr, &elf_info);
+ if (ret)
+ goto out;
+
+ ret = kexec_elf_load(image, &ehdr, &elf_info, &kbuf, &kernel_load_addr);
+ if (ret)
+ goto out;
+
+ pr_debug("Loaded the kernel at 0x%lx\n", kernel_load_addr);
+
+ ret = kexec_load_purgatory(image, &pbuf);
+ if (ret) {
+ pr_err("Loading purgatory failed.\n");
+ goto out;
+ }
+
+ pr_debug("Loaded purgatory at 0x%lx\n", pbuf.mem);
+
+ if (initrd != NULL) {
+ kbuf.buffer = initrd;
+ kbuf.bufsz = kbuf.memsz = initrd_len;
+ kbuf.buf_align = PAGE_SIZE;
+ kbuf.top_down = false;
+ kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
+ ret = kexec_add_buffer(&kbuf);
+ if (ret)
+ goto out;
+ initrd_load_addr = kbuf.mem;
+
+ pr_debug("Loaded initrd at 0x%lx\n", initrd_load_addr);
+ }
+
+ fdt_size = fdt_totalsize(initial_boot_params) * 2;
+ fdt = kmalloc(fdt_size, GFP_KERNEL);
+ if (!fdt) {
+ pr_err("Not enough memory for the device tree.\n");
+ ret = -ENOMEM;
+ goto out;
+ }
+ ret = fdt_open_into(initial_boot_params, fdt, fdt_size);
+ if (ret < 0) {
+ pr_err("Error setting up the new device tree.\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = setup_new_fdt(image, fdt, initrd_load_addr, initrd_len, cmdline);
+ if (ret)
+ goto out;
+
+ fdt_pack(fdt);
+
+ kbuf.buffer = fdt;
+ kbuf.bufsz = kbuf.memsz = fdt_size;
+ kbuf.buf_align = PAGE_SIZE;
+ kbuf.top_down = true;
+ kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
+ ret = kexec_add_buffer(&kbuf);
+ if (ret)
+ goto out;
+ fdt_load_addr = kbuf.mem;
+
+ pr_debug("Loaded device tree at 0x%lx\n", fdt_load_addr);
+
+ slave_code = elf_info.buffer + elf_info.proghdrs[0].p_offset;
+ ret = setup_purgatory(image, slave_code, fdt, kernel_load_addr,
+ fdt_load_addr);
+ if (ret)
+ pr_err("Error setting up the purgatory.\n");
+
+out:
+ kexec_free_elf_info(&elf_info);
+
+ /* Make kimage_file_post_load_cleanup free the fdt buffer for us. */
+ return ret ? ERR_PTR(ret) : fdt;
+}
+
+const struct kexec_file_ops kexec_elf64_ops = {
+ .probe = kexec_elf_probe,
+ .load = elf64_load,
+};
diff --git a/arch/powerpc/kernel/machine_kexec_file_64.c b/arch/powerpc/kexec/file_load.c
index 143c91724617..143c91724617 100644
--- a/arch/powerpc/kernel/machine_kexec_file_64.c
+++ b/arch/powerpc/kexec/file_load.c
diff --git a/arch/powerpc/kernel/ima_kexec.c b/arch/powerpc/kexec/ima.c
index 720e50e490b6..720e50e490b6 100644
--- a/arch/powerpc/kernel/ima_kexec.c
+++ b/arch/powerpc/kexec/ima.c
diff --git a/arch/powerpc/kexec/relocate_32.S b/arch/powerpc/kexec/relocate_32.S
new file mode 100644
index 000000000000..61946c19e07c
--- /dev/null
+++ b/arch/powerpc/kexec/relocate_32.S
@@ -0,0 +1,500 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * This file contains kexec low-level functions.
+ *
+ * Copyright (C) 2002-2003 Eric Biederman <ebiederm@xmission.com>
+ * GameCube/ppc32 port Copyright (C) 2004 Albert Herranz
+ * PPC44x port. Copyright (C) 2011, IBM Corporation
+ * Author: Suzuki Poulose <suzuki@in.ibm.com>
+ */
+
+#include <asm/reg.h>
+#include <asm/page.h>
+#include <asm/mmu.h>
+#include <asm/ppc_asm.h>
+#include <asm/kexec.h>
+
+ .text
+
+ /*
+ * Must be relocatable PIC code callable as a C function.
+ */
+ .globl relocate_new_kernel
+relocate_new_kernel:
+ /* r3 = page_list */
+ /* r4 = reboot_code_buffer */
+ /* r5 = start_address */
+
+#ifdef CONFIG_FSL_BOOKE
+
+ mr r29, r3
+ mr r30, r4
+ mr r31, r5
+
+#define ENTRY_MAPPING_KEXEC_SETUP
+#include <kernel/fsl_booke_entry_mapping.S>
+#undef ENTRY_MAPPING_KEXEC_SETUP
+
+ mr r3, r29
+ mr r4, r30
+ mr r5, r31
+
+ li r0, 0
+#elif defined(CONFIG_44x)
+
+ /* Save our parameters */
+ mr r29, r3
+ mr r30, r4
+ mr r31, r5
+
+#ifdef CONFIG_PPC_47x
+ /* Check for 47x cores */
+ mfspr r3,SPRN_PVR
+ srwi r3,r3,16
+ cmplwi cr0,r3,PVR_476FPE@h
+ beq setup_map_47x
+ cmplwi cr0,r3,PVR_476@h
+ beq setup_map_47x
+ cmplwi cr0,r3,PVR_476_ISS@h
+ beq setup_map_47x
+#endif /* CONFIG_PPC_47x */
+
+/*
+ * Code for setting up 1:1 mapping for PPC440x for KEXEC
+ *
+ * We cannot switch off the MMU on PPC44x.
+ * So we:
+ * 1) Invalidate all the mappings except the one we are running from.
+ * 2) Create a tmp mapping for our code in the other address space(TS) and
+ * jump to it. Invalidate the entry we started in.
+ * 3) Create a 1:1 mapping for 0-2GiB in chunks of 256M in original TS.
+ * 4) Jump to the 1:1 mapping in original TS.
+ * 5) Invalidate the tmp mapping.
+ *
+ * - Based on the kexec support code for FSL BookE
+ *
+ */
+
+ /*
+ * Load the PID with kernel PID (0).
+ * Also load our MSR_IS and TID to MMUCR for TLB search.
+ */
+ li r3, 0
+ mtspr SPRN_PID, r3
+ mfmsr r4
+ andi. r4,r4,MSR_IS@l
+ beq wmmucr
+ oris r3,r3,PPC44x_MMUCR_STS@h
+wmmucr:
+ mtspr SPRN_MMUCR,r3
+ sync
+
+ /*
+ * Invalidate all the TLB entries except the current entry
+ * where we are running from
+ */
+ bl 0f /* Find our address */
+0: mflr r5 /* Make it accessible */
+ tlbsx r23,0,r5 /* Find entry we are in */
+ li r4,0 /* Start at TLB entry 0 */
+ li r3,0 /* Set PAGEID inval value */
+1: cmpw r23,r4 /* Is this our entry? */
+ beq skip /* If so, skip the inval */
+ tlbwe r3,r4,PPC44x_TLB_PAGEID /* If not, inval the entry */
+skip:
+ addi r4,r4,1 /* Increment */
+ cmpwi r4,64 /* Are we done? */
+ bne 1b /* If not, repeat */
+ isync
+
+ /* Create a temp mapping and jump to it */
+ andi. r6, r23, 1 /* Find the index to use */
+ addi r24, r6, 1 /* r24 will contain 1 or 2 */
+
+ mfmsr r9 /* get the MSR */
+ rlwinm r5, r9, 27, 31, 31 /* Extract the MSR[IS] */
+ xori r7, r5, 1 /* Use the other address space */
+
+ /* Read the current mapping entries */
+ tlbre r3, r23, PPC44x_TLB_PAGEID
+ tlbre r4, r23, PPC44x_TLB_XLAT
+ tlbre r5, r23, PPC44x_TLB_ATTRIB
+
+ /* Save our current XLAT entry */
+ mr r25, r4
+
+ /* Extract the TLB PageSize */
+ li r10, 1 /* r10 will hold PageSize */
+ rlwinm r11, r3, 0, 24, 27 /* bits 24-27 */
+
+ /* XXX: As of now we use 256M, 4K pages */
+ cmpwi r11, PPC44x_TLB_256M
+ bne tlb_4k
+ rotlwi r10, r10, 28 /* r10 = 256M */
+ b write_out
+tlb_4k:
+ cmpwi r11, PPC44x_TLB_4K
+ bne default
+ rotlwi r10, r10, 12 /* r10 = 4K */
+ b write_out
+default:
+ rotlwi r10, r10, 10 /* r10 = 1K */
+
+write_out:
+ /*
+ * Write out the tmp 1:1 mapping for this code in other address space
+ * Fixup EPN = RPN , TS=other address space
+ */
+ insrwi r3, r7, 1, 23 /* Bit 23 is TS for PAGEID field */
+
+ /* Write out the tmp mapping entries */
+ tlbwe r3, r24, PPC44x_TLB_PAGEID
+ tlbwe r4, r24, PPC44x_TLB_XLAT
+ tlbwe r5, r24, PPC44x_TLB_ATTRIB
+
+ subi r11, r10, 1 /* PageOffset Mask = PageSize - 1 */
+ not r10, r11 /* Mask for PageNum */
+
+ /* Switch to other address space in MSR */
+ insrwi r9, r7, 1, 26 /* Set MSR[IS] = r7 */
+
+ bl 1f
+1: mflr r8
+ addi r8, r8, (2f-1b) /* Find the target offset */
+
+ /* Jump to the tmp mapping */
+ mtspr SPRN_SRR0, r8
+ mtspr SPRN_SRR1, r9
+ rfi
+
+2:
+ /* Invalidate the entry we were executing from */
+ li r3, 0
+ tlbwe r3, r23, PPC44x_TLB_PAGEID
+
+ /* attribute fields. rwx for SUPERVISOR mode */
+ li r5, 0
+ ori r5, r5, (PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G)
+
+ /* Create 1:1 mapping in 256M pages */
+ xori r7, r7, 1 /* Revert back to Original TS */
+
+ li r8, 0 /* PageNumber */
+ li r6, 3 /* TLB Index, start at 3 */
+
+next_tlb:
+ rotlwi r3, r8, 28 /* Create EPN (bits 0-3) */
+ mr r4, r3 /* RPN = EPN */
+ ori r3, r3, (PPC44x_TLB_VALID | PPC44x_TLB_256M) /* SIZE = 256M, Valid */
+ insrwi r3, r7, 1, 23 /* Set TS from r7 */
+
+ tlbwe r3, r6, PPC44x_TLB_PAGEID /* PageID field : EPN, V, SIZE */
+ tlbwe r4, r6, PPC44x_TLB_XLAT /* Address translation : RPN */
+ tlbwe r5, r6, PPC44x_TLB_ATTRIB /* Attributes */
+
+ addi r8, r8, 1 /* Increment PN */
+ addi r6, r6, 1 /* Increment TLB Index */
+ cmpwi r8, 8 /* Are we done ? */
+ bne next_tlb
+ isync
+
+ /* Jump to the new mapping 1:1 */
+ li r9,0
+ insrwi r9, r7, 1, 26 /* Set MSR[IS] = r7 */
+
+ bl 1f
+1: mflr r8
+ and r8, r8, r11 /* Get our offset within page */
+ addi r8, r8, (2f-1b)
+
+ and r5, r25, r10 /* Get our target PageNum */
+ or r8, r8, r5 /* Target jump address */
+
+ mtspr SPRN_SRR0, r8
+ mtspr SPRN_SRR1, r9
+ rfi
+2:
+ /* Invalidate the tmp entry we used */
+ li r3, 0
+ tlbwe r3, r24, PPC44x_TLB_PAGEID
+ sync
+ b ppc44x_map_done
+
+#ifdef CONFIG_PPC_47x
+
+ /* 1:1 mapping for 47x */
+
+setup_map_47x:
+
+ /*
+ * Load the kernel pid (0) to PID and also to MMUCR[TID].
+ * Also set the MSR IS->MMUCR STS
+ */
+ li r3, 0
+ mtspr SPRN_PID, r3 /* Set PID */
+ mfmsr r4 /* Get MSR */
+ andi. r4, r4, MSR_IS@l /* TS=1? */
+ beq 1f /* If not, leave STS=0 */
+ oris r3, r3, PPC47x_MMUCR_STS@h /* Set STS=1 */
+1: mtspr SPRN_MMUCR, r3 /* Put MMUCR */
+ sync
+
+ /* Find the entry we are running from */
+ bl 2f
+2: mflr r23
+ tlbsx r23, 0, r23
+ tlbre r24, r23, 0 /* TLB Word 0 */
+ tlbre r25, r23, 1 /* TLB Word 1 */
+ tlbre r26, r23, 2 /* TLB Word 2 */
+
+
+ /*
+ * Invalidates all the tlb entries by writing to 256 RPNs(r4)
+ * of 4k page size in all 4 ways (0-3 in r3).
+ * This would invalidate the entire UTLB including the one we are
+ * running from. However the shadow TLB entries would help us
+ * to continue the execution, until we flush them (rfi/isync).
+ */
+ addis r3, 0, 0x8000 /* specify the way */
+ addi r4, 0, 0 /* TLB Word0 = (EPN=0, VALID = 0) */
+ addi r5, 0, 0
+ b clear_utlb_entry
+
+ /* Align the loop to speed things up. from head_44x.S */
+ .align 6
+
+clear_utlb_entry:
+
+ tlbwe r4, r3, 0
+ tlbwe r5, r3, 1
+ tlbwe r5, r3, 2
+ addis r3, r3, 0x2000 /* Increment the way */
+ cmpwi r3, 0
+ bne clear_utlb_entry
+ addis r3, 0, 0x8000
+ addis r4, r4, 0x100 /* Increment the EPN */
+ cmpwi r4, 0
+ bne clear_utlb_entry
+
+ /* Create the entries in the other address space */
+ mfmsr r5
+ rlwinm r7, r5, 27, 31, 31 /* Get the TS (Bit 26) from MSR */
+ xori r7, r7, 1 /* r7 = !TS */
+
+ insrwi r24, r7, 1, 21 /* Change the TS in the saved TLB word 0 */
+
+ /*
+ * write out the TLB entries for the tmp mapping
+ * Use way '0' so that we could easily invalidate it later.
+ */
+ lis r3, 0x8000 /* Way '0' */
+
+ tlbwe r24, r3, 0
+ tlbwe r25, r3, 1
+ tlbwe r26, r3, 2
+
+ /* Update the msr to the new TS */
+ insrwi r5, r7, 1, 26
+
+ bl 1f
+1: mflr r6
+ addi r6, r6, (2f-1b)
+
+ mtspr SPRN_SRR0, r6
+ mtspr SPRN_SRR1, r5
+ rfi
+
+ /*
+ * Now we are in the tmp address space.
+ * Create a 1:1 mapping for 0-2GiB in the original TS.
+ */
+2:
+ li r3, 0
+ li r4, 0 /* TLB Word 0 */
+ li r5, 0 /* TLB Word 1 */
+ li r6, 0
+ ori r6, r6, PPC47x_TLB2_S_RWX /* TLB word 2 */
+
+ li r8, 0 /* PageIndex */
+
+ xori r7, r7, 1 /* revert back to original TS */
+
+write_utlb:
+ rotlwi r5, r8, 28 /* RPN = PageIndex * 256M */
+ /* ERPN = 0 as we don't use memory above 2G */
+
+ mr r4, r5 /* EPN = RPN */
+ ori r4, r4, (PPC47x_TLB0_VALID | PPC47x_TLB0_256M)
+ insrwi r4, r7, 1, 21 /* Insert the TS to Word 0 */
+
+ tlbwe r4, r3, 0 /* Write out the entries */
+ tlbwe r5, r3, 1
+ tlbwe r6, r3, 2
+ addi r8, r8, 1
+ cmpwi r8, 8 /* Have we completed ? */
+ bne write_utlb
+
+ /* make sure we complete the TLB write up */
+ isync
+
+ /*
+ * Prepare to jump to the 1:1 mapping.
+ * 1) Extract page size of the tmp mapping
+ * DSIZ = TLB_Word0[22:27]
+ * 2) Calculate the physical address of the address
+ * to jump to.
+ */
+ rlwinm r10, r24, 0, 22, 27
+
+ cmpwi r10, PPC47x_TLB0_4K
+ bne 0f
+ li r10, 0x1000 /* r10 = 4k */
+ bl 1f
+
+0:
+ /* Defaults to 256M */
+ lis r10, 0x1000
+
+ bl 1f
+1: mflr r4
+ addi r4, r4, (2f-1b) /* virtual address of 2f */
+
+ subi r11, r10, 1 /* offsetmask = Pagesize - 1 */
+ not r10, r11 /* Pagemask = ~(offsetmask) */
+
+ and r5, r25, r10 /* Physical page */
+ and r6, r4, r11 /* offset within the current page */
+
+ or r5, r5, r6 /* Physical address for 2f */
+
+ /* Switch the TS in MSR to the original one */
+ mfmsr r8
+ insrwi r8, r7, 1, 26
+
+ mtspr SPRN_SRR1, r8
+ mtspr SPRN_SRR0, r5
+ rfi
+
+2:
+ /* Invalidate the tmp mapping */
+ lis r3, 0x8000 /* Way '0' */
+
+ clrrwi r24, r24, 12 /* Clear the valid bit */
+ tlbwe r24, r3, 0
+ tlbwe r25, r3, 1
+ tlbwe r26, r3, 2
+
+ /* Make sure we complete the TLB write and flush the shadow TLB */
+ isync
+
+#endif
+
+ppc44x_map_done:
+
+
+ /* Restore the parameters */
+ mr r3, r29
+ mr r4, r30
+ mr r5, r31
+
+ li r0, 0
+#else
+ li r0, 0
+
+ /*
+ * Set Machine Status Register to a known status,
+ * switch the MMU off and jump to 1: in a single step.
+ */
+
+ mr r8, r0
+ ori r8, r8, MSR_RI|MSR_ME
+ mtspr SPRN_SRR1, r8
+ addi r8, r4, 1f - relocate_new_kernel
+ mtspr SPRN_SRR0, r8
+ sync
+ rfi
+
+1:
+#endif
+ /* from this point address translation is turned off */
+ /* and interrupts are disabled */
+
+ /* set a new stack at the bottom of our page... */
+ /* (not really needed now) */
+ addi r1, r4, KEXEC_CONTROL_PAGE_SIZE - 8 /* for LR Save+Back Chain */
+ stw r0, 0(r1)
+
+ /* Do the copies */
+ li r6, 0 /* checksum */
+ mr r0, r3
+ b 1f
+
+0: /* top, read another word for the indirection page */
+ lwzu r0, 4(r3)
+
+1:
+ /* is it a destination page? (r8) */
+ rlwinm. r7, r0, 0, 31, 31 /* IND_DESTINATION (1<<0) */
+ beq 2f
+
+ rlwinm r8, r0, 0, 0, 19 /* clear kexec flags, page align */
+ b 0b
+
+2: /* is it an indirection page? (r3) */
+ rlwinm. r7, r0, 0, 30, 30 /* IND_INDIRECTION (1<<1) */
+ beq 2f
+
+ rlwinm r3, r0, 0, 0, 19 /* clear kexec flags, page align */
+ subi r3, r3, 4
+ b 0b
+
+2: /* are we done? */
+ rlwinm. r7, r0, 0, 29, 29 /* IND_DONE (1<<2) */
+ beq 2f
+ b 3f
+
+2: /* is it a source page? (r9) */
+ rlwinm. r7, r0, 0, 28, 28 /* IND_SOURCE (1<<3) */
+ beq 0b
+
+ rlwinm r9, r0, 0, 0, 19 /* clear kexec flags, page align */
+
+ li r7, PAGE_SIZE / 4
+ mtctr r7
+ subi r9, r9, 4
+ subi r8, r8, 4
+9:
+ lwzu r0, 4(r9) /* do the copy */
+ xor r6, r6, r0
+ stwu r0, 4(r8)
+ dcbst 0, r8
+ sync
+ icbi 0, r8
+ bdnz 9b
+
+ addi r9, r9, 4
+ addi r8, r8, 4
+ b 0b
+
+3:
+
+ /* To be certain of avoiding problems with self-modifying code
+ * execute a serializing instruction here.
+ */
+ isync
+ sync
+
+ mfspr r3, SPRN_PIR /* current core we are running on */
+ mr r4, r5 /* load physical address of chunk called */
+
+ /* jump to the entry point, usually the setup routine */
+ mtlr r5
+ blrl
+
+1: b 1b
+
+relocate_new_kernel_end:
+
+ .globl relocate_new_kernel_size
+relocate_new_kernel_size:
+ .long relocate_new_kernel_end - relocate_new_kernel
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 4c67cc79de7c..2bfeaa13befb 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -71,6 +71,9 @@ kvm-hv-y += \
book3s_64_mmu_radix.o \
book3s_hv_nested.o
+kvm-hv-$(CONFIG_PPC_UV) += \
+ book3s_hv_uvmem.o
+
kvm-hv-$(CONFIG_PPC_TRANSACTIONAL_MEM) += \
book3s_hv_tm.o
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 9524d92bc45d..d07a8e12fa15 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -36,8 +36,8 @@
#include "book3s.h"
#include "trace.h"
-#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
-#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
+#define VM_STAT(x, ...) offsetof(struct kvm, stat.x), KVM_STAT_VM, ## __VA_ARGS__
+#define VCPU_STAT(x, ...) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU, ## __VA_ARGS__
/* #define EXIT_DEBUG */
@@ -69,32 +69,11 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "pthru_all", VCPU_STAT(pthru_all) },
{ "pthru_host", VCPU_STAT(pthru_host) },
{ "pthru_bad_aff", VCPU_STAT(pthru_bad_aff) },
- { "largepages_2M", VM_STAT(num_2M_pages) },
- { "largepages_1G", VM_STAT(num_1G_pages) },
+ { "largepages_2M", VM_STAT(num_2M_pages, .mode = 0444) },
+ { "largepages_1G", VM_STAT(num_1G_pages, .mode = 0444) },
{ NULL }
};
-void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu)
-{
- if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) {
- ulong pc = kvmppc_get_pc(vcpu);
- ulong lr = kvmppc_get_lr(vcpu);
- if ((pc & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS)
- kvmppc_set_pc(vcpu, pc & ~SPLIT_HACK_MASK);
- if ((lr & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS)
- kvmppc_set_lr(vcpu, lr & ~SPLIT_HACK_MASK);
- vcpu->arch.hflags &= ~BOOK3S_HFLAG_SPLIT_HACK;
- }
-}
-EXPORT_SYMBOL_GPL(kvmppc_unfixup_split_real);
-
-static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu)
-{
- if (!is_kvmppc_hv_enabled(vcpu->kvm))
- return to_book3s(vcpu)->hior;
- return 0;
-}
-
static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu,
unsigned long pending_now, unsigned long old_pending)
{
@@ -134,11 +113,7 @@ static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)
void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags)
{
- kvmppc_unfixup_split_real(vcpu);
- kvmppc_set_srr0(vcpu, kvmppc_get_pc(vcpu));
- kvmppc_set_srr1(vcpu, (kvmppc_get_msr(vcpu) & ~0x783f0000ul) | flags);
- kvmppc_set_pc(vcpu, kvmppc_interrupt_offset(vcpu) + vec);
- vcpu->arch.mmu.reset_msr(vcpu);
+ vcpu->kvm->arch.kvm_ops->inject_interrupt(vcpu, vec, flags);
}
static int kvmppc_book3s_vec2irqprio(unsigned int vec)
@@ -496,11 +471,6 @@ int kvmppc_load_last_inst(struct kvm_vcpu *vcpu,
}
EXPORT_SYMBOL_GPL(kvmppc_load_last_inst);
-int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
-{
- return 0;
-}
-
int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu)
{
return 0;
@@ -814,9 +784,9 @@ void kvmppc_decrementer_func(struct kvm_vcpu *vcpu)
kvm_vcpu_kick(vcpu);
}
-struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
+int kvmppc_core_vcpu_create(struct kvm_vcpu *vcpu)
{
- return kvm->arch.kvm_ops->vcpu_create(kvm, id);
+ return vcpu->kvm->arch.kvm_ops->vcpu_create(vcpu);
}
void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
@@ -1083,9 +1053,11 @@ static int kvmppc_book3s_init(void)
if (xics_on_xive()) {
kvmppc_xive_init_module();
kvm_register_device_ops(&kvm_xive_ops, KVM_DEV_TYPE_XICS);
- kvmppc_xive_native_init_module();
- kvm_register_device_ops(&kvm_xive_native_ops,
- KVM_DEV_TYPE_XIVE);
+ if (kvmppc_xive_native_supported()) {
+ kvmppc_xive_native_init_module();
+ kvm_register_device_ops(&kvm_xive_native_ops,
+ KVM_DEV_TYPE_XIVE);
+ }
} else
#endif
kvm_register_device_ops(&kvm_xics_ops, KVM_DEV_TYPE_XICS);
diff --git a/arch/powerpc/kvm/book3s.h b/arch/powerpc/kvm/book3s.h
index 2ef1311a2a13..3a4613985949 100644
--- a/arch/powerpc/kvm/book3s.h
+++ b/arch/powerpc/kvm/book3s.h
@@ -32,4 +32,7 @@ extern void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val);
static inline void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val) {}
#endif
+extern void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr);
+extern void kvmppc_inject_interrupt_hv(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags);
+
#endif
diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c
index 18f244aad7aa..f21e73492ce3 100644
--- a/arch/powerpc/kvm/book3s_32_mmu.c
+++ b/arch/powerpc/kvm/book3s_32_mmu.c
@@ -90,11 +90,6 @@ static u64 kvmppc_mmu_book3s_32_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr,
return (((u64)eaddr >> 12) & 0xffff) | (vsid << 16);
}
-static void kvmppc_mmu_book3s_32_reset_msr(struct kvm_vcpu *vcpu)
-{
- kvmppc_set_msr(vcpu, 0);
-}
-
static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvm_vcpu *vcpu,
u32 sre, gva_t eaddr,
bool primary)
@@ -406,7 +401,6 @@ void kvmppc_mmu_book3s_32_init(struct kvm_vcpu *vcpu)
mmu->mtsrin = kvmppc_mmu_book3s_32_mtsrin;
mmu->mfsrin = kvmppc_mmu_book3s_32_mfsrin;
mmu->xlate = kvmppc_mmu_book3s_32_xlate;
- mmu->reset_msr = kvmppc_mmu_book3s_32_reset_msr;
mmu->tlbie = kvmppc_mmu_book3s_32_tlbie;
mmu->esid_to_vsid = kvmppc_mmu_book3s_32_esid_to_vsid;
mmu->ea_to_vp = kvmppc_mmu_book3s_32_ea_to_vp;
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index 5f63a5f7f24f..599133256a95 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -24,20 +24,6 @@
#define dprintk(X...) do { } while(0)
#endif
-static void kvmppc_mmu_book3s_64_reset_msr(struct kvm_vcpu *vcpu)
-{
- unsigned long msr = vcpu->arch.intr_msr;
- unsigned long cur_msr = kvmppc_get_msr(vcpu);
-
- /* If transactional, change to suspend mode on IRQ delivery */
- if (MSR_TM_TRANSACTIONAL(cur_msr))
- msr |= MSR_TS_S;
- else
- msr |= cur_msr & MSR_TS_MASK;
-
- kvmppc_set_msr(vcpu, msr);
-}
-
static struct kvmppc_slb *kvmppc_mmu_book3s_64_find_slbe(
struct kvm_vcpu *vcpu,
gva_t eaddr)
@@ -676,7 +662,6 @@ void kvmppc_mmu_book3s_64_init(struct kvm_vcpu *vcpu)
mmu->slbie = kvmppc_mmu_book3s_64_slbie;
mmu->slbia = kvmppc_mmu_book3s_64_slbia;
mmu->xlate = kvmppc_mmu_book3s_64_xlate;
- mmu->reset_msr = kvmppc_mmu_book3s_64_reset_msr;
mmu->tlbie = kvmppc_mmu_book3s_64_tlbie;
mmu->esid_to_vsid = kvmppc_mmu_book3s_64_esid_to_vsid;
mmu->ea_to_vp = kvmppc_mmu_book3s_64_ea_to_vp;
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 9a75f0e1933b..6c372f5c61b6 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -275,18 +275,6 @@ int kvmppc_mmu_hv_init(void)
return 0;
}
-static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu)
-{
- unsigned long msr = vcpu->arch.intr_msr;
-
- /* If transactional, change to suspend mode on IRQ delivery */
- if (MSR_TM_TRANSACTIONAL(vcpu->arch.shregs.msr))
- msr |= MSR_TS_S;
- else
- msr |= vcpu->arch.shregs.msr & MSR_TS_MASK;
- kvmppc_set_msr(vcpu, msr);
-}
-
static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
long pte_index, unsigned long pteh,
unsigned long ptel, unsigned long *pte_idx_ret)
@@ -296,7 +284,7 @@ static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
/* Protect linux PTE lookup from page table destruction */
rcu_read_lock_sched(); /* this disables preemption too */
ret = kvmppc_do_h_enter(kvm, flags, pte_index, pteh, ptel,
- current->mm->pgd, false, pte_idx_ret);
+ kvm->mm->pgd, false, pte_idx_ret);
rcu_read_unlock_sched();
if (ret == H_TOO_HARD) {
/* this can't happen */
@@ -508,6 +496,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
struct vm_area_struct *vma;
unsigned long rcbits;
long mmio_update;
+ struct mm_struct *mm;
if (kvm_is_radix(kvm))
return kvmppc_book3s_radix_page_fault(run, vcpu, ea, dsisr);
@@ -584,6 +573,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
is_ci = false;
pfn = 0;
page = NULL;
+ mm = kvm->mm;
pte_size = PAGE_SIZE;
writing = (dsisr & DSISR_ISSTORE) != 0;
/* If writing != 0, then the HPTE must allow writing, if we get here */
@@ -592,8 +582,8 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
npages = get_user_pages_fast(hva, 1, writing ? FOLL_WRITE : 0, pages);
if (npages < 1) {
/* Check if it's an I/O mapping */
- down_read(&current->mm->mmap_sem);
- vma = find_vma(current->mm, hva);
+ down_read(&mm->mmap_sem);
+ vma = find_vma(mm, hva);
if (vma && vma->vm_start <= hva && hva + psize <= vma->vm_end &&
(vma->vm_flags & VM_PFNMAP)) {
pfn = vma->vm_pgoff +
@@ -602,7 +592,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
is_ci = pte_ci(__pte((pgprot_val(vma->vm_page_prot))));
write_ok = vma->vm_flags & VM_WRITE;
}
- up_read(&current->mm->mmap_sem);
+ up_read(&mm->mmap_sem);
if (!pfn)
goto out_put;
} else {
@@ -621,8 +611,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
* hugepage split and collapse.
*/
local_irq_save(flags);
- ptep = find_current_mm_pte(current->mm->pgd,
- hva, NULL, NULL);
+ ptep = find_current_mm_pte(mm->pgd, hva, NULL, NULL);
if (ptep) {
pte = kvmppc_read_update_linux_pte(ptep, 1);
if (__pte_write(pte))
@@ -2000,7 +1989,7 @@ int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *ghf)
ret = anon_inode_getfd("kvm-htab", &kvm_htab_fops, ctx, rwflag | O_CLOEXEC);
if (ret < 0) {
kfree(ctx);
- kvm_put_kvm(kvm);
+ kvm_put_kvm_no_destroy(kvm);
return ret;
}
@@ -2161,7 +2150,6 @@ void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu)
vcpu->arch.slb_nr = 32; /* POWER7/POWER8 */
mmu->xlate = kvmppc_mmu_book3s_64_hv_xlate;
- mmu->reset_msr = kvmppc_mmu_book3s_64_hv_reset_msr;
vcpu->arch.hflags |= BOOK3S_HFLAG_SLB;
}
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index 2d415c36a61d..803940d79b73 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -19,6 +19,8 @@
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/pte-walk.h>
+#include <asm/ultravisor.h>
+#include <asm/kvm_book3s_uvmem.h>
/*
* Supported radix tree geometry.
@@ -61,12 +63,10 @@ unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid,
}
isync();
- pagefault_disable();
if (is_load)
- ret = raw_copy_from_user(to, from, n);
+ ret = probe_user_read(to, (const void __user *)from, n);
else
- ret = raw_copy_to_user(to, from, n);
- pagefault_enable();
+ ret = probe_user_write((void __user *)to, from, n);
/* switch the pid first to avoid running host with unallocated pid */
if (quadrant == 1 && pid != old_pid)
@@ -915,6 +915,9 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
if (!(dsisr & DSISR_PRTABLE_FAULT))
gpa |= ea & 0xfff;
+ if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
+ return kvmppc_send_page_to_uv(kvm, gfn);
+
/* Get the corresponding memslot */
memslot = gfn_to_memslot(kvm, gfn);
@@ -972,6 +975,11 @@ int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
unsigned long gpa = gfn << PAGE_SHIFT;
unsigned int shift;
+ if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE) {
+ uv_page_inval(kvm->arch.lpid, gpa, PAGE_SHIFT);
+ return 0;
+ }
+
ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
if (ptep && pte_present(*ptep))
kvmppc_unmap_pte(kvm, ptep, gpa, shift, memslot,
@@ -989,6 +997,9 @@ int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
int ref = 0;
unsigned long old, *rmapp;
+ if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
+ return ref;
+
ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
if (ptep && pte_present(*ptep) && pte_young(*ptep)) {
old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_ACCESSED, 0,
@@ -1013,6 +1024,9 @@ int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
unsigned int shift;
int ref = 0;
+ if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
+ return ref;
+
ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
if (ptep && pte_present(*ptep) && pte_young(*ptep))
ref = 1;
@@ -1030,6 +1044,9 @@ static int kvm_radix_test_clear_dirty(struct kvm *kvm,
int ret = 0;
unsigned long old, *rmapp;
+ if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
+ return ret;
+
ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
if (ptep && pte_present(*ptep) && pte_dirty(*ptep)) {
ret = 1;
@@ -1082,6 +1099,12 @@ void kvmppc_radix_flush_memslot(struct kvm *kvm,
unsigned long gpa;
unsigned int shift;
+ if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START)
+ kvmppc_uvmem_drop_pages(memslot, kvm, true);
+
+ if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
+ return;
+
gpa = memslot->base_gfn << PAGE_SHIFT;
spin_lock(&kvm->mmu_lock);
for (n = memslot->npages; n; --n) {
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index e99a14798ab0..ee6c103bb7d5 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -253,10 +253,11 @@ static int kvm_spapr_tce_release(struct inode *inode, struct file *filp)
}
}
+ account_locked_vm(kvm->mm,
+ kvmppc_stt_pages(kvmppc_tce_pages(stt->size)), false);
+
kvm_put_kvm(stt->kvm);
- account_locked_vm(current->mm,
- kvmppc_stt_pages(kvmppc_tce_pages(stt->size)), false);
call_rcu(&stt->rcu, release_spapr_tce_table);
return 0;
@@ -272,6 +273,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
{
struct kvmppc_spapr_tce_table *stt = NULL;
struct kvmppc_spapr_tce_table *siter;
+ struct mm_struct *mm = kvm->mm;
unsigned long npages, size = args->size;
int ret = -ENOMEM;
@@ -280,7 +282,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
return -EINVAL;
npages = kvmppc_tce_pages(size);
- ret = account_locked_vm(current->mm, kvmppc_stt_pages(npages), true);
+ ret = account_locked_vm(mm, kvmppc_stt_pages(npages), true);
if (ret)
return ret;
@@ -317,7 +319,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
if (ret >= 0)
list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables);
else
- kvm_put_kvm(kvm);
+ kvm_put_kvm_no_destroy(kvm);
mutex_unlock(&kvm->lock);
@@ -326,7 +328,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
kfree(stt);
fail_acct:
- account_locked_vm(current->mm, kvmppc_stt_pages(npages), false);
+ account_locked_vm(mm, kvmppc_stt_pages(npages), false);
return ret;
}
@@ -416,7 +418,7 @@ static void kvmppc_clear_tce(struct mm_struct *mm, struct iommu_table *tbl,
unsigned long hpa = 0;
enum dma_data_direction dir = DMA_NONE;
- iommu_tce_xchg(mm, tbl, entry, &hpa, &dir);
+ iommu_tce_xchg_no_kill(mm, tbl, entry, &hpa, &dir);
}
static long kvmppc_tce_iommu_mapped_dec(struct kvm *kvm,
@@ -447,7 +449,8 @@ static long kvmppc_tce_iommu_do_unmap(struct kvm *kvm,
unsigned long hpa = 0;
long ret;
- if (WARN_ON_ONCE(iommu_tce_xchg(kvm->mm, tbl, entry, &hpa, &dir)))
+ if (WARN_ON_ONCE(iommu_tce_xchg_no_kill(kvm->mm, tbl, entry, &hpa,
+ &dir)))
return H_TOO_HARD;
if (dir == DMA_NONE)
@@ -455,7 +458,7 @@ static long kvmppc_tce_iommu_do_unmap(struct kvm *kvm,
ret = kvmppc_tce_iommu_mapped_dec(kvm, tbl, entry);
if (ret != H_SUCCESS)
- iommu_tce_xchg(kvm->mm, tbl, entry, &hpa, &dir);
+ iommu_tce_xchg_no_kill(kvm->mm, tbl, entry, &hpa, &dir);
return ret;
}
@@ -501,7 +504,7 @@ long kvmppc_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
if (mm_iommu_mapped_inc(mem))
return H_TOO_HARD;
- ret = iommu_tce_xchg(kvm->mm, tbl, entry, &hpa, &dir);
+ ret = iommu_tce_xchg_no_kill(kvm->mm, tbl, entry, &hpa, &dir);
if (WARN_ON_ONCE(ret)) {
mm_iommu_mapped_dec(mem);
return H_TOO_HARD;
@@ -579,6 +582,8 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
ret = kvmppc_tce_iommu_map(vcpu->kvm, stt, stit->tbl,
entry, ua, dir);
+ iommu_tce_kill(stit->tbl, entry, 1);
+
if (ret != H_SUCCESS) {
kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl, entry);
goto unlock_exit;
@@ -656,12 +661,14 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
*/
if (get_user(tce, tces + i)) {
ret = H_TOO_HARD;
- goto unlock_exit;
+ goto invalidate_exit;
}
tce = be64_to_cpu(tce);
- if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua))
- return H_PARAMETER;
+ if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua)) {
+ ret = H_PARAMETER;
+ goto invalidate_exit;
+ }
list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
ret = kvmppc_tce_iommu_map(vcpu->kvm, stt,
@@ -671,13 +678,17 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
if (ret != H_SUCCESS) {
kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl,
entry);
- goto unlock_exit;
+ goto invalidate_exit;
}
}
kvmppc_tce_put(stt, entry + i, tce);
}
+invalidate_exit:
+ list_for_each_entry_lockless(stit, &stt->iommu_tables, next)
+ iommu_tce_kill(stit->tbl, entry, npages);
+
unlock_exit:
srcu_read_unlock(&vcpu->kvm->srcu, idx);
@@ -716,7 +727,7 @@ long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu,
continue;
if (ret == H_TOO_HARD)
- return ret;
+ goto invalidate_exit;
WARN_ON_ONCE(1);
kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl, entry);
@@ -726,6 +737,10 @@ long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu,
for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift))
kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value);
- return H_SUCCESS;
+invalidate_exit:
+ list_for_each_entry_lockless(stit, &stt->iommu_tables, next)
+ iommu_tce_kill(stit->tbl, ioba >> stt->page_shift, npages);
+
+ return ret;
}
EXPORT_SYMBOL_GPL(kvmppc_h_stuff_tce);
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
index f50bbeedfc66..ab6eeb8e753e 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -218,13 +218,14 @@ static long kvmppc_rm_ioba_validate(struct kvmppc_spapr_tce_table *stt,
return H_SUCCESS;
}
-static long iommu_tce_xchg_rm(struct mm_struct *mm, struct iommu_table *tbl,
+static long iommu_tce_xchg_no_kill_rm(struct mm_struct *mm,
+ struct iommu_table *tbl,
unsigned long entry, unsigned long *hpa,
enum dma_data_direction *direction)
{
long ret;
- ret = tbl->it_ops->exchange_rm(tbl, entry, hpa, direction);
+ ret = tbl->it_ops->xchg_no_kill(tbl, entry, hpa, direction, true);
if (!ret && ((*direction == DMA_FROM_DEVICE) ||
(*direction == DMA_BIDIRECTIONAL))) {
@@ -240,13 +241,20 @@ static long iommu_tce_xchg_rm(struct mm_struct *mm, struct iommu_table *tbl,
return ret;
}
+extern void iommu_tce_kill_rm(struct iommu_table *tbl,
+ unsigned long entry, unsigned long pages)
+{
+ if (tbl->it_ops->tce_kill)
+ tbl->it_ops->tce_kill(tbl, entry, pages, true);
+}
+
static void kvmppc_rm_clear_tce(struct kvm *kvm, struct iommu_table *tbl,
unsigned long entry)
{
unsigned long hpa = 0;
enum dma_data_direction dir = DMA_NONE;
- iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir);
+ iommu_tce_xchg_no_kill_rm(kvm->mm, tbl, entry, &hpa, &dir);
}
static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm *kvm,
@@ -278,7 +286,7 @@ static long kvmppc_rm_tce_iommu_do_unmap(struct kvm *kvm,
unsigned long hpa = 0;
long ret;
- if (iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir))
+ if (iommu_tce_xchg_no_kill_rm(kvm->mm, tbl, entry, &hpa, &dir))
/*
* real mode xchg can fail if struct page crosses
* a page boundary
@@ -290,7 +298,7 @@ static long kvmppc_rm_tce_iommu_do_unmap(struct kvm *kvm,
ret = kvmppc_rm_tce_iommu_mapped_dec(kvm, tbl, entry);
if (ret)
- iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir);
+ iommu_tce_xchg_no_kill_rm(kvm->mm, tbl, entry, &hpa, &dir);
return ret;
}
@@ -336,7 +344,7 @@ static long kvmppc_rm_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
if (WARN_ON_ONCE_RM(mm_iommu_mapped_inc(mem)))
return H_TOO_HARD;
- ret = iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir);
+ ret = iommu_tce_xchg_no_kill_rm(kvm->mm, tbl, entry, &hpa, &dir);
if (ret) {
mm_iommu_mapped_dec(mem);
/*
@@ -417,6 +425,8 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
ret = kvmppc_rm_tce_iommu_map(vcpu->kvm, stt,
stit->tbl, entry, ua, dir);
+ iommu_tce_kill_rm(stit->tbl, entry, 1);
+
if (ret != H_SUCCESS) {
kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
return ret;
@@ -556,8 +566,10 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
unsigned long tce = be64_to_cpu(((u64 *)tces)[i]);
ua = 0;
- if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua, NULL))
- return H_PARAMETER;
+ if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua, NULL)) {
+ ret = H_PARAMETER;
+ goto invalidate_exit;
+ }
list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
ret = kvmppc_rm_tce_iommu_map(vcpu->kvm, stt,
@@ -567,13 +579,17 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
if (ret != H_SUCCESS) {
kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl,
entry);
- goto unlock_exit;
+ goto invalidate_exit;
}
}
kvmppc_rm_tce_put(stt, entry + i, tce);
}
+invalidate_exit:
+ list_for_each_entry_lockless(stit, &stt->iommu_tables, next)
+ iommu_tce_kill_rm(stit->tbl, entry, npages);
+
unlock_exit:
if (rmap)
unlock_rmap(rmap);
@@ -616,7 +632,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
continue;
if (ret == H_TOO_HARD)
- return ret;
+ goto invalidate_exit;
WARN_ON_ONCE_RM(1);
kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
@@ -626,7 +642,11 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift))
kvmppc_rm_tce_put(stt, ioba >> stt->page_shift, tce_value);
- return H_SUCCESS;
+invalidate_exit:
+ list_for_each_entry_lockless(stit, &stt->iommu_tables, next)
+ iommu_tce_kill_rm(stit->tbl, ioba >> stt->page_shift, npages);
+
+ return ret;
}
/* This can be called in either virtual mode or real mode */
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index cde3f5a4b3e4..2cefd071b848 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -72,6 +72,9 @@
#include <asm/xics.h>
#include <asm/xive.h>
#include <asm/hw_breakpoint.h>
+#include <asm/kvm_host.h>
+#include <asm/kvm_book3s_uvmem.h>
+#include <asm/ultravisor.h>
#include "book3s.h"
@@ -133,7 +136,6 @@ static inline bool nesting_enabled(struct kvm *kvm)
/* If set, the threads on each CPU core have to be in the same MMU mode */
static bool no_mixing_hpt_and_radix;
-static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
/*
@@ -338,18 +340,6 @@ static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu)
spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
}
-static void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr)
-{
- /*
- * Check for illegal transactional state bit combination
- * and if we find it, force the TS field to a safe state.
- */
- if ((msr & MSR_TS_MASK) == MSR_TS_MASK)
- msr &= ~MSR_TS_MASK;
- vcpu->arch.shregs.msr = msr;
- kvmppc_end_cede(vcpu);
-}
-
static void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr)
{
vcpu->arch.pvr = pvr;
@@ -401,8 +391,11 @@ static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
spin_lock(&vc->lock);
vc->arch_compat = arch_compat;
- /* Set all PCR bits for which guest_pcr_bit <= bit < host_pcr_bit */
- vc->pcr = host_pcr_bit - guest_pcr_bit;
+ /*
+ * Set all PCR bits for which guest_pcr_bit <= bit < host_pcr_bit
+ * Also set all reserved PCR bits
+ */
+ vc->pcr = (host_pcr_bit - guest_pcr_bit) | PCR_MASK;
spin_unlock(&vc->lock);
return 0;
@@ -789,6 +782,11 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags,
vcpu->arch.dawr = value1;
vcpu->arch.dawrx = value2;
return H_SUCCESS;
+ case H_SET_MODE_RESOURCE_ADDR_TRANS_MODE:
+ /* KVM does not support mflags=2 (AIL=2) */
+ if (mflags != 0 && mflags != 3)
+ return H_UNSUPPORTED_FLAG_START;
+ return H_TOO_HARD;
default:
return H_TOO_HARD;
}
@@ -1075,6 +1073,28 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
kvmppc_get_gpr(vcpu, 5),
kvmppc_get_gpr(vcpu, 6));
break;
+ case H_SVM_PAGE_IN:
+ ret = kvmppc_h_svm_page_in(vcpu->kvm,
+ kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5),
+ kvmppc_get_gpr(vcpu, 6));
+ break;
+ case H_SVM_PAGE_OUT:
+ ret = kvmppc_h_svm_page_out(vcpu->kvm,
+ kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5),
+ kvmppc_get_gpr(vcpu, 6));
+ break;
+ case H_SVM_INIT_START:
+ ret = kvmppc_h_svm_init_start(vcpu->kvm);
+ break;
+ case H_SVM_INIT_DONE:
+ ret = kvmppc_h_svm_init_done(vcpu->kvm);
+ break;
+ case H_SVM_INIT_ABORT:
+ ret = kvmppc_h_svm_init_abort(vcpu->kvm);
+ break;
+
default:
return RESUME_HOST;
}
@@ -1678,7 +1698,14 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
*val = get_reg_val(id, vcpu->arch.pspb);
break;
case KVM_REG_PPC_DPDES:
- *val = get_reg_val(id, vcpu->arch.vcore->dpdes);
+ /*
+ * On POWER9, where we are emulating msgsndp etc.,
+ * we return 1 bit for each vcpu, which can come from
+ * either vcore->dpdes or doorbell_request.
+ * On POWER8, doorbell_request is 0.
+ */
+ *val = get_reg_val(id, vcpu->arch.vcore->dpdes |
+ vcpu->arch.doorbell_request);
break;
case KVM_REG_PPC_VTB:
*val = get_reg_val(id, vcpu->arch.vcore->vtb);
@@ -2247,22 +2274,16 @@ static void debugfs_vcpu_init(struct kvm_vcpu *vcpu, unsigned int id)
}
#endif /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */
-static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
- unsigned int id)
+static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu)
{
- struct kvm_vcpu *vcpu;
int err;
int core;
struct kvmppc_vcore *vcore;
+ struct kvm *kvm;
+ unsigned int id;
- err = -ENOMEM;
- vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
- if (!vcpu)
- goto out;
-
- err = kvm_vcpu_init(vcpu, kvm, id);
- if (err)
- goto free_vcpu;
+ kvm = vcpu->kvm;
+ id = vcpu->vcpu_id;
vcpu->arch.shared = &vcpu->arch.shregs;
#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
@@ -2344,7 +2365,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
mutex_unlock(&kvm->lock);
if (!vcore)
- goto free_vcpu;
+ return err;
spin_lock(&vcore->lock);
++vcore->num_threads;
@@ -2359,12 +2380,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
debugfs_vcpu_init(vcpu, id);
- return vcpu;
-
-free_vcpu:
- kmem_cache_free(kvm_vcpu_cache, vcpu);
-out:
- return ERR_PTR(err);
+ return 0;
}
static int kvmhv_set_smt_mode(struct kvm *kvm, unsigned long smt_mode,
@@ -2418,8 +2434,6 @@ static void kvmppc_core_vcpu_free_hv(struct kvm_vcpu *vcpu)
unpin_vpa(vcpu->kvm, &vcpu->arch.slb_shadow);
unpin_vpa(vcpu->kvm, &vcpu->arch.vpa);
spin_unlock(&vcpu->arch.vpa_update_lock);
- kvm_vcpu_uninit(vcpu);
- kmem_cache_free(kvm_vcpu_cache, vcpu);
}
static int kvmppc_core_check_requests_hv(struct kvm_vcpu *vcpu)
@@ -2444,15 +2458,6 @@ static void kvmppc_set_timer(struct kvm_vcpu *vcpu)
vcpu->arch.timer_running = 1;
}
-static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
-{
- vcpu->arch.ceded = 0;
- if (vcpu->arch.timer_running) {
- hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
- vcpu->arch.timer_running = 0;
- }
-}
-
extern int __kvmppc_vcore_entry(void);
static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
@@ -2860,7 +2865,7 @@ static void collect_piggybacks(struct core_info *cip, int target_threads)
if (!spin_trylock(&pvc->lock))
continue;
prepare_threads(pvc);
- if (!pvc->n_runnable) {
+ if (!pvc->n_runnable || !pvc->kvm->arch.mmu_ready) {
list_del_init(&pvc->preempt_list);
if (pvc->runner == NULL) {
pvc->vcore_state = VCORE_INACTIVE;
@@ -2881,15 +2886,20 @@ static void collect_piggybacks(struct core_info *cip, int target_threads)
spin_unlock(&lp->lock);
}
-static bool recheck_signals(struct core_info *cip)
+static bool recheck_signals_and_mmu(struct core_info *cip)
{
int sub, i;
struct kvm_vcpu *vcpu;
+ struct kvmppc_vcore *vc;
- for (sub = 0; sub < cip->n_subcores; ++sub)
- for_each_runnable_thread(i, vcpu, cip->vc[sub])
+ for (sub = 0; sub < cip->n_subcores; ++sub) {
+ vc = cip->vc[sub];
+ if (!vc->kvm->arch.mmu_ready)
+ return true;
+ for_each_runnable_thread(i, vcpu, vc)
if (signal_pending(vcpu->arch.run_task))
return true;
+ }
return false;
}
@@ -3119,7 +3129,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
local_irq_disable();
hard_irq_disable();
if (lazy_irq_pending() || need_resched() ||
- recheck_signals(&core_info) || !vc->kvm->arch.mmu_ready) {
+ recheck_signals_and_mmu(&core_info)) {
local_irq_enable();
vc->vcore_state = VCORE_INACTIVE;
/* Unlock all except the primary vcore */
@@ -3398,7 +3408,7 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
}
if (vc->pcr)
- mtspr(SPRN_PCR, vc->pcr);
+ mtspr(SPRN_PCR, vc->pcr | PCR_MASK);
mtspr(SPRN_DPDES, vc->dpdes);
mtspr(SPRN_VTB, vc->vtb);
@@ -3478,7 +3488,7 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
vc->vtb = mfspr(SPRN_VTB);
mtspr(SPRN_DPDES, 0);
if (vc->pcr)
- mtspr(SPRN_PCR, 0);
+ mtspr(SPRN_PCR, PCR_MASK);
if (vc->tb_offset_applied) {
u64 new_tb = mftb() - vc->tb_offset_applied;
@@ -4265,7 +4275,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
user_vrsave = mfspr(SPRN_VRSAVE);
vcpu->arch.wqp = &vcpu->arch.vcore->wq;
- vcpu->arch.pgdir = current->mm->pgd;
+ vcpu->arch.pgdir = kvm->mm->pgd;
vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
do {
@@ -4496,6 +4506,29 @@ static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm,
if (change == KVM_MR_FLAGS_ONLY && kvm_is_radix(kvm) &&
((new->flags ^ old->flags) & KVM_MEM_LOG_DIRTY_PAGES))
kvmppc_radix_flush_memslot(kvm, old);
+ /*
+ * If UV hasn't yet called H_SVM_INIT_START, don't register memslots.
+ */
+ if (!kvm->arch.secure_guest)
+ return;
+
+ switch (change) {
+ case KVM_MR_CREATE:
+ if (kvmppc_uvmem_slot_init(kvm, new))
+ return;
+ uv_register_mem_slot(kvm->arch.lpid,
+ new->base_gfn << PAGE_SHIFT,
+ new->npages * PAGE_SIZE,
+ 0, new->id);
+ break;
+ case KVM_MR_DELETE:
+ uv_unregister_mem_slot(kvm->arch.lpid, old->id);
+ kvmppc_uvmem_slot_free(kvm, old);
+ break;
+ default:
+ /* TODO: Handle KVM_MR_MOVE */
+ break;
+ }
}
/*
@@ -4597,14 +4630,14 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
/* Look up the VMA for the start of this memory slot */
hva = memslot->userspace_addr;
- down_read(&current->mm->mmap_sem);
- vma = find_vma(current->mm, hva);
+ down_read(&kvm->mm->mmap_sem);
+ vma = find_vma(kvm->mm, hva);
if (!vma || vma->vm_start > hva || (vma->vm_flags & VM_IO))
goto up_out;
psize = vma_kernel_pagesize(vma);
- up_read(&current->mm->mmap_sem);
+ up_read(&kvm->mm->mmap_sem);
/* We can handle 4k, 64k or 16M pages in the VRMA */
if (psize >= 0x1000000)
@@ -4637,7 +4670,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
return err;
up_out:
- up_read(&current->mm->mmap_sem);
+ up_read(&kvm->mm->mmap_sem);
goto out_srcu;
}
@@ -4769,6 +4802,8 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
char buf[32];
int ret;
+ mutex_init(&kvm->arch.uvmem_lock);
+ INIT_LIST_HEAD(&kvm->arch.uvmem_pfns);
mutex_init(&kvm->arch.mmu_setup_lock);
/* Allocate the guest's logical partition ID */
@@ -4938,8 +4973,11 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
if (nesting_enabled(kvm))
kvmhv_release_all_nested(kvm);
kvm->arch.process_table = 0;
+ if (kvm->arch.secure_guest)
+ uv_svm_terminate(kvm->arch.lpid);
kvmhv_set_ptbl_entry(kvm->arch.lpid, 0, 0);
}
+
kvmppc_free_lpid(kvm->arch.lpid);
kvmppc_free_pimap(kvm);
@@ -5379,6 +5417,94 @@ static int kvmhv_store_to_eaddr(struct kvm_vcpu *vcpu, ulong *eaddr, void *ptr,
return rc;
}
+static void unpin_vpa_reset(struct kvm *kvm, struct kvmppc_vpa *vpa)
+{
+ unpin_vpa(kvm, vpa);
+ vpa->gpa = 0;
+ vpa->pinned_addr = NULL;
+ vpa->dirty = false;
+ vpa->update_pending = 0;
+}
+
+/*
+ * IOCTL handler to turn off secure mode of guest
+ *
+ * - Release all device pages
+ * - Issue ucall to terminate the guest on the UV side
+ * - Unpin the VPA pages.
+ * - Reinit the partition scoped page tables
+ */
+static int kvmhv_svm_off(struct kvm *kvm)
+{
+ struct kvm_vcpu *vcpu;
+ int mmu_was_ready;
+ int srcu_idx;
+ int ret = 0;
+ int i;
+
+ if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START))
+ return ret;
+
+ mutex_lock(&kvm->arch.mmu_setup_lock);
+ mmu_was_ready = kvm->arch.mmu_ready;
+ if (kvm->arch.mmu_ready) {
+ kvm->arch.mmu_ready = 0;
+ /* order mmu_ready vs. vcpus_running */
+ smp_mb();
+ if (atomic_read(&kvm->arch.vcpus_running)) {
+ kvm->arch.mmu_ready = 1;
+ ret = -EBUSY;
+ goto out;
+ }
+ }
+
+ srcu_idx = srcu_read_lock(&kvm->srcu);
+ for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
+ struct kvm_memory_slot *memslot;
+ struct kvm_memslots *slots = __kvm_memslots(kvm, i);
+
+ if (!slots)
+ continue;
+
+ kvm_for_each_memslot(memslot, slots) {
+ kvmppc_uvmem_drop_pages(memslot, kvm, true);
+ uv_unregister_mem_slot(kvm->arch.lpid, memslot->id);
+ }
+ }
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+
+ ret = uv_svm_terminate(kvm->arch.lpid);
+ if (ret != U_SUCCESS) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /*
+ * When secure guest is reset, all the guest pages are sent
+ * to UV via UV_PAGE_IN before the non-boot vcpus get a
+ * chance to run and unpin their VPA pages. Unpinning of all
+ * VPA pages is done here explicitly so that VPA pages
+ * can be migrated to the secure side.
+ *
+ * This is required to for the secure SMP guest to reboot
+ * correctly.
+ */
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ spin_lock(&vcpu->arch.vpa_update_lock);
+ unpin_vpa_reset(kvm, &vcpu->arch.dtl);
+ unpin_vpa_reset(kvm, &vcpu->arch.slb_shadow);
+ unpin_vpa_reset(kvm, &vcpu->arch.vpa);
+ spin_unlock(&vcpu->arch.vpa_update_lock);
+ }
+
+ kvmppc_setup_partition_table(kvm);
+ kvm->arch.secure_guest = 0;
+ kvm->arch.mmu_ready = mmu_was_ready;
+out:
+ mutex_unlock(&kvm->arch.mmu_setup_lock);
+ return ret;
+}
+
static struct kvmppc_ops kvm_ops_hv = {
.get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv,
.set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv,
@@ -5386,6 +5512,7 @@ static struct kvmppc_ops kvm_ops_hv = {
.set_one_reg = kvmppc_set_one_reg_hv,
.vcpu_load = kvmppc_core_vcpu_load_hv,
.vcpu_put = kvmppc_core_vcpu_put_hv,
+ .inject_interrupt = kvmppc_inject_interrupt_hv,
.set_msr = kvmppc_set_msr_hv,
.vcpu_run = kvmppc_vcpu_run_hv,
.vcpu_create = kvmppc_core_vcpu_create_hv,
@@ -5421,6 +5548,7 @@ static struct kvmppc_ops kvm_ops_hv = {
.enable_nested = kvmhv_enable_nested,
.load_from_eaddr = kvmhv_load_from_eaddr,
.store_to_eaddr = kvmhv_store_to_eaddr,
+ .svm_off = kvmhv_svm_off,
};
static int kvm_init_subcore_bitmap(void)
@@ -5462,6 +5590,12 @@ static int kvmppc_radix_possible(void)
static int kvmppc_book3s_init_hv(void)
{
int r;
+
+ if (!tlbie_capable) {
+ pr_err("KVM-HV: Host does not support TLBIE\n");
+ return -ENODEV;
+ }
+
/*
* FIXME!! Do we need to check on all cpus ?
*/
@@ -5523,11 +5657,16 @@ static int kvmppc_book3s_init_hv(void)
no_mixing_hpt_and_radix = true;
}
+ r = kvmppc_uvmem_init();
+ if (r < 0)
+ pr_err("KVM-HV: kvmppc_uvmem_init failed %d\n", r);
+
return r;
}
static void kvmppc_book3s_exit_hv(void)
{
+ kvmppc_uvmem_free();
kvmppc_free_host_rm_ops();
if (kvmppc_radix_possible())
kvmppc_radix_exit();
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 7c1909657b55..7cd3cf3d366b 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -755,6 +755,71 @@ void kvmhv_p9_restore_lpcr(struct kvm_split_mode *sip)
local_paca->kvm_hstate.kvm_split_mode = NULL;
}
+static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.ceded = 0;
+ if (vcpu->arch.timer_running) {
+ hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
+ vcpu->arch.timer_running = 0;
+ }
+}
+
+void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr)
+{
+ /*
+ * Check for illegal transactional state bit combination
+ * and if we find it, force the TS field to a safe state.
+ */
+ if ((msr & MSR_TS_MASK) == MSR_TS_MASK)
+ msr &= ~MSR_TS_MASK;
+ vcpu->arch.shregs.msr = msr;
+ kvmppc_end_cede(vcpu);
+}
+EXPORT_SYMBOL_GPL(kvmppc_set_msr_hv);
+
+static void inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags)
+{
+ unsigned long msr, pc, new_msr, new_pc;
+
+ msr = kvmppc_get_msr(vcpu);
+ pc = kvmppc_get_pc(vcpu);
+ new_msr = vcpu->arch.intr_msr;
+ new_pc = vec;
+
+ /* If transactional, change to suspend mode on IRQ delivery */
+ if (MSR_TM_TRANSACTIONAL(msr))
+ new_msr |= MSR_TS_S;
+ else
+ new_msr |= msr & MSR_TS_MASK;
+
+ /*
+ * Perform MSR and PC adjustment for LPCR[AIL]=3 if it is set and
+ * applicable. AIL=2 is not supported.
+ *
+ * AIL does not apply to SRESET, MCE, or HMI (which is never
+ * delivered to the guest), and does not apply if IR=0 or DR=0.
+ */
+ if (vec != BOOK3S_INTERRUPT_SYSTEM_RESET &&
+ vec != BOOK3S_INTERRUPT_MACHINE_CHECK &&
+ (vcpu->arch.vcore->lpcr & LPCR_AIL) == LPCR_AIL_3 &&
+ (msr & (MSR_IR|MSR_DR)) == (MSR_IR|MSR_DR) ) {
+ new_msr |= MSR_IR | MSR_DR;
+ new_pc += 0xC000000000004000ULL;
+ }
+
+ kvmppc_set_srr0(vcpu, pc);
+ kvmppc_set_srr1(vcpu, (msr & SRR1_MSR_BITS) | srr1_flags);
+ kvmppc_set_pc(vcpu, new_pc);
+ vcpu->arch.shregs.msr = new_msr;
+}
+
+void kvmppc_inject_interrupt_hv(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags)
+{
+ inject_interrupt(vcpu, vec, srr1_flags);
+ kvmppc_end_cede(vcpu);
+}
+EXPORT_SYMBOL_GPL(kvmppc_inject_interrupt_hv);
+
/*
* Is there a PRIV_DOORBELL pending for the guest (on POWER9)?
* Can we inject a Decrementer or a External interrupt?
@@ -762,7 +827,6 @@ void kvmhv_p9_restore_lpcr(struct kvm_split_mode *sip)
void kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu)
{
int ext;
- unsigned long vec = 0;
unsigned long lpcr;
/* Insert EXTERNAL bit into LPCR at the MER bit position */
@@ -774,26 +838,16 @@ void kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu)
if (vcpu->arch.shregs.msr & MSR_EE) {
if (ext) {
- vec = BOOK3S_INTERRUPT_EXTERNAL;
+ inject_interrupt(vcpu, BOOK3S_INTERRUPT_EXTERNAL, 0);
} else {
long int dec = mfspr(SPRN_DEC);
if (!(lpcr & LPCR_LD))
dec = (int) dec;
if (dec < 0)
- vec = BOOK3S_INTERRUPT_DECREMENTER;
+ inject_interrupt(vcpu,
+ BOOK3S_INTERRUPT_DECREMENTER, 0);
}
}
- if (vec) {
- unsigned long msr, old_msr = vcpu->arch.shregs.msr;
-
- kvmppc_set_srr0(vcpu, kvmppc_get_pc(vcpu));
- kvmppc_set_srr1(vcpu, old_msr);
- kvmppc_set_pc(vcpu, vec);
- msr = vcpu->arch.intr_msr;
- if (MSR_TM_ACTIVE(old_msr))
- msr |= MSR_TS_S;
- vcpu->arch.shregs.msr = msr;
- }
if (vcpu->arch.doorbell_request) {
mtspr(SPRN_DPDES, 1);
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
index 735e0ac6f5b2..dc97e5be76f6 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -29,7 +29,7 @@ void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
{
struct kvmppc_vcore *vc = vcpu->arch.vcore;
- hr->pcr = vc->pcr;
+ hr->pcr = vc->pcr | PCR_MASK;
hr->dpdes = vc->dpdes;
hr->hfscr = vcpu->arch.hfscr;
hr->tb_offset = vc->tb_offset;
@@ -65,7 +65,7 @@ static void byteswap_hv_regs(struct hv_guest_state *hr)
hr->lpid = swab32(hr->lpid);
hr->vcpu_token = swab32(hr->vcpu_token);
hr->lpcr = swab64(hr->lpcr);
- hr->pcr = swab64(hr->pcr);
+ hr->pcr = swab64(hr->pcr) | PCR_MASK;
hr->amor = swab64(hr->amor);
hr->dpdes = swab64(hr->dpdes);
hr->hfscr = swab64(hr->hfscr);
@@ -148,7 +148,7 @@ static void restore_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
{
struct kvmppc_vcore *vc = vcpu->arch.vcore;
- vc->pcr = hr->pcr;
+ vc->pcr = hr->pcr | PCR_MASK;
vc->dpdes = hr->dpdes;
vcpu->arch.hfscr = hr->hfscr;
vcpu->arch.dawr = hr->dawr0;
@@ -398,7 +398,7 @@ static void kvmhv_flush_lpid(unsigned int lpid)
long rc;
if (!kvmhv_on_pseries()) {
- radix__flush_tlb_lpid(lpid);
+ radix__flush_all_lpid(lpid);
return;
}
@@ -411,7 +411,7 @@ static void kvmhv_flush_lpid(unsigned int lpid)
void kvmhv_set_ptbl_entry(unsigned int lpid, u64 dw0, u64 dw1)
{
if (!kvmhv_on_pseries()) {
- mmu_partition_table_set_entry(lpid, dw0, dw1);
+ mmu_partition_table_set_entry(lpid, dw0, dw1, true);
return;
}
@@ -1186,7 +1186,7 @@ static int kvmhv_translate_addr_nested(struct kvm_vcpu *vcpu,
forward_to_l1:
vcpu->arch.fault_dsisr = flags;
if (vcpu->arch.trap == BOOK3S_INTERRUPT_H_INST_STORAGE) {
- vcpu->arch.shregs.msr &= ~0x783f0000ul;
+ vcpu->arch.shregs.msr &= SRR1_MSR_BITS;
vcpu->arch.shregs.msr |= flags;
}
return RESUME_HOST;
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 63e0ce91e29d..220305454c23 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -99,7 +99,7 @@ void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
} else {
rev->forw = rev->back = pte_index;
*rmap = (*rmap & ~KVMPPC_RMAP_INDEX) |
- pte_index | KVMPPC_RMAP_PRESENT;
+ pte_index | KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_HPT;
}
unlock_rmap(rmap);
}
@@ -433,6 +433,37 @@ static inline int is_mmio_hpte(unsigned long v, unsigned long r)
(HPTE_R_KEY_HI | HPTE_R_KEY_LO));
}
+static inline void fixup_tlbie_lpid(unsigned long rb_value, unsigned long lpid)
+{
+
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+ /* Radix flush for a hash guest */
+
+ unsigned long rb,rs,prs,r,ric;
+
+ rb = PPC_BIT(52); /* IS = 2 */
+ rs = 0; /* lpid = 0 */
+ prs = 0; /* partition scoped */
+ r = 1; /* radix format */
+ ric = 0; /* RIC_FLSUH_TLB */
+
+ /*
+ * Need the extra ptesync to make sure we don't
+ * re-order the tlbie
+ */
+ asm volatile("ptesync": : :"memory");
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+ : : "r"(rb), "i"(r), "i"(prs),
+ "i"(ric), "r"(rs) : "memory");
+ }
+
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+ asm volatile("ptesync": : :"memory");
+ asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : :
+ "r" (rb_value), "r" (lpid));
+ }
+}
+
static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
long npages, int global, bool need_sync)
{
@@ -451,16 +482,7 @@ static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
"r" (rbvalues[i]), "r" (kvm->arch.lpid));
}
- if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) {
- /*
- * Need the extra ptesync to make sure we don't
- * re-order the tlbie
- */
- asm volatile("ptesync": : :"memory");
- asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : :
- "r" (rbvalues[0]), "r" (kvm->arch.lpid));
- }
-
+ fixup_tlbie_lpid(rbvalues[i - 1], kvm->arch.lpid);
asm volatile("eieio; tlbsync; ptesync" : : : "memory");
} else {
if (need_sync)
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index 4d2ec77d806c..287d5911df0f 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -58,7 +58,7 @@ static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu)
hcpu = hcore << threads_shift;
kvmppc_host_rm_ops_hv->rm_core[hcore].rm_data = vcpu;
smp_muxed_ipi_set_message(hcpu, PPC_MSG_RM_HOST_ACTION);
- kvmppc_set_host_ipi(hcpu, 1);
+ kvmppc_set_host_ipi(hcpu);
smp_mb();
kvmhv_rm_send_ipi(hcpu);
}
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 337e64468d78..dbc2fecc37f0 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -11,6 +11,7 @@
*/
#include <asm/ppc_asm.h>
+#include <asm/code-patching-asm.h>
#include <asm/kvm_asm.h>
#include <asm/reg.h>
#include <asm/mmu.h>
@@ -29,6 +30,7 @@
#include <asm/asm-compat.h>
#include <asm/feature-fixups.h>
#include <asm/cpuidle.h>
+#include <asm/ultravisor-api.h>
/* Sign-extend HDEC if not on POWER9 */
#define EXTEND_HDEC(reg) \
@@ -643,8 +645,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
/* Load guest PCR value to select appropriate compat mode */
37: ld r7, VCORE_PCR(r5)
- cmpdi r7, 0
+ LOAD_REG_IMMEDIATE(r6, PCR_MASK)
+ cmpld r7, r6
beq 38f
+ or r7, r7, r6
mtspr SPRN_PCR, r7
38:
@@ -942,6 +946,8 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
ld r11, VCPU_XIVE_SAVED_STATE(r4)
li r9, TM_QW1_OS
lwz r8, VCPU_XIVE_CAM_WORD(r4)
+ cmpwi r8, 0
+ beq no_xive
li r7, TM_QW1_OS + TM_WORD2
mfmsr r0
andi. r0, r0, MSR_DR /* in real mode? */
@@ -1083,16 +1089,10 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
ld r5, VCPU_LR(r4)
- ld r6, VCPU_CR(r4)
mtlr r5
- mtcr r6
ld r1, VCPU_GPR(R1)(r4)
- ld r2, VCPU_GPR(R2)(r4)
- ld r3, VCPU_GPR(R3)(r4)
ld r5, VCPU_GPR(R5)(r4)
- ld r6, VCPU_GPR(R6)(r4)
- ld r7, VCPU_GPR(R7)(r4)
ld r8, VCPU_GPR(R8)(r4)
ld r9, VCPU_GPR(R9)(r4)
ld r10, VCPU_GPR(R10)(r4)
@@ -1110,10 +1110,42 @@ BEGIN_FTR_SECTION
mtspr SPRN_HDSISR, r0
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+ ld r6, VCPU_KVM(r4)
+ lbz r7, KVM_SECURE_GUEST(r6)
+ cmpdi r7, 0
+ ld r6, VCPU_GPR(R6)(r4)
+ ld r7, VCPU_GPR(R7)(r4)
+ bne ret_to_ultra
+
+ ld r0, VCPU_CR(r4)
+ mtcr r0
+
ld r0, VCPU_GPR(R0)(r4)
+ ld r2, VCPU_GPR(R2)(r4)
+ ld r3, VCPU_GPR(R3)(r4)
ld r4, VCPU_GPR(R4)(r4)
HRFI_TO_GUEST
b .
+/*
+ * Use UV_RETURN ultracall to return control back to the Ultravisor after
+ * processing an hypercall or interrupt that was forwarded (a.k.a. reflected)
+ * to the Hypervisor.
+ *
+ * All registers have already been loaded, except:
+ * R0 = hcall result
+ * R2 = SRR1, so UV can detect a synthesized interrupt (if any)
+ * R3 = UV_RETURN
+ */
+ret_to_ultra:
+ ld r0, VCPU_CR(r4)
+ mtcr r0
+
+ ld r0, VCPU_GPR(R3)(r4)
+ mfspr r2, SPRN_SRR1
+ li r3, 0
+ ori r3, r3, UV_RETURN
+ ld r4, VCPU_GPR(R4)(r4)
+ sc 2
/*
* Enter the guest on a P9 or later system where we have exactly
@@ -1456,6 +1488,13 @@ guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
1:
#endif /* CONFIG_KVM_XICS */
+ /*
+ * Possibly flush the link stack here, before we do a blr in
+ * guest_exit_short_path.
+ */
+1: nop
+ patch_site 1b patch__call_kvm_flush_link_stack
+
/* If we came in through the P9 short path, go back out to C now */
lwz r0, STACK_SLOT_SHORT_PATH(r1)
cmpwi r0, 0
@@ -1762,6 +1801,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
tlbsync
ptesync
+BEGIN_FTR_SECTION
/* Radix: Handle the case where the guest used an illegal PID */
LOAD_REG_ADDR(r4, mmu_base_pid)
lwz r3, VCPU_GUEST_PID(r9)
@@ -1791,6 +1831,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
addi r7,r7,0x1000
bdnz 1b
ptesync
+END_FTR_SECTION_IFSET(CPU_FTR_P9_RADIX_PREFETCH_BUG)
2:
#endif /* CONFIG_PPC_RADIX_MMU */
@@ -1884,12 +1925,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
/* Reset PCR */
ld r0, VCORE_PCR(r5)
- cmpdi r0, 0
+ LOAD_REG_IMMEDIATE(r6, PCR_MASK)
+ cmpld r0, r6
beq 18f
- li r0, 0
- mtspr SPRN_PCR, r0
+ mtspr SPRN_PCR, r6
18:
/* Signal secondary CPUs to continue */
+ li r0, 0
stb r0,VCORE_IN_GUEST(r5)
19: lis r8,0x7fff /* MAX_INT@h */
mtspr SPRN_HDEC,r8
@@ -1931,6 +1973,28 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
mtlr r0
blr
+.balign 32
+.global kvm_flush_link_stack
+kvm_flush_link_stack:
+ /* Save LR into r0 */
+ mflr r0
+
+ /* Flush the link stack. On Power8 it's up to 32 entries in size. */
+ .rept 32
+ bl .+4
+ .endr
+
+ /* And on Power9 it's up to 64. */
+BEGIN_FTR_SECTION
+ .rept 32
+ bl .+4
+ .endr
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+
+ /* Restore LR */
+ mtlr r0
+ blr
+
kvmppc_guest_external:
/* External interrupt, first check for host_ipi. If this is
* set, we know the host wants us out so let's do it now
@@ -2831,29 +2895,39 @@ kvm_cede_prodded:
kvm_cede_exit:
ld r9, HSTATE_KVM_VCPU(r13)
#ifdef CONFIG_KVM_XICS
- /* Abort if we still have a pending escalation */
+ /* are we using XIVE with single escalation? */
+ ld r10, VCPU_XIVE_ESC_VADDR(r9)
+ cmpdi r10, 0
+ beq 3f
+ li r6, XIVE_ESB_SET_PQ_00
+ /*
+ * If we still have a pending escalation, abort the cede,
+ * and we must set PQ to 10 rather than 00 so that we don't
+ * potentially end up with two entries for the escalation
+ * interrupt in the XIVE interrupt queue. In that case
+ * we also don't want to set xive_esc_on to 1 here in
+ * case we race with xive_esc_irq().
+ */
lbz r5, VCPU_XIVE_ESC_ON(r9)
cmpwi r5, 0
- beq 1f
+ beq 4f
li r0, 0
stb r0, VCPU_CEDED(r9)
-1: /* Enable XIVE escalation */
- li r5, XIVE_ESB_SET_PQ_00
+ li r6, XIVE_ESB_SET_PQ_10
+ b 5f
+4: li r0, 1
+ stb r0, VCPU_XIVE_ESC_ON(r9)
+ /* make sure store to xive_esc_on is seen before xive_esc_irq runs */
+ sync
+5: /* Enable XIVE escalation */
mfmsr r0
andi. r0, r0, MSR_DR /* in real mode? */
beq 1f
- ld r10, VCPU_XIVE_ESC_VADDR(r9)
- cmpdi r10, 0
- beq 3f
- ldx r0, r10, r5
+ ldx r0, r10, r6
b 2f
1: ld r10, VCPU_XIVE_ESC_RADDR(r9)
- cmpdi r10, 0
- beq 3f
- ldcix r0, r10, r5
+ ldcix r0, r10, r6
2: sync
- li r0, 1
- stb r0, VCPU_XIVE_ESC_ON(r9)
#endif /* CONFIG_KVM_XICS */
3: b guest_exit_cont
diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c
new file mode 100644
index 000000000000..79b1202b1c62
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
@@ -0,0 +1,813 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Secure pages management: Migration of pages between normal and secure
+ * memory of KVM guests.
+ *
+ * Copyright 2018 Bharata B Rao, IBM Corp. <bharata@linux.ibm.com>
+ */
+
+/*
+ * A pseries guest can be run as secure guest on Ultravisor-enabled
+ * POWER platforms. On such platforms, this driver will be used to manage
+ * the movement of guest pages between the normal memory managed by
+ * hypervisor (HV) and secure memory managed by Ultravisor (UV).
+ *
+ * The page-in or page-out requests from UV will come to HV as hcalls and
+ * HV will call back into UV via ultracalls to satisfy these page requests.
+ *
+ * Private ZONE_DEVICE memory equal to the amount of secure memory
+ * available in the platform for running secure guests is hotplugged.
+ * Whenever a page belonging to the guest becomes secure, a page from this
+ * private device memory is used to represent and track that secure page
+ * on the HV side. Some pages (like virtio buffers, VPA pages etc) are
+ * shared between UV and HV. However such pages aren't represented by
+ * device private memory and mappings to shared memory exist in both
+ * UV and HV page tables.
+ */
+
+/*
+ * Notes on locking
+ *
+ * kvm->arch.uvmem_lock is a per-guest lock that prevents concurrent
+ * page-in and page-out requests for the same GPA. Concurrent accesses
+ * can either come via UV (guest vCPUs requesting for same page)
+ * or when HV and guest simultaneously access the same page.
+ * This mutex serializes the migration of page from HV(normal) to
+ * UV(secure) and vice versa. So the serialization points are around
+ * migrate_vma routines and page-in/out routines.
+ *
+ * Per-guest mutex comes with a cost though. Mainly it serializes the
+ * fault path as page-out can occur when HV faults on accessing secure
+ * guest pages. Currently UV issues page-in requests for all the guest
+ * PFNs one at a time during early boot (UV_ESM uvcall), so this is
+ * not a cause for concern. Also currently the number of page-outs caused
+ * by HV touching secure pages is very very low. If an when UV supports
+ * overcommitting, then we might see concurrent guest driven page-outs.
+ *
+ * Locking order
+ *
+ * 1. kvm->srcu - Protects KVM memslots
+ * 2. kvm->mm->mmap_sem - find_vma, migrate_vma_pages and helpers, ksm_madvise
+ * 3. kvm->arch.uvmem_lock - protects read/writes to uvmem slots thus acting
+ * as sync-points for page-in/out
+ */
+
+/*
+ * Notes on page size
+ *
+ * Currently UV uses 2MB mappings internally, but will issue H_SVM_PAGE_IN
+ * and H_SVM_PAGE_OUT hcalls in PAGE_SIZE(64K) granularity. HV tracks
+ * secure GPAs at 64K page size and maintains one device PFN for each
+ * 64K secure GPA. UV_PAGE_IN and UV_PAGE_OUT calls by HV are also issued
+ * for 64K page at a time.
+ *
+ * HV faulting on secure pages: When HV touches any secure page, it
+ * faults and issues a UV_PAGE_OUT request with 64K page size. Currently
+ * UV splits and remaps the 2MB page if necessary and copies out the
+ * required 64K page contents.
+ *
+ * Shared pages: Whenever guest shares a secure page, UV will split and
+ * remap the 2MB page if required and issue H_SVM_PAGE_IN with 64K page size.
+ *
+ * HV invalidating a page: When a regular page belonging to secure
+ * guest gets unmapped, HV informs UV with UV_PAGE_INVAL of 64K
+ * page size. Using 64K page size is correct here because any non-secure
+ * page will essentially be of 64K page size. Splitting by UV during sharing
+ * and page-out ensures this.
+ *
+ * Page fault handling: When HV handles page fault of a page belonging
+ * to secure guest, it sends that to UV with a 64K UV_PAGE_IN request.
+ * Using 64K size is correct here too as UV would have split the 2MB page
+ * into 64k mappings and would have done page-outs earlier.
+ *
+ * In summary, the current secure pages handling code in HV assumes
+ * 64K page size and in fact fails any page-in/page-out requests of
+ * non-64K size upfront. If and when UV starts supporting multiple
+ * page-sizes, we need to break this assumption.
+ */
+
+#include <linux/pagemap.h>
+#include <linux/migrate.h>
+#include <linux/kvm_host.h>
+#include <linux/ksm.h>
+#include <asm/ultravisor.h>
+#include <asm/mman.h>
+#include <asm/kvm_ppc.h>
+
+static struct dev_pagemap kvmppc_uvmem_pgmap;
+static unsigned long *kvmppc_uvmem_bitmap;
+static DEFINE_SPINLOCK(kvmppc_uvmem_bitmap_lock);
+
+#define KVMPPC_UVMEM_PFN (1UL << 63)
+
+struct kvmppc_uvmem_slot {
+ struct list_head list;
+ unsigned long nr_pfns;
+ unsigned long base_pfn;
+ unsigned long *pfns;
+};
+
+struct kvmppc_uvmem_page_pvt {
+ struct kvm *kvm;
+ unsigned long gpa;
+ bool skip_page_out;
+};
+
+int kvmppc_uvmem_slot_init(struct kvm *kvm, const struct kvm_memory_slot *slot)
+{
+ struct kvmppc_uvmem_slot *p;
+
+ p = kzalloc(sizeof(*p), GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+ p->pfns = vzalloc(array_size(slot->npages, sizeof(*p->pfns)));
+ if (!p->pfns) {
+ kfree(p);
+ return -ENOMEM;
+ }
+ p->nr_pfns = slot->npages;
+ p->base_pfn = slot->base_gfn;
+
+ mutex_lock(&kvm->arch.uvmem_lock);
+ list_add(&p->list, &kvm->arch.uvmem_pfns);
+ mutex_unlock(&kvm->arch.uvmem_lock);
+
+ return 0;
+}
+
+/*
+ * All device PFNs are already released by the time we come here.
+ */
+void kvmppc_uvmem_slot_free(struct kvm *kvm, const struct kvm_memory_slot *slot)
+{
+ struct kvmppc_uvmem_slot *p, *next;
+
+ mutex_lock(&kvm->arch.uvmem_lock);
+ list_for_each_entry_safe(p, next, &kvm->arch.uvmem_pfns, list) {
+ if (p->base_pfn == slot->base_gfn) {
+ vfree(p->pfns);
+ list_del(&p->list);
+ kfree(p);
+ break;
+ }
+ }
+ mutex_unlock(&kvm->arch.uvmem_lock);
+}
+
+static void kvmppc_uvmem_pfn_insert(unsigned long gfn, unsigned long uvmem_pfn,
+ struct kvm *kvm)
+{
+ struct kvmppc_uvmem_slot *p;
+
+ list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) {
+ if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) {
+ unsigned long index = gfn - p->base_pfn;
+
+ p->pfns[index] = uvmem_pfn | KVMPPC_UVMEM_PFN;
+ return;
+ }
+ }
+}
+
+static void kvmppc_uvmem_pfn_remove(unsigned long gfn, struct kvm *kvm)
+{
+ struct kvmppc_uvmem_slot *p;
+
+ list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) {
+ if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) {
+ p->pfns[gfn - p->base_pfn] = 0;
+ return;
+ }
+ }
+}
+
+static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, struct kvm *kvm,
+ unsigned long *uvmem_pfn)
+{
+ struct kvmppc_uvmem_slot *p;
+
+ list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) {
+ if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) {
+ unsigned long index = gfn - p->base_pfn;
+
+ if (p->pfns[index] & KVMPPC_UVMEM_PFN) {
+ if (uvmem_pfn)
+ *uvmem_pfn = p->pfns[index] &
+ ~KVMPPC_UVMEM_PFN;
+ return true;
+ } else
+ return false;
+ }
+ }
+ return false;
+}
+
+unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
+{
+ struct kvm_memslots *slots;
+ struct kvm_memory_slot *memslot;
+ int ret = H_SUCCESS;
+ int srcu_idx;
+
+ if (!kvmppc_uvmem_bitmap)
+ return H_UNSUPPORTED;
+
+ /* Only radix guests can be secure guests */
+ if (!kvm_is_radix(kvm))
+ return H_UNSUPPORTED;
+
+ srcu_idx = srcu_read_lock(&kvm->srcu);
+ slots = kvm_memslots(kvm);
+ kvm_for_each_memslot(memslot, slots) {
+ if (kvmppc_uvmem_slot_init(kvm, memslot)) {
+ ret = H_PARAMETER;
+ goto out;
+ }
+ ret = uv_register_mem_slot(kvm->arch.lpid,
+ memslot->base_gfn << PAGE_SHIFT,
+ memslot->npages * PAGE_SIZE,
+ 0, memslot->id);
+ if (ret < 0) {
+ kvmppc_uvmem_slot_free(kvm, memslot);
+ ret = H_PARAMETER;
+ goto out;
+ }
+ }
+ kvm->arch.secure_guest |= KVMPPC_SECURE_INIT_START;
+out:
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+ return ret;
+}
+
+unsigned long kvmppc_h_svm_init_done(struct kvm *kvm)
+{
+ if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START))
+ return H_UNSUPPORTED;
+
+ kvm->arch.secure_guest |= KVMPPC_SECURE_INIT_DONE;
+ pr_info("LPID %d went secure\n", kvm->arch.lpid);
+ return H_SUCCESS;
+}
+
+/*
+ * Drop device pages that we maintain for the secure guest
+ *
+ * We first mark the pages to be skipped from UV_PAGE_OUT when there
+ * is HV side fault on these pages. Next we *get* these pages, forcing
+ * fault on them, do fault time migration to replace the device PTEs in
+ * QEMU page table with normal PTEs from newly allocated pages.
+ */
+void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free,
+ struct kvm *kvm, bool skip_page_out)
+{
+ int i;
+ struct kvmppc_uvmem_page_pvt *pvt;
+ unsigned long pfn, uvmem_pfn;
+ unsigned long gfn = free->base_gfn;
+
+ for (i = free->npages; i; --i, ++gfn) {
+ struct page *uvmem_page;
+
+ mutex_lock(&kvm->arch.uvmem_lock);
+ if (!kvmppc_gfn_is_uvmem_pfn(gfn, kvm, &uvmem_pfn)) {
+ mutex_unlock(&kvm->arch.uvmem_lock);
+ continue;
+ }
+
+ uvmem_page = pfn_to_page(uvmem_pfn);
+ pvt = uvmem_page->zone_device_data;
+ pvt->skip_page_out = skip_page_out;
+ mutex_unlock(&kvm->arch.uvmem_lock);
+
+ pfn = gfn_to_pfn(kvm, gfn);
+ if (is_error_noslot_pfn(pfn))
+ continue;
+ kvm_release_pfn_clean(pfn);
+ }
+}
+
+unsigned long kvmppc_h_svm_init_abort(struct kvm *kvm)
+{
+ int srcu_idx;
+ struct kvm_memory_slot *memslot;
+
+ /*
+ * Expect to be called only after INIT_START and before INIT_DONE.
+ * If INIT_DONE was completed, use normal VM termination sequence.
+ */
+ if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START))
+ return H_UNSUPPORTED;
+
+ if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
+ return H_STATE;
+
+ srcu_idx = srcu_read_lock(&kvm->srcu);
+
+ kvm_for_each_memslot(memslot, kvm_memslots(kvm))
+ kvmppc_uvmem_drop_pages(memslot, kvm, false);
+
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+
+ kvm->arch.secure_guest = 0;
+ uv_svm_terminate(kvm->arch.lpid);
+
+ return H_PARAMETER;
+}
+
+/*
+ * Get a free device PFN from the pool
+ *
+ * Called when a normal page is moved to secure memory (UV_PAGE_IN). Device
+ * PFN will be used to keep track of the secure page on HV side.
+ *
+ * Called with kvm->arch.uvmem_lock held
+ */
+static struct page *kvmppc_uvmem_get_page(unsigned long gpa, struct kvm *kvm)
+{
+ struct page *dpage = NULL;
+ unsigned long bit, uvmem_pfn;
+ struct kvmppc_uvmem_page_pvt *pvt;
+ unsigned long pfn_last, pfn_first;
+
+ pfn_first = kvmppc_uvmem_pgmap.res.start >> PAGE_SHIFT;
+ pfn_last = pfn_first +
+ (resource_size(&kvmppc_uvmem_pgmap.res) >> PAGE_SHIFT);
+
+ spin_lock(&kvmppc_uvmem_bitmap_lock);
+ bit = find_first_zero_bit(kvmppc_uvmem_bitmap,
+ pfn_last - pfn_first);
+ if (bit >= (pfn_last - pfn_first))
+ goto out;
+ bitmap_set(kvmppc_uvmem_bitmap, bit, 1);
+ spin_unlock(&kvmppc_uvmem_bitmap_lock);
+
+ pvt = kzalloc(sizeof(*pvt), GFP_KERNEL);
+ if (!pvt)
+ goto out_clear;
+
+ uvmem_pfn = bit + pfn_first;
+ kvmppc_uvmem_pfn_insert(gpa >> PAGE_SHIFT, uvmem_pfn, kvm);
+
+ pvt->gpa = gpa;
+ pvt->kvm = kvm;
+
+ dpage = pfn_to_page(uvmem_pfn);
+ dpage->zone_device_data = pvt;
+ get_page(dpage);
+ lock_page(dpage);
+ return dpage;
+out_clear:
+ spin_lock(&kvmppc_uvmem_bitmap_lock);
+ bitmap_clear(kvmppc_uvmem_bitmap, bit, 1);
+out:
+ spin_unlock(&kvmppc_uvmem_bitmap_lock);
+ return NULL;
+}
+
+/*
+ * Alloc a PFN from private device memory pool and copy page from normal
+ * memory to secure memory using UV_PAGE_IN uvcall.
+ */
+static int
+kvmppc_svm_page_in(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end, unsigned long gpa, struct kvm *kvm,
+ unsigned long page_shift, bool *downgrade)
+{
+ unsigned long src_pfn, dst_pfn = 0;
+ struct migrate_vma mig;
+ struct page *spage;
+ unsigned long pfn;
+ struct page *dpage;
+ int ret = 0;
+
+ memset(&mig, 0, sizeof(mig));
+ mig.vma = vma;
+ mig.start = start;
+ mig.end = end;
+ mig.src = &src_pfn;
+ mig.dst = &dst_pfn;
+
+ /*
+ * We come here with mmap_sem write lock held just for
+ * ksm_madvise(), otherwise we only need read mmap_sem.
+ * Hence downgrade to read lock once ksm_madvise() is done.
+ */
+ ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
+ MADV_UNMERGEABLE, &vma->vm_flags);
+ downgrade_write(&kvm->mm->mmap_sem);
+ *downgrade = true;
+ if (ret)
+ return ret;
+
+ ret = migrate_vma_setup(&mig);
+ if (ret)
+ return ret;
+
+ if (!(*mig.src & MIGRATE_PFN_MIGRATE)) {
+ ret = -1;
+ goto out_finalize;
+ }
+
+ dpage = kvmppc_uvmem_get_page(gpa, kvm);
+ if (!dpage) {
+ ret = -1;
+ goto out_finalize;
+ }
+
+ pfn = *mig.src >> MIGRATE_PFN_SHIFT;
+ spage = migrate_pfn_to_page(*mig.src);
+ if (spage)
+ uv_page_in(kvm->arch.lpid, pfn << page_shift, gpa, 0,
+ page_shift);
+
+ *mig.dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED;
+ migrate_vma_pages(&mig);
+out_finalize:
+ migrate_vma_finalize(&mig);
+ return ret;
+}
+
+/*
+ * Shares the page with HV, thus making it a normal page.
+ *
+ * - If the page is already secure, then provision a new page and share
+ * - If the page is a normal page, share the existing page
+ *
+ * In the former case, uses dev_pagemap_ops.migrate_to_ram handler
+ * to unmap the device page from QEMU's page tables.
+ */
+static unsigned long
+kvmppc_share_page(struct kvm *kvm, unsigned long gpa, unsigned long page_shift)
+{
+
+ int ret = H_PARAMETER;
+ struct page *uvmem_page;
+ struct kvmppc_uvmem_page_pvt *pvt;
+ unsigned long pfn;
+ unsigned long gfn = gpa >> page_shift;
+ int srcu_idx;
+ unsigned long uvmem_pfn;
+
+ srcu_idx = srcu_read_lock(&kvm->srcu);
+ mutex_lock(&kvm->arch.uvmem_lock);
+ if (kvmppc_gfn_is_uvmem_pfn(gfn, kvm, &uvmem_pfn)) {
+ uvmem_page = pfn_to_page(uvmem_pfn);
+ pvt = uvmem_page->zone_device_data;
+ pvt->skip_page_out = true;
+ }
+
+retry:
+ mutex_unlock(&kvm->arch.uvmem_lock);
+ pfn = gfn_to_pfn(kvm, gfn);
+ if (is_error_noslot_pfn(pfn))
+ goto out;
+
+ mutex_lock(&kvm->arch.uvmem_lock);
+ if (kvmppc_gfn_is_uvmem_pfn(gfn, kvm, &uvmem_pfn)) {
+ uvmem_page = pfn_to_page(uvmem_pfn);
+ pvt = uvmem_page->zone_device_data;
+ pvt->skip_page_out = true;
+ kvm_release_pfn_clean(pfn);
+ goto retry;
+ }
+
+ if (!uv_page_in(kvm->arch.lpid, pfn << page_shift, gpa, 0, page_shift))
+ ret = H_SUCCESS;
+ kvm_release_pfn_clean(pfn);
+ mutex_unlock(&kvm->arch.uvmem_lock);
+out:
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+ return ret;
+}
+
+/*
+ * H_SVM_PAGE_IN: Move page from normal memory to secure memory.
+ *
+ * H_PAGE_IN_SHARED flag makes the page shared which means that the same
+ * memory in is visible from both UV and HV.
+ */
+unsigned long
+kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa,
+ unsigned long flags, unsigned long page_shift)
+{
+ bool downgrade = false;
+ unsigned long start, end;
+ struct vm_area_struct *vma;
+ int srcu_idx;
+ unsigned long gfn = gpa >> page_shift;
+ int ret;
+
+ if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START))
+ return H_UNSUPPORTED;
+
+ if (page_shift != PAGE_SHIFT)
+ return H_P3;
+
+ if (flags & ~H_PAGE_IN_SHARED)
+ return H_P2;
+
+ if (flags & H_PAGE_IN_SHARED)
+ return kvmppc_share_page(kvm, gpa, page_shift);
+
+ ret = H_PARAMETER;
+ srcu_idx = srcu_read_lock(&kvm->srcu);
+ down_write(&kvm->mm->mmap_sem);
+
+ start = gfn_to_hva(kvm, gfn);
+ if (kvm_is_error_hva(start))
+ goto out;
+
+ mutex_lock(&kvm->arch.uvmem_lock);
+ /* Fail the page-in request of an already paged-in page */
+ if (kvmppc_gfn_is_uvmem_pfn(gfn, kvm, NULL))
+ goto out_unlock;
+
+ end = start + (1UL << page_shift);
+ vma = find_vma_intersection(kvm->mm, start, end);
+ if (!vma || vma->vm_start > start || vma->vm_end < end)
+ goto out_unlock;
+
+ if (!kvmppc_svm_page_in(vma, start, end, gpa, kvm, page_shift,
+ &downgrade))
+ ret = H_SUCCESS;
+out_unlock:
+ mutex_unlock(&kvm->arch.uvmem_lock);
+out:
+ if (downgrade)
+ up_read(&kvm->mm->mmap_sem);
+ else
+ up_write(&kvm->mm->mmap_sem);
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+ return ret;
+}
+
+/*
+ * Provision a new page on HV side and copy over the contents
+ * from secure memory using UV_PAGE_OUT uvcall.
+ */
+static int
+kvmppc_svm_page_out(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end, unsigned long page_shift,
+ struct kvm *kvm, unsigned long gpa)
+{
+ unsigned long src_pfn, dst_pfn = 0;
+ struct migrate_vma mig;
+ struct page *dpage, *spage;
+ struct kvmppc_uvmem_page_pvt *pvt;
+ unsigned long pfn;
+ int ret = U_SUCCESS;
+
+ memset(&mig, 0, sizeof(mig));
+ mig.vma = vma;
+ mig.start = start;
+ mig.end = end;
+ mig.src = &src_pfn;
+ mig.dst = &dst_pfn;
+
+ mutex_lock(&kvm->arch.uvmem_lock);
+ /* The requested page is already paged-out, nothing to do */
+ if (!kvmppc_gfn_is_uvmem_pfn(gpa >> page_shift, kvm, NULL))
+ goto out;
+
+ ret = migrate_vma_setup(&mig);
+ if (ret)
+ goto out;
+
+ spage = migrate_pfn_to_page(*mig.src);
+ if (!spage || !(*mig.src & MIGRATE_PFN_MIGRATE))
+ goto out_finalize;
+
+ if (!is_zone_device_page(spage))
+ goto out_finalize;
+
+ dpage = alloc_page_vma(GFP_HIGHUSER, vma, start);
+ if (!dpage) {
+ ret = -1;
+ goto out_finalize;
+ }
+
+ lock_page(dpage);
+ pvt = spage->zone_device_data;
+ pfn = page_to_pfn(dpage);
+
+ /*
+ * This function is used in two cases:
+ * - When HV touches a secure page, for which we do UV_PAGE_OUT
+ * - When a secure page is converted to shared page, we *get*
+ * the page to essentially unmap the device page. In this
+ * case we skip page-out.
+ */
+ if (!pvt->skip_page_out)
+ ret = uv_page_out(kvm->arch.lpid, pfn << page_shift,
+ gpa, 0, page_shift);
+
+ if (ret == U_SUCCESS)
+ *mig.dst = migrate_pfn(pfn) | MIGRATE_PFN_LOCKED;
+ else {
+ unlock_page(dpage);
+ __free_page(dpage);
+ goto out_finalize;
+ }
+
+ migrate_vma_pages(&mig);
+out_finalize:
+ migrate_vma_finalize(&mig);
+out:
+ mutex_unlock(&kvm->arch.uvmem_lock);
+ return ret;
+}
+
+/*
+ * Fault handler callback that gets called when HV touches any page that
+ * has been moved to secure memory, we ask UV to give back the page by
+ * issuing UV_PAGE_OUT uvcall.
+ *
+ * This eventually results in dropping of device PFN and the newly
+ * provisioned page/PFN gets populated in QEMU page tables.
+ */
+static vm_fault_t kvmppc_uvmem_migrate_to_ram(struct vm_fault *vmf)
+{
+ struct kvmppc_uvmem_page_pvt *pvt = vmf->page->zone_device_data;
+
+ if (kvmppc_svm_page_out(vmf->vma, vmf->address,
+ vmf->address + PAGE_SIZE, PAGE_SHIFT,
+ pvt->kvm, pvt->gpa))
+ return VM_FAULT_SIGBUS;
+ else
+ return 0;
+}
+
+/*
+ * Release the device PFN back to the pool
+ *
+ * Gets called when secure page becomes a normal page during H_SVM_PAGE_OUT.
+ * Gets called with kvm->arch.uvmem_lock held.
+ */
+static void kvmppc_uvmem_page_free(struct page *page)
+{
+ unsigned long pfn = page_to_pfn(page) -
+ (kvmppc_uvmem_pgmap.res.start >> PAGE_SHIFT);
+ struct kvmppc_uvmem_page_pvt *pvt;
+
+ spin_lock(&kvmppc_uvmem_bitmap_lock);
+ bitmap_clear(kvmppc_uvmem_bitmap, pfn, 1);
+ spin_unlock(&kvmppc_uvmem_bitmap_lock);
+
+ pvt = page->zone_device_data;
+ page->zone_device_data = NULL;
+ kvmppc_uvmem_pfn_remove(pvt->gpa >> PAGE_SHIFT, pvt->kvm);
+ kfree(pvt);
+}
+
+static const struct dev_pagemap_ops kvmppc_uvmem_ops = {
+ .page_free = kvmppc_uvmem_page_free,
+ .migrate_to_ram = kvmppc_uvmem_migrate_to_ram,
+};
+
+/*
+ * H_SVM_PAGE_OUT: Move page from secure memory to normal memory.
+ */
+unsigned long
+kvmppc_h_svm_page_out(struct kvm *kvm, unsigned long gpa,
+ unsigned long flags, unsigned long page_shift)
+{
+ unsigned long gfn = gpa >> page_shift;
+ unsigned long start, end;
+ struct vm_area_struct *vma;
+ int srcu_idx;
+ int ret;
+
+ if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START))
+ return H_UNSUPPORTED;
+
+ if (page_shift != PAGE_SHIFT)
+ return H_P3;
+
+ if (flags)
+ return H_P2;
+
+ ret = H_PARAMETER;
+ srcu_idx = srcu_read_lock(&kvm->srcu);
+ down_read(&kvm->mm->mmap_sem);
+ start = gfn_to_hva(kvm, gfn);
+ if (kvm_is_error_hva(start))
+ goto out;
+
+ end = start + (1UL << page_shift);
+ vma = find_vma_intersection(kvm->mm, start, end);
+ if (!vma || vma->vm_start > start || vma->vm_end < end)
+ goto out;
+
+ if (!kvmppc_svm_page_out(vma, start, end, page_shift, kvm, gpa))
+ ret = H_SUCCESS;
+out:
+ up_read(&kvm->mm->mmap_sem);
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+ return ret;
+}
+
+int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gfn)
+{
+ unsigned long pfn;
+ int ret = U_SUCCESS;
+
+ pfn = gfn_to_pfn(kvm, gfn);
+ if (is_error_noslot_pfn(pfn))
+ return -EFAULT;
+
+ mutex_lock(&kvm->arch.uvmem_lock);
+ if (kvmppc_gfn_is_uvmem_pfn(gfn, kvm, NULL))
+ goto out;
+
+ ret = uv_page_in(kvm->arch.lpid, pfn << PAGE_SHIFT, gfn << PAGE_SHIFT,
+ 0, PAGE_SHIFT);
+out:
+ kvm_release_pfn_clean(pfn);
+ mutex_unlock(&kvm->arch.uvmem_lock);
+ return (ret == U_SUCCESS) ? RESUME_GUEST : -EFAULT;
+}
+
+static u64 kvmppc_get_secmem_size(void)
+{
+ struct device_node *np;
+ int i, len;
+ const __be32 *prop;
+ u64 size = 0;
+
+ np = of_find_compatible_node(NULL, NULL, "ibm,uv-firmware");
+ if (!np)
+ goto out;
+
+ prop = of_get_property(np, "secure-memory-ranges", &len);
+ if (!prop)
+ goto out_put;
+
+ for (i = 0; i < len / (sizeof(*prop) * 4); i++)
+ size += of_read_number(prop + (i * 4) + 2, 2);
+
+out_put:
+ of_node_put(np);
+out:
+ return size;
+}
+
+int kvmppc_uvmem_init(void)
+{
+ int ret = 0;
+ unsigned long size;
+ struct resource *res;
+ void *addr;
+ unsigned long pfn_last, pfn_first;
+
+ size = kvmppc_get_secmem_size();
+ if (!size) {
+ /*
+ * Don't fail the initialization of kvm-hv module if
+ * the platform doesn't export ibm,uv-firmware node.
+ * Let normal guests run on such PEF-disabled platform.
+ */
+ pr_info("KVMPPC-UVMEM: No support for secure guests\n");
+ goto out;
+ }
+
+ res = request_free_mem_region(&iomem_resource, size, "kvmppc_uvmem");
+ if (IS_ERR(res)) {
+ ret = PTR_ERR(res);
+ goto out;
+ }
+
+ kvmppc_uvmem_pgmap.type = MEMORY_DEVICE_PRIVATE;
+ kvmppc_uvmem_pgmap.res = *res;
+ kvmppc_uvmem_pgmap.ops = &kvmppc_uvmem_ops;
+ addr = memremap_pages(&kvmppc_uvmem_pgmap, NUMA_NO_NODE);
+ if (IS_ERR(addr)) {
+ ret = PTR_ERR(addr);
+ goto out_free_region;
+ }
+
+ pfn_first = res->start >> PAGE_SHIFT;
+ pfn_last = pfn_first + (resource_size(res) >> PAGE_SHIFT);
+ kvmppc_uvmem_bitmap = kcalloc(BITS_TO_LONGS(pfn_last - pfn_first),
+ sizeof(unsigned long), GFP_KERNEL);
+ if (!kvmppc_uvmem_bitmap) {
+ ret = -ENOMEM;
+ goto out_unmap;
+ }
+
+ pr_info("KVMPPC-UVMEM: Secure Memory size 0x%lx\n", size);
+ return ret;
+out_unmap:
+ memunmap_pages(&kvmppc_uvmem_pgmap);
+out_free_region:
+ release_mem_region(res->start, size);
+out:
+ return ret;
+}
+
+void kvmppc_uvmem_free(void)
+{
+ memunmap_pages(&kvmppc_uvmem_pgmap);
+ release_mem_region(kvmppc_uvmem_pgmap.res.start,
+ resource_size(&kvmppc_uvmem_pgmap.res));
+ kfree(kvmppc_uvmem_bitmap);
+}
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index cc65af8fe6f7..729a0f12a752 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -90,7 +90,43 @@ static void kvmppc_fixup_split_real(struct kvm_vcpu *vcpu)
kvmppc_set_pc(vcpu, pc | SPLIT_HACK_OFFS);
}
-void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu);
+static void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) {
+ ulong pc = kvmppc_get_pc(vcpu);
+ ulong lr = kvmppc_get_lr(vcpu);
+ if ((pc & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS)
+ kvmppc_set_pc(vcpu, pc & ~SPLIT_HACK_MASK);
+ if ((lr & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS)
+ kvmppc_set_lr(vcpu, lr & ~SPLIT_HACK_MASK);
+ vcpu->arch.hflags &= ~BOOK3S_HFLAG_SPLIT_HACK;
+ }
+}
+
+static void kvmppc_inject_interrupt_pr(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags)
+{
+ unsigned long msr, pc, new_msr, new_pc;
+
+ kvmppc_unfixup_split_real(vcpu);
+
+ msr = kvmppc_get_msr(vcpu);
+ pc = kvmppc_get_pc(vcpu);
+ new_msr = vcpu->arch.intr_msr;
+ new_pc = to_book3s(vcpu)->hior + vec;
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ /* If transactional, change to suspend mode on IRQ delivery */
+ if (MSR_TM_TRANSACTIONAL(msr))
+ new_msr |= MSR_TS_S;
+ else
+ new_msr |= msr & MSR_TS_MASK;
+#endif
+
+ kvmppc_set_srr0(vcpu, pc);
+ kvmppc_set_srr1(vcpu, (msr & SRR1_MSR_BITS) | srr1_flags);
+ kvmppc_set_pc(vcpu, new_pc);
+ kvmppc_set_msr(vcpu, new_msr);
+}
static void kvmppc_core_vcpu_load_pr(struct kvm_vcpu *vcpu, int cpu)
{
@@ -1708,21 +1744,17 @@ static int kvmppc_set_one_reg_pr(struct kvm_vcpu *vcpu, u64 id,
return r;
}
-static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm,
- unsigned int id)
+static int kvmppc_core_vcpu_create_pr(struct kvm_vcpu *vcpu)
{
struct kvmppc_vcpu_book3s *vcpu_book3s;
- struct kvm_vcpu *vcpu;
- int err = -ENOMEM;
unsigned long p;
+ int err;
- vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
- if (!vcpu)
- goto out;
+ err = -ENOMEM;
vcpu_book3s = vzalloc(sizeof(struct kvmppc_vcpu_book3s));
if (!vcpu_book3s)
- goto free_vcpu;
+ goto out;
vcpu->arch.book3s = vcpu_book3s;
#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
@@ -1732,14 +1764,9 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm,
goto free_vcpu3s;
#endif
- err = kvm_vcpu_init(vcpu, kvm, id);
- if (err)
- goto free_shadow_vcpu;
-
- err = -ENOMEM;
p = __get_free_page(GFP_KERNEL|__GFP_ZERO);
if (!p)
- goto uninit_vcpu;
+ goto free_shadow_vcpu;
vcpu->arch.shared = (void *)p;
#ifdef CONFIG_PPC_BOOK3S_64
/* Always start the shared struct in native endian mode */
@@ -1761,6 +1788,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm,
#else
/* default to book3s_32 (750) */
vcpu->arch.pvr = 0x84202;
+ vcpu->arch.intr_msr = 0;
#endif
kvmppc_set_pvr_pr(vcpu, vcpu->arch.pvr);
vcpu->arch.slb_nr = 64;
@@ -1769,22 +1797,20 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm,
err = kvmppc_mmu_init(vcpu);
if (err < 0)
- goto uninit_vcpu;
+ goto free_shared_page;
- return vcpu;
+ return 0;
-uninit_vcpu:
- kvm_vcpu_uninit(vcpu);
+free_shared_page:
+ free_page((unsigned long)vcpu->arch.shared);
free_shadow_vcpu:
#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
kfree(vcpu->arch.shadow_vcpu);
free_vcpu3s:
#endif
vfree(vcpu_book3s);
-free_vcpu:
- kmem_cache_free(kvm_vcpu_cache, vcpu);
out:
- return ERR_PTR(err);
+ return err;
}
static void kvmppc_core_vcpu_free_pr(struct kvm_vcpu *vcpu)
@@ -1792,12 +1818,10 @@ static void kvmppc_core_vcpu_free_pr(struct kvm_vcpu *vcpu)
struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
free_page((unsigned long)vcpu->arch.shared & PAGE_MASK);
- kvm_vcpu_uninit(vcpu);
#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
kfree(vcpu->arch.shadow_vcpu);
#endif
vfree(vcpu_book3s);
- kmem_cache_free(kvm_vcpu_cache, vcpu);
}
static int kvmppc_vcpu_run_pr(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
@@ -1993,6 +2017,7 @@ static int kvm_vm_ioctl_get_smmu_info_pr(struct kvm *kvm,
{
/* We should not get called */
BUG();
+ return 0;
}
#endif /* CONFIG_PPC64 */
@@ -2058,6 +2083,7 @@ static struct kvmppc_ops kvm_ops_pr = {
.set_one_reg = kvmppc_set_one_reg_pr,
.vcpu_load = kvmppc_core_vcpu_load_pr,
.vcpu_put = kvmppc_core_vcpu_put_pr,
+ .inject_interrupt = kvmppc_inject_interrupt_pr,
.set_msr = kvmppc_set_msr_pr,
.vcpu_run = kvmppc_vcpu_run_pr,
.vcpu_create = kvmppc_core_vcpu_create_pr,
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index e3ba67095895..85215e79db42 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -67,8 +67,14 @@ void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu)
void __iomem *tima = local_paca->kvm_hstate.xive_tima_virt;
u64 pq;
- if (!tima)
+ /*
+ * Nothing to do if the platform doesn't have a XIVE
+ * or this vCPU doesn't have its own XIVE context
+ * (e.g. because it's not using an in-kernel interrupt controller).
+ */
+ if (!tima || !vcpu->arch.xive_cam_word)
return;
+
eieio();
__raw_writeq(vcpu->arch.xive_saved_state.w01, tima + TM_QW1_OS);
__raw_writel(vcpu->arch.xive_cam_word, tima + TM_QW1_OS + TM_WORD2);
@@ -160,6 +166,9 @@ static irqreturn_t xive_esc_irq(int irq, void *data)
*/
vcpu->arch.xive_esc_on = false;
+ /* This orders xive_esc_on = false vs. subsequent stale_p = true */
+ smp_wmb(); /* goes with smp_mb() in cleanup_single_escalation */
+
return IRQ_HANDLED;
}
@@ -475,7 +484,7 @@ static void xive_finish_unmask(struct kvmppc_xive *xive,
kvmppc_xive_select_irq(state, &hw_num, &xd);
/*
- * See command in xive_lock_and_mask() concerning masking
+ * See comment in xive_lock_and_mask() concerning masking
* via firmware.
*/
if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) {
@@ -1113,6 +1122,31 @@ void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu)
vcpu->arch.xive_esc_raddr = 0;
}
+/*
+ * In single escalation mode, the escalation interrupt is marked so
+ * that EOI doesn't re-enable it, but just sets the stale_p flag to
+ * indicate that the P bit has already been dealt with. However, the
+ * assembly code that enters the guest sets PQ to 00 without clearing
+ * stale_p (because it has no easy way to address it). Hence we have
+ * to adjust stale_p before shutting down the interrupt.
+ */
+void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu,
+ struct kvmppc_xive_vcpu *xc, int irq)
+{
+ struct irq_data *d = irq_get_irq_data(irq);
+ struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
+
+ /*
+ * This slightly odd sequence gives the right result
+ * (i.e. stale_p set if xive_esc_on is false) even if
+ * we race with xive_esc_irq() and xive_irq_eoi().
+ */
+ xd->stale_p = false;
+ smp_mb(); /* paired with smb_wmb in xive_esc_irq */
+ if (!vcpu->arch.xive_esc_on)
+ xd->stale_p = true;
+}
+
void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu)
{
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
@@ -1134,20 +1168,28 @@ void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu)
/* Mask the VP IPI */
xive_vm_esb_load(&xc->vp_ipi_data, XIVE_ESB_SET_PQ_01);
- /* Disable the VP */
- xive_native_disable_vp(xc->vp_id);
-
- /* Free the queues & associated interrupts */
+ /* Free escalations */
for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
- struct xive_q *q = &xc->queues[i];
-
- /* Free the escalation irq */
if (xc->esc_virq[i]) {
+ if (xc->xive->single_escalation)
+ xive_cleanup_single_escalation(vcpu, xc,
+ xc->esc_virq[i]);
free_irq(xc->esc_virq[i], vcpu);
irq_dispose_mapping(xc->esc_virq[i]);
kfree(xc->esc_virq_names[i]);
}
- /* Free the queue */
+ }
+
+ /* Disable the VP */
+ xive_native_disable_vp(xc->vp_id);
+
+ /* Clear the cam word so guest entry won't try to push context */
+ vcpu->arch.xive_cam_word = 0;
+
+ /* Free the queues */
+ for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
+ struct xive_q *q = &xc->queues[i];
+
xive_native_disable_queue(xc->vp_id, q, i);
if (q->qpage) {
free_pages((unsigned long)q->qpage,
@@ -1169,12 +1211,52 @@ void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu)
vcpu->arch.xive_vcpu = NULL;
}
+static bool kvmppc_xive_vcpu_id_valid(struct kvmppc_xive *xive, u32 cpu)
+{
+ /* We have a block of xive->nr_servers VPs. We just need to check
+ * raw vCPU ids are below the expected limit for this guest's
+ * core stride ; kvmppc_pack_vcpu_id() will pack them down to an
+ * index that can be safely used to compute a VP id that belongs
+ * to the VP block.
+ */
+ return cpu < xive->nr_servers * xive->kvm->arch.emul_smt_mode;
+}
+
+int kvmppc_xive_compute_vp_id(struct kvmppc_xive *xive, u32 cpu, u32 *vp)
+{
+ u32 vp_id;
+
+ if (!kvmppc_xive_vcpu_id_valid(xive, cpu)) {
+ pr_devel("Out of bounds !\n");
+ return -EINVAL;
+ }
+
+ if (xive->vp_base == XIVE_INVALID_VP) {
+ xive->vp_base = xive_native_alloc_vp_block(xive->nr_servers);
+ pr_devel("VP_Base=%x nr_servers=%d\n", xive->vp_base, xive->nr_servers);
+
+ if (xive->vp_base == XIVE_INVALID_VP)
+ return -ENOSPC;
+ }
+
+ vp_id = kvmppc_xive_vp(xive, cpu);
+ if (kvmppc_xive_vp_in_use(xive->kvm, vp_id)) {
+ pr_devel("Duplicate !\n");
+ return -EEXIST;
+ }
+
+ *vp = vp_id;
+
+ return 0;
+}
+
int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
struct kvm_vcpu *vcpu, u32 cpu)
{
struct kvmppc_xive *xive = dev->private;
struct kvmppc_xive_vcpu *xc;
int i, r = -EBUSY;
+ u32 vp_id;
pr_devel("connect_vcpu(cpu=%d)\n", cpu);
@@ -1186,25 +1268,25 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
return -EPERM;
if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT)
return -EBUSY;
- if (kvmppc_xive_find_server(vcpu->kvm, cpu)) {
- pr_devel("Duplicate !\n");
- return -EEXIST;
- }
- if (cpu >= (KVM_MAX_VCPUS * vcpu->kvm->arch.emul_smt_mode)) {
- pr_devel("Out of bounds !\n");
- return -EINVAL;
- }
- xc = kzalloc(sizeof(*xc), GFP_KERNEL);
- if (!xc)
- return -ENOMEM;
/* We need to synchronize with queue provisioning */
mutex_lock(&xive->lock);
+
+ r = kvmppc_xive_compute_vp_id(xive, cpu, &vp_id);
+ if (r)
+ goto bail;
+
+ xc = kzalloc(sizeof(*xc), GFP_KERNEL);
+ if (!xc) {
+ r = -ENOMEM;
+ goto bail;
+ }
+
vcpu->arch.xive_vcpu = xc;
xc->xive = xive;
xc->vcpu = vcpu;
xc->server_num = cpu;
- xc->vp_id = kvmppc_xive_vp(xive, cpu);
+ xc->vp_id = vp_id;
xc->mfrr = 0xff;
xc->valid = true;
@@ -1784,6 +1866,43 @@ int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
return 0;
}
+int kvmppc_xive_set_nr_servers(struct kvmppc_xive *xive, u64 addr)
+{
+ u32 __user *ubufp = (u32 __user *) addr;
+ u32 nr_servers;
+ int rc = 0;
+
+ if (get_user(nr_servers, ubufp))
+ return -EFAULT;
+
+ pr_devel("%s nr_servers=%u\n", __func__, nr_servers);
+
+ if (!nr_servers || nr_servers > KVM_MAX_VCPU_ID)
+ return -EINVAL;
+
+ mutex_lock(&xive->lock);
+ if (xive->vp_base != XIVE_INVALID_VP)
+ /* The VP block is allocated once and freed when the device
+ * is released. Better not allow to change its size since its
+ * used by connect_vcpu to validate vCPU ids are valid (eg,
+ * setting it back to a higher value could allow connect_vcpu
+ * to come up with a VP id that goes beyond the VP block, which
+ * is likely to cause a crash in OPAL).
+ */
+ rc = -EBUSY;
+ else if (nr_servers > KVM_MAX_VCPUS)
+ /* We don't need more servers. Higher vCPU ids get packed
+ * down below KVM_MAX_VCPUS by kvmppc_pack_vcpu_id().
+ */
+ xive->nr_servers = KVM_MAX_VCPUS;
+ else
+ xive->nr_servers = nr_servers;
+
+ mutex_unlock(&xive->lock);
+
+ return rc;
+}
+
static int xive_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
{
struct kvmppc_xive *xive = dev->private;
@@ -1792,6 +1911,11 @@ static int xive_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
switch (attr->group) {
case KVM_DEV_XICS_GRP_SOURCES:
return xive_set_source(xive, attr->attr, attr->addr);
+ case KVM_DEV_XICS_GRP_CTRL:
+ switch (attr->attr) {
+ case KVM_DEV_XICS_NR_SERVERS:
+ return kvmppc_xive_set_nr_servers(xive, attr->addr);
+ }
}
return -ENXIO;
}
@@ -1817,6 +1941,11 @@ static int xive_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
attr->attr < KVMPPC_XICS_NR_IRQS)
return 0;
break;
+ case KVM_DEV_XICS_GRP_CTRL:
+ switch (attr->attr) {
+ case KVM_DEV_XICS_NR_SERVERS:
+ return 0;
+ }
}
return -ENXIO;
}
@@ -1951,10 +2080,13 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
{
struct kvmppc_xive *xive;
struct kvm *kvm = dev->kvm;
- int ret = 0;
pr_devel("Creating xive for partition\n");
+ /* Already there ? */
+ if (kvm->arch.xive)
+ return -EEXIST;
+
xive = kvmppc_xive_get_device(kvm, type);
if (!xive)
return -ENOMEM;
@@ -1964,12 +2096,6 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
xive->kvm = kvm;
mutex_init(&xive->lock);
- /* Already there ? */
- if (kvm->arch.xive)
- ret = -EEXIST;
- else
- kvm->arch.xive = xive;
-
/* We use the default queue size set by the host */
xive->q_order = xive_native_default_eq_shift();
if (xive->q_order < PAGE_SHIFT)
@@ -1977,18 +2103,16 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
else
xive->q_page_order = xive->q_order - PAGE_SHIFT;
- /* Allocate a bunch of VPs */
- xive->vp_base = xive_native_alloc_vp_block(KVM_MAX_VCPUS);
- pr_devel("VP_Base=%x\n", xive->vp_base);
-
- if (xive->vp_base == XIVE_INVALID_VP)
- ret = -ENOMEM;
+ /* VP allocation is delayed to the first call to connect_vcpu */
+ xive->vp_base = XIVE_INVALID_VP;
+ /* KVM_MAX_VCPUS limits the number of VMs to roughly 64 per sockets
+ * on a POWER9 system.
+ */
+ xive->nr_servers = KVM_MAX_VCPUS;
xive->single_escalation = xive_native_has_single_escalation();
- if (ret)
- return ret;
-
+ kvm->arch.xive = xive;
return 0;
}
@@ -2058,9 +2182,9 @@ static int xive_debug_show(struct seq_file *m, void *private)
if (!xc)
continue;
- seq_printf(m, "cpu server %#x CPPR:%#x HWCPPR:%#x"
+ seq_printf(m, "cpu server %#x VP:%#x CPPR:%#x HWCPPR:%#x"
" MFRR:%#x PEND:%#x h_xirr: R=%lld V=%lld\n",
- xc->server_num, xc->cppr, xc->hw_cppr,
+ xc->server_num, xc->vp_id, xc->cppr, xc->hw_cppr,
xc->mfrr, xc->pending,
xc->stat_rm_h_xirr, xc->stat_vm_h_xirr);
diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h
index 50494d0ee375..382e3a56e789 100644
--- a/arch/powerpc/kvm/book3s_xive.h
+++ b/arch/powerpc/kvm/book3s_xive.h
@@ -135,6 +135,9 @@ struct kvmppc_xive {
/* Flags */
u8 single_escalation;
+ /* Number of entries in the VP block */
+ u32 nr_servers;
+
struct kvmppc_xive_ops *ops;
struct address_space *mapping;
struct mutex mapping_lock;
@@ -220,6 +223,18 @@ static inline u32 kvmppc_xive_vp(struct kvmppc_xive *xive, u32 server)
return xive->vp_base + kvmppc_pack_vcpu_id(xive->kvm, server);
}
+static inline bool kvmppc_xive_vp_in_use(struct kvm *kvm, u32 vp_id)
+{
+ struct kvm_vcpu *vcpu = NULL;
+ int i;
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ if (vcpu->arch.xive_vcpu && vp_id == vcpu->arch.xive_vcpu->vp_id)
+ return true;
+ }
+ return false;
+}
+
/*
* Mapping between guest priorities and host priorities
* is as follow.
@@ -282,6 +297,10 @@ int kvmppc_xive_select_target(struct kvm *kvm, u32 *server, u8 prio);
int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio,
bool single_escalation);
struct kvmppc_xive *kvmppc_xive_get_device(struct kvm *kvm, u32 type);
+void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu,
+ struct kvmppc_xive_vcpu *xc, int irq);
+int kvmppc_xive_compute_vp_id(struct kvmppc_xive *xive, u32 cpu, u32 *vp);
+int kvmppc_xive_set_nr_servers(struct kvmppc_xive *xive, u64 addr);
#endif /* CONFIG_KVM_XICS */
#endif /* _KVM_PPC_BOOK3S_XICS_H */
diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
index a998823f68a3..6ef0151ff70a 100644
--- a/arch/powerpc/kvm/book3s_xive_native.c
+++ b/arch/powerpc/kvm/book3s_xive_native.c
@@ -50,6 +50,24 @@ static void kvmppc_xive_native_cleanup_queue(struct kvm_vcpu *vcpu, int prio)
}
}
+static int kvmppc_xive_native_configure_queue(u32 vp_id, struct xive_q *q,
+ u8 prio, __be32 *qpage,
+ u32 order, bool can_escalate)
+{
+ int rc;
+ __be32 *qpage_prev = q->qpage;
+
+ rc = xive_native_configure_queue(vp_id, q, prio, qpage, order,
+ can_escalate);
+ if (rc)
+ return rc;
+
+ if (qpage_prev)
+ put_page(virt_to_page(qpage_prev));
+
+ return rc;
+}
+
void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu)
{
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
@@ -67,20 +85,28 @@ void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu)
xc->valid = false;
kvmppc_xive_disable_vcpu_interrupts(vcpu);
- /* Disable the VP */
- xive_native_disable_vp(xc->vp_id);
-
- /* Free the queues & associated interrupts */
+ /* Free escalations */
for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
/* Free the escalation irq */
if (xc->esc_virq[i]) {
+ if (xc->xive->single_escalation)
+ xive_cleanup_single_escalation(vcpu, xc,
+ xc->esc_virq[i]);
free_irq(xc->esc_virq[i], vcpu);
irq_dispose_mapping(xc->esc_virq[i]);
kfree(xc->esc_virq_names[i]);
xc->esc_virq[i] = 0;
}
+ }
+
+ /* Disable the VP */
+ xive_native_disable_vp(xc->vp_id);
- /* Free the queue */
+ /* Clear the cam word so guest entry won't try to push context */
+ vcpu->arch.xive_cam_word = 0;
+
+ /* Free the queues */
+ for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
kvmppc_xive_native_cleanup_queue(vcpu, i);
}
@@ -98,6 +124,7 @@ int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
struct kvmppc_xive *xive = dev->private;
struct kvmppc_xive_vcpu *xc = NULL;
int rc;
+ u32 vp_id;
pr_devel("native_connect_vcpu(server=%d)\n", server_num);
@@ -109,18 +136,12 @@ int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
return -EPERM;
if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT)
return -EBUSY;
- if (server_num >= (KVM_MAX_VCPUS * vcpu->kvm->arch.emul_smt_mode)) {
- pr_devel("Out of bounds !\n");
- return -EINVAL;
- }
mutex_lock(&xive->lock);
- if (kvmppc_xive_find_server(vcpu->kvm, server_num)) {
- pr_devel("Duplicate !\n");
- rc = -EEXIST;
+ rc = kvmppc_xive_compute_vp_id(xive, server_num, &vp_id);
+ if (rc)
goto bail;
- }
xc = kzalloc(sizeof(*xc), GFP_KERNEL);
if (!xc) {
@@ -133,7 +154,7 @@ int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
xc->vcpu = vcpu;
xc->server_num = server_num;
- xc->vp_id = kvmppc_xive_vp(xive, server_num);
+ xc->vp_id = vp_id;
xc->valid = true;
vcpu->arch.irq_type = KVMPPC_IRQ_XIVE;
@@ -572,19 +593,14 @@ static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive,
q->guest_qaddr = 0;
q->guest_qshift = 0;
- rc = xive_native_configure_queue(xc->vp_id, q, priority,
- NULL, 0, true);
+ rc = kvmppc_xive_native_configure_queue(xc->vp_id, q, priority,
+ NULL, 0, true);
if (rc) {
pr_err("Failed to reset queue %d for VCPU %d: %d\n",
priority, xc->server_num, rc);
return rc;
}
- if (q->qpage) {
- put_page(virt_to_page(q->qpage));
- q->qpage = NULL;
- }
-
return 0;
}
@@ -614,17 +630,18 @@ static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive,
srcu_idx = srcu_read_lock(&kvm->srcu);
gfn = gpa_to_gfn(kvm_eq.qaddr);
- page = gfn_to_page(kvm, gfn);
- if (is_error_page(page)) {
+
+ page_size = kvm_host_page_size(vcpu, gfn);
+ if (1ull << kvm_eq.qshift > page_size) {
srcu_read_unlock(&kvm->srcu, srcu_idx);
- pr_err("Couldn't get queue page %llx!\n", kvm_eq.qaddr);
+ pr_warn("Incompatible host page size %lx!\n", page_size);
return -EINVAL;
}
- page_size = kvm_host_page_size(kvm, gfn);
- if (1ull << kvm_eq.qshift > page_size) {
+ page = gfn_to_page(kvm, gfn);
+ if (is_error_page(page)) {
srcu_read_unlock(&kvm->srcu, srcu_idx);
- pr_warn("Incompatible host page size %lx!\n", page_size);
+ pr_err("Couldn't get queue page %llx!\n", kvm_eq.qaddr);
return -EINVAL;
}
@@ -643,8 +660,8 @@ static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive,
* OPAL level because the use of END ESBs is not supported by
* Linux.
*/
- rc = xive_native_configure_queue(xc->vp_id, q, priority,
- (__be32 *) qaddr, kvm_eq.qshift, true);
+ rc = kvmppc_xive_native_configure_queue(xc->vp_id, q, priority,
+ (__be32 *) qaddr, kvm_eq.qshift, true);
if (rc) {
pr_err("Failed to configure queue %d for VCPU %d: %d\n",
priority, xc->server_num, rc);
@@ -918,6 +935,8 @@ static int kvmppc_xive_native_set_attr(struct kvm_device *dev,
return kvmppc_xive_reset(xive);
case KVM_DEV_XIVE_EQ_SYNC:
return kvmppc_xive_native_eq_sync(xive);
+ case KVM_DEV_XIVE_NR_SERVERS:
+ return kvmppc_xive_set_nr_servers(xive, attr->addr);
}
break;
case KVM_DEV_XIVE_GRP_SOURCE:
@@ -957,6 +976,7 @@ static int kvmppc_xive_native_has_attr(struct kvm_device *dev,
switch (attr->attr) {
case KVM_DEV_XIVE_RESET:
case KVM_DEV_XIVE_EQ_SYNC:
+ case KVM_DEV_XIVE_NR_SERVERS:
return 0;
}
break;
@@ -1057,7 +1077,6 @@ static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type)
{
struct kvmppc_xive *xive;
struct kvm *kvm = dev->kvm;
- int ret = 0;
pr_devel("Creating xive native device\n");
@@ -1071,27 +1090,20 @@ static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type)
dev->private = xive;
xive->dev = dev;
xive->kvm = kvm;
- kvm->arch.xive = xive;
mutex_init(&xive->mapping_lock);
mutex_init(&xive->lock);
- /*
- * Allocate a bunch of VPs. KVM_MAX_VCPUS is a large value for
- * a default. Getting the max number of CPUs the VM was
- * configured with would improve our usage of the XIVE VP space.
+ /* VP allocation is delayed to the first call to connect_vcpu */
+ xive->vp_base = XIVE_INVALID_VP;
+ /* KVM_MAX_VCPUS limits the number of VMs to roughly 64 per sockets
+ * on a POWER9 system.
*/
- xive->vp_base = xive_native_alloc_vp_block(KVM_MAX_VCPUS);
- pr_devel("VP_Base=%x\n", xive->vp_base);
-
- if (xive->vp_base == XIVE_INVALID_VP)
- ret = -ENXIO;
+ xive->nr_servers = KVM_MAX_VCPUS;
xive->single_escalation = xive_native_has_single_escalation();
xive->ops = &kvmppc_xive_native_ops;
- if (ret)
- return ret;
-
+ kvm->arch.xive = xive;
return 0;
}
@@ -1171,6 +1183,11 @@ int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val)
return 0;
}
+bool kvmppc_xive_native_supported(void)
+{
+ return xive_native_has_queue_state_support();
+}
+
static int xive_native_debug_show(struct seq_file *m, void *private)
{
struct kvmppc_xive *xive = m->private;
@@ -1189,8 +1206,8 @@ static int xive_native_debug_show(struct seq_file *m, void *private)
if (!xc)
continue;
- seq_printf(m, "cpu server %#x NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x\n",
- xc->server_num,
+ seq_printf(m, "cpu server %#x VP=%#x NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x\n",
+ xc->server_num, xc->vp_id,
vcpu->arch.xive_saved_state.nsr,
vcpu->arch.xive_saved_state.cppr,
vcpu->arch.xive_saved_state.ipb,
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index be9a45874194..7b27604adadf 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -775,7 +775,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
debug = current->thread.debug;
current->thread.debug = vcpu->arch.dbg_reg;
- vcpu->arch.pgdir = current->mm->pgd;
+ vcpu->arch.pgdir = vcpu->kvm->mm->pgd;
kvmppc_fix_ee_before_entry();
ret = __kvmppc_vcpu_run(kvm_run, vcpu);
@@ -1377,36 +1377,6 @@ static void kvmppc_set_tsr(struct kvm_vcpu *vcpu, u32 new_tsr)
update_timer_ints(vcpu);
}
-/* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */
-int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
-{
- int i;
- int r;
-
- vcpu->arch.regs.nip = 0;
- vcpu->arch.shared->pir = vcpu->vcpu_id;
- kvmppc_set_gpr(vcpu, 1, (16<<20) - 8); /* -8 for the callee-save LR slot */
- kvmppc_set_msr(vcpu, 0);
-
-#ifndef CONFIG_KVM_BOOKE_HV
- vcpu->arch.shadow_msr = MSR_USER | MSR_IS | MSR_DS;
- vcpu->arch.shadow_pid = 1;
- vcpu->arch.shared->msr = 0;
-#endif
-
- /* Eye-catching numbers so we know if the guest takes an interrupt
- * before it's programmed its own IVPR/IVORs. */
- vcpu->arch.ivpr = 0x55550000;
- for (i = 0; i < BOOKE_IRQPRIO_MAX; i++)
- vcpu->arch.ivor[i] = 0x7700 | i * 4;
-
- kvmppc_init_timing_stats(vcpu);
-
- r = kvmppc_core_vcpu_setup(vcpu);
- kvmppc_sanity_check(vcpu);
- return r;
-}
-
int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu)
{
/* setup watchdog timer once */
@@ -2114,9 +2084,40 @@ int kvmppc_core_init_vm(struct kvm *kvm)
return kvm->arch.kvm_ops->init_vm(kvm);
}
-struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
+int kvmppc_core_vcpu_create(struct kvm_vcpu *vcpu)
{
- return kvm->arch.kvm_ops->vcpu_create(kvm, id);
+ int i;
+ int r;
+
+ r = vcpu->kvm->arch.kvm_ops->vcpu_create(vcpu);
+ if (r)
+ return r;
+
+ /* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */
+ vcpu->arch.regs.nip = 0;
+ vcpu->arch.shared->pir = vcpu->vcpu_id;
+ kvmppc_set_gpr(vcpu, 1, (16<<20) - 8); /* -8 for the callee-save LR slot */
+ kvmppc_set_msr(vcpu, 0);
+
+#ifndef CONFIG_KVM_BOOKE_HV
+ vcpu->arch.shadow_msr = MSR_USER | MSR_IS | MSR_DS;
+ vcpu->arch.shadow_pid = 1;
+ vcpu->arch.shared->msr = 0;
+#endif
+
+ /* Eye-catching numbers so we know if the guest takes an interrupt
+ * before it's programmed its own IVPR/IVORs. */
+ vcpu->arch.ivpr = 0x55550000;
+ for (i = 0; i < BOOKE_IRQPRIO_MAX; i++)
+ vcpu->arch.ivor[i] = 0x7700 | i * 4;
+
+ kvmppc_init_timing_stats(vcpu);
+
+ r = kvmppc_core_vcpu_setup(vcpu);
+ if (r)
+ vcpu->kvm->arch.kvm_ops->vcpu_free(vcpu);
+ kvmppc_sanity_check(vcpu);
+ return r;
}
void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index b5a848a55504..f2b4feaff6d2 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -433,28 +433,16 @@ static int kvmppc_set_one_reg_e500(struct kvm_vcpu *vcpu, u64 id,
return r;
}
-static struct kvm_vcpu *kvmppc_core_vcpu_create_e500(struct kvm *kvm,
- unsigned int id)
+static int kvmppc_core_vcpu_create_e500(struct kvm_vcpu *vcpu)
{
struct kvmppc_vcpu_e500 *vcpu_e500;
- struct kvm_vcpu *vcpu;
int err;
- vcpu_e500 = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
- if (!vcpu_e500) {
- err = -ENOMEM;
- goto out;
- }
+ BUILD_BUG_ON(offsetof(struct kvmppc_vcpu_e500, vcpu) != 0);
+ vcpu_e500 = to_e500(vcpu);
- vcpu = &vcpu_e500->vcpu;
- err = kvm_vcpu_init(vcpu, kvm, id);
- if (err)
- goto free_vcpu;
-
- if (kvmppc_e500_id_table_alloc(vcpu_e500) == NULL) {
- err = -ENOMEM;
- goto uninit_vcpu;
- }
+ if (kvmppc_e500_id_table_alloc(vcpu_e500) == NULL)
+ return -ENOMEM;
err = kvmppc_e500_tlb_init(vcpu_e500);
if (err)
@@ -466,18 +454,13 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_e500(struct kvm *kvm,
goto uninit_tlb;
}
- return vcpu;
+ return 0;
uninit_tlb:
kvmppc_e500_tlb_uninit(vcpu_e500);
uninit_id:
kvmppc_e500_id_table_free(vcpu_e500);
-uninit_vcpu:
- kvm_vcpu_uninit(vcpu);
-free_vcpu:
- kmem_cache_free(kvm_vcpu_cache, vcpu_e500);
-out:
- return ERR_PTR(err);
+ return err;
}
static void kvmppc_core_vcpu_free_e500(struct kvm_vcpu *vcpu)
@@ -487,8 +470,6 @@ static void kvmppc_core_vcpu_free_e500(struct kvm_vcpu *vcpu)
free_page((unsigned long)vcpu->arch.shared);
kvmppc_e500_tlb_uninit(vcpu_e500);
kvmppc_e500_id_table_free(vcpu_e500);
- kvm_vcpu_uninit(vcpu);
- kmem_cache_free(kvm_vcpu_cache, vcpu_e500);
}
static int kvmppc_core_init_vm_e500(struct kvm *kvm)
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
index 321db0fdb9db..425d13806645 100644
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -355,9 +355,9 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
if (tlbsel == 1) {
struct vm_area_struct *vma;
- down_read(&current->mm->mmap_sem);
+ down_read(&kvm->mm->mmap_sem);
- vma = find_vma(current->mm, hva);
+ vma = find_vma(kvm->mm, hva);
if (vma && hva >= vma->vm_start &&
(vma->vm_flags & VM_PFNMAP)) {
/*
@@ -441,7 +441,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1);
}
- up_read(&current->mm->mmap_sem);
+ up_read(&kvm->mm->mmap_sem);
}
if (likely(!pfnmap)) {
diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
index 318e65c65999..e6b06cb2b92c 100644
--- a/arch/powerpc/kvm/e500mc.c
+++ b/arch/powerpc/kvm/e500mc.c
@@ -301,30 +301,20 @@ static int kvmppc_set_one_reg_e500mc(struct kvm_vcpu *vcpu, u64 id,
return r;
}
-static struct kvm_vcpu *kvmppc_core_vcpu_create_e500mc(struct kvm *kvm,
- unsigned int id)
+static int kvmppc_core_vcpu_create_e500mc(struct kvm_vcpu *vcpu)
{
struct kvmppc_vcpu_e500 *vcpu_e500;
- struct kvm_vcpu *vcpu;
int err;
- vcpu_e500 = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
- if (!vcpu_e500) {
- err = -ENOMEM;
- goto out;
- }
- vcpu = &vcpu_e500->vcpu;
+ BUILD_BUG_ON(offsetof(struct kvmppc_vcpu_e500, vcpu) != 0);
+ vcpu_e500 = to_e500(vcpu);
/* Invalid PIR value -- this LPID dosn't have valid state on any cpu */
vcpu->arch.oldpir = 0xffffffff;
- err = kvm_vcpu_init(vcpu, kvm, id);
- if (err)
- goto free_vcpu;
-
err = kvmppc_e500_tlb_init(vcpu_e500);
if (err)
- goto uninit_vcpu;
+ return err;
vcpu->arch.shared = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
if (!vcpu->arch.shared) {
@@ -332,17 +322,11 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_e500mc(struct kvm *kvm,
goto uninit_tlb;
}
- return vcpu;
+ return 0;
uninit_tlb:
kvmppc_e500_tlb_uninit(vcpu_e500);
-uninit_vcpu:
- kvm_vcpu_uninit(vcpu);
-
-free_vcpu:
- kmem_cache_free(kvm_vcpu_cache, vcpu_e500);
-out:
- return ERR_PTR(err);
+ return err;
}
static void kvmppc_core_vcpu_free_e500mc(struct kvm_vcpu *vcpu)
@@ -351,8 +335,6 @@ static void kvmppc_core_vcpu_free_e500mc(struct kvm_vcpu *vcpu)
free_page((unsigned long)vcpu->arch.shared);
kvmppc_e500_tlb_uninit(vcpu_e500);
- kvm_vcpu_uninit(vcpu);
- kmem_cache_free(kvm_vcpu_cache, vcpu_e500);
}
static int kvmppc_core_init_vm_e500mc(struct kvm *kvm)
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index bb4d09c1ad56..6fca38ca791f 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -271,6 +271,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
*/
if (inst == KVMPPC_INST_SW_BREAKPOINT) {
run->exit_reason = KVM_EXIT_DEBUG;
+ run->debug.arch.status = 0;
run->debug.arch.address = kvmppc_get_pc(vcpu);
emulated = EMULATE_EXIT_USER;
advance = 0;
diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c
index 9208c82ed08d..1139bc56e004 100644
--- a/arch/powerpc/kvm/emulate_loadstore.c
+++ b/arch/powerpc/kvm/emulate_loadstore.c
@@ -73,7 +73,6 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
{
struct kvm_run *run = vcpu->run;
u32 inst;
- int ra, rs, rt;
enum emulation_result emulated = EMULATE_FAIL;
int advance = 1;
struct instruction_op op;
@@ -85,16 +84,6 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
if (emulated != EMULATE_DONE)
return emulated;
- ra = get_ra(inst);
- rs = get_rs(inst);
- rt = get_rt(inst);
-
- /*
- * if mmio_vsx_tx_sx_enabled == 0, copy data between
- * VSR[0..31] and memory
- * if mmio_vsx_tx_sx_enabled == 1, copy data between
- * VSR[32..63] and memory
- */
vcpu->arch.mmio_vsx_copy_nums = 0;
vcpu->arch.mmio_vsx_offset = 0;
vcpu->arch.mmio_copy_type = KVMPPC_VSX_COPY_NONE;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 3e566c2e6066..1af96fb5dc6f 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -31,6 +31,8 @@
#include <asm/hvcall.h>
#include <asm/plpar_wrappers.h>
#endif
+#include <asm/ultravisor.h>
+#include <asm/kvm_host.h>
#include "timing.h"
#include "irq.h"
@@ -473,7 +475,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
#endif
kvm_for_each_vcpu(i, vcpu, kvm)
- kvm_arch_vcpu_free(vcpu);
+ kvm_vcpu_destroy(vcpu);
mutex_lock(&kvm->lock);
for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
@@ -522,6 +524,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_IMMEDIATE_EXIT:
r = 1;
break;
+ case KVM_CAP_PPC_GUEST_DEBUG_SSTEP:
+ /* fall through */
case KVM_CAP_PPC_PAIRED_SINGLES:
case KVM_CAP_PPC_OSI:
case KVM_CAP_PPC_GET_PVINFO:
@@ -561,7 +565,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
* a POWER9 processor) and the PowerNV platform, as
* nested is not yet supported.
*/
- r = xive_enabled() && !!cpu_has_feature(CPU_FTR_HVMODE);
+ r = xive_enabled() && !!cpu_has_feature(CPU_FTR_HVMODE) &&
+ kvmppc_xive_native_supported();
break;
#endif
@@ -715,22 +720,55 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
kvmppc_core_flush_memslot(kvm, slot);
}
-struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
+int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
+{
+ return 0;
+}
+
+static enum hrtimer_restart kvmppc_decrementer_wakeup(struct hrtimer *timer)
{
struct kvm_vcpu *vcpu;
- vcpu = kvmppc_core_vcpu_create(kvm, id);
- if (!IS_ERR(vcpu)) {
- vcpu->arch.wqp = &vcpu->wq;
- kvmppc_create_vcpu_debugfs(vcpu, id);
- }
- return vcpu;
+
+ vcpu = container_of(timer, struct kvm_vcpu, arch.dec_timer);
+ kvmppc_decrementer_func(vcpu);
+
+ return HRTIMER_NORESTART;
+}
+
+int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
+{
+ int err;
+
+ hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
+ vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup;
+ vcpu->arch.dec_expires = get_tb();
+
+#ifdef CONFIG_KVM_EXIT_TIMING
+ mutex_init(&vcpu->arch.exit_timing_lock);
+#endif
+ err = kvmppc_subarch_vcpu_init(vcpu);
+ if (err)
+ return err;
+
+ err = kvmppc_core_vcpu_create(vcpu);
+ if (err)
+ goto out_vcpu_uninit;
+
+ vcpu->arch.wqp = &vcpu->wq;
+ kvmppc_create_vcpu_debugfs(vcpu, vcpu->vcpu_id);
+ return 0;
+
+out_vcpu_uninit:
+ kvmppc_mmu_destroy(vcpu);
+ kvmppc_subarch_vcpu_uninit(vcpu);
+ return err;
}
void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
{
}
-void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
+void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
{
/* Make sure we're not using the vcpu anymore */
hrtimer_cancel(&vcpu->arch.dec_timer);
@@ -753,11 +791,9 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
}
kvmppc_core_vcpu_free(vcpu);
-}
-void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
-{
- kvm_arch_vcpu_free(vcpu);
+ kvmppc_mmu_destroy(vcpu);
+ kvmppc_subarch_vcpu_uninit(vcpu);
}
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
@@ -765,37 +801,6 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
return kvmppc_core_pending_dec(vcpu);
}
-static enum hrtimer_restart kvmppc_decrementer_wakeup(struct hrtimer *timer)
-{
- struct kvm_vcpu *vcpu;
-
- vcpu = container_of(timer, struct kvm_vcpu, arch.dec_timer);
- kvmppc_decrementer_func(vcpu);
-
- return HRTIMER_NORESTART;
-}
-
-int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
-{
- int ret;
-
- hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
- vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup;
- vcpu->arch.dec_expires = get_tb();
-
-#ifdef CONFIG_KVM_EXIT_TIMING
- mutex_init(&vcpu->arch.exit_timing_lock);
-#endif
- ret = kvmppc_subarch_vcpu_init(vcpu);
- return ret;
-}
-
-void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
-{
- kvmppc_mmu_destroy(vcpu);
- kvmppc_subarch_vcpu_uninit(vcpu);
-}
-
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
#ifdef CONFIG_BOOKE
@@ -2410,6 +2415,16 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = -EFAULT;
break;
}
+ case KVM_PPC_SVM_OFF: {
+ struct kvm *kvm = filp->private_data;
+
+ r = 0;
+ if (!kvm->arch.kvm_ops->svm_off)
+ goto out;
+
+ r = kvm->arch.kvm_ops->svm_off(kvm);
+ break;
+ }
default: {
struct kvm *kvm = filp->private_data;
r = kvm->arch.kvm_ops->arch_vm_ioctl(filp, ioctl, arg);
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index eebc782d89a5..b8de3be10eb4 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -16,7 +16,7 @@ CFLAGS_code-patching.o += -DDISABLE_BRANCH_PROFILING
CFLAGS_feature-fixups.o += -DDISABLE_BRANCH_PROFILING
endif
-obj-y += alloc.o code-patching.o feature-fixups.o
+obj-y += alloc.o code-patching.o feature-fixups.o pmem.o
ifndef CONFIG_KASAN
obj-y += string.o memcmp_$(BITS).o
@@ -39,7 +39,7 @@ obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o copypage_power7.o \
memcpy_power7.o
obj64-y += copypage_64.o copyuser_64.o mem_64.o hweight_64.o \
- memcpy_64.o pmem.o
+ memcpy_64.o memcpy_mcsafe_64.o
obj64-$(CONFIG_SMP) += locks.o
obj64-$(CONFIG_ALTIVEC) += vmx-helper.o
diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c
index 6550b9e5ce5f..6440d5943c00 100644
--- a/arch/powerpc/lib/locks.c
+++ b/arch/powerpc/lib/locks.c
@@ -18,7 +18,7 @@
#include <asm/hvcall.h>
#include <asm/smp.h>
-void __spin_yield(arch_spinlock_t *lock)
+void splpar_spin_yield(arch_spinlock_t *lock)
{
unsigned int lock_value, holder_cpu, yield_count;
@@ -36,14 +36,14 @@ void __spin_yield(arch_spinlock_t *lock)
plpar_hcall_norets(H_CONFER,
get_hard_smp_processor_id(holder_cpu), yield_count);
}
-EXPORT_SYMBOL_GPL(__spin_yield);
+EXPORT_SYMBOL_GPL(splpar_spin_yield);
/*
* Waiting for a read lock or a write lock on a rwlock...
* This turns out to be the same for read and write locks, since
* we only know the holder if it is write-locked.
*/
-void __rw_yield(arch_rwlock_t *rw)
+void splpar_rw_yield(arch_rwlock_t *rw)
{
int lock_value;
unsigned int holder_cpu, yield_count;
diff --git a/arch/powerpc/lib/memcpy_mcsafe_64.S b/arch/powerpc/lib/memcpy_mcsafe_64.S
new file mode 100644
index 000000000000..cb882d9a6d8a
--- /dev/null
+++ b/arch/powerpc/lib/memcpy_mcsafe_64.S
@@ -0,0 +1,242 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) IBM Corporation, 2011
+ * Derived from copyuser_power7.s by Anton Blanchard <anton@au.ibm.com>
+ * Author - Balbir Singh <bsingharora@gmail.com>
+ */
+#include <asm/ppc_asm.h>
+#include <asm/errno.h>
+#include <asm/export.h>
+
+ .macro err1
+100:
+ EX_TABLE(100b,.Ldo_err1)
+ .endm
+
+ .macro err2
+200:
+ EX_TABLE(200b,.Ldo_err2)
+ .endm
+
+ .macro err3
+300: EX_TABLE(300b,.Ldone)
+ .endm
+
+.Ldo_err2:
+ ld r22,STK_REG(R22)(r1)
+ ld r21,STK_REG(R21)(r1)
+ ld r20,STK_REG(R20)(r1)
+ ld r19,STK_REG(R19)(r1)
+ ld r18,STK_REG(R18)(r1)
+ ld r17,STK_REG(R17)(r1)
+ ld r16,STK_REG(R16)(r1)
+ ld r15,STK_REG(R15)(r1)
+ ld r14,STK_REG(R14)(r1)
+ addi r1,r1,STACKFRAMESIZE
+.Ldo_err1:
+ /* Do a byte by byte copy to get the exact remaining size */
+ mtctr r7
+46:
+err3; lbz r0,0(r4)
+ addi r4,r4,1
+err3; stb r0,0(r3)
+ addi r3,r3,1
+ bdnz 46b
+ li r3,0
+ blr
+
+.Ldone:
+ mfctr r3
+ blr
+
+
+_GLOBAL(memcpy_mcsafe)
+ mr r7,r5
+ cmpldi r5,16
+ blt .Lshort_copy
+
+.Lcopy:
+ /* Get the source 8B aligned */
+ neg r6,r4
+ mtocrf 0x01,r6
+ clrldi r6,r6,(64-3)
+
+ bf cr7*4+3,1f
+err1; lbz r0,0(r4)
+ addi r4,r4,1
+err1; stb r0,0(r3)
+ addi r3,r3,1
+ subi r7,r7,1
+
+1: bf cr7*4+2,2f
+err1; lhz r0,0(r4)
+ addi r4,r4,2
+err1; sth r0,0(r3)
+ addi r3,r3,2
+ subi r7,r7,2
+
+2: bf cr7*4+1,3f
+err1; lwz r0,0(r4)
+ addi r4,r4,4
+err1; stw r0,0(r3)
+ addi r3,r3,4
+ subi r7,r7,4
+
+3: sub r5,r5,r6
+ cmpldi r5,128
+
+ mflr r0
+ stdu r1,-STACKFRAMESIZE(r1)
+ std r14,STK_REG(R14)(r1)
+ std r15,STK_REG(R15)(r1)
+ std r16,STK_REG(R16)(r1)
+ std r17,STK_REG(R17)(r1)
+ std r18,STK_REG(R18)(r1)
+ std r19,STK_REG(R19)(r1)
+ std r20,STK_REG(R20)(r1)
+ std r21,STK_REG(R21)(r1)
+ std r22,STK_REG(R22)(r1)
+ std r0,STACKFRAMESIZE+16(r1)
+
+ blt 5f
+ srdi r6,r5,7
+ mtctr r6
+
+ /* Now do cacheline (128B) sized loads and stores. */
+ .align 5
+4:
+err2; ld r0,0(r4)
+err2; ld r6,8(r4)
+err2; ld r8,16(r4)
+err2; ld r9,24(r4)
+err2; ld r10,32(r4)
+err2; ld r11,40(r4)
+err2; ld r12,48(r4)
+err2; ld r14,56(r4)
+err2; ld r15,64(r4)
+err2; ld r16,72(r4)
+err2; ld r17,80(r4)
+err2; ld r18,88(r4)
+err2; ld r19,96(r4)
+err2; ld r20,104(r4)
+err2; ld r21,112(r4)
+err2; ld r22,120(r4)
+ addi r4,r4,128
+err2; std r0,0(r3)
+err2; std r6,8(r3)
+err2; std r8,16(r3)
+err2; std r9,24(r3)
+err2; std r10,32(r3)
+err2; std r11,40(r3)
+err2; std r12,48(r3)
+err2; std r14,56(r3)
+err2; std r15,64(r3)
+err2; std r16,72(r3)
+err2; std r17,80(r3)
+err2; std r18,88(r3)
+err2; std r19,96(r3)
+err2; std r20,104(r3)
+err2; std r21,112(r3)
+err2; std r22,120(r3)
+ addi r3,r3,128
+ subi r7,r7,128
+ bdnz 4b
+
+ clrldi r5,r5,(64-7)
+
+ /* Up to 127B to go */
+5: srdi r6,r5,4
+ mtocrf 0x01,r6
+
+6: bf cr7*4+1,7f
+err2; ld r0,0(r4)
+err2; ld r6,8(r4)
+err2; ld r8,16(r4)
+err2; ld r9,24(r4)
+err2; ld r10,32(r4)
+err2; ld r11,40(r4)
+err2; ld r12,48(r4)
+err2; ld r14,56(r4)
+ addi r4,r4,64
+err2; std r0,0(r3)
+err2; std r6,8(r3)
+err2; std r8,16(r3)
+err2; std r9,24(r3)
+err2; std r10,32(r3)
+err2; std r11,40(r3)
+err2; std r12,48(r3)
+err2; std r14,56(r3)
+ addi r3,r3,64
+ subi r7,r7,64
+
+7: ld r14,STK_REG(R14)(r1)
+ ld r15,STK_REG(R15)(r1)
+ ld r16,STK_REG(R16)(r1)
+ ld r17,STK_REG(R17)(r1)
+ ld r18,STK_REG(R18)(r1)
+ ld r19,STK_REG(R19)(r1)
+ ld r20,STK_REG(R20)(r1)
+ ld r21,STK_REG(R21)(r1)
+ ld r22,STK_REG(R22)(r1)
+ addi r1,r1,STACKFRAMESIZE
+
+ /* Up to 63B to go */
+ bf cr7*4+2,8f
+err1; ld r0,0(r4)
+err1; ld r6,8(r4)
+err1; ld r8,16(r4)
+err1; ld r9,24(r4)
+ addi r4,r4,32
+err1; std r0,0(r3)
+err1; std r6,8(r3)
+err1; std r8,16(r3)
+err1; std r9,24(r3)
+ addi r3,r3,32
+ subi r7,r7,32
+
+ /* Up to 31B to go */
+8: bf cr7*4+3,9f
+err1; ld r0,0(r4)
+err1; ld r6,8(r4)
+ addi r4,r4,16
+err1; std r0,0(r3)
+err1; std r6,8(r3)
+ addi r3,r3,16
+ subi r7,r7,16
+
+9: clrldi r5,r5,(64-4)
+
+ /* Up to 15B to go */
+.Lshort_copy:
+ mtocrf 0x01,r5
+ bf cr7*4+0,12f
+err1; lwz r0,0(r4) /* Less chance of a reject with word ops */
+err1; lwz r6,4(r4)
+ addi r4,r4,8
+err1; stw r0,0(r3)
+err1; stw r6,4(r3)
+ addi r3,r3,8
+ subi r7,r7,8
+
+12: bf cr7*4+1,13f
+err1; lwz r0,0(r4)
+ addi r4,r4,4
+err1; stw r0,0(r3)
+ addi r3,r3,4
+ subi r7,r7,4
+
+13: bf cr7*4+2,14f
+err1; lhz r0,0(r4)
+ addi r4,r4,2
+err1; sth r0,0(r3)
+ addi r3,r3,2
+ subi r7,r7,2
+
+14: bf cr7*4+3,15f
+err1; lbz r0,0(r4)
+err1; stb r0,0(r3)
+
+15: li r3,0
+ blr
+
+EXPORT_SYMBOL_GPL(memcpy_mcsafe);
diff --git a/arch/powerpc/lib/pmem.c b/arch/powerpc/lib/pmem.c
index 377712e85605..0666a8d29596 100644
--- a/arch/powerpc/lib/pmem.c
+++ b/arch/powerpc/lib/pmem.c
@@ -17,14 +17,14 @@ void arch_wb_cache_pmem(void *addr, size_t size)
unsigned long start = (unsigned long) addr;
flush_dcache_range(start, start + size);
}
-EXPORT_SYMBOL(arch_wb_cache_pmem);
+EXPORT_SYMBOL_GPL(arch_wb_cache_pmem);
void arch_invalidate_pmem(void *addr, size_t size)
{
unsigned long start = (unsigned long) addr;
flush_dcache_range(start, start + size);
}
-EXPORT_SYMBOL(arch_invalidate_pmem);
+EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
/*
* CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE symbols
diff --git a/arch/powerpc/lib/string_32.S b/arch/powerpc/lib/string_32.S
index f69a6aab7bfb..1ddb26394e8a 100644
--- a/arch/powerpc/lib/string_32.S
+++ b/arch/powerpc/lib/string_32.S
@@ -17,7 +17,7 @@ CACHELINE_BYTES = L1_CACHE_BYTES
LG_CACHELINE_BYTES = L1_CACHE_SHIFT
CACHELINE_MASK = (L1_CACHE_BYTES-1)
-_GLOBAL(__clear_user)
+_GLOBAL(__arch_clear_user)
/*
* Use dcbz on the complete cache lines in the destination
* to set them to zero. This requires that the destination
@@ -87,4 +87,4 @@ _GLOBAL(__clear_user)
EX_TABLE(8b, 91b)
EX_TABLE(9b, 91b)
-EXPORT_SYMBOL(__clear_user)
+EXPORT_SYMBOL(__arch_clear_user)
diff --git a/arch/powerpc/lib/string_64.S b/arch/powerpc/lib/string_64.S
index 507b18b1660e..169872bc0892 100644
--- a/arch/powerpc/lib/string_64.S
+++ b/arch/powerpc/lib/string_64.S
@@ -17,7 +17,7 @@ PPC64_CACHES:
.section ".text"
/**
- * __clear_user: - Zero a block of memory in user space, with less checking.
+ * __arch_clear_user: - Zero a block of memory in user space, with less checking.
* @to: Destination address, in user space.
* @n: Number of bytes to zero.
*
@@ -58,7 +58,7 @@ err3; stb r0,0(r3)
mr r3,r4
blr
-_GLOBAL_TOC(__clear_user)
+_GLOBAL_TOC(__arch_clear_user)
cmpdi r4,32
neg r6,r3
li r0,0
@@ -181,4 +181,4 @@ err1; dcbz 0,r3
cmpdi r4,32
blt .Lshort_clear
b .Lmedium_clear
-EXPORT_SYMBOL(__clear_user)
+EXPORT_SYMBOL(__arch_clear_user)
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 0f499db315d6..5e147986400d 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -7,7 +7,7 @@ ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
obj-y := fault.o mem.o pgtable.o mmap.o \
init_$(BITS).o pgtable_$(BITS).o \
- pgtable-frag.o \
+ pgtable-frag.o ioremap.o ioremap_$(BITS).o \
init-common.o mmu_context.o drmem.o
obj-$(CONFIG_PPC_MMU_NOHASH) += nohash/
obj-$(CONFIG_PPC_BOOK3S_32) += book3s32/
diff --git a/arch/powerpc/mm/book3s32/hash_low.S b/arch/powerpc/mm/book3s32/hash_low.S
index 8bbbd9775c8a..c11b0a005196 100644
--- a/arch/powerpc/mm/book3s32/hash_low.S
+++ b/arch/powerpc/mm/book3s32/hash_low.S
@@ -25,6 +25,12 @@
#include <asm/feature-fixups.h>
#include <asm/code-patching-asm.h>
+#ifdef CONFIG_VMAP_STACK
+#define ADDR_OFFSET 0
+#else
+#define ADDR_OFFSET PAGE_OFFSET
+#endif
+
#ifdef CONFIG_SMP
.section .bss
.align 2
@@ -47,8 +53,8 @@ mmu_hash_lock:
.text
_GLOBAL(hash_page)
#ifdef CONFIG_SMP
- lis r8, (mmu_hash_lock - PAGE_OFFSET)@h
- ori r8, r8, (mmu_hash_lock - PAGE_OFFSET)@l
+ lis r8, (mmu_hash_lock - ADDR_OFFSET)@h
+ ori r8, r8, (mmu_hash_lock - ADDR_OFFSET)@l
lis r0,0x0fff
b 10f
11: lwz r6,0(r8)
@@ -66,9 +72,12 @@ _GLOBAL(hash_page)
cmplw 0,r4,r0
ori r3,r3,_PAGE_USER|_PAGE_PRESENT /* test low addresses as user */
mfspr r5, SPRN_SPRG_PGDIR /* phys page-table root */
+#ifdef CONFIG_VMAP_STACK
+ tovirt(r5, r5)
+#endif
blt+ 112f /* assume user more likely */
- lis r5, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */
- addi r5 ,r5 ,(swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */
+ lis r5, (swapper_pg_dir - ADDR_OFFSET)@ha /* if kernel address, use */
+ addi r5 ,r5 ,(swapper_pg_dir - ADDR_OFFSET)@l /* kernel page table */
rlwimi r3,r9,32-12,29,29 /* MSR_PR -> _PAGE_USER */
112:
#ifndef CONFIG_PTE_64BIT
@@ -80,6 +89,9 @@ _GLOBAL(hash_page)
lwzx r8,r8,r5 /* Get L1 entry */
rlwinm. r8,r8,0,0,20 /* extract pt base address */
#endif
+#ifdef CONFIG_VMAP_STACK
+ tovirt(r8, r8)
+#endif
#ifdef CONFIG_SMP
beq- hash_page_out /* return if no mapping */
#else
@@ -137,9 +149,9 @@ retry:
#ifdef CONFIG_SMP
eieio
- lis r8, (mmu_hash_lock - PAGE_OFFSET)@ha
+ lis r8, (mmu_hash_lock - ADDR_OFFSET)@ha
li r0,0
- stw r0, (mmu_hash_lock - PAGE_OFFSET)@l(r8)
+ stw r0, (mmu_hash_lock - ADDR_OFFSET)@l(r8)
#endif
/* Return from the exception */
@@ -152,9 +164,9 @@ retry:
#ifdef CONFIG_SMP
hash_page_out:
eieio
- lis r8, (mmu_hash_lock - PAGE_OFFSET)@ha
+ lis r8, (mmu_hash_lock - ADDR_OFFSET)@ha
li r0,0
- stw r0, (mmu_hash_lock - PAGE_OFFSET)@l(r8)
+ stw r0, (mmu_hash_lock - ADDR_OFFSET)@l(r8)
blr
#endif /* CONFIG_SMP */
@@ -329,7 +341,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
patch_site 1f, patch__hash_page_A1
patch_site 2f, patch__hash_page_A2
/* Get the address of the primary PTE group in the hash table (r3) */
-0: lis r0, (Hash_base - PAGE_OFFSET)@h /* base address of hash table */
+0: lis r0, (Hash_base - ADDR_OFFSET)@h /* base address of hash table */
1: rlwimi r0,r3,LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* VSID -> hash */
2: rlwinm r3,r4,20+LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* PI -> hash */
xor r3,r3,r0 /* make primary hash */
@@ -343,10 +355,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
beq+ 10f /* no PTE: go look for an empty slot */
tlbie r4
- lis r4, (htab_hash_searches - PAGE_OFFSET)@ha
- lwz r6, (htab_hash_searches - PAGE_OFFSET)@l(r4)
+ lis r4, (htab_hash_searches - ADDR_OFFSET)@ha
+ lwz r6, (htab_hash_searches - ADDR_OFFSET)@l(r4)
addi r6,r6,1 /* count how many searches we do */
- stw r6, (htab_hash_searches - PAGE_OFFSET)@l(r4)
+ stw r6, (htab_hash_searches - ADDR_OFFSET)@l(r4)
/* Search the primary PTEG for a PTE whose 1st (d)word matches r5 */
mtctr r0
@@ -378,10 +390,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
beq+ found_empty
/* update counter of times that the primary PTEG is full */
- lis r4, (primary_pteg_full - PAGE_OFFSET)@ha
- lwz r6, (primary_pteg_full - PAGE_OFFSET)@l(r4)
+ lis r4, (primary_pteg_full - ADDR_OFFSET)@ha
+ lwz r6, (primary_pteg_full - ADDR_OFFSET)@l(r4)
addi r6,r6,1
- stw r6, (primary_pteg_full - PAGE_OFFSET)@l(r4)
+ stw r6, (primary_pteg_full - ADDR_OFFSET)@l(r4)
patch_site 0f, patch__hash_page_C
/* Search the secondary PTEG for an empty slot */
@@ -415,8 +427,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
* lockup here but that shouldn't happen
*/
-1: lis r4, (next_slot - PAGE_OFFSET)@ha /* get next evict slot */
- lwz r6, (next_slot - PAGE_OFFSET)@l(r4)
+1: lis r4, (next_slot - ADDR_OFFSET)@ha /* get next evict slot */
+ lwz r6, (next_slot - ADDR_OFFSET)@l(r4)
addi r6,r6,HPTE_SIZE /* search for candidate */
andi. r6,r6,7*HPTE_SIZE
stw r6,next_slot@l(r4)
diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c
index e249fbf6b9c3..0a1c65a2c565 100644
--- a/arch/powerpc/mm/book3s32/mmu.c
+++ b/arch/powerpc/mm/book3s32/mmu.c
@@ -74,7 +74,7 @@ static int find_free_bat(void)
{
int b;
- if (cpu_has_feature(CPU_FTR_601)) {
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S_601)) {
for (b = 0; b < 4; b++) {
struct ppc_bat *bat = BATS[b];
@@ -106,7 +106,7 @@ static int find_free_bat(void)
*/
static unsigned int block_size(unsigned long base, unsigned long top)
{
- unsigned int max_size = (cpu_has_feature(CPU_FTR_601) ? 8 : 256) << 20;
+ unsigned int max_size = IS_ENABLED(CONFIG_PPC_BOOK3S_601) ? SZ_8M : SZ_256M;
unsigned int base_shift = (ffs(base) - 1) & 31;
unsigned int block_shift = (fls(top - base) - 1) & 31;
@@ -189,7 +189,7 @@ void mmu_mark_initmem_nx(void)
unsigned long top = (unsigned long)_etext - PAGE_OFFSET;
unsigned long size;
- if (cpu_has_feature(CPU_FTR_601))
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S_601))
return;
for (i = 0; i < nb - 1 && base < top && top - base > (128 << 10);) {
@@ -227,7 +227,7 @@ void mmu_mark_rodata_ro(void)
int nb = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4;
int i;
- if (cpu_has_feature(CPU_FTR_601))
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S_601))
return;
for (i = 0; i < nb; i++) {
@@ -251,15 +251,24 @@ void __init setbat(int index, unsigned long virt, phys_addr_t phys,
{
unsigned int bl;
int wimgxpp;
- struct ppc_bat *bat = BATS[index];
+ struct ppc_bat *bat;
unsigned long flags = pgprot_val(prot);
+ if (index == -1)
+ index = find_free_bat();
+ if (index == -1) {
+ pr_err("%s: no BAT available for mapping 0x%llx\n", __func__,
+ (unsigned long long)phys);
+ return;
+ }
+ bat = BATS[index];
+
if ((flags & _PAGE_NO_CACHE) ||
(cpu_has_feature(CPU_FTR_NEED_COHERENT) == 0))
flags &= ~_PAGE_COHERENT;
bl = (size >> 17) - 1;
- if (PVR_VER(mfspr(SPRN_PVR)) != 1) {
+ if (!IS_ENABLED(CONFIG_PPC_BOOK3S_601)) {
/* 603, 604, etc. */
/* Do DBAT first */
wimgxpp = flags & (_PAGE_WRITETHRU | _PAGE_NO_CACHE
@@ -297,8 +306,7 @@ void __init setbat(int index, unsigned long virt, phys_addr_t phys,
/*
* Preload a translation in the hash table
*/
-void hash_preload(struct mm_struct *mm, unsigned long ea,
- bool is_exec, unsigned long trap)
+void hash_preload(struct mm_struct *mm, unsigned long ea)
{
pmd_t *pmd;
@@ -310,6 +318,39 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
}
/*
+ * This is called at the end of handling a user page fault, when the
+ * fault has been handled by updating a PTE in the linux page tables.
+ * We use it to preload an HPTE into the hash table corresponding to
+ * the updated linux PTE.
+ *
+ * This must always be called with the pte lock held.
+ */
+void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
+ pte_t *ptep)
+{
+ if (!mmu_has_feature(MMU_FTR_HPTE_TABLE))
+ return;
+ /*
+ * We don't need to worry about _PAGE_PRESENT here because we are
+ * called with either mm->page_table_lock held or ptl lock held
+ */
+
+ /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */
+ if (!pte_young(*ptep) || address >= TASK_SIZE)
+ return;
+
+ /* We have to test for regs NULL since init will get here first thing at boot */
+ if (!current->thread.regs)
+ return;
+
+ /* We also avoid filling the hash if not coming from a fault */
+ if (TRAP(current->thread.regs) != 0x300 && TRAP(current->thread.regs) != 0x400)
+ return;
+
+ hash_preload(vma->vm_mm, address);
+}
+
+/*
* Initialize the hash table and patch the instructions in hashtable.S.
*/
void __init MMU_init_hw(void)
@@ -358,11 +399,21 @@ void __init MMU_init_hw(void)
hash_mb2 = hash_mb = 32 - LG_HPTEG_SIZE - lg_n_hpteg;
if (lg_n_hpteg > 16)
hash_mb2 = 16 - LG_HPTEG_SIZE;
+
+ /*
+ * When KASAN is selected, there is already an early temporary hash
+ * table and the switch to the final hash table is done later.
+ */
+ if (IS_ENABLED(CONFIG_KASAN))
+ return;
+
+ MMU_init_hw_patch();
}
void __init MMU_init_hw_patch(void)
{
unsigned int hmask = Hash_mask >> (16 - LG_HPTEG_SIZE);
+ unsigned int hash;
if (ppc_md.progress)
ppc_md.progress("hash:patch", 0x345);
@@ -374,8 +425,12 @@ void __init MMU_init_hw_patch(void)
/*
* Patch up the instructions in hashtable.S:create_hpte
*/
- modify_instruction_site(&patch__hash_page_A0, 0xffff,
- ((unsigned int)Hash - PAGE_OFFSET) >> 16);
+ if (IS_ENABLED(CONFIG_VMAP_STACK))
+ hash = (unsigned int)Hash;
+ else
+ hash = (unsigned int)Hash - PAGE_OFFSET;
+
+ modify_instruction_site(&patch__hash_page_A0, 0xffff, hash >> 16);
modify_instruction_site(&patch__hash_page_A1, 0x7c0, hash_mb << 6);
modify_instruction_site(&patch__hash_page_A2, 0x7c0, hash_mb2 << 6);
modify_instruction_site(&patch__hash_page_B, 0xffff, hmask);
@@ -400,7 +455,7 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base,
BUG_ON(first_memblock_base != 0);
/* 601 can only access 16MB at the moment */
- if (PVR_VER(mfspr(SPRN_PVR)) == 1)
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S_601))
memblock_set_current_limit(min_t(u64, first_memblock_size, 0x01000000));
else /* Anything else has 256M mapped */
memblock_set_current_limit(min_t(u64, first_memblock_size, 0x10000000));
@@ -418,9 +473,6 @@ void __init setup_kuep(bool disabled)
{
pr_info("Activating Kernel Userspace Execution Prevention\n");
- if (cpu_has_feature(CPU_FTR_601))
- pr_warn("KUEP is not working on powerpc 601 (No NX bit in Seg Regs)\n");
-
if (disabled)
pr_warn("KUEP cannot be disabled yet on 6xx when compiled in\n");
}
diff --git a/arch/powerpc/mm/book3s64/hash_native.c b/arch/powerpc/mm/book3s64/hash_native.c
index 90ab4f31e2b3..d2d8237ea9d5 100644
--- a/arch/powerpc/mm/book3s64/hash_native.c
+++ b/arch/powerpc/mm/book3s64/hash_native.c
@@ -197,9 +197,32 @@ static inline unsigned long ___tlbie(unsigned long vpn, int psize,
return va;
}
-static inline void fixup_tlbie(unsigned long vpn, int psize, int apsize, int ssize)
+static inline void fixup_tlbie_vpn(unsigned long vpn, int psize,
+ int apsize, int ssize)
{
- if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) {
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+ /* Radix flush for a hash guest */
+
+ unsigned long rb,rs,prs,r,ric;
+
+ rb = PPC_BIT(52); /* IS = 2 */
+ rs = 0; /* lpid = 0 */
+ prs = 0; /* partition scoped */
+ r = 1; /* radix format */
+ ric = 0; /* RIC_FLSUH_TLB */
+
+ /*
+ * Need the extra ptesync to make sure we don't
+ * re-order the tlbie
+ */
+ asm volatile("ptesync": : :"memory");
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+ : : "r"(rb), "i"(r), "i"(prs),
+ "i"(ric), "r"(rs) : "memory");
+ }
+
+
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
/* Need the extra ptesync to ensure we don't reorder tlbie*/
asm volatile("ptesync": : :"memory");
___tlbie(vpn, psize, apsize, ssize);
@@ -283,7 +306,7 @@ static inline void tlbie(unsigned long vpn, int psize, int apsize,
asm volatile("ptesync": : :"memory");
} else {
__tlbie(vpn, psize, apsize, ssize);
- fixup_tlbie(vpn, psize, apsize, ssize);
+ fixup_tlbie_vpn(vpn, psize, apsize, ssize);
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
if (lock_tlbie && !use_local)
@@ -459,19 +482,12 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
return ret;
}
-static long native_hpte_find(unsigned long vpn, int psize, int ssize)
+static long __native_hpte_find(unsigned long want_v, unsigned long slot)
{
struct hash_pte *hptep;
- unsigned long hash;
+ unsigned long hpte_v;
unsigned long i;
- long slot;
- unsigned long want_v, hpte_v;
-
- hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize);
- want_v = hpte_encode_avpn(vpn, psize, ssize);
- /* Bolted mappings are only ever in the primary group */
- slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
for (i = 0; i < HPTES_PER_GROUP; i++) {
hptep = htab_address + slot;
@@ -485,6 +501,33 @@ static long native_hpte_find(unsigned long vpn, int psize, int ssize)
return -1;
}
+static long native_hpte_find(unsigned long vpn, int psize, int ssize)
+{
+ unsigned long hpte_group;
+ unsigned long want_v;
+ unsigned long hash;
+ long slot;
+
+ hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize);
+ want_v = hpte_encode_avpn(vpn, psize, ssize);
+
+ /*
+ * We try to keep bolted entries always in primary hash
+ * But in some case we can find them in secondary too.
+ */
+ hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+ slot = __native_hpte_find(want_v, hpte_group);
+ if (slot < 0) {
+ /* Try in secondary */
+ hpte_group = (~hash & htab_hash_mask) * HPTES_PER_GROUP;
+ slot = __native_hpte_find(want_v, hpte_group);
+ if (slot < 0)
+ return -1;
+ }
+
+ return slot;
+}
+
/*
* Update the page protection bits. Intended to be used to create
* guard pages for kernel data structures on pages which are bolted
@@ -856,7 +899,7 @@ static void native_flush_hash_range(unsigned long number, int local)
/*
* Just do one more with the last used values.
*/
- fixup_tlbie(vpn, psize, psize, ssize);
+ fixup_tlbie_vpn(vpn, psize, psize, ssize);
asm volatile("eieio; tlbsync; ptesync":::"memory");
if (lock_tlbie)
diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c b/arch/powerpc/mm/book3s64/hash_pgtable.c
index d1f390ac9cdb..64733b9cb20a 100644
--- a/arch/powerpc/mm/book3s64/hash_pgtable.c
+++ b/arch/powerpc/mm/book3s64/hash_pgtable.c
@@ -406,6 +406,8 @@ int hash__has_transparent_hugepage(void)
return 1;
}
+EXPORT_SYMBOL_GPL(hash__has_transparent_hugepage);
+
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#ifdef CONFIG_STRICT_KERNEL_RWX
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index b8ad14bb1170..523d4d39d11e 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -34,6 +34,7 @@
#include <linux/libfdt.h>
#include <linux/pkeys.h>
#include <linux/hugetlb.h>
+#include <linux/cpu.h>
#include <asm/debugfs.h>
#include <asm/processor.h>
@@ -61,6 +62,7 @@
#include <asm/ps3.h>
#include <asm/pte-walk.h>
#include <asm/asm-prototypes.h>
+#include <asm/ultravisor.h>
#include <mm/mmu_decl.h>
@@ -261,6 +263,7 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
unsigned long vsid = get_kernel_vsid(vaddr, ssize);
unsigned long vpn = hpt_vpn(vaddr, vsid, ssize);
unsigned long tprot = prot;
+ bool secondary_hash = false;
/*
* If we hit a bad address return error.
@@ -271,10 +274,6 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
if (overlaps_kernel_text(vaddr, vaddr + step))
tprot &= ~HPTE_R_N;
- /* Make kvm guest trampolines executable */
- if (overlaps_kvm_tmp(vaddr, vaddr + step))
- tprot &= ~HPTE_R_N;
-
/*
* If relocatable, check if it overlaps interrupt vectors that
* are copied down to real 0. For relocatable kernel
@@ -293,13 +292,31 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
BUG_ON(!mmu_hash_ops.hpte_insert);
+repeat:
ret = mmu_hash_ops.hpte_insert(hpteg, vpn, paddr, tprot,
HPTE_V_BOLTED, psize, psize,
ssize);
+ if (ret == -1) {
+ /*
+ * Try to to keep bolted entries in primary.
+ * Remove non bolted entries and try insert again
+ */
+ ret = mmu_hash_ops.hpte_remove(hpteg);
+ if (ret != -1)
+ ret = mmu_hash_ops.hpte_insert(hpteg, vpn, paddr, tprot,
+ HPTE_V_BOLTED, psize, psize,
+ ssize);
+ if (ret == -1 && !secondary_hash) {
+ secondary_hash = true;
+ hpteg = ((~hash & htab_hash_mask) * HPTES_PER_GROUP);
+ goto repeat;
+ }
+ }
if (ret < 0)
break;
+ cond_resched();
#ifdef CONFIG_DEBUG_PAGEALLOC
if (debug_pagealloc_enabled() &&
(paddr >> PAGE_SHIFT) < linear_map_hash_count)
@@ -635,6 +652,7 @@ static void init_hpte_page_sizes(void)
static void __init htab_init_page_sizes(void)
{
+ bool aligned = true;
init_hpte_page_sizes();
if (!debug_pagealloc_enabled()) {
@@ -642,7 +660,15 @@ static void __init htab_init_page_sizes(void)
* Pick a size for the linear mapping. Currently, we only
* support 16M, 1M and 4K which is the default
*/
- if (mmu_psize_defs[MMU_PAGE_16M].shift)
+ if (IS_ENABLED(STRICT_KERNEL_RWX) &&
+ (unsigned long)_stext % 0x1000000) {
+ if (mmu_psize_defs[MMU_PAGE_16M].shift)
+ pr_warn("Kernel not 16M aligned, "
+ "disabling 16M linear map alignment");
+ aligned = false;
+ }
+
+ if (mmu_psize_defs[MMU_PAGE_16M].shift && aligned)
mmu_linear_psize = MMU_PAGE_16M;
else if (mmu_psize_defs[MMU_PAGE_1M].shift)
mmu_linear_psize = MMU_PAGE_1M;
@@ -823,7 +849,7 @@ static void __init hash_init_partition_table(phys_addr_t hash_table,
* For now, UPRT is 0 and we have no segment table.
*/
htab_size = __ilog2(htab_size) - 18;
- mmu_partition_table_set_entry(0, hash_table | htab_size, 0);
+ mmu_partition_table_set_entry(0, hash_table | htab_size, 0, false);
pr_info("Partition table %p\n", partition_tb);
}
@@ -857,12 +883,6 @@ static void __init htab_initialize(void)
/* Using a hypervisor which owns the htab */
htab_address = NULL;
_SDR1 = 0;
- /*
- * On POWER9, we need to do a H_REGISTER_PROC_TBL hcall
- * to inform the hypervisor that we wish to use the HPT.
- */
- if (cpu_has_feature(CPU_FTR_ARCH_300))
- register_process_table(0, 0, 0);
#ifdef CONFIG_FA_DUMP
/*
* If firmware assisted dump is active firmware preserves
@@ -1075,8 +1095,8 @@ void hash__early_init_mmu_secondary(void)
if (!cpu_has_feature(CPU_FTR_ARCH_300))
mtspr(SPRN_SDR1, _SDR1);
else
- mtspr(SPRN_PTCR,
- __pa(partition_tb) | (PATB_SIZE_SHIFT - 12));
+ set_ptcr_when_no_uv(__pa(partition_tb) |
+ (PATB_SIZE_SHIFT - 12));
}
/* Initialize SLB */
slb_initialize();
@@ -1460,8 +1480,8 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap,
}
EXPORT_SYMBOL_GPL(hash_page);
-int __hash_page(unsigned long ea, unsigned long msr, unsigned long trap,
- unsigned long dsisr)
+int __hash_page(unsigned long trap, unsigned long ea, unsigned long dsisr,
+ unsigned long msr)
{
unsigned long access = _PAGE_PRESENT | _PAGE_READ;
unsigned long flags = 0;
@@ -1518,8 +1538,8 @@ static bool should_hash_preload(struct mm_struct *mm, unsigned long ea)
}
#endif
-void hash_preload(struct mm_struct *mm, unsigned long ea,
- bool is_exec, unsigned long trap)
+static void hash_preload(struct mm_struct *mm, unsigned long ea,
+ bool is_exec, unsigned long trap)
{
int hugepage_shift;
unsigned long vsid;
@@ -1599,6 +1619,57 @@ out_exit:
local_irq_restore(flags);
}
+/*
+ * This is called at the end of handling a user page fault, when the
+ * fault has been handled by updating a PTE in the linux page tables.
+ * We use it to preload an HPTE into the hash table corresponding to
+ * the updated linux PTE.
+ *
+ * This must always be called with the pte lock held.
+ */
+void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
+ pte_t *ptep)
+{
+ /*
+ * We don't need to worry about _PAGE_PRESENT here because we are
+ * called with either mm->page_table_lock held or ptl lock held
+ */
+ unsigned long trap;
+ bool is_exec;
+
+ if (radix_enabled()) {
+ prefetch((void *)address);
+ return;
+ }
+
+ /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */
+ if (!pte_young(*ptep) || address >= TASK_SIZE)
+ return;
+
+ /*
+ * We try to figure out if we are coming from an instruction
+ * access fault and pass that down to __hash_page so we avoid
+ * double-faulting on execution of fresh text. We have to test
+ * for regs NULL since init will get here first thing at boot.
+ *
+ * We also avoid filling the hash if not coming from a fault.
+ */
+
+ trap = current->thread.regs ? TRAP(current->thread.regs) : 0UL;
+ switch (trap) {
+ case 0x300:
+ is_exec = false;
+ break;
+ case 0x400:
+ is_exec = true;
+ break;
+ default:
+ return;
+ }
+
+ hash_preload(vma->vm_mm, address, is_exec, trap);
+}
+
#ifdef CONFIG_PPC_MEM_KEYS
/*
* Return the protection key associated with the given address and the
@@ -1705,7 +1776,7 @@ void flush_hash_hugepage(unsigned long vsid, unsigned long addr,
/*
* IF we try to do a HUGE PTE update after a withdraw is done.
* we will find the below NULL. This happens when we do
- * split_huge_page_pmd
+ * split_huge_pmd
*/
if (!hpte_slot_array)
return;
@@ -1931,10 +2002,16 @@ static int hpt_order_get(void *data, u64 *val)
static int hpt_order_set(void *data, u64 val)
{
+ int ret;
+
if (!mmu_hash_ops.resize_hpt)
return -ENODEV;
- return mmu_hash_ops.resize_hpt(val);
+ cpus_read_lock();
+ ret = mmu_hash_ops.resize_hpt(val);
+ cpus_read_unlock();
+
+ return ret;
}
DEFINE_DEBUGFS_ATTRIBUTE(fops_hpt_order, hpt_order_get, hpt_order_set, "%llu\n");
@@ -1957,7 +2034,4 @@ void __init print_system_hash_info(void)
if (htab_hash_mask)
pr_info("htab_hash_mask = 0x%lx\n", htab_hash_mask);
- pr_info("kernel vmalloc start = 0x%lx\n", KERN_VIRT_START);
- pr_info("kernel IO start = 0x%lx\n", KERN_IO_START);
- pr_info("kernel vmemmap start = 0x%lx\n", (unsigned long)vmemmap);
}
diff --git a/arch/powerpc/mm/book3s64/iommu_api.c b/arch/powerpc/mm/book3s64/iommu_api.c
index b056cae3388b..eba73ebd8ae5 100644
--- a/arch/powerpc/mm/book3s64/iommu_api.c
+++ b/arch/powerpc/mm/book3s64/iommu_api.c
@@ -103,7 +103,7 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
for (entry = 0; entry < entries; entry += chunk) {
unsigned long n = min(entries - entry, chunk);
- ret = get_user_pages(ua + (entry << PAGE_SHIFT), n,
+ ret = pin_user_pages(ua + (entry << PAGE_SHIFT), n,
FOLL_WRITE | FOLL_LONGTERM,
mem->hpages + entry, NULL);
if (ret == n) {
@@ -129,11 +129,8 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
* Allow to use larger than 64k IOMMU pages. Only do that
* if we are backed by hugetlb.
*/
- if ((mem->pageshift > PAGE_SHIFT) && PageHuge(page)) {
- struct page *head = compound_head(page);
-
- pageshift = compound_order(head) + PAGE_SHIFT;
- }
+ if ((mem->pageshift > PAGE_SHIFT) && PageHuge(page))
+ pageshift = page_shift(compound_head(page));
mem->pageshift = min(mem->pageshift, pageshift);
/*
* We don't need struct page reference any more, switch
@@ -170,9 +167,8 @@ good_exit:
return 0;
free_exit:
- /* free the reference taken */
- for (i = 0; i < pinned; i++)
- put_page(mem->hpages[i]);
+ /* free the references taken */
+ unpin_user_pages(mem->hpages, pinned);
vfree(mem->hpas);
kfree(mem);
@@ -218,7 +214,8 @@ static void mm_iommu_unpin(struct mm_iommu_table_group_mem_t *mem)
if (mem->hpas[i] & MM_IOMMU_TABLE_GROUP_PAGE_DIRTY)
SetPageDirty(page);
- put_page(page);
+ unpin_user_page(page);
+
mem->hpas[i] = 0;
}
}
diff --git a/arch/powerpc/mm/book3s64/mmu_context.c b/arch/powerpc/mm/book3s64/mmu_context.c
index 2d0cb5ba9a47..0ba30b8b935b 100644
--- a/arch/powerpc/mm/book3s64/mmu_context.c
+++ b/arch/powerpc/mm/book3s64/mmu_context.c
@@ -256,8 +256,21 @@ void destroy_context(struct mm_struct *mm)
#ifdef CONFIG_SPAPR_TCE_IOMMU
WARN_ON_ONCE(!list_empty(&mm->context.iommu_group_mem_list));
#endif
+ /*
+ * For tasks which were successfully initialized we end up calling
+ * arch_exit_mmap() which clears the process table entry. And
+ * arch_exit_mmap() is called before the required fullmm TLB flush
+ * which does a RIC=2 flush. Hence for an initialized task, we do clear
+ * any cached process table entries.
+ *
+ * The condition below handles the error case during task init. We have
+ * set the process table entry early and if we fail a task
+ * initialization, we need to ensure the process table entry is zeroed.
+ * We need not worry about process table entry caches because the task
+ * never ran with the PID value.
+ */
if (radix_enabled())
- WARN_ON(process_tb[mm->context.id].prtb0 != 0);
+ process_tb[mm->context.id].prtb0 = 0;
else
subpage_prot_free(mm);
destroy_contexts(&mm->context);
diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c
index 7d0e0d0d22c4..2bf7e1b4fd82 100644
--- a/arch/powerpc/mm/book3s64/pgtable.c
+++ b/arch/powerpc/mm/book3s64/pgtable.c
@@ -8,10 +8,13 @@
#include <linux/memblock.h>
#include <misc/cxl-base.h>
+#include <asm/debugfs.h>
#include <asm/pgalloc.h>
#include <asm/tlb.h>
#include <asm/trace.h>
#include <asm/powernv.h>
+#include <asm/firmware.h>
+#include <asm/ultravisor.h>
#include <mm/mmu_decl.h>
#include <trace/events/thp.h>
@@ -21,9 +24,6 @@ EXPORT_SYMBOL(__pmd_frag_nr);
unsigned long __pmd_frag_size_shift;
EXPORT_SYMBOL(__pmd_frag_size_shift);
-int (*register_process_table)(unsigned long base, unsigned long page_size,
- unsigned long tbl_size);
-
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/*
* This is called when relaxing access to a hugepage. It's also called in the page
@@ -205,37 +205,61 @@ void __init mmu_partition_table_init(void)
* 64 K size.
*/
ptcr = __pa(partition_tb) | (PATB_SIZE_SHIFT - 12);
- mtspr(SPRN_PTCR, ptcr);
+ set_ptcr_when_no_uv(ptcr);
powernv_set_nmmu_ptcr(ptcr);
}
+static void flush_partition(unsigned int lpid, bool radix)
+{
+ if (radix) {
+ radix__flush_all_lpid(lpid);
+ radix__flush_all_lpid_guest(lpid);
+ } else {
+ asm volatile("ptesync" : : : "memory");
+ asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : :
+ "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
+ /* do we need fixup here ?*/
+ asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+ trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 0);
+ }
+}
+
void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
- unsigned long dw1)
+ unsigned long dw1, bool flush)
{
unsigned long old = be64_to_cpu(partition_tb[lpid].patb0);
+ /*
+ * When ultravisor is enabled, the partition table is stored in secure
+ * memory and can only be accessed doing an ultravisor call. However, we
+ * maintain a copy of the partition table in normal memory to allow Nest
+ * MMU translations to occur (for normal VMs).
+ *
+ * Therefore, here we always update partition_tb, regardless of whether
+ * we are running under an ultravisor or not.
+ */
partition_tb[lpid].patb0 = cpu_to_be64(dw0);
partition_tb[lpid].patb1 = cpu_to_be64(dw1);
/*
- * Global flush of TLBs and partition table caches for this lpid.
- * The type of flush (hash or radix) depends on what the previous
- * use of this partition ID was, not the new use.
+ * If ultravisor is enabled, we do an ultravisor call to register the
+ * partition table entry (PATE), which also do a global flush of TLBs
+ * and partition table caches for the lpid. Otherwise, just do the
+ * flush. The type of flush (hash or radix) depends on what the previous
+ * use of the partition ID was, not the new use.
*/
- asm volatile("ptesync" : : : "memory");
- if (old & PATB_HR) {
- asm volatile(PPC_TLBIE_5(%0,%1,2,0,1) : :
- "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
- asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : :
- "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
- trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 1);
- } else {
- asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : :
- "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
- trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 0);
+ if (firmware_has_feature(FW_FEATURE_ULTRAVISOR)) {
+ uv_register_pate(lpid, dw0, dw1);
+ pr_info("PATE registered by ultravisor: dw0 = 0x%lx, dw1 = 0x%lx\n",
+ dw0, dw1);
+ } else if (flush) {
+ /*
+ * Boot does not need to flush, because MMU is off and each
+ * CPU does a tlbiel_all() before switching them on, which
+ * flushes everything.
+ */
+ flush_partition(lpid, (old & PATB_HR));
}
- /* do we need fixup here ?*/
- asm volatile("eieio; tlbsync; ptesync" : : : "memory");
}
EXPORT_SYMBOL_GPL(mmu_partition_table_set_entry);
@@ -354,7 +378,6 @@ static inline void pgtable_free(void *table, int index)
}
}
-#ifdef CONFIG_SMP
void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int index)
{
unsigned long pgf = (unsigned long)table;
@@ -371,12 +394,6 @@ void __tlb_remove_table(void *_table)
return pgtable_free(table, index);
}
-#else
-void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int index)
-{
- return pgtable_free(table, index);
-}
-#endif
#ifdef CONFIG_PROC_FS
atomic_long_t direct_pages_count[MMU_PAGE_COUNT];
@@ -447,23 +464,48 @@ int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl,
return true;
}
-int ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size, pgprot_t prot, int nid)
-{
- unsigned long i;
+/*
+ * Does the CPU support tlbie?
+ */
+bool tlbie_capable __read_mostly = true;
+EXPORT_SYMBOL(tlbie_capable);
- if (radix_enabled())
- return radix__ioremap_range(ea, pa, size, prot, nid);
-
- for (i = 0; i < size; i += PAGE_SIZE) {
- int err = map_kernel_page(ea + i, pa + i, prot);
- if (err) {
- if (slab_is_available())
- unmap_kernel_range(ea, size);
- else
- WARN_ON_ONCE(1); /* Should clean up */
- return err;
- }
+/*
+ * Should tlbie be used for management of CPU TLBs, for kernel and process
+ * address spaces? tlbie may still be used for nMMU accelerators, and for KVM
+ * guest address spaces.
+ */
+bool tlbie_enabled __read_mostly = true;
+
+static int __init setup_disable_tlbie(char *str)
+{
+ if (!radix_enabled()) {
+ pr_err("disable_tlbie: Unable to disable TLBIE with Hash MMU.\n");
+ return 1;
}
+ tlbie_capable = false;
+ tlbie_enabled = false;
+
+ return 1;
+}
+__setup("disable_tlbie", setup_disable_tlbie);
+
+static int __init pgtable_debugfs_setup(void)
+{
+ if (!tlbie_capable)
+ return 0;
+
+ /*
+ * There is no locking vs tlb flushing when changing this value.
+ * The tlb flushers will see one value or another, and use either
+ * tlbie or tlbiel with IPIs. In both cases the TLBs will be
+ * invalidated as expected.
+ */
+ debugfs_create_bool("tlbie_enabled", 0600,
+ powerpc_debugfs_root,
+ &tlbie_enabled);
+
return 0;
}
+arch_initcall(pgtable_debugfs_setup);
diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c
index ae7fca40e5b3..59e0ebbd8036 100644
--- a/arch/powerpc/mm/book3s64/pkeys.c
+++ b/arch/powerpc/mm/book3s64/pkeys.c
@@ -307,16 +307,6 @@ void thread_pkey_regs_init(struct thread_struct *thread)
write_iamr(pkey_iamr_mask);
}
-static inline bool pkey_allows_readwrite(int pkey)
-{
- int pkey_shift = pkeyshift(pkey);
-
- if (!is_pkey_enabled(pkey))
- return true;
-
- return !(read_amr() & ((AMR_RD_BIT|AMR_WR_BIT) << pkey_shift));
-}
-
int __execute_only_pkey(struct mm_struct *mm)
{
return mm->context.execute_only_pkey;
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index b4ca9e95e678..dd1bea45325c 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -13,6 +13,7 @@
#include <linux/memblock.h>
#include <linux/of_fdt.h>
#include <linux/mm.h>
+#include <linux/hugetlb.h>
#include <linux/string_helpers.h>
#include <linux/stop_machine.h>
@@ -27,25 +28,13 @@
#include <asm/sections.h>
#include <asm/trace.h>
#include <asm/uaccess.h>
+#include <asm/ultravisor.h>
#include <trace/events/thp.h>
unsigned int mmu_pid_bits;
unsigned int mmu_base_pid;
-static int native_register_process_table(unsigned long base, unsigned long pg_sz,
- unsigned long table_size)
-{
- unsigned long patb0, patb1;
-
- patb0 = be64_to_cpu(partition_tb[0].patb0);
- patb1 = base | table_size | PATB_GR;
-
- mmu_partition_table_set_entry(0, patb0, patb1);
-
- return 0;
-}
-
static __ref void *early_alloc_pgtable(unsigned long size, int nid,
unsigned long region_start, unsigned long region_end)
{
@@ -348,7 +337,11 @@ static void __init radix_init_pgtable(void)
}
/* Find out how many PID bits are supported */
- if (cpu_has_feature(CPU_FTR_HVMODE)) {
+ if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) {
+ if (!mmu_pid_bits)
+ mmu_pid_bits = 20;
+ mmu_base_pid = 1;
+ } else if (cpu_has_feature(CPU_FTR_HVMODE)) {
if (!mmu_pid_bits)
mmu_pid_bits = 20;
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
@@ -380,18 +373,6 @@ static void __init radix_init_pgtable(void)
*/
rts_field = radix__get_tree_size();
process_tb->prtb0 = cpu_to_be64(rts_field | __pa(init_mm.pgd) | RADIX_PGD_INDEX_SIZE);
- /*
- * Fill in the partition table. We are suppose to use effective address
- * of process table here. But our linear mapping also enable us to use
- * physical address here.
- */
- register_process_table(__pa(process_tb), 0, PRTB_SIZE_SHIFT - 12);
- pr_info("Process table %p and radix root for kernel: %p\n", process_tb, init_mm.pgd);
- asm volatile("ptesync" : : : "memory");
- asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : :
- "r" (TLBIEL_INVAL_SET_LPID), "r" (0));
- asm volatile("eieio; tlbsync; ptesync" : : : "memory");
- trace_tlbie(0, 0, TLBIEL_INVAL_SET_LPID, 0, 2, 1, 1);
/*
* The init_mm context is given the first available (non-zero) PID,
@@ -412,20 +393,15 @@ static void __init radix_init_pgtable(void)
static void __init radix_init_partition_table(void)
{
- unsigned long rts_field, dw0;
+ unsigned long rts_field, dw0, dw1;
mmu_partition_table_init();
rts_field = radix__get_tree_size();
dw0 = rts_field | __pa(init_mm.pgd) | RADIX_PGD_INDEX_SIZE | PATB_HR;
- mmu_partition_table_set_entry(0, dw0, 0);
+ dw1 = __pa(process_tb) | (PRTB_SIZE_SHIFT - 12) | PATB_GR;
+ mmu_partition_table_set_entry(0, dw0, dw1, false);
pr_info("Initializing Radix MMU\n");
- pr_info("Partition table %p\n", partition_tb);
-}
-
-void __init radix_init_native(void)
-{
- register_process_table = native_register_process_table;
}
static int __init get_idx_from_shift(unsigned int shift)
@@ -621,8 +597,9 @@ void __init radix__early_init_mmu(void)
__pmd_frag_nr = RADIX_PMD_FRAG_NR;
__pmd_frag_size_shift = RADIX_PMD_FRAG_SIZE_SHIFT;
+ radix_init_pgtable();
+
if (!firmware_has_feature(FW_FEATURE_LPAR)) {
- radix_init_native();
lpcr = mfspr(SPRN_LPCR);
mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
radix_init_partition_table();
@@ -633,11 +610,9 @@ void __init radix__early_init_mmu(void)
memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
- radix_init_pgtable();
/* Switch to the guard PID before turning on MMU */
radix__switch_mmu_context(NULL, &init_mm);
- if (cpu_has_feature(CPU_FTR_HVMODE))
- tlbiel_all();
+ tlbiel_all();
}
void radix__early_init_mmu_secondary(void)
@@ -650,14 +625,14 @@ void radix__early_init_mmu_secondary(void)
lpcr = mfspr(SPRN_LPCR);
mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
- mtspr(SPRN_PTCR,
- __pa(partition_tb) | (PATB_SIZE_SHIFT - 12));
+ set_ptcr_when_no_uv(__pa(partition_tb) |
+ (PATB_SIZE_SHIFT - 12));
+
radix_init_amor();
}
radix__switch_mmu_context(NULL, &init_mm);
- if (cpu_has_feature(CPU_FTR_HVMODE))
- tlbiel_all();
+ tlbiel_all();
}
void radix__mmu_cleanup_all(void)
@@ -667,7 +642,7 @@ void radix__mmu_cleanup_all(void)
if (!firmware_has_feature(FW_FEATURE_LPAR)) {
lpcr = mfspr(SPRN_LPCR);
mtspr(SPRN_LPCR, lpcr & ~LPCR_UPRT);
- mtspr(SPRN_PTCR, 0);
+ set_ptcr_when_no_uv(0);
powernv_set_nmmu_ptcr(0);
radix__flush_tlb_all();
}
@@ -737,8 +712,8 @@ static int __meminit stop_machine_change_mapping(void *data)
spin_unlock(&init_mm.page_table_lock);
pte_clear(&init_mm, params->aligned_start, params->pte);
- create_physical_mapping(params->aligned_start, params->start, -1);
- create_physical_mapping(params->end, params->aligned_end, -1);
+ create_physical_mapping(__pa(params->aligned_start), __pa(params->start), -1);
+ create_physical_mapping(__pa(params->end), __pa(params->aligned_end), -1);
spin_lock(&init_mm.page_table_lock);
return 0;
}
@@ -902,7 +877,7 @@ int __meminit radix__create_section_mapping(unsigned long start, unsigned long e
return -1;
}
- return create_physical_mapping(start, end, nid);
+ return create_physical_mapping(__pa(start), __pa(end), nid);
}
int __meminit radix__remove_section_mapping(unsigned long start, unsigned long end)
@@ -1057,13 +1032,6 @@ pmd_t radix__pmdp_huge_get_and_clear(struct mm_struct *mm,
return old_pmd;
}
-int radix__has_transparent_hugepage(void)
-{
- /* For radix 2M at PMD level means thp */
- if (mmu_psize_defs[MMU_PAGE_2M].shift == PMD_SHIFT)
- return 1;
- return 0;
-}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep,
@@ -1218,26 +1186,6 @@ int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
return 1;
}
-int radix__ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size,
- pgprot_t prot, int nid)
-{
- if (likely(slab_is_available())) {
- int err = ioremap_page_range(ea, ea + size, pa, prot);
- if (err)
- unmap_kernel_range(ea, size);
- return err;
- } else {
- unsigned long i;
-
- for (i = 0; i < size; i += PAGE_SIZE) {
- int err = map_kernel_page(ea + i, pa + i, prot);
- if (WARN_ON_ONCE(err)) /* Should clean up */
- return err;
- }
- return 0;
- }
-}
-
int __init arch_ioremap_p4d_supported(void)
{
return 0;
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index 71f7fede2fa4..03f43c924e00 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -51,11 +51,15 @@ static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is)
* and partition table entries. Then flush the remaining sets of the
* TLB.
*/
- tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 0);
- for (set = 1; set < num_sets; set++)
- tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 0);
- /* Do the same for process scoped entries. */
+ if (early_cpu_has_feature(CPU_FTR_HVMODE)) {
+ /* MSR[HV] should flush partition scope translations first. */
+ tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 0);
+ for (set = 1; set < num_sets; set++)
+ tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 0);
+ }
+
+ /* Flush process scoped entries. */
tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 1);
for (set = 1; set < num_sets; set++)
tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 1);
@@ -116,22 +120,6 @@ static __always_inline void __tlbie_pid(unsigned long pid, unsigned long ric)
trace_tlbie(0, 0, rb, rs, ric, prs, r);
}
-static __always_inline void __tlbiel_lpid(unsigned long lpid, int set,
- unsigned long ric)
-{
- unsigned long rb,rs,prs,r;
-
- rb = PPC_BIT(52); /* IS = 2 */
- rb |= set << PPC_BITLSHIFT(51);
- rs = 0; /* LPID comes from LPIDR */
- prs = 0; /* partition scoped */
- r = 1; /* radix format */
-
- asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
- : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
- trace_tlbie(lpid, 1, rb, rs, ric, prs, r);
-}
-
static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric)
{
unsigned long rb,rs,prs,r;
@@ -146,23 +134,20 @@ static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric)
trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
}
-static __always_inline void __tlbiel_lpid_guest(unsigned long lpid, int set,
- unsigned long ric)
+static __always_inline void __tlbie_lpid_guest(unsigned long lpid, unsigned long ric)
{
unsigned long rb,rs,prs,r;
rb = PPC_BIT(52); /* IS = 2 */
- rb |= set << PPC_BITLSHIFT(51);
- rs = 0; /* LPID comes from LPIDR */
+ rs = lpid;
prs = 1; /* process scoped */
r = 1; /* radix format */
- asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
- trace_tlbie(lpid, 1, rb, rs, ric, prs, r);
+ trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
}
-
static __always_inline void __tlbiel_va(unsigned long va, unsigned long pid,
unsigned long ap, unsigned long ric)
{
@@ -211,22 +196,83 @@ static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid
trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
}
-static inline void fixup_tlbie(void)
+
+static inline void fixup_tlbie_va(unsigned long va, unsigned long pid,
+ unsigned long ap)
{
- unsigned long pid = 0;
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+ asm volatile("ptesync": : :"memory");
+ __tlbie_va(va, 0, ap, RIC_FLUSH_TLB);
+ }
+
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+ asm volatile("ptesync": : :"memory");
+ __tlbie_va(va, pid, ap, RIC_FLUSH_TLB);
+ }
+}
+
+static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid,
+ unsigned long ap)
+{
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+ asm volatile("ptesync": : :"memory");
+ __tlbie_pid(0, RIC_FLUSH_TLB);
+ }
+
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+ asm volatile("ptesync": : :"memory");
+ __tlbie_va(va, pid, ap, RIC_FLUSH_TLB);
+ }
+}
+
+static inline void fixup_tlbie_pid(unsigned long pid)
+{
+ /*
+ * We can use any address for the invalidation, pick one which is
+ * probably unused as an optimisation.
+ */
unsigned long va = ((1UL << 52) - 1);
- if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) {
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+ asm volatile("ptesync": : :"memory");
+ __tlbie_pid(0, RIC_FLUSH_TLB);
+ }
+
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
asm volatile("ptesync": : :"memory");
__tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
}
}
+
+static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid,
+ unsigned long ap)
+{
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+ asm volatile("ptesync": : :"memory");
+ __tlbie_lpid_va(va, 0, ap, RIC_FLUSH_TLB);
+ }
+
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+ asm volatile("ptesync": : :"memory");
+ __tlbie_lpid_va(va, lpid, ap, RIC_FLUSH_TLB);
+ }
+}
+
static inline void fixup_tlbie_lpid(unsigned long lpid)
{
+ /*
+ * We can use any address for the invalidation, pick one which is
+ * probably unused as an optimisation.
+ */
unsigned long va = ((1UL << 52) - 1);
- if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) {
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+ asm volatile("ptesync": : :"memory");
+ __tlbie_lpid(0, RIC_FLUSH_TLB);
+ }
+
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
asm volatile("ptesync": : :"memory");
__tlbie_lpid_va(va, lpid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
}
@@ -273,6 +319,7 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
switch (ric) {
case RIC_FLUSH_TLB:
__tlbie_pid(pid, RIC_FLUSH_TLB);
+ fixup_tlbie_pid(pid);
break;
case RIC_FLUSH_PWC:
__tlbie_pid(pid, RIC_FLUSH_PWC);
@@ -280,37 +327,42 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
case RIC_FLUSH_ALL:
default:
__tlbie_pid(pid, RIC_FLUSH_ALL);
+ fixup_tlbie_pid(pid);
}
- fixup_tlbie();
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
-static inline void _tlbiel_lpid(unsigned long lpid, unsigned long ric)
+struct tlbiel_pid {
+ unsigned long pid;
+ unsigned long ric;
+};
+
+static void do_tlbiel_pid(void *info)
{
- int set;
+ struct tlbiel_pid *t = info;
- VM_BUG_ON(mfspr(SPRN_LPID) != lpid);
+ if (t->ric == RIC_FLUSH_TLB)
+ _tlbiel_pid(t->pid, RIC_FLUSH_TLB);
+ else if (t->ric == RIC_FLUSH_PWC)
+ _tlbiel_pid(t->pid, RIC_FLUSH_PWC);
+ else
+ _tlbiel_pid(t->pid, RIC_FLUSH_ALL);
+}
- asm volatile("ptesync": : :"memory");
+static inline void _tlbiel_pid_multicast(struct mm_struct *mm,
+ unsigned long pid, unsigned long ric)
+{
+ struct cpumask *cpus = mm_cpumask(mm);
+ struct tlbiel_pid t = { .pid = pid, .ric = ric };
+ on_each_cpu_mask(cpus, do_tlbiel_pid, &t, 1);
/*
- * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL,
- * also flush the entire Page Walk Cache.
+ * Always want the CPU translations to be invalidated with tlbiel in
+ * these paths, so while coprocessors must use tlbie, we can not
+ * optimise away the tlbiel component.
*/
- __tlbiel_lpid(lpid, 0, ric);
-
- /* For PWC, only one flush is needed */
- if (ric == RIC_FLUSH_PWC) {
- asm volatile("ptesync": : :"memory");
- return;
- }
-
- /* For the remaining sets, just flush the TLB */
- for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++)
- __tlbiel_lpid(lpid, set, RIC_FLUSH_TLB);
-
- asm volatile("ptesync": : :"memory");
- asm volatile(PPC_RADIX_INVALIDATE_ERAT_GUEST "; isync" : : :"memory");
+ if (atomic_read(&mm->context.copros) > 0)
+ _tlbie_pid(pid, RIC_FLUSH_ALL);
}
static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric)
@@ -325,6 +377,7 @@ static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric)
switch (ric) {
case RIC_FLUSH_TLB:
__tlbie_lpid(lpid, RIC_FLUSH_TLB);
+ fixup_tlbie_lpid(lpid);
break;
case RIC_FLUSH_PWC:
__tlbie_lpid(lpid, RIC_FLUSH_PWC);
@@ -332,40 +385,33 @@ static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric)
case RIC_FLUSH_ALL:
default:
__tlbie_lpid(lpid, RIC_FLUSH_ALL);
+ fixup_tlbie_lpid(lpid);
}
- fixup_tlbie_lpid(lpid);
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
-static __always_inline void _tlbiel_lpid_guest(unsigned long lpid, unsigned long ric)
+static __always_inline void _tlbie_lpid_guest(unsigned long lpid, unsigned long ric)
{
- int set;
-
- VM_BUG_ON(mfspr(SPRN_LPID) != lpid);
-
- asm volatile("ptesync": : :"memory");
-
/*
- * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL,
- * also flush the entire Page Walk Cache.
+ * Workaround the fact that the "ric" argument to __tlbie_pid
+ * must be a compile-time contraint to match the "i" constraint
+ * in the asm statement.
*/
- __tlbiel_lpid_guest(lpid, 0, ric);
-
- /* For PWC, only one flush is needed */
- if (ric == RIC_FLUSH_PWC) {
- asm volatile("ptesync": : :"memory");
- return;
+ switch (ric) {
+ case RIC_FLUSH_TLB:
+ __tlbie_lpid_guest(lpid, RIC_FLUSH_TLB);
+ break;
+ case RIC_FLUSH_PWC:
+ __tlbie_lpid_guest(lpid, RIC_FLUSH_PWC);
+ break;
+ case RIC_FLUSH_ALL:
+ default:
+ __tlbie_lpid_guest(lpid, RIC_FLUSH_ALL);
}
-
- /* For the remaining sets, just flush the TLB */
- for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++)
- __tlbiel_lpid_guest(lpid, set, RIC_FLUSH_TLB);
-
- asm volatile("ptesync": : :"memory");
- asm volatile(PPC_RADIX_INVALIDATE_ERAT_GUEST : : :"memory");
+ fixup_tlbie_lpid(lpid);
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
-
static inline void __tlbiel_va_range(unsigned long start, unsigned long end,
unsigned long pid, unsigned long page_size,
unsigned long psize)
@@ -407,6 +453,8 @@ static inline void __tlbie_va_range(unsigned long start, unsigned long end,
for (addr = start; addr < end; addr += page_size)
__tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
+
+ fixup_tlbie_va_range(addr - page_size, pid, ap);
}
static __always_inline void _tlbie_va(unsigned long va, unsigned long pid,
@@ -416,10 +464,57 @@ static __always_inline void _tlbie_va(unsigned long va, unsigned long pid,
asm volatile("ptesync": : :"memory");
__tlbie_va(va, pid, ap, ric);
- fixup_tlbie();
+ fixup_tlbie_va(va, pid, ap);
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
+struct tlbiel_va {
+ unsigned long pid;
+ unsigned long va;
+ unsigned long psize;
+ unsigned long ric;
+};
+
+static void do_tlbiel_va(void *info)
+{
+ struct tlbiel_va *t = info;
+
+ if (t->ric == RIC_FLUSH_TLB)
+ _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_TLB);
+ else if (t->ric == RIC_FLUSH_PWC)
+ _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_PWC);
+ else
+ _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_ALL);
+}
+
+static inline void _tlbiel_va_multicast(struct mm_struct *mm,
+ unsigned long va, unsigned long pid,
+ unsigned long psize, unsigned long ric)
+{
+ struct cpumask *cpus = mm_cpumask(mm);
+ struct tlbiel_va t = { .va = va, .pid = pid, .psize = psize, .ric = ric };
+ on_each_cpu_mask(cpus, do_tlbiel_va, &t, 1);
+ if (atomic_read(&mm->context.copros) > 0)
+ _tlbie_va(va, pid, psize, RIC_FLUSH_TLB);
+}
+
+struct tlbiel_va_range {
+ unsigned long pid;
+ unsigned long start;
+ unsigned long end;
+ unsigned long page_size;
+ unsigned long psize;
+ bool also_pwc;
+};
+
+static void do_tlbiel_va_range(void *info)
+{
+ struct tlbiel_va_range *t = info;
+
+ _tlbiel_va_range(t->start, t->end, t->pid, t->page_size,
+ t->psize, t->also_pwc);
+}
+
static __always_inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid,
unsigned long psize, unsigned long ric)
{
@@ -427,7 +522,7 @@ static __always_inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid,
asm volatile("ptesync": : :"memory");
__tlbie_lpid_va(va, lpid, ap, ric);
- fixup_tlbie_lpid(lpid);
+ fixup_tlbie_lpid_va(va, lpid, ap);
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
@@ -439,10 +534,24 @@ static inline void _tlbie_va_range(unsigned long start, unsigned long end,
if (also_pwc)
__tlbie_pid(pid, RIC_FLUSH_PWC);
__tlbie_va_range(start, end, pid, page_size, psize);
- fixup_tlbie();
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
+static inline void _tlbiel_va_range_multicast(struct mm_struct *mm,
+ unsigned long start, unsigned long end,
+ unsigned long pid, unsigned long page_size,
+ unsigned long psize, bool also_pwc)
+{
+ struct cpumask *cpus = mm_cpumask(mm);
+ struct tlbiel_va_range t = { .start = start, .end = end,
+ .pid = pid, .page_size = page_size,
+ .psize = psize, .also_pwc = also_pwc };
+
+ on_each_cpu_mask(cpus, do_tlbiel_va_range, &t, 1);
+ if (atomic_read(&mm->context.copros) > 0)
+ _tlbie_va_range(start, end, pid, page_size, psize, also_pwc);
+}
+
/*
* Base TLB flushing operations:
*
@@ -580,10 +689,14 @@ void radix__flush_tlb_mm(struct mm_struct *mm)
goto local;
}
- if (mm_needs_flush_escalation(mm))
- _tlbie_pid(pid, RIC_FLUSH_ALL);
- else
- _tlbie_pid(pid, RIC_FLUSH_TLB);
+ if (cputlb_use_tlbie()) {
+ if (mm_needs_flush_escalation(mm))
+ _tlbie_pid(pid, RIC_FLUSH_ALL);
+ else
+ _tlbie_pid(pid, RIC_FLUSH_TLB);
+ } else {
+ _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_TLB);
+ }
} else {
local:
_tlbiel_pid(pid, RIC_FLUSH_TLB);
@@ -609,25 +722,23 @@ static void __flush_all_mm(struct mm_struct *mm, bool fullmm)
goto local;
}
}
- _tlbie_pid(pid, RIC_FLUSH_ALL);
+ if (cputlb_use_tlbie())
+ _tlbie_pid(pid, RIC_FLUSH_ALL);
+ else
+ _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL);
} else {
local:
_tlbiel_pid(pid, RIC_FLUSH_ALL);
}
preempt_enable();
}
+
void radix__flush_all_mm(struct mm_struct *mm)
{
__flush_all_mm(mm, false);
}
EXPORT_SYMBOL(radix__flush_all_mm);
-void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr)
-{
- tlb->need_flush_all = 1;
-}
-EXPORT_SYMBOL(radix__flush_tlb_pwc);
-
void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
int psize)
{
@@ -644,7 +755,10 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
exit_flush_lazy_tlbs(mm);
goto local;
}
- _tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
+ if (cputlb_use_tlbie())
+ _tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
+ else
+ _tlbiel_va_multicast(mm, vmaddr, pid, psize, RIC_FLUSH_TLB);
} else {
local:
_tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
@@ -666,6 +780,24 @@ EXPORT_SYMBOL(radix__flush_tlb_page);
#define radix__flush_all_mm radix__local_flush_all_mm
#endif /* CONFIG_SMP */
+static void do_tlbiel_kernel(void *info)
+{
+ _tlbiel_pid(0, RIC_FLUSH_ALL);
+}
+
+static inline void _tlbiel_kernel_broadcast(void)
+{
+ on_each_cpu(do_tlbiel_kernel, NULL, 1);
+ if (tlbie_capable) {
+ /*
+ * Coherent accelerators don't refcount kernel memory mappings,
+ * so have to always issue a tlbie for them. This is quite a
+ * slow path anyway.
+ */
+ _tlbie_pid(0, RIC_FLUSH_ALL);
+ }
+}
+
/*
* If kernel TLBIs ever become local rather than global, then
* drivers/misc/ocxl/link.c:ocxl_link_add_pe will need some work, as it
@@ -673,7 +805,10 @@ EXPORT_SYMBOL(radix__flush_tlb_page);
*/
void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end)
{
- _tlbie_pid(0, RIC_FLUSH_ALL);
+ if (cputlb_use_tlbie())
+ _tlbie_pid(0, RIC_FLUSH_ALL);
+ else
+ _tlbiel_kernel_broadcast();
}
EXPORT_SYMBOL(radix__flush_tlb_kernel_range);
@@ -692,8 +827,7 @@ static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
static unsigned long tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2;
static inline void __radix__flush_tlb_range(struct mm_struct *mm,
- unsigned long start, unsigned long end,
- bool flush_all_sizes)
+ unsigned long start, unsigned long end)
{
unsigned long pid;
@@ -729,54 +863,48 @@ is_local:
if (local) {
_tlbiel_pid(pid, RIC_FLUSH_TLB);
} else {
- if (mm_needs_flush_escalation(mm))
- _tlbie_pid(pid, RIC_FLUSH_ALL);
- else
- _tlbie_pid(pid, RIC_FLUSH_TLB);
+ if (cputlb_use_tlbie()) {
+ if (mm_needs_flush_escalation(mm))
+ _tlbie_pid(pid, RIC_FLUSH_ALL);
+ else
+ _tlbie_pid(pid, RIC_FLUSH_TLB);
+ } else {
+ _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_TLB);
+ }
}
} else {
- bool hflush = flush_all_sizes;
- bool gflush = flush_all_sizes;
+ bool hflush = false;
unsigned long hstart, hend;
- unsigned long gstart, gend;
-
- if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
- hflush = true;
- if (hflush) {
+ if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
hstart = (start + PMD_SIZE - 1) & PMD_MASK;
hend = end & PMD_MASK;
if (hstart == hend)
hflush = false;
+ else
+ hflush = true;
}
- if (gflush) {
- gstart = (start + PUD_SIZE - 1) & PUD_MASK;
- gend = end & PUD_MASK;
- if (gstart == gend)
- gflush = false;
- }
-
- asm volatile("ptesync": : :"memory");
if (local) {
+ asm volatile("ptesync": : :"memory");
__tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize);
if (hflush)
__tlbiel_va_range(hstart, hend, pid,
PMD_SIZE, MMU_PAGE_2M);
- if (gflush)
- __tlbiel_va_range(gstart, gend, pid,
- PUD_SIZE, MMU_PAGE_1G);
asm volatile("ptesync": : :"memory");
- } else {
+ } else if (cputlb_use_tlbie()) {
+ asm volatile("ptesync": : :"memory");
__tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize);
if (hflush)
__tlbie_va_range(hstart, hend, pid,
PMD_SIZE, MMU_PAGE_2M);
- if (gflush)
- __tlbie_va_range(gstart, gend, pid,
- PUD_SIZE, MMU_PAGE_1G);
- fixup_tlbie();
asm volatile("eieio; tlbsync; ptesync": : :"memory");
+ } else {
+ _tlbiel_va_range_multicast(mm,
+ start, end, pid, page_size, mmu_virtual_psize, false);
+ if (hflush)
+ _tlbiel_va_range_multicast(mm,
+ hstart, hend, pid, PMD_SIZE, MMU_PAGE_2M, false);
}
}
preempt_enable();
@@ -791,7 +919,7 @@ void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
return radix__flush_hugetlb_tlb_range(vma, start, end);
#endif
- __radix__flush_tlb_range(vma->vm_mm, start, end, false);
+ __radix__flush_tlb_range(vma->vm_mm, start, end);
}
EXPORT_SYMBOL(radix__flush_tlb_range);
@@ -835,32 +963,19 @@ EXPORT_SYMBOL_GPL(radix__flush_pwc_lpid);
/*
* Flush partition scoped translations from LPID (=LPIDR)
*/
-void radix__flush_tlb_lpid(unsigned int lpid)
+void radix__flush_all_lpid(unsigned int lpid)
{
_tlbie_lpid(lpid, RIC_FLUSH_ALL);
}
-EXPORT_SYMBOL_GPL(radix__flush_tlb_lpid);
-
-/*
- * Flush partition scoped translations from LPID (=LPIDR)
- */
-void radix__local_flush_tlb_lpid(unsigned int lpid)
-{
- _tlbiel_lpid(lpid, RIC_FLUSH_ALL);
-}
-EXPORT_SYMBOL_GPL(radix__local_flush_tlb_lpid);
+EXPORT_SYMBOL_GPL(radix__flush_all_lpid);
/*
- * Flush process scoped translations from LPID (=LPIDR).
- * Important difference, the guest normally manages its own translations,
- * but some cases e.g., vCPU CPU migration require KVM to flush.
+ * Flush process scoped translations from LPID (=LPIDR)
*/
-void radix__local_flush_tlb_lpid_guest(unsigned int lpid)
+void radix__flush_all_lpid_guest(unsigned int lpid)
{
- _tlbiel_lpid_guest(lpid, RIC_FLUSH_ALL);
+ _tlbie_lpid_guest(lpid, RIC_FLUSH_ALL);
}
-EXPORT_SYMBOL_GPL(radix__local_flush_tlb_lpid_guest);
-
static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
unsigned long end, int psize);
@@ -880,53 +995,19 @@ void radix__tlb_flush(struct mmu_gather *tlb)
* that flushes the process table entry cache upon process teardown.
* See the comment for radix in arch_exit_mmap().
*/
- if (tlb->fullmm) {
+ if (tlb->fullmm || tlb->need_flush_all) {
__flush_all_mm(mm, true);
-#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)
- } else if (mm_tlb_flush_nested(mm)) {
- /*
- * If there is a concurrent invalidation that is clearing ptes,
- * then it's possible this invalidation will miss one of those
- * cleared ptes and miss flushing the TLB. If this invalidate
- * returns before the other one flushes TLBs, that can result
- * in it returning while there are still valid TLBs inside the
- * range to be invalidated.
- *
- * See mm/memory.c:tlb_finish_mmu() for more details.
- *
- * The solution to this is ensure the entire range is always
- * flushed here. The problem for powerpc is that the flushes
- * are page size specific, so this "forced flush" would not
- * do the right thing if there are a mix of page sizes in
- * the range to be invalidated. So use __flush_tlb_range
- * which invalidates all possible page sizes in the range.
- *
- * PWC flush probably is not be required because the core code
- * shouldn't free page tables in this path, but accounting
- * for the possibility makes us a bit more robust.
- *
- * need_flush_all is an uncommon case because page table
- * teardown should be done with exclusive locks held (but
- * after locks are dropped another invalidate could come
- * in), it could be optimized further if necessary.
- */
- if (!tlb->need_flush_all)
- __radix__flush_tlb_range(mm, start, end, true);
- else
- radix__flush_all_mm(mm);
-#endif
} else if ( (psize = radix_get_mmu_psize(page_size)) == -1) {
- if (!tlb->need_flush_all)
+ if (!tlb->freed_tables)
radix__flush_tlb_mm(mm);
else
radix__flush_all_mm(mm);
} else {
- if (!tlb->need_flush_all)
+ if (!tlb->freed_tables)
radix__flush_tlb_range_psize(mm, start, end, psize);
else
radix__flush_tlb_pwc_range_psize(mm, start, end, psize);
}
- tlb->need_flush_all = 0;
}
static __always_inline void __radix__flush_tlb_range_psize(struct mm_struct *mm,
@@ -966,16 +1047,26 @@ is_local:
if (local) {
_tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
} else {
- if (mm_needs_flush_escalation(mm))
- also_pwc = true;
+ if (cputlb_use_tlbie()) {
+ if (mm_needs_flush_escalation(mm))
+ also_pwc = true;
+
+ _tlbie_pid(pid,
+ also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
+ } else {
+ _tlbiel_pid_multicast(mm, pid,
+ also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
+ }
- _tlbie_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
}
} else {
if (local)
_tlbiel_va_range(start, end, pid, page_size, psize, also_pwc);
- else
+ else if (cputlb_use_tlbie())
_tlbie_va_range(start, end, pid, page_size, psize, also_pwc);
+ else
+ _tlbiel_va_range_multicast(mm,
+ start, end, pid, page_size, psize, also_pwc);
}
preempt_enable();
}
@@ -1017,7 +1108,11 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
exit_flush_lazy_tlbs(mm);
goto local;
}
- _tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
+ if (cputlb_use_tlbie())
+ _tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
+ else
+ _tlbiel_va_range_multicast(mm,
+ addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
} else {
local:
_tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
@@ -1066,6 +1161,9 @@ extern void radix_kvm_prefetch_workaround(struct mm_struct *mm)
if (unlikely(pid == MMU_NO_CONTEXT))
return;
+ if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
+ return;
+
/*
* If this context hasn't run on that CPU before and KVM is
* around, there's a slim chance that the guest on another
diff --git a/arch/powerpc/mm/book3s64/subpage_prot.c b/arch/powerpc/mm/book3s64/subpage_prot.c
index 9ba07e55c489..2ef24a53f4c9 100644
--- a/arch/powerpc/mm/book3s64/subpage_prot.c
+++ b/arch/powerpc/mm/book3s64/subpage_prot.c
@@ -7,7 +7,7 @@
#include <linux/kernel.h>
#include <linux/gfp.h>
#include <linux/types.h>
-#include <linux/mm.h>
+#include <linux/pagewalk.h>
#include <linux/hugetlb.h>
#include <linux/syscalls.h>
@@ -139,14 +139,14 @@ static int subpage_walk_pmd_entry(pmd_t *pmd, unsigned long addr,
return 0;
}
+static const struct mm_walk_ops subpage_walk_ops = {
+ .pmd_entry = subpage_walk_pmd_entry,
+};
+
static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr,
unsigned long len)
{
struct vm_area_struct *vma;
- struct mm_walk subpage_proto_walk = {
- .mm = mm,
- .pmd_entry = subpage_walk_pmd_entry,
- };
/*
* We don't try too hard, we just mark all the vma in that range
@@ -163,7 +163,7 @@ static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr,
if (vma->vm_start >= (addr + len))
break;
vma->vm_flags |= VM_NOHUGEPAGE;
- walk_page_vma(vma, &subpage_proto_walk);
+ walk_page_vma(vma, &subpage_walk_ops, NULL);
vma = vma->vm_next;
}
}
diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c
index c617282d5b2a..5ab4f868e919 100644
--- a/arch/powerpc/mm/dma-noncoherent.c
+++ b/arch/powerpc/mm/dma-noncoherent.c
@@ -4,310 +4,18 @@
* Copyright (C) 2001 Dan Malek (dmalek@jlc.net)
*
* Copyright (C) 2000 Russell King
- *
- * Consistent memory allocators. Used for DMA devices that want to
- * share uncached memory with the processor core. The function return
- * is the virtual address and 'dma_handle' is the physical address.
- * Mostly stolen from the ARM port, with some changes for PowerPC.
- * -- Dan
- *
- * Reorganized to get rid of the arch-specific consistent_* functions
- * and provide non-coherent implementations for the DMA API. -Matt
- *
- * Added in_interrupt() safe dma_alloc_coherent()/dma_free_coherent()
- * implementation. This is pulled straight from ARM and barely
- * modified. -Matt
*/
-#include <linux/sched.h>
-#include <linux/slab.h>
#include <linux/kernel.h>
#include <linux/errno.h>
-#include <linux/string.h>
#include <linux/types.h>
#include <linux/highmem.h>
#include <linux/dma-direct.h>
#include <linux/dma-noncoherent.h>
-#include <linux/export.h>
#include <asm/tlbflush.h>
#include <asm/dma.h>
-#include <mm/mmu_decl.h>
-
-/*
- * This address range defaults to a value that is safe for all
- * platforms which currently set CONFIG_NOT_COHERENT_CACHE. It
- * can be further configured for specific applications under
- * the "Advanced Setup" menu. -Matt
- */
-#define CONSISTENT_BASE (IOREMAP_TOP)
-#define CONSISTENT_END (CONSISTENT_BASE + CONFIG_CONSISTENT_SIZE)
-#define CONSISTENT_OFFSET(x) (((unsigned long)(x) - CONSISTENT_BASE) >> PAGE_SHIFT)
-
-/*
- * This is the page table (2MB) covering uncached, DMA consistent allocations
- */
-static DEFINE_SPINLOCK(consistent_lock);
-
-/*
- * VM region handling support.
- *
- * This should become something generic, handling VM region allocations for
- * vmalloc and similar (ioremap, module space, etc).
- *
- * I envisage vmalloc()'s supporting vm_struct becoming:
- *
- * struct vm_struct {
- * struct vm_region region;
- * unsigned long flags;
- * struct page **pages;
- * unsigned int nr_pages;
- * unsigned long phys_addr;
- * };
- *
- * get_vm_area() would then call vm_region_alloc with an appropriate
- * struct vm_region head (eg):
- *
- * struct vm_region vmalloc_head = {
- * .vm_list = LIST_HEAD_INIT(vmalloc_head.vm_list),
- * .vm_start = VMALLOC_START,
- * .vm_end = VMALLOC_END,
- * };
- *
- * However, vmalloc_head.vm_start is variable (typically, it is dependent on
- * the amount of RAM found at boot time.) I would imagine that get_vm_area()
- * would have to initialise this each time prior to calling vm_region_alloc().
- */
-struct ppc_vm_region {
- struct list_head vm_list;
- unsigned long vm_start;
- unsigned long vm_end;
-};
-
-static struct ppc_vm_region consistent_head = {
- .vm_list = LIST_HEAD_INIT(consistent_head.vm_list),
- .vm_start = CONSISTENT_BASE,
- .vm_end = CONSISTENT_END,
-};
-
-static struct ppc_vm_region *
-ppc_vm_region_alloc(struct ppc_vm_region *head, size_t size, gfp_t gfp)
-{
- unsigned long addr = head->vm_start, end = head->vm_end - size;
- unsigned long flags;
- struct ppc_vm_region *c, *new;
-
- new = kmalloc(sizeof(struct ppc_vm_region), gfp);
- if (!new)
- goto out;
-
- spin_lock_irqsave(&consistent_lock, flags);
-
- list_for_each_entry(c, &head->vm_list, vm_list) {
- if ((addr + size) < addr)
- goto nospc;
- if ((addr + size) <= c->vm_start)
- goto found;
- addr = c->vm_end;
- if (addr > end)
- goto nospc;
- }
-
- found:
- /*
- * Insert this entry _before_ the one we found.
- */
- list_add_tail(&new->vm_list, &c->vm_list);
- new->vm_start = addr;
- new->vm_end = addr + size;
-
- spin_unlock_irqrestore(&consistent_lock, flags);
- return new;
-
- nospc:
- spin_unlock_irqrestore(&consistent_lock, flags);
- kfree(new);
- out:
- return NULL;
-}
-
-static struct ppc_vm_region *ppc_vm_region_find(struct ppc_vm_region *head, unsigned long addr)
-{
- struct ppc_vm_region *c;
-
- list_for_each_entry(c, &head->vm_list, vm_list) {
- if (c->vm_start == addr)
- goto out;
- }
- c = NULL;
- out:
- return c;
-}
-
-/*
- * Allocate DMA-coherent memory space and return both the kernel remapped
- * virtual and bus address for that space.
- */
-void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
- gfp_t gfp, unsigned long attrs)
-{
- struct page *page;
- struct ppc_vm_region *c;
- unsigned long order;
- u64 mask = ISA_DMA_THRESHOLD, limit;
-
- if (dev) {
- mask = dev->coherent_dma_mask;
-
- /*
- * Sanity check the DMA mask - it must be non-zero, and
- * must be able to be satisfied by a DMA allocation.
- */
- if (mask == 0) {
- dev_warn(dev, "coherent DMA mask is unset\n");
- goto no_page;
- }
-
- if ((~mask) & ISA_DMA_THRESHOLD) {
- dev_warn(dev, "coherent DMA mask %#llx is smaller "
- "than system GFP_DMA mask %#llx\n",
- mask, (unsigned long long)ISA_DMA_THRESHOLD);
- goto no_page;
- }
- }
-
-
- size = PAGE_ALIGN(size);
- limit = (mask + 1) & ~mask;
- if ((limit && size >= limit) ||
- size >= (CONSISTENT_END - CONSISTENT_BASE)) {
- printk(KERN_WARNING "coherent allocation too big (requested %#x mask %#Lx)\n",
- size, mask);
- return NULL;
- }
-
- order = get_order(size);
-
- /* Might be useful if we ever have a real legacy DMA zone... */
- if (mask != 0xffffffff)
- gfp |= GFP_DMA;
-
- page = alloc_pages(gfp, order);
- if (!page)
- goto no_page;
-
- /*
- * Invalidate any data that might be lurking in the
- * kernel direct-mapped region for device DMA.
- */
- {
- unsigned long kaddr = (unsigned long)page_address(page);
- memset(page_address(page), 0, size);
- flush_dcache_range(kaddr, kaddr + size);
- }
-
- /*
- * Allocate a virtual address in the consistent mapping region.
- */
- c = ppc_vm_region_alloc(&consistent_head, size,
- gfp & ~(__GFP_DMA | __GFP_HIGHMEM));
- if (c) {
- unsigned long vaddr = c->vm_start;
- struct page *end = page + (1 << order);
-
- split_page(page, order);
-
- /*
- * Set the "dma handle"
- */
- *dma_handle = phys_to_dma(dev, page_to_phys(page));
-
- do {
- SetPageReserved(page);
- map_kernel_page(vaddr, page_to_phys(page),
- pgprot_noncached(PAGE_KERNEL));
- page++;
- vaddr += PAGE_SIZE;
- } while (size -= PAGE_SIZE);
-
- /*
- * Free the otherwise unused pages.
- */
- while (page < end) {
- __free_page(page);
- page++;
- }
-
- return (void *)c->vm_start;
- }
-
- if (page)
- __free_pages(page, order);
- no_page:
- return NULL;
-}
-
-/*
- * free a page as defined by the above mapping.
- */
-void arch_dma_free(struct device *dev, size_t size, void *vaddr,
- dma_addr_t dma_handle, unsigned long attrs)
-{
- struct ppc_vm_region *c;
- unsigned long flags, addr;
-
- size = PAGE_ALIGN(size);
-
- spin_lock_irqsave(&consistent_lock, flags);
-
- c = ppc_vm_region_find(&consistent_head, (unsigned long)vaddr);
- if (!c)
- goto no_area;
-
- if ((c->vm_end - c->vm_start) != size) {
- printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n",
- __func__, c->vm_end - c->vm_start, size);
- dump_stack();
- size = c->vm_end - c->vm_start;
- }
-
- addr = c->vm_start;
- do {
- pte_t *ptep;
- unsigned long pfn;
-
- ptep = pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(addr),
- addr),
- addr),
- addr);
- if (!pte_none(*ptep) && pte_present(*ptep)) {
- pfn = pte_pfn(*ptep);
- pte_clear(&init_mm, addr, ptep);
- if (pfn_valid(pfn)) {
- struct page *page = pfn_to_page(pfn);
- __free_reserved_page(page);
- }
- }
- addr += PAGE_SIZE;
- } while (size -= PAGE_SIZE);
-
- flush_tlb_kernel_range(c->vm_start, c->vm_end);
-
- list_del(&c->vm_list);
-
- spin_unlock_irqrestore(&consistent_lock, flags);
-
- kfree(c);
- return;
-
- no_area:
- spin_unlock_irqrestore(&consistent_lock, flags);
- printk(KERN_ERR "%s: trying to free invalid coherent area: %p\n",
- __func__, vaddr);
- dump_stack();
-}
-
/*
* make an area consistent.
*/
@@ -396,35 +104,21 @@ static void __dma_sync_page(phys_addr_t paddr, size_t size, int dir)
#endif
}
-void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
- size_t size, enum dma_data_direction dir)
+void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
+ enum dma_data_direction dir)
{
__dma_sync_page(paddr, size, dir);
}
-void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
- size_t size, enum dma_data_direction dir)
+void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
+ enum dma_data_direction dir)
{
__dma_sync_page(paddr, size, dir);
}
-/*
- * Return the PFN for a given cpu virtual address returned by arch_dma_alloc.
- */
-long arch_dma_coherent_to_pfn(struct device *dev, void *vaddr,
- dma_addr_t dma_addr)
+void arch_dma_prep_coherent(struct page *page, size_t size)
{
- /* This should always be populated, so we don't test every
- * level. If that fails, we'll have a nice crash which
- * will be as good as a BUG_ON()
- */
- unsigned long cpu_addr = (unsigned long)vaddr;
- pgd_t *pgd = pgd_offset_k(cpu_addr);
- pud_t *pud = pud_offset(pgd, cpu_addr);
- pmd_t *pmd = pmd_offset(pud, cpu_addr);
- pte_t *ptep = pte_offset_kernel(pmd, cpu_addr);
+ unsigned long kaddr = (unsigned long)page_address(page);
- if (pte_none(*ptep) || !pte_present(*ptep))
- return 0;
- return pte_pfn(*ptep);
+ flush_dcache_range(kaddr, kaddr + size);
}
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 8432c281de92..8db0507619e2 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -233,7 +233,7 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code,
// Read/write fault in a valid region (the exception table search passed
// above), but blocked by KUAP is bad, it can never succeed.
- if (bad_kuap_fault(regs, is_write))
+ if (bad_kuap_fault(regs, address, is_write))
return true;
// What's left? Kernel fault on user in well defined regions (extable
@@ -279,12 +279,8 @@ static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address,
if ((flags & FAULT_FLAG_WRITE) && (flags & FAULT_FLAG_USER) &&
access_ok(nip, sizeof(*nip))) {
unsigned int inst;
- int res;
- pagefault_disable();
- res = __get_user_inatomic(inst, nip);
- pagefault_enable();
- if (!res)
+ if (!probe_user_read(&inst, nip, sizeof(inst)))
return !store_updates_sp(inst);
*must_retry = true;
}
@@ -354,6 +350,9 @@ static void sanity_check_fault(bool is_write, bool is_user,
* Userspace trying to access kernel address, we get PROTFAULT for that.
*/
if (is_user && address >= TASK_SIZE) {
+ if ((long)address == -1)
+ return;
+
pr_crit_ratelimited("%s[%d]: User access of kernel address (%lx) - exploit attempt? (uid: %d)\n",
current->comm, current->pid, address,
from_kuid(&init_user_ns, current_uid()));
@@ -645,6 +644,7 @@ NOKPROBE_SYMBOL(do_page_fault);
void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
{
const struct exception_table_entry *entry;
+ int is_write = page_fault_is_write(regs->dsisr);
/* Are we prepared to handle this fault? */
if ((entry = search_exception_tables(regs->nip)) != NULL) {
@@ -658,9 +658,10 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
case 0x300:
case 0x380:
case 0xe00:
- pr_alert("BUG: %s at 0x%08lx\n",
+ pr_alert("BUG: %s on %s at 0x%08lx\n",
regs->dar < PAGE_SIZE ? "Kernel NULL pointer dereference" :
- "Unable to handle kernel data access", regs->dar);
+ "Unable to handle kernel data access",
+ is_write ? "write" : "read", regs->dar);
break;
case 0x400:
case 0x480:
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index a8953f108808..73d4873fc7f8 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -667,7 +667,7 @@ void flush_dcache_icache_hugepage(struct page *page)
BUG_ON(!PageCompound(page));
- for (i = 0; i < (1UL << compound_order(page)); i++) {
+ for (i = 0; i < compound_nr(page); i++) {
if (!PageHighMem(page)) {
__flush_dcache_icache(page_address(page+i));
} else {
diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c
index a84da92920f7..42ef7a6e6098 100644
--- a/arch/powerpc/mm/init-common.c
+++ b/arch/powerpc/mm/init-common.c
@@ -21,6 +21,13 @@
#include <asm/pgtable.h>
#include <asm/kup.h>
+phys_addr_t memstart_addr __ro_after_init = (phys_addr_t)~0ull;
+EXPORT_SYMBOL_GPL(memstart_addr);
+phys_addr_t kernstart_addr __ro_after_init;
+EXPORT_SYMBOL_GPL(kernstart_addr);
+unsigned long kernstart_virt_addr __ro_after_init = KERNELBASE;
+EXPORT_SYMBOL_GPL(kernstart_virt_addr);
+
static bool disable_kuep = !IS_ENABLED(CONFIG_PPC_KUEP);
static bool disable_kuap = !IS_ENABLED(CONFIG_PPC_KUAP);
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index b04896a88d79..872df48ae41b 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -56,11 +56,6 @@
phys_addr_t total_memory;
phys_addr_t total_lowmem;
-phys_addr_t memstart_addr = (phys_addr_t)~0ull;
-EXPORT_SYMBOL(memstart_addr);
-phys_addr_t kernstart_addr;
-EXPORT_SYMBOL(kernstart_addr);
-
#ifdef CONFIG_RELOCATABLE
/* Used in __va()/__pa() */
long long virt_phys_offset;
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index a44f6281ca3a..4002ced3596f 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -63,38 +63,48 @@
#include <mm/mmu_decl.h>
-phys_addr_t memstart_addr = ~0;
-EXPORT_SYMBOL_GPL(memstart_addr);
-phys_addr_t kernstart_addr;
-EXPORT_SYMBOL_GPL(kernstart_addr);
-
#ifdef CONFIG_SPARSEMEM_VMEMMAP
/*
- * Given an address within the vmemmap, determine the pfn of the page that
- * represents the start of the section it is within. Note that we have to
+ * Given an address within the vmemmap, determine the page that
+ * represents the start of the subsection it is within. Note that we have to
* do this by hand as the proffered address may not be correctly aligned.
* Subtraction of non-aligned pointers produces undefined results.
*/
-static unsigned long __meminit vmemmap_section_start(unsigned long page)
+static struct page * __meminit vmemmap_subsection_start(unsigned long vmemmap_addr)
{
- unsigned long offset = page - ((unsigned long)(vmemmap));
+ unsigned long start_pfn;
+ unsigned long offset = vmemmap_addr - ((unsigned long)(vmemmap));
/* Return the pfn of the start of the section. */
- return (offset / sizeof(struct page)) & PAGE_SECTION_MASK;
+ start_pfn = (offset / sizeof(struct page)) & PAGE_SUBSECTION_MASK;
+ return pfn_to_page(start_pfn);
}
/*
- * Check if this vmemmap page is already initialised. If any section
- * which overlaps this vmemmap page is initialised then this page is
- * initialised already.
+ * Since memory is added in sub-section chunks, before creating a new vmemmap
+ * mapping, the kernel should check whether there is an existing memmap mapping
+ * covering the new subsection added. This is needed because kernel can map
+ * vmemmap area using 16MB pages which will cover a memory range of 16G. Such
+ * a range covers multiple subsections (2M)
+ *
+ * If any subsection in the 16G range mapped by vmemmap is valid we consider the
+ * vmemmap populated (There is a page table entry already present). We can't do
+ * a page table lookup here because with the hash translation we don't keep
+ * vmemmap details in linux page table.
*/
-static int __meminit vmemmap_populated(unsigned long start, int page_size)
+static int __meminit vmemmap_populated(unsigned long vmemmap_addr, int vmemmap_map_size)
{
- unsigned long end = start + page_size;
- start = (unsigned long)(pfn_to_page(vmemmap_section_start(start)));
+ struct page *start;
+ unsigned long vmemmap_end = vmemmap_addr + vmemmap_map_size;
+ start = vmemmap_subsection_start(vmemmap_addr);
- for (; start < end; start += (PAGES_PER_SECTION * sizeof(struct page)))
- if (pfn_valid(page_to_pfn((struct page *)start)))
+ for (; (unsigned long)start < vmemmap_end; start += PAGES_PER_SUBSECTION)
+ /*
+ * pfn valid check here is intended to really check
+ * whether we have any subsection already initialized
+ * in this range.
+ */
+ if (pfn_valid(page_to_pfn(start)))
return 1;
return 0;
@@ -172,6 +182,21 @@ static __meminit void vmemmap_list_populate(unsigned long phys,
vmemmap_list = vmem_back;
}
+static bool altmap_cross_boundary(struct vmem_altmap *altmap, unsigned long start,
+ unsigned long page_size)
+{
+ unsigned long nr_pfn = page_size / sizeof(struct page);
+ unsigned long start_pfn = page_to_pfn((struct page *)start);
+
+ if ((start_pfn + nr_pfn) > altmap->end_pfn)
+ return true;
+
+ if (start_pfn < altmap->base_pfn)
+ return true;
+
+ return false;
+}
+
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
struct vmem_altmap *altmap)
{
@@ -186,6 +211,12 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
void *p = NULL;
int rc;
+ /*
+ * This vmemmap range is backing different subsections. If any
+ * of that subsection is marked valid, that means we already
+ * have initialized a page table covering this range and hence
+ * the vmemmap range is populated.
+ */
if (vmemmap_populated(start, page_size))
continue;
@@ -194,7 +225,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
* fail due to alignment issues when using 16MB hugepages, so
* fall back to system memory if the altmap allocation fail.
*/
- if (altmap) {
+ if (altmap && !altmap_cross_boundary(altmap, start, page_size)) {
p = altmap_alloc_block_buf(page_size, altmap);
if (!p)
pr_debug("altmap block allocation failed, falling back to system memory");
@@ -275,9 +306,10 @@ void __ref vmemmap_free(unsigned long start, unsigned long end,
struct page *page;
/*
- * the section has already be marked as invalid, so
- * vmemmap_populated() true means some other sections still
- * in this page, so skip it.
+ * We have already marked the subsection we are trying to remove
+ * invalid. So if we want to remove the vmemmap range, we
+ * need to make sure there is no subsection marked valid
+ * in this range.
*/
if (vmemmap_populated(start, page_size))
continue;
diff --git a/arch/powerpc/mm/ioremap.c b/arch/powerpc/mm/ioremap.c
new file mode 100644
index 000000000000..fc669643ce6a
--- /dev/null
+++ b/arch/powerpc/mm/ioremap.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <asm/io-workarounds.h>
+
+unsigned long ioremap_bot;
+EXPORT_SYMBOL(ioremap_bot);
+
+void __iomem *ioremap(phys_addr_t addr, unsigned long size)
+{
+ pgprot_t prot = pgprot_noncached(PAGE_KERNEL);
+ void *caller = __builtin_return_address(0);
+
+ if (iowa_is_active())
+ return iowa_ioremap(addr, size, prot, caller);
+ return __ioremap_caller(addr, size, prot, caller);
+}
+EXPORT_SYMBOL(ioremap);
+
+void __iomem *ioremap_wc(phys_addr_t addr, unsigned long size)
+{
+ pgprot_t prot = pgprot_noncached_wc(PAGE_KERNEL);
+ void *caller = __builtin_return_address(0);
+
+ if (iowa_is_active())
+ return iowa_ioremap(addr, size, prot, caller);
+ return __ioremap_caller(addr, size, prot, caller);
+}
+EXPORT_SYMBOL(ioremap_wc);
+
+void __iomem *ioremap_coherent(phys_addr_t addr, unsigned long size)
+{
+ pgprot_t prot = pgprot_cached(PAGE_KERNEL);
+ void *caller = __builtin_return_address(0);
+
+ if (iowa_is_active())
+ return iowa_ioremap(addr, size, prot, caller);
+ return __ioremap_caller(addr, size, prot, caller);
+}
+
+void __iomem *ioremap_prot(phys_addr_t addr, unsigned long size, unsigned long flags)
+{
+ pte_t pte = __pte(flags);
+ void *caller = __builtin_return_address(0);
+
+ /* writeable implies dirty for kernel addresses */
+ if (pte_write(pte))
+ pte = pte_mkdirty(pte);
+
+ /* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */
+ pte = pte_exprotect(pte);
+ pte = pte_mkprivileged(pte);
+
+ if (iowa_is_active())
+ return iowa_ioremap(addr, size, pte_pgprot(pte), caller);
+ return __ioremap_caller(addr, size, pte_pgprot(pte), caller);
+}
+EXPORT_SYMBOL(ioremap_prot);
+
+int early_ioremap_range(unsigned long ea, phys_addr_t pa,
+ unsigned long size, pgprot_t prot)
+{
+ unsigned long i;
+
+ for (i = 0; i < size; i += PAGE_SIZE) {
+ int err = map_kernel_page(ea + i, pa + i, prot);
+
+ if (WARN_ON_ONCE(err)) /* Should clean up */
+ return err;
+ }
+
+ return 0;
+}
+
+void __iomem *do_ioremap(phys_addr_t pa, phys_addr_t offset, unsigned long size,
+ pgprot_t prot, void *caller)
+{
+ struct vm_struct *area;
+ int ret;
+ unsigned long va;
+
+ area = __get_vm_area_caller(size, VM_IOREMAP, IOREMAP_START, IOREMAP_END, caller);
+ if (area == NULL)
+ return NULL;
+
+ area->phys_addr = pa;
+ va = (unsigned long)area->addr;
+
+ ret = ioremap_page_range(va, va + size, pa, prot);
+ if (!ret)
+ return (void __iomem *)area->addr + offset;
+
+ unmap_kernel_range(va, size);
+ free_vm_area(area);
+
+ return NULL;
+}
diff --git a/arch/powerpc/mm/ioremap_32.c b/arch/powerpc/mm/ioremap_32.c
new file mode 100644
index 000000000000..743e11384dea
--- /dev/null
+++ b/arch/powerpc/mm/ioremap_32.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+#include <mm/mmu_decl.h>
+
+void __iomem *ioremap_wt(phys_addr_t addr, unsigned long size)
+{
+ pgprot_t prot = pgprot_cached_wthru(PAGE_KERNEL);
+
+ return __ioremap_caller(addr, size, prot, __builtin_return_address(0));
+}
+EXPORT_SYMBOL(ioremap_wt);
+
+void __iomem *
+__ioremap_caller(phys_addr_t addr, unsigned long size, pgprot_t prot, void *caller)
+{
+ unsigned long v;
+ phys_addr_t p, offset;
+ int err;
+
+ /*
+ * Choose an address to map it to.
+ * Once the vmalloc system is running, we use it.
+ * Before then, we use space going down from IOREMAP_TOP
+ * (ioremap_bot records where we're up to).
+ */
+ p = addr & PAGE_MASK;
+ offset = addr & ~PAGE_MASK;
+ size = PAGE_ALIGN(addr + size) - p;
+
+ /*
+ * If the address lies within the first 16 MB, assume it's in ISA
+ * memory space
+ */
+ if (p < 16 * 1024 * 1024)
+ p += _ISA_MEM_BASE;
+
+#ifndef CONFIG_CRASH_DUMP
+ /*
+ * Don't allow anybody to remap normal RAM that we're using.
+ * mem_init() sets high_memory so only do the check after that.
+ */
+ if (slab_is_available() && p <= virt_to_phys(high_memory - 1) &&
+ page_is_ram(__phys_to_pfn(p))) {
+ pr_warn("%s(): phys addr 0x%llx is RAM lr %ps\n", __func__,
+ (unsigned long long)p, __builtin_return_address(0));
+ return NULL;
+ }
+#endif
+
+ if (size == 0)
+ return NULL;
+
+ /*
+ * Is it already mapped? Perhaps overlapped by a previous
+ * mapping.
+ */
+ v = p_block_mapped(p);
+ if (v)
+ return (void __iomem *)v + offset;
+
+ if (slab_is_available())
+ return do_ioremap(p, offset, size, prot, caller);
+
+ /*
+ * Should check if it is a candidate for a BAT mapping
+ */
+ pr_warn("ioremap() called early from %pS. Use early_ioremap() instead\n", caller);
+
+ err = early_ioremap_range(ioremap_bot - size, p, size, prot);
+ if (err)
+ return NULL;
+ ioremap_bot -= size;
+
+ return (void __iomem *)ioremap_bot + offset;
+}
+
+void iounmap(volatile void __iomem *addr)
+{
+ /*
+ * If mapped by BATs then there is nothing to do.
+ * Calling vfree() generates a benign warning.
+ */
+ if (v_block_mapped((unsigned long)addr))
+ return;
+
+ if (addr > high_memory && (unsigned long)addr < ioremap_bot)
+ vunmap((void *)(PAGE_MASK & (unsigned long)addr));
+}
+EXPORT_SYMBOL(iounmap);
diff --git a/arch/powerpc/mm/ioremap_64.c b/arch/powerpc/mm/ioremap_64.c
new file mode 100644
index 000000000000..50a99d9684f7
--- /dev/null
+++ b/arch/powerpc/mm/ioremap_64.c
@@ -0,0 +1,115 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+/**
+ * Low level function to establish the page tables for an IO mapping
+ */
+void __iomem *__ioremap_at(phys_addr_t pa, void *ea, unsigned long size, pgprot_t prot)
+{
+ int ret;
+ unsigned long va = (unsigned long)ea;
+
+ /* We don't support the 4K PFN hack with ioremap */
+ if (pgprot_val(prot) & H_PAGE_4K_PFN)
+ return NULL;
+
+ if ((ea + size) >= (void *)IOREMAP_END) {
+ pr_warn("Outside the supported range\n");
+ return NULL;
+ }
+
+ WARN_ON(pa & ~PAGE_MASK);
+ WARN_ON(((unsigned long)ea) & ~PAGE_MASK);
+ WARN_ON(size & ~PAGE_MASK);
+
+ if (slab_is_available()) {
+ ret = ioremap_page_range(va, va + size, pa, prot);
+ if (ret)
+ unmap_kernel_range(va, size);
+ } else {
+ ret = early_ioremap_range(va, pa, size, prot);
+ }
+
+ if (ret)
+ return NULL;
+
+ return (void __iomem *)ea;
+}
+EXPORT_SYMBOL(__ioremap_at);
+
+/**
+ * Low level function to tear down the page tables for an IO mapping. This is
+ * used for mappings that are manipulated manually, like partial unmapping of
+ * PCI IOs or ISA space.
+ */
+void __iounmap_at(void *ea, unsigned long size)
+{
+ WARN_ON(((unsigned long)ea) & ~PAGE_MASK);
+ WARN_ON(size & ~PAGE_MASK);
+
+ unmap_kernel_range((unsigned long)ea, size);
+}
+EXPORT_SYMBOL(__iounmap_at);
+
+void __iomem *__ioremap_caller(phys_addr_t addr, unsigned long size,
+ pgprot_t prot, void *caller)
+{
+ phys_addr_t paligned, offset;
+ void __iomem *ret;
+ int err;
+
+ /* We don't support the 4K PFN hack with ioremap */
+ if (pgprot_val(prot) & H_PAGE_4K_PFN)
+ return NULL;
+
+ /*
+ * Choose an address to map it to. Once the vmalloc system is running,
+ * we use it. Before that, we map using addresses going up from
+ * ioremap_bot. vmalloc will use the addresses from IOREMAP_BASE
+ * through ioremap_bot.
+ */
+ paligned = addr & PAGE_MASK;
+ offset = addr & ~PAGE_MASK;
+ size = PAGE_ALIGN(addr + size) - paligned;
+
+ if (size == 0 || paligned == 0)
+ return NULL;
+
+ if (slab_is_available())
+ return do_ioremap(paligned, offset, size, prot, caller);
+
+ pr_warn("ioremap() called early from %pS. Use early_ioremap() instead\n", caller);
+
+ err = early_ioremap_range(ioremap_bot, paligned, size, prot);
+ if (err)
+ return NULL;
+
+ ret = (void __iomem *)ioremap_bot + offset;
+ ioremap_bot += size;
+
+ return ret;
+}
+
+/*
+ * Unmap an IO region and remove it from vmalloc'd list.
+ * Access to IO memory should be serialized by driver.
+ */
+void iounmap(volatile void __iomem *token)
+{
+ void *addr;
+
+ if (!slab_is_available())
+ return;
+
+ addr = (void *)((unsigned long __force)PCI_FIX_ADDR(token) & PAGE_MASK);
+
+ if ((unsigned long)addr < ioremap_bot) {
+ pr_warn("Attempt to iounmap early bolted mapping at 0x%p\n", addr);
+ return;
+ }
+ vunmap(addr);
+}
+EXPORT_SYMBOL(iounmap);
diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c
index 74f4555a62ba..16dd95bd0749 100644
--- a/arch/powerpc/mm/kasan/kasan_init_32.c
+++ b/arch/powerpc/mm/kasan/kasan_init_32.c
@@ -5,13 +5,22 @@
#include <linux/kasan.h>
#include <linux/printk.h>
#include <linux/memblock.h>
+#include <linux/moduleloader.h>
#include <linux/sched/task.h>
#include <linux/vmalloc.h>
#include <asm/pgalloc.h>
#include <asm/code-patching.h>
#include <mm/mmu_decl.h>
-static void kasan_populate_pte(pte_t *ptep, pgprot_t prot)
+static pgprot_t __init kasan_prot_ro(void)
+{
+ if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE))
+ return PAGE_READONLY;
+
+ return PAGE_KERNEL_RO;
+}
+
+static void __init kasan_populate_pte(pte_t *ptep, pgprot_t prot)
{
unsigned long va = (unsigned long)kasan_early_shadow_page;
phys_addr_t pa = __pa(kasan_early_shadow_page);
@@ -21,62 +30,56 @@ static void kasan_populate_pte(pte_t *ptep, pgprot_t prot)
__set_pte_at(&init_mm, va, ptep, pfn_pte(PHYS_PFN(pa), prot), 0);
}
-static int __ref kasan_init_shadow_page_tables(unsigned long k_start, unsigned long k_end)
+static int __init kasan_init_shadow_page_tables(unsigned long k_start, unsigned long k_end)
{
pmd_t *pmd;
unsigned long k_cur, k_next;
+ pte_t *new = NULL;
pmd = pmd_offset(pud_offset(pgd_offset_k(k_start), k_start), k_start);
for (k_cur = k_start; k_cur != k_end; k_cur = k_next, pmd++) {
- pte_t *new;
-
k_next = pgd_addr_end(k_cur, k_end);
if ((void *)pmd_page_vaddr(*pmd) != kasan_early_shadow_pte)
continue;
- if (slab_is_available())
- new = pte_alloc_one_kernel(&init_mm);
- else
+ if (!new)
new = memblock_alloc(PTE_FRAG_SIZE, PTE_FRAG_SIZE);
if (!new)
return -ENOMEM;
- if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE))
- kasan_populate_pte(new, PAGE_READONLY);
- else
- kasan_populate_pte(new, PAGE_KERNEL_RO);
- pmd_populate_kernel(&init_mm, pmd, new);
- }
- return 0;
-}
+ kasan_populate_pte(new, PAGE_KERNEL);
-static void __ref *kasan_get_one_page(void)
-{
- if (slab_is_available())
- return (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
+ smp_wmb(); /* See comment in __pte_alloc */
- return memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+ spin_lock(&init_mm.page_table_lock);
+ /* Has another populated it ? */
+ if (likely((void *)pmd_page_vaddr(*pmd) == kasan_early_shadow_pte)) {
+ pmd_populate_kernel(&init_mm, pmd, new);
+ new = NULL;
+ }
+ spin_unlock(&init_mm.page_table_lock);
+ }
+ return 0;
}
-static int __ref kasan_init_region(void *start, size_t size)
+static int __init kasan_init_region(void *start, size_t size)
{
unsigned long k_start = (unsigned long)kasan_mem_to_shadow(start);
unsigned long k_end = (unsigned long)kasan_mem_to_shadow(start + size);
unsigned long k_cur;
int ret;
- void *block = NULL;
+ void *block;
ret = kasan_init_shadow_page_tables(k_start, k_end);
if (ret)
return ret;
- if (!slab_is_available())
- block = memblock_alloc(k_end - k_start, PAGE_SIZE);
+ block = memblock_alloc(k_end - k_start, PAGE_SIZE);
- for (k_cur = k_start; k_cur < k_end; k_cur += PAGE_SIZE) {
+ for (k_cur = k_start & PAGE_MASK; k_cur < k_end; k_cur += PAGE_SIZE) {
pmd_t *pmd = pmd_offset(pud_offset(pgd_offset_k(k_cur), k_cur), k_cur);
- void *va = block ? block + k_cur - k_start : kasan_get_one_page();
+ void *va = block + k_cur - k_start;
pte_t pte = pfn_pte(PHYS_PFN(__pa(va)), PAGE_KERNEL);
if (!va)
@@ -90,14 +93,51 @@ static int __ref kasan_init_region(void *start, size_t size)
static void __init kasan_remap_early_shadow_ro(void)
{
- if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE))
- kasan_populate_pte(kasan_early_shadow_pte, PAGE_READONLY);
- else
- kasan_populate_pte(kasan_early_shadow_pte, PAGE_KERNEL_RO);
+ pgprot_t prot = kasan_prot_ro();
+ unsigned long k_start = KASAN_SHADOW_START;
+ unsigned long k_end = KASAN_SHADOW_END;
+ unsigned long k_cur;
+ phys_addr_t pa = __pa(kasan_early_shadow_page);
+
+ kasan_populate_pte(kasan_early_shadow_pte, prot);
+ for (k_cur = k_start & PAGE_MASK; k_cur < k_end; k_cur += PAGE_SIZE) {
+ pmd_t *pmd = pmd_offset(pud_offset(pgd_offset_k(k_cur), k_cur), k_cur);
+ pte_t *ptep = pte_offset_kernel(pmd, k_cur);
+
+ if ((pte_val(*ptep) & PTE_RPN_MASK) != pa)
+ continue;
+
+ __set_pte_at(&init_mm, k_cur, ptep, pfn_pte(PHYS_PFN(pa), prot), 0);
+ }
flush_tlb_kernel_range(KASAN_SHADOW_START, KASAN_SHADOW_END);
}
+static void __init kasan_unmap_early_shadow_vmalloc(void)
+{
+ unsigned long k_start = (unsigned long)kasan_mem_to_shadow((void *)VMALLOC_START);
+ unsigned long k_end = (unsigned long)kasan_mem_to_shadow((void *)VMALLOC_END);
+ unsigned long k_cur;
+ phys_addr_t pa = __pa(kasan_early_shadow_page);
+
+ if (!early_mmu_has_feature(MMU_FTR_HPTE_TABLE)) {
+ int ret = kasan_init_shadow_page_tables(k_start, k_end);
+
+ if (ret)
+ panic("kasan: kasan_init_shadow_page_tables() failed");
+ }
+ for (k_cur = k_start & PAGE_MASK; k_cur < k_end; k_cur += PAGE_SIZE) {
+ pmd_t *pmd = pmd_offset(pud_offset(pgd_offset_k(k_cur), k_cur), k_cur);
+ pte_t *ptep = pte_offset_kernel(pmd, k_cur);
+
+ if ((pte_val(*ptep) & PTE_RPN_MASK) != pa)
+ continue;
+
+ __set_pte_at(&init_mm, k_cur, ptep, __pte(0), 0);
+ }
+ flush_tlb_kernel_range(k_start, k_end);
+}
+
void __init kasan_mmu_init(void)
{
int ret;
@@ -134,30 +174,22 @@ void __init kasan_init(void)
pr_info("KASAN init done\n");
}
-#ifdef CONFIG_MODULES
-void *module_alloc(unsigned long size)
+void __init kasan_late_init(void)
{
- void *base = vmalloc_exec(size);
-
- if (!base)
- return NULL;
-
- if (!kasan_init_region(base, size))
- return base;
-
- vfree(base);
-
- return NULL;
+ if (IS_ENABLED(CONFIG_KASAN_VMALLOC))
+ kasan_unmap_early_shadow_vmalloc();
}
-#endif
#ifdef CONFIG_PPC_BOOK3S_32
u8 __initdata early_hash[256 << 10] __aligned(256 << 10) = {0};
static void __init kasan_early_hash_table(void)
{
- modify_instruction_site(&patch__hash_page_A0, 0xffff, __pa(early_hash) >> 16);
- modify_instruction_site(&patch__flush_hash_A0, 0xffff, __pa(early_hash) >> 16);
+ unsigned int hash = IS_ENABLED(CONFIG_VMAP_STACK) ? (unsigned int)early_hash :
+ __pa(early_hash);
+
+ modify_instruction_site(&patch__hash_page_A0, 0xffff, hash >> 16);
+ modify_instruction_site(&patch__flush_hash_A0, 0xffff, hash >> 16);
Hash = (struct hash_pte *)early_hash;
}
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 9191a66b3bc5..ef7b1119b2e2 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -31,6 +31,7 @@
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/memremap.h>
+#include <linux/dma-direct.h>
#include <asm/pgalloc.h>
#include <asm/prom.h>
@@ -48,6 +49,7 @@
#include <asm/fixmap.h>
#include <asm/swiotlb.h>
#include <asm/rtas.h>
+#include <asm/kasan.h>
#include <mm/mmu_decl.h>
@@ -104,6 +106,27 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end)
return -ENODEV;
}
+#define FLUSH_CHUNK_SIZE SZ_1G
+/**
+ * flush_dcache_range_chunked(): Write any modified data cache blocks out to
+ * memory and invalidate them, in chunks of up to FLUSH_CHUNK_SIZE
+ * Does not invalidate the corresponding instruction cache blocks.
+ *
+ * @start: the start address
+ * @stop: the stop address (exclusive)
+ * @chunk: the max size of the chunks
+ */
+static void flush_dcache_range_chunked(unsigned long start, unsigned long stop,
+ unsigned long chunk)
+{
+ unsigned long i;
+
+ for (i = start; i < stop; i += chunk) {
+ flush_dcache_range(i, min(stop, i + chunk));
+ cond_resched();
+ }
+}
+
int __ref arch_add_memory(int nid, u64 start, u64 size,
struct mhp_restrictions *restrictions)
{
@@ -120,7 +143,6 @@ int __ref arch_add_memory(int nid, u64 start, u64 size,
start, start + size, rc);
return -EFAULT;
}
- flush_dcache_range(start, start + size);
return __add_pages(nid, start_pfn, nr_pages, restrictions);
}
@@ -130,14 +152,14 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size,
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
- struct page *page = pfn_to_page(start_pfn) + vmem_altmap_offset(altmap);
int ret;
- __remove_pages(page_zone(page), start_pfn, nr_pages, altmap);
+ __remove_pages(start_pfn, nr_pages, altmap);
/* Remove htab bolted mappings for this section of memory */
start = (unsigned long)__va(start);
- flush_dcache_range(start, start + size);
+ flush_dcache_range_chunked(start, start + size, FLUSH_CHUNK_SIZE);
+
ret = remove_section_mapping(start, start + size);
WARN_ON_ONCE(ret);
@@ -201,10 +223,10 @@ static int __init mark_nonram_nosave(void)
* everything else. GFP_DMA32 page allocations automatically fall back to
* ZONE_DMA.
*
- * By using 31-bit unconditionally, we can exploit ARCH_ZONE_DMA_BITS to
- * inform the generic DMA mapping code. 32-bit only devices (if not handled
- * by an IOMMU anyway) will take a first dip into ZONE_NORMAL and get
- * otherwise served by ZONE_DMA.
+ * By using 31-bit unconditionally, we can exploit zone_dma_bits to inform the
+ * generic DMA mapping code. 32-bit only devices (if not handled by an IOMMU
+ * anyway) will take a first dip into ZONE_NORMAL and get otherwise served by
+ * ZONE_DMA.
*/
static unsigned long max_zone_pfns[MAX_NR_ZONES];
@@ -216,15 +238,13 @@ void __init paging_init(void)
unsigned long long total_ram = memblock_phys_mem_size();
phys_addr_t top_of_ram = memblock_end_of_DRAM();
-#ifdef CONFIG_PPC32
- unsigned long v = __fix_to_virt(__end_of_fixed_addresses - 1);
- unsigned long end = __fix_to_virt(FIX_HOLE);
+#ifdef CONFIG_HIGHMEM
+ unsigned long v = __fix_to_virt(FIX_KMAP_END);
+ unsigned long end = __fix_to_virt(FIX_KMAP_BEGIN);
for (; v < end; v += PAGE_SIZE)
map_kernel_page(v, 0, __pgprot(0)); /* XXX gross */
-#endif
-#ifdef CONFIG_HIGHMEM
map_kernel_page(PKMAP_BASE, 0, __pgprot(0)); /* XXX gross */
pkmap_page_table = virt_to_kpte(PKMAP_BASE);
@@ -237,9 +257,18 @@ void __init paging_init(void)
printk(KERN_DEBUG "Memory hole size: %ldMB\n",
(long int)((top_of_ram - total_ram) >> 20));
+ /*
+ * Allow 30-bit DMA for very limited Broadcom wifi chips on many
+ * powerbooks.
+ */
+ if (IS_ENABLED(CONFIG_PPC32))
+ zone_dma_bits = 30;
+ else
+ zone_dma_bits = 31;
+
#ifdef CONFIG_ZONE_DMA
max_zone_pfns[ZONE_DMA] = min(max_low_pfn,
- 1UL << (ARCH_ZONE_DMA_BITS - PAGE_SHIFT));
+ 1UL << (zone_dma_bits - PAGE_SHIFT));
#endif
max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
#ifdef CONFIG_HIGHMEM
@@ -260,11 +289,22 @@ void __init mem_init(void)
BUILD_BUG_ON(MMU_PAGE_COUNT > 16);
#ifdef CONFIG_SWIOTLB
+ /*
+ * Some platforms (e.g. 85xx) limit DMA-able memory way below
+ * 4G. We force memblock to bottom-up mode to ensure that the
+ * memory allocated in swiotlb_init() is DMA-able.
+ * As it's the last memblock allocation, no need to reset it
+ * back to to-down.
+ */
+ memblock_set_bottom_up(true);
swiotlb_init(0);
#endif
high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
set_max_mapnr(max_pfn);
+
+ kasan_late_init();
+
memblock_free_all();
#ifdef CONFIG_HIGHMEM
@@ -302,12 +342,9 @@ void __init mem_init(void)
pr_info(" * 0x%08lx..0x%08lx : highmem PTEs\n",
PKMAP_BASE, PKMAP_ADDR(LAST_PKMAP));
#endif /* CONFIG_HIGHMEM */
-#ifdef CONFIG_NOT_COHERENT_CACHE
- pr_info(" * 0x%08lx..0x%08lx : consistent mem\n",
- IOREMAP_TOP, IOREMAP_TOP + CONFIG_CONSISTENT_SIZE);
-#endif /* CONFIG_NOT_COHERENT_CACHE */
- pr_info(" * 0x%08lx..0x%08lx : early ioremap\n",
- ioremap_bot, IOREMAP_TOP);
+ if (ioremap_bot != IOREMAP_TOP)
+ pr_info(" * 0x%08lx..0x%08lx : early ioremap\n",
+ ioremap_bot, IOREMAP_TOP);
pr_info(" * 0x%08lx..0x%08lx : vmalloc & ioremap\n",
VMALLOC_START, VMALLOC_END);
#endif /* CONFIG_PPC32 */
@@ -321,6 +358,120 @@ void free_initmem(void)
free_initmem_default(POISON_FREE_INITMEM);
}
+/**
+ * flush_coherent_icache() - if a CPU has a coherent icache, flush it
+ * @addr: The base address to use (can be any valid address, the whole cache will be flushed)
+ * Return true if the cache was flushed, false otherwise
+ */
+static inline bool flush_coherent_icache(unsigned long addr)
+{
+ /*
+ * For a snooping icache, we still need a dummy icbi to purge all the
+ * prefetched instructions from the ifetch buffers. We also need a sync
+ * before the icbi to order the the actual stores to memory that might
+ * have modified instructions with the icbi.
+ */
+ if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) {
+ mb(); /* sync */
+ icbi((void *)addr);
+ mb(); /* sync */
+ isync();
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * invalidate_icache_range() - Flush the icache by issuing icbi across an address range
+ * @start: the start address
+ * @stop: the stop address (exclusive)
+ */
+static void invalidate_icache_range(unsigned long start, unsigned long stop)
+{
+ unsigned long shift = l1_icache_shift();
+ unsigned long bytes = l1_icache_bytes();
+ char *addr = (char *)(start & ~(bytes - 1));
+ unsigned long size = stop - (unsigned long)addr + (bytes - 1);
+ unsigned long i;
+
+ for (i = 0; i < size >> shift; i++, addr += bytes)
+ icbi(addr);
+
+ mb(); /* sync */
+ isync();
+}
+
+/**
+ * flush_icache_range: Write any modified data cache blocks out to memory
+ * and invalidate the corresponding blocks in the instruction cache
+ *
+ * Generic code will call this after writing memory, before executing from it.
+ *
+ * @start: the start address
+ * @stop: the stop address (exclusive)
+ */
+void flush_icache_range(unsigned long start, unsigned long stop)
+{
+ if (flush_coherent_icache(start))
+ return;
+
+ clean_dcache_range(start, stop);
+
+ if (IS_ENABLED(CONFIG_44x)) {
+ /*
+ * Flash invalidate on 44x because we are passed kmapped
+ * addresses and this doesn't work for userspace pages due to
+ * the virtually tagged icache.
+ */
+ iccci((void *)start);
+ mb(); /* sync */
+ isync();
+ } else
+ invalidate_icache_range(start, stop);
+}
+EXPORT_SYMBOL(flush_icache_range);
+
+#if !defined(CONFIG_PPC_8xx) && !defined(CONFIG_PPC64)
+/**
+ * flush_dcache_icache_phys() - Flush a page by it's physical address
+ * @physaddr: the physical address of the page
+ */
+static void flush_dcache_icache_phys(unsigned long physaddr)
+{
+ unsigned long bytes = l1_dcache_bytes();
+ unsigned long nb = PAGE_SIZE / bytes;
+ unsigned long addr = physaddr & PAGE_MASK;
+ unsigned long msr, msr0;
+ unsigned long loop1 = addr, loop2 = addr;
+
+ msr0 = mfmsr();
+ msr = msr0 & ~MSR_DR;
+ /*
+ * This must remain as ASM to prevent potential memory accesses
+ * while the data MMU is disabled
+ */
+ asm volatile(
+ " mtctr %2;\n"
+ " mtmsr %3;\n"
+ " isync;\n"
+ "0: dcbst 0, %0;\n"
+ " addi %0, %0, %4;\n"
+ " bdnz 0b;\n"
+ " sync;\n"
+ " mtctr %2;\n"
+ "1: icbi 0, %1;\n"
+ " addi %1, %1, %4;\n"
+ " bdnz 1b;\n"
+ " sync;\n"
+ " mtmsr %5;\n"
+ " isync;\n"
+ : "+&r" (loop1), "+&r" (loop2)
+ : "r" (nb), "r" (msr), "i" (bytes), "r" (msr0)
+ : "ctr", "memory");
+}
+#endif // !defined(CONFIG_PPC_8xx) && !defined(CONFIG_PPC64)
+
/*
* This is called when a page has been modified by the kernel.
* It just marks the page as not i-cache clean. We do the i-cache
@@ -353,12 +504,46 @@ void flush_dcache_icache_page(struct page *page)
__flush_dcache_icache(start);
kunmap_atomic(start);
} else {
- __flush_dcache_icache_phys(page_to_pfn(page) << PAGE_SHIFT);
+ unsigned long addr = page_to_pfn(page) << PAGE_SHIFT;
+
+ if (flush_coherent_icache(addr))
+ return;
+ flush_dcache_icache_phys(addr);
}
#endif
}
EXPORT_SYMBOL(flush_dcache_icache_page);
+/**
+ * __flush_dcache_icache(): Flush a particular page from the data cache to RAM.
+ * Note: this is necessary because the instruction cache does *not*
+ * snoop from the data cache.
+ *
+ * @page: the address of the page to flush
+ */
+void __flush_dcache_icache(void *p)
+{
+ unsigned long addr = (unsigned long)p;
+
+ if (flush_coherent_icache(addr))
+ return;
+
+ clean_dcache_range(addr, addr + PAGE_SIZE);
+
+ /*
+ * We don't flush the icache on 44x. Those have a virtual icache and we
+ * don't have access to the virtual address here (it's not the page
+ * vaddr but where it's mapped in user space). The flushing of the
+ * icache on these is handled elsewhere, when a change in the address
+ * space occurs, before returning to user space.
+ */
+
+ if (cpu_has_feature(MMU_FTR_TYPE_44x))
+ return;
+
+ invalidate_icache_range(addr, addr + PAGE_SIZE);
+}
+
void clear_user_page(void *page, unsigned long vaddr, struct page *pg)
{
clear_page(page);
@@ -407,63 +592,6 @@ void flush_icache_user_range(struct vm_area_struct *vma, struct page *page,
EXPORT_SYMBOL(flush_icache_user_range);
/*
- * This is called at the end of handling a user page fault, when the
- * fault has been handled by updating a PTE in the linux page tables.
- * We use it to preload an HPTE into the hash table corresponding to
- * the updated linux PTE.
- *
- * This must always be called with the pte lock held.
- */
-void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
- pte_t *ptep)
-{
-#ifdef CONFIG_PPC_BOOK3S
- /*
- * We don't need to worry about _PAGE_PRESENT here because we are
- * called with either mm->page_table_lock held or ptl lock held
- */
- unsigned long trap;
- bool is_exec;
-
- if (radix_enabled()) {
- prefetch((void *)address);
- return;
- }
-
- /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */
- if (!pte_young(*ptep) || address >= TASK_SIZE)
- return;
-
- /* We try to figure out if we are coming from an instruction
- * access fault and pass that down to __hash_page so we avoid
- * double-faulting on execution of fresh text. We have to test
- * for regs NULL since init will get here first thing at boot
- *
- * We also avoid filling the hash if not coming from a fault
- */
-
- trap = current->thread.regs ? TRAP(current->thread.regs) : 0UL;
- switch (trap) {
- case 0x300:
- is_exec = false;
- break;
- case 0x400:
- is_exec = true;
- break;
- default:
- return;
- }
-
- hash_preload(vma->vm_mm, address, is_exec, trap);
-#endif /* CONFIG_PPC_BOOK3S */
-#if (defined(CONFIG_PPC_BOOK3E_64) || defined(CONFIG_PPC_FSL_BOOK3E)) \
- && defined(CONFIG_HUGETLB_PAGE)
- if (is_vm_hugetlb_page(vma))
- book3e_hugetlb_preload(vma, address, *ptep);
-#endif
-}
-
-/*
* System memory should not be in /proc/iomem but various tools expect it
* (eg kdump).
*/
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 32c1a191c28a..7097e07a209a 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -82,10 +82,6 @@ static inline void print_system_hash_info(void) {}
#else /* CONFIG_PPC_MMU_NOHASH */
-extern void hash_preload(struct mm_struct *mm, unsigned long ea,
- bool is_exec, unsigned long trap);
-
-
extern void _tlbie(unsigned long address);
extern void _tlbia(void);
@@ -95,6 +91,8 @@ void print_system_hash_info(void);
#ifdef CONFIG_PPC32
+void hash_preload(struct mm_struct *mm, unsigned long ea);
+
extern void mapin_ram(void);
extern void setbat(int index, unsigned long virt, phys_addr_t phys,
unsigned int size, pgprot_t prot);
@@ -108,7 +106,6 @@ extern u8 early_hash[];
#endif /* CONFIG_PPC32 */
-extern unsigned long ioremap_bot;
extern unsigned long __max_low_memory;
extern phys_addr_t __initial_memory_limit_addr;
extern phys_addr_t total_memory;
@@ -142,10 +139,21 @@ extern unsigned long calc_cam_sz(unsigned long ram, unsigned long virt,
extern void adjust_total_lowmem(void);
extern int switch_to_as1(void);
extern void restore_to_as0(int esel, int offset, void *dt_ptr, int bootcpu);
+void create_kaslr_tlb_entry(int entry, unsigned long virt, phys_addr_t phys);
+void reloc_kernel_entry(void *fdt, int addr);
+extern int is_second_reloc;
#endif
extern void loadcam_entry(unsigned int index);
extern void loadcam_multi(int first_idx, int num, int tmp_idx);
+#ifdef CONFIG_RANDOMIZE_BASE
+void kaslr_early_init(void *dt_ptr, phys_addr_t size);
+void kaslr_late_init(void);
+#else
+static inline void kaslr_early_init(void *dt_ptr, phys_addr_t size) {}
+static inline void kaslr_late_init(void) {}
+#endif
+
struct tlbcam {
u32 MAS0;
u32 MAS1;
@@ -173,3 +181,9 @@ void mmu_mark_rodata_ro(void);
static inline void mmu_mark_initmem_nx(void) { }
static inline void mmu_mark_rodata_ro(void) { }
#endif
+
+#ifdef CONFIG_PPC_DEBUG_WX
+void ptdump_check_wx(void);
+#else
+static inline void ptdump_check_wx(void) { }
+#endif
diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c
index 4a06cb342da2..3189308dece4 100644
--- a/arch/powerpc/mm/nohash/8xx.c
+++ b/arch/powerpc/mm/nohash/8xx.c
@@ -21,33 +21,34 @@ extern int __map_without_ltlbs;
static unsigned long block_mapped_ram;
/*
- * Return PA for this VA if it is in an area mapped with LTLBs.
+ * Return PA for this VA if it is in an area mapped with LTLBs or fixmap.
* Otherwise, returns 0
*/
phys_addr_t v_block_mapped(unsigned long va)
{
unsigned long p = PHYS_IMMR_BASE;
- if (__map_without_ltlbs)
- return 0;
if (va >= VIRT_IMMR_BASE && va < VIRT_IMMR_BASE + IMMR_SIZE)
return p + va - VIRT_IMMR_BASE;
+ if (__map_without_ltlbs)
+ return 0;
if (va >= PAGE_OFFSET && va < PAGE_OFFSET + block_mapped_ram)
return __pa(va);
return 0;
}
/*
- * Return VA for a given PA mapped with LTLBs or 0 if not mapped
+ * Return VA for a given PA mapped with LTLBs or fixmap
+ * Return 0 if not mapped
*/
unsigned long p_block_mapped(phys_addr_t pa)
{
unsigned long p = PHYS_IMMR_BASE;
- if (__map_without_ltlbs)
- return 0;
if (pa >= p && pa < p + IMMR_SIZE)
return VIRT_IMMR_BASE + pa - p;
+ if (__map_without_ltlbs)
+ return 0;
if (pa < block_mapped_ram)
return (unsigned long)__va(pa);
return 0;
@@ -103,6 +104,19 @@ static void mmu_patch_addis(s32 *site, long simm)
patch_instruction_site(site, instr);
}
+static void mmu_mapin_ram_chunk(unsigned long offset, unsigned long top, pgprot_t prot)
+{
+ unsigned long s = offset;
+ unsigned long v = PAGE_OFFSET + s;
+ phys_addr_t p = memstart_addr + s;
+
+ for (; s < top; s += PAGE_SIZE) {
+ map_kernel_page(v, p, prot);
+ v += PAGE_SIZE;
+ p += PAGE_SIZE;
+ }
+}
+
unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
{
unsigned long mapped;
@@ -115,10 +129,20 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT))
mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top, 0);
} else {
+ unsigned long einittext8 = ALIGN(__pa(_einittext), SZ_8M);
+
mapped = top & ~(LARGE_PAGE_SIZE_8M - 1);
if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT))
- mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top,
- _ALIGN(__pa(_einittext), 8 << 20));
+ mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top, einittext8);
+
+ /*
+ * Populate page tables to:
+ * - have them appear in /sys/kernel/debug/kernel_page_tables
+ * - allow the BDI to find the pages when they are not PINNED
+ */
+ mmu_mapin_ram_chunk(0, einittext8, PAGE_KERNEL_X);
+ mmu_mapin_ram_chunk(einittext8, mapped, PAGE_KERNEL);
+ mmu_mapin_immr();
}
mmu_patch_cmp_limit(&patch__dtlbmiss_linmem_top, mapped);
@@ -144,18 +168,41 @@ void mmu_mark_initmem_nx(void)
if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) && CONFIG_ETEXT_SHIFT < 23)
mmu_patch_addis(&patch__itlbmiss_linmem_top8,
-((long)_etext & ~(LARGE_PAGE_SIZE_8M - 1)));
- if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT))
+ if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT)) {
+ unsigned long einittext8 = ALIGN(__pa(_einittext), SZ_8M);
+ unsigned long etext8 = ALIGN(__pa(_etext), SZ_8M);
+ unsigned long etext = __pa(_etext);
+
mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top, __pa(_etext));
+
+ /* Update page tables for PTDUMP and BDI */
+ mmu_mapin_ram_chunk(0, einittext8, __pgprot(0));
+ if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) {
+ mmu_mapin_ram_chunk(0, etext, PAGE_KERNEL_TEXT);
+ mmu_mapin_ram_chunk(etext, einittext8, PAGE_KERNEL);
+ } else {
+ mmu_mapin_ram_chunk(0, etext8, PAGE_KERNEL_TEXT);
+ mmu_mapin_ram_chunk(etext8, einittext8, PAGE_KERNEL);
+ }
+ }
}
#ifdef CONFIG_STRICT_KERNEL_RWX
void mmu_mark_rodata_ro(void)
{
+ unsigned long sinittext = __pa(_sinittext);
+ unsigned long etext = __pa(_etext);
+
if (CONFIG_DATA_SHIFT < 23)
mmu_patch_addis(&patch__dtlbmiss_romem_top8,
-__pa(((unsigned long)_sinittext) &
~(LARGE_PAGE_SIZE_8M - 1)));
mmu_patch_addis(&patch__dtlbmiss_romem_top, -__pa(_sinittext));
+
+ /* Update page tables for PTDUMP and BDI */
+ mmu_mapin_ram_chunk(0, sinittext, __pgprot(0));
+ mmu_mapin_ram_chunk(0, etext, PAGE_KERNEL_ROX);
+ mmu_mapin_ram_chunk(etext, sinittext, PAGE_KERNEL_RO);
}
#endif
diff --git a/arch/powerpc/mm/nohash/Makefile b/arch/powerpc/mm/nohash/Makefile
index 33b6f6f29d3f..0424f6ce5bd8 100644
--- a/arch/powerpc/mm/nohash/Makefile
+++ b/arch/powerpc/mm/nohash/Makefile
@@ -8,6 +8,7 @@ obj-$(CONFIG_40x) += 40x.o
obj-$(CONFIG_44x) += 44x.o
obj-$(CONFIG_PPC_8xx) += 8xx.o
obj-$(CONFIG_PPC_FSL_BOOK3E) += fsl_booke.o
+obj-$(CONFIG_RANDOMIZE_BASE) += kaslr_booke.o
ifdef CONFIG_HUGETLB_PAGE
obj-$(CONFIG_PPC_FSL_BOOK3E) += book3e_hugetlbpage.o
endif
diff --git a/arch/powerpc/mm/nohash/book3e_hugetlbpage.c b/arch/powerpc/mm/nohash/book3e_hugetlbpage.c
index 61915f4d3c7f..8b88be91b622 100644
--- a/arch/powerpc/mm/nohash/book3e_hugetlbpage.c
+++ b/arch/powerpc/mm/nohash/book3e_hugetlbpage.c
@@ -122,8 +122,8 @@ static inline int book3e_tlb_exists(unsigned long ea, unsigned long pid)
return found;
}
-void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea,
- pte_t pte)
+static void
+book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea, pte_t pte)
{
unsigned long mas1, mas2;
u64 mas7_3;
@@ -183,6 +183,18 @@ void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea,
local_irq_restore(flags);
}
+/*
+ * This is called at the end of handling a user page fault, when the
+ * fault has been handled by updating a PTE in the linux page tables.
+ *
+ * This must always be called with the pte lock held.
+ */
+void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
+{
+ if (is_vm_hugetlb_page(vma))
+ book3e_hugetlb_preload(vma, address, *ptep);
+}
+
void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
{
struct hstate *hstate = hstate_file(vma->vm_file);
diff --git a/arch/powerpc/mm/nohash/fsl_booke.c b/arch/powerpc/mm/nohash/fsl_booke.c
index 556e3cd52a35..b4eb06ceb189 100644
--- a/arch/powerpc/mm/nohash/fsl_booke.c
+++ b/arch/powerpc/mm/nohash/fsl_booke.c
@@ -263,11 +263,13 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base,
int __initdata is_second_reloc;
notrace void __init relocate_init(u64 dt_ptr, phys_addr_t start)
{
- unsigned long base = KERNELBASE;
+ unsigned long base = kernstart_virt_addr;
+ phys_addr_t size;
kernstart_addr = start;
if (is_second_reloc) {
virt_phys_offset = PAGE_OFFSET - memstart_addr;
+ kaslr_late_init();
return;
}
@@ -291,7 +293,7 @@ notrace void __init relocate_init(u64 dt_ptr, phys_addr_t start)
start &= ~0x3ffffff;
base &= ~0x3ffffff;
virt_phys_offset = base - start;
- early_get_first_memblock_info(__va(dt_ptr), NULL);
+ early_get_first_memblock_info(__va(dt_ptr), &size);
/*
* We now get the memstart_addr, then we should check if this
* address is the same as what the PAGE_OFFSET map to now. If
@@ -316,6 +318,8 @@ notrace void __init relocate_init(u64 dt_ptr, phys_addr_t start)
/* We should never reach here */
panic("Relocation error");
}
+
+ kaslr_early_init(__va(dt_ptr), size);
}
#endif
#endif
diff --git a/arch/powerpc/mm/nohash/kaslr_booke.c b/arch/powerpc/mm/nohash/kaslr_booke.c
new file mode 100644
index 000000000000..4a75f2d9bf0e
--- /dev/null
+++ b/arch/powerpc/mm/nohash/kaslr_booke.c
@@ -0,0 +1,401 @@
+// SPDX-License-Identifier: GPL-2.0-only
+//
+// Copyright (C) 2019 Jason Yan <yanaijie@huawei.com>
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/stddef.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/memblock.h>
+#include <linux/libfdt.h>
+#include <linux/crash_core.h>
+#include <asm/pgalloc.h>
+#include <asm/prom.h>
+#include <asm/kdump.h>
+#include <mm/mmu_decl.h>
+#include <generated/compile.h>
+#include <generated/utsrelease.h>
+
+struct regions {
+ unsigned long pa_start;
+ unsigned long pa_end;
+ unsigned long kernel_size;
+ unsigned long dtb_start;
+ unsigned long dtb_end;
+ unsigned long initrd_start;
+ unsigned long initrd_end;
+ unsigned long crash_start;
+ unsigned long crash_end;
+ int reserved_mem;
+ int reserved_mem_addr_cells;
+ int reserved_mem_size_cells;
+};
+
+/* Simplified build-specific string for starting entropy. */
+static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@"
+ LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION;
+
+struct regions __initdata regions;
+
+static __init void kaslr_get_cmdline(void *fdt)
+{
+ int node = fdt_path_offset(fdt, "/chosen");
+
+ early_init_dt_scan_chosen(node, "chosen", 1, boot_command_line);
+}
+
+static unsigned long __init rotate_xor(unsigned long hash, const void *area,
+ size_t size)
+{
+ size_t i;
+ const unsigned long *ptr = area;
+
+ for (i = 0; i < size / sizeof(hash); i++) {
+ /* Rotate by odd number of bits and XOR. */
+ hash = (hash << ((sizeof(hash) * 8) - 7)) | (hash >> 7);
+ hash ^= ptr[i];
+ }
+
+ return hash;
+}
+
+/* Attempt to create a simple starting entropy. This can make it defferent for
+ * every build but it is still not enough. Stronger entropy should
+ * be added to make it change for every boot.
+ */
+static unsigned long __init get_boot_seed(void *fdt)
+{
+ unsigned long hash = 0;
+
+ hash = rotate_xor(hash, build_str, sizeof(build_str));
+ hash = rotate_xor(hash, fdt, fdt_totalsize(fdt));
+
+ return hash;
+}
+
+static __init u64 get_kaslr_seed(void *fdt)
+{
+ int node, len;
+ fdt64_t *prop;
+ u64 ret;
+
+ node = fdt_path_offset(fdt, "/chosen");
+ if (node < 0)
+ return 0;
+
+ prop = fdt_getprop_w(fdt, node, "kaslr-seed", &len);
+ if (!prop || len != sizeof(u64))
+ return 0;
+
+ ret = fdt64_to_cpu(*prop);
+ *prop = 0;
+ return ret;
+}
+
+static __init bool regions_overlap(u32 s1, u32 e1, u32 s2, u32 e2)
+{
+ return e1 >= s2 && e2 >= s1;
+}
+
+static __init bool overlaps_reserved_region(const void *fdt, u32 start,
+ u32 end)
+{
+ int subnode, len, i;
+ u64 base, size;
+
+ /* check for overlap with /memreserve/ entries */
+ for (i = 0; i < fdt_num_mem_rsv(fdt); i++) {
+ if (fdt_get_mem_rsv(fdt, i, &base, &size) < 0)
+ continue;
+ if (regions_overlap(start, end, base, base + size))
+ return true;
+ }
+
+ if (regions.reserved_mem < 0)
+ return false;
+
+ /* check for overlap with static reservations in /reserved-memory */
+ for (subnode = fdt_first_subnode(fdt, regions.reserved_mem);
+ subnode >= 0;
+ subnode = fdt_next_subnode(fdt, subnode)) {
+ const fdt32_t *reg;
+ u64 rsv_end;
+
+ len = 0;
+ reg = fdt_getprop(fdt, subnode, "reg", &len);
+ while (len >= (regions.reserved_mem_addr_cells +
+ regions.reserved_mem_size_cells)) {
+ base = fdt32_to_cpu(reg[0]);
+ if (regions.reserved_mem_addr_cells == 2)
+ base = (base << 32) | fdt32_to_cpu(reg[1]);
+
+ reg += regions.reserved_mem_addr_cells;
+ len -= 4 * regions.reserved_mem_addr_cells;
+
+ size = fdt32_to_cpu(reg[0]);
+ if (regions.reserved_mem_size_cells == 2)
+ size = (size << 32) | fdt32_to_cpu(reg[1]);
+
+ reg += regions.reserved_mem_size_cells;
+ len -= 4 * regions.reserved_mem_size_cells;
+
+ if (base >= regions.pa_end)
+ continue;
+
+ rsv_end = min(base + size, (u64)U32_MAX);
+
+ if (regions_overlap(start, end, base, rsv_end))
+ return true;
+ }
+ }
+ return false;
+}
+
+static __init bool overlaps_region(const void *fdt, u32 start,
+ u32 end)
+{
+ if (regions_overlap(start, end, __pa(_stext), __pa(_end)))
+ return true;
+
+ if (regions_overlap(start, end, regions.dtb_start,
+ regions.dtb_end))
+ return true;
+
+ if (regions_overlap(start, end, regions.initrd_start,
+ regions.initrd_end))
+ return true;
+
+ if (regions_overlap(start, end, regions.crash_start,
+ regions.crash_end))
+ return true;
+
+ return overlaps_reserved_region(fdt, start, end);
+}
+
+static void __init get_crash_kernel(void *fdt, unsigned long size)
+{
+#ifdef CONFIG_CRASH_CORE
+ unsigned long long crash_size, crash_base;
+ int ret;
+
+ ret = parse_crashkernel(boot_command_line, size, &crash_size,
+ &crash_base);
+ if (ret != 0 || crash_size == 0)
+ return;
+ if (crash_base == 0)
+ crash_base = KDUMP_KERNELBASE;
+
+ regions.crash_start = (unsigned long)crash_base;
+ regions.crash_end = (unsigned long)(crash_base + crash_size);
+
+ pr_debug("crash_base=0x%llx crash_size=0x%llx\n", crash_base, crash_size);
+#endif
+}
+
+static void __init get_initrd_range(void *fdt)
+{
+ u64 start, end;
+ int node, len;
+ const __be32 *prop;
+
+ node = fdt_path_offset(fdt, "/chosen");
+ if (node < 0)
+ return;
+
+ prop = fdt_getprop(fdt, node, "linux,initrd-start", &len);
+ if (!prop)
+ return;
+ start = of_read_number(prop, len / 4);
+
+ prop = fdt_getprop(fdt, node, "linux,initrd-end", &len);
+ if (!prop)
+ return;
+ end = of_read_number(prop, len / 4);
+
+ regions.initrd_start = (unsigned long)start;
+ regions.initrd_end = (unsigned long)end;
+
+ pr_debug("initrd_start=0x%llx initrd_end=0x%llx\n", start, end);
+}
+
+static __init unsigned long get_usable_address(const void *fdt,
+ unsigned long start,
+ unsigned long offset)
+{
+ unsigned long pa;
+ unsigned long pa_end;
+
+ for (pa = offset; (long)pa > (long)start; pa -= SZ_16K) {
+ pa_end = pa + regions.kernel_size;
+ if (overlaps_region(fdt, pa, pa_end))
+ continue;
+
+ return pa;
+ }
+ return 0;
+}
+
+static __init void get_cell_sizes(const void *fdt, int node, int *addr_cells,
+ int *size_cells)
+{
+ const int *prop;
+ int len;
+
+ /*
+ * Retrieve the #address-cells and #size-cells properties
+ * from the 'node', or use the default if not provided.
+ */
+ *addr_cells = *size_cells = 1;
+
+ prop = fdt_getprop(fdt, node, "#address-cells", &len);
+ if (len == 4)
+ *addr_cells = fdt32_to_cpu(*prop);
+ prop = fdt_getprop(fdt, node, "#size-cells", &len);
+ if (len == 4)
+ *size_cells = fdt32_to_cpu(*prop);
+}
+
+static unsigned long __init kaslr_legal_offset(void *dt_ptr, unsigned long index,
+ unsigned long offset)
+{
+ unsigned long koffset = 0;
+ unsigned long start;
+
+ while ((long)index >= 0) {
+ offset = memstart_addr + index * SZ_64M + offset;
+ start = memstart_addr + index * SZ_64M;
+ koffset = get_usable_address(dt_ptr, start, offset);
+ if (koffset)
+ break;
+ index--;
+ }
+
+ if (koffset != 0)
+ koffset -= memstart_addr;
+
+ return koffset;
+}
+
+static inline __init bool kaslr_disabled(void)
+{
+ return strstr(boot_command_line, "nokaslr") != NULL;
+}
+
+static unsigned long __init kaslr_choose_location(void *dt_ptr, phys_addr_t size,
+ unsigned long kernel_sz)
+{
+ unsigned long offset, random;
+ unsigned long ram, linear_sz;
+ u64 seed;
+ unsigned long index;
+
+ kaslr_get_cmdline(dt_ptr);
+ if (kaslr_disabled())
+ return 0;
+
+ random = get_boot_seed(dt_ptr);
+
+ seed = get_tb() << 32;
+ seed ^= get_tb();
+ random = rotate_xor(random, &seed, sizeof(seed));
+
+ /*
+ * Retrieve (and wipe) the seed from the FDT
+ */
+ seed = get_kaslr_seed(dt_ptr);
+ if (seed)
+ random = rotate_xor(random, &seed, sizeof(seed));
+ else
+ pr_warn("KASLR: No safe seed for randomizing the kernel base.\n");
+
+ ram = min_t(phys_addr_t, __max_low_memory, size);
+ ram = map_mem_in_cams(ram, CONFIG_LOWMEM_CAM_NUM, true);
+ linear_sz = min_t(unsigned long, ram, SZ_512M);
+
+ /* If the linear size is smaller than 64M, do not randmize */
+ if (linear_sz < SZ_64M)
+ return 0;
+
+ /* check for a reserved-memory node and record its cell sizes */
+ regions.reserved_mem = fdt_path_offset(dt_ptr, "/reserved-memory");
+ if (regions.reserved_mem >= 0)
+ get_cell_sizes(dt_ptr, regions.reserved_mem,
+ &regions.reserved_mem_addr_cells,
+ &regions.reserved_mem_size_cells);
+
+ regions.pa_start = memstart_addr;
+ regions.pa_end = memstart_addr + linear_sz;
+ regions.dtb_start = __pa(dt_ptr);
+ regions.dtb_end = __pa(dt_ptr) + fdt_totalsize(dt_ptr);
+ regions.kernel_size = kernel_sz;
+
+ get_initrd_range(dt_ptr);
+ get_crash_kernel(dt_ptr, ram);
+
+ /*
+ * Decide which 64M we want to start
+ * Only use the low 8 bits of the random seed
+ */
+ index = random & 0xFF;
+ index %= linear_sz / SZ_64M;
+
+ /* Decide offset inside 64M */
+ offset = random % (SZ_64M - kernel_sz);
+ offset = round_down(offset, SZ_16K);
+
+ return kaslr_legal_offset(dt_ptr, index, offset);
+}
+
+/*
+ * To see if we need to relocate the kernel to a random offset
+ * void *dt_ptr - address of the device tree
+ * phys_addr_t size - size of the first memory block
+ */
+notrace void __init kaslr_early_init(void *dt_ptr, phys_addr_t size)
+{
+ unsigned long tlb_virt;
+ phys_addr_t tlb_phys;
+ unsigned long offset;
+ unsigned long kernel_sz;
+
+ kernel_sz = (unsigned long)_end - (unsigned long)_stext;
+
+ offset = kaslr_choose_location(dt_ptr, size, kernel_sz);
+ if (offset == 0)
+ return;
+
+ kernstart_virt_addr += offset;
+ kernstart_addr += offset;
+
+ is_second_reloc = 1;
+
+ if (offset >= SZ_64M) {
+ tlb_virt = round_down(kernstart_virt_addr, SZ_64M);
+ tlb_phys = round_down(kernstart_addr, SZ_64M);
+
+ /* Create kernel map to relocate in */
+ create_kaslr_tlb_entry(1, tlb_virt, tlb_phys);
+ }
+
+ /* Copy the kernel to it's new location and run */
+ memcpy((void *)kernstart_virt_addr, (void *)_stext, kernel_sz);
+ flush_icache_range(kernstart_virt_addr, kernstart_virt_addr + kernel_sz);
+
+ reloc_kernel_entry(dt_ptr, kernstart_virt_addr);
+}
+
+void __init kaslr_late_init(void)
+{
+ /* If randomized, clear the original kernel */
+ if (kernstart_virt_addr != KERNELBASE) {
+ unsigned long kernel_sz;
+
+ kernel_sz = (unsigned long)_end - kernstart_virt_addr;
+ memzero_explicit((void *)KERNELBASE, kernel_sz);
+ }
+}
diff --git a/arch/powerpc/mm/nohash/tlb.c b/arch/powerpc/mm/nohash/tlb.c
index d4acf6fa0596..696f568253a0 100644
--- a/arch/powerpc/mm/nohash/tlb.c
+++ b/arch/powerpc/mm/nohash/tlb.c
@@ -630,7 +630,6 @@ static void early_init_this_mmu(void)
#ifdef CONFIG_PPC_FSL_BOOK3E
if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
unsigned int num_cams;
- int __maybe_unused cpu = smp_processor_id();
bool map = true;
/* use a quarter of the TLBCAM for bolted linear map */
@@ -704,6 +703,8 @@ static void __init early_init_mmu_global(void)
* for use by the TLB miss code
*/
linear_map_top = memblock_end_of_DRAM();
+
+ ioremap_bot = IOREMAP_BASE;
}
static void __init early_mmu_set_memory_limit(void)
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 50d68d21ddcc..3c7dec70cda0 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -1616,11 +1616,11 @@ static ssize_t topology_write(struct file *file, const char __user *buf,
return count;
}
-static const struct file_operations topology_ops = {
- .read = seq_read,
- .write = topology_write,
- .open = topology_open,
- .release = single_release
+static const struct proc_ops topology_proc_ops = {
+ .proc_read = seq_read,
+ .proc_write = topology_write,
+ .proc_open = topology_open,
+ .proc_release = single_release,
};
static int topology_update_init(void)
@@ -1630,7 +1630,7 @@ static int topology_update_init(void)
if (vphn_enabled)
topology_schedule_update();
- if (!proc_create("powerpc/topology_updates", 0644, NULL, &topology_ops))
+ if (!proc_create("powerpc/topology_updates", 0644, NULL, &topology_proc_ops))
return -ENOMEM;
topology_inited = 1;
diff --git a/arch/powerpc/mm/pgtable-frag.c b/arch/powerpc/mm/pgtable-frag.c
index a7b05214760c..ee4bd6d38602 100644
--- a/arch/powerpc/mm/pgtable-frag.c
+++ b/arch/powerpc/mm/pgtable-frag.c
@@ -25,7 +25,7 @@ void pte_frag_destroy(void *pte_frag)
count = ((unsigned long)pte_frag & ~PAGE_MASK) >> PTE_FRAG_SIZE_SHIFT;
/* We allow PTE_FRAG_NR fragments from a PTE page */
if (atomic_sub_and_test(PTE_FRAG_NR - count, &page->pt_frag_refcount)) {
- pgtable_page_dtor(page);
+ pgtable_pte_page_dtor(page);
__free_page(page);
}
}
@@ -61,7 +61,7 @@ static pte_t *__alloc_for_ptecache(struct mm_struct *mm, int kernel)
page = alloc_page(PGALLOC_GFP | __GFP_ACCOUNT);
if (!page)
return NULL;
- if (!pgtable_page_ctor(page)) {
+ if (!pgtable_pte_page_ctor(page)) {
__free_page(page);
return NULL;
}
@@ -113,7 +113,7 @@ void pte_fragment_free(unsigned long *table, int kernel)
BUG_ON(atomic_read(&page->pt_frag_refcount) <= 0);
if (atomic_dec_and_test(&page->pt_frag_refcount)) {
if (!kernel)
- pgtable_page_dtor(page);
+ pgtable_pte_page_dtor(page);
__free_page(page);
}
}
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index 35cb96cfc258..5fb90edd865e 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -27,166 +27,13 @@
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/fixmap.h>
-#include <asm/io.h>
#include <asm/setup.h>
#include <asm/sections.h>
#include <mm/mmu_decl.h>
-unsigned long ioremap_bot;
-EXPORT_SYMBOL(ioremap_bot); /* aka VMALLOC_END */
-
extern char etext[], _stext[], _sinittext[], _einittext[];
-void __iomem *
-ioremap(phys_addr_t addr, unsigned long size)
-{
- pgprot_t prot = pgprot_noncached(PAGE_KERNEL);
-
- return __ioremap_caller(addr, size, prot, __builtin_return_address(0));
-}
-EXPORT_SYMBOL(ioremap);
-
-void __iomem *
-ioremap_wc(phys_addr_t addr, unsigned long size)
-{
- pgprot_t prot = pgprot_noncached_wc(PAGE_KERNEL);
-
- return __ioremap_caller(addr, size, prot, __builtin_return_address(0));
-}
-EXPORT_SYMBOL(ioremap_wc);
-
-void __iomem *
-ioremap_wt(phys_addr_t addr, unsigned long size)
-{
- pgprot_t prot = pgprot_cached_wthru(PAGE_KERNEL);
-
- return __ioremap_caller(addr, size, prot, __builtin_return_address(0));
-}
-EXPORT_SYMBOL(ioremap_wt);
-
-void __iomem *
-ioremap_coherent(phys_addr_t addr, unsigned long size)
-{
- pgprot_t prot = pgprot_cached(PAGE_KERNEL);
-
- return __ioremap_caller(addr, size, prot, __builtin_return_address(0));
-}
-EXPORT_SYMBOL(ioremap_coherent);
-
-void __iomem *
-ioremap_prot(phys_addr_t addr, unsigned long size, unsigned long flags)
-{
- pte_t pte = __pte(flags);
-
- /* writeable implies dirty for kernel addresses */
- if (pte_write(pte))
- pte = pte_mkdirty(pte);
-
- /* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */
- pte = pte_exprotect(pte);
- pte = pte_mkprivileged(pte);
-
- return __ioremap_caller(addr, size, pte_pgprot(pte), __builtin_return_address(0));
-}
-EXPORT_SYMBOL(ioremap_prot);
-
-void __iomem *
-__ioremap(phys_addr_t addr, unsigned long size, unsigned long flags)
-{
- return __ioremap_caller(addr, size, __pgprot(flags), __builtin_return_address(0));
-}
-
-void __iomem *
-__ioremap_caller(phys_addr_t addr, unsigned long size, pgprot_t prot, void *caller)
-{
- unsigned long v, i;
- phys_addr_t p;
- int err;
-
- /*
- * Choose an address to map it to.
- * Once the vmalloc system is running, we use it.
- * Before then, we use space going down from IOREMAP_TOP
- * (ioremap_bot records where we're up to).
- */
- p = addr & PAGE_MASK;
- size = PAGE_ALIGN(addr + size) - p;
-
- /*
- * If the address lies within the first 16 MB, assume it's in ISA
- * memory space
- */
- if (p < 16*1024*1024)
- p += _ISA_MEM_BASE;
-
-#ifndef CONFIG_CRASH_DUMP
- /*
- * Don't allow anybody to remap normal RAM that we're using.
- * mem_init() sets high_memory so only do the check after that.
- */
- if (slab_is_available() && p <= virt_to_phys(high_memory - 1) &&
- page_is_ram(__phys_to_pfn(p))) {
- printk("__ioremap(): phys addr 0x%llx is RAM lr %ps\n",
- (unsigned long long)p, __builtin_return_address(0));
- return NULL;
- }
-#endif
-
- if (size == 0)
- return NULL;
-
- /*
- * Is it already mapped? Perhaps overlapped by a previous
- * mapping.
- */
- v = p_block_mapped(p);
- if (v)
- goto out;
-
- if (slab_is_available()) {
- struct vm_struct *area;
- area = get_vm_area_caller(size, VM_IOREMAP, caller);
- if (area == 0)
- return NULL;
- area->phys_addr = p;
- v = (unsigned long) area->addr;
- } else {
- v = (ioremap_bot -= size);
- }
-
- /*
- * Should check if it is a candidate for a BAT mapping
- */
-
- err = 0;
- for (i = 0; i < size && err == 0; i += PAGE_SIZE)
- err = map_kernel_page(v + i, p + i, prot);
- if (err) {
- if (slab_is_available())
- vunmap((void *)v);
- return NULL;
- }
-
-out:
- return (void __iomem *) (v + ((unsigned long)addr & ~PAGE_MASK));
-}
-EXPORT_SYMBOL(__ioremap);
-
-void iounmap(volatile void __iomem *addr)
-{
- /*
- * If mapped by BATs then there is nothing to do.
- * Calling vfree() generates a benign warning.
- */
- if (v_block_mapped((unsigned long)addr))
- return;
-
- if (addr > high_memory && (unsigned long) addr < ioremap_bot)
- vunmap((void *) (PAGE_MASK & (unsigned long)addr));
-}
-EXPORT_SYMBOL(iounmap);
-
static void __init *early_alloc_pgtable(unsigned long size)
{
void *ptr = memblock_alloc(size, size);
@@ -252,7 +99,7 @@ static void __init __mapin_ram_chunk(unsigned long offset, unsigned long top)
map_kernel_page(v, p, ktext ? PAGE_KERNEL_TEXT : PAGE_KERNEL);
#ifdef CONFIG_PPC_BOOK3S_32
if (ktext)
- hash_preload(&init_mm, v, false, 0x300);
+ hash_preload(&init_mm, v);
#endif
v += PAGE_SIZE;
p += PAGE_SIZE;
@@ -270,10 +117,7 @@ void __init mapin_ram(void)
if (base >= top)
continue;
base = mmu_mapin_ram(base, top);
- if (IS_ENABLED(CONFIG_BDI_SWITCH))
- __mapin_ram_chunk(reg->base, top);
- else
- __mapin_ram_chunk(base, top);
+ __mapin_ram_chunk(base, top);
}
}
@@ -374,6 +218,7 @@ void mark_rodata_ro(void)
if (v_block_mapped((unsigned long)_sinittext)) {
mmu_mark_rodata_ro();
+ ptdump_check_wx();
return;
}
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 9ad59b733984..e78832dce7bb 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/*
- * This file contains ioremap and related functions for 64-bit machines.
+ * This file contains pgtable related functions for 64-bit machines.
*
* Derived from arch/ppc64/mm/init.c
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
@@ -34,7 +34,6 @@
#include <asm/pgalloc.h>
#include <asm/page.h>
#include <asm/prom.h>
-#include <asm/io.h>
#include <asm/mmu_context.h>
#include <asm/pgtable.h>
#include <asm/mmu.h>
@@ -98,208 +97,8 @@ unsigned long __pte_frag_nr;
EXPORT_SYMBOL(__pte_frag_nr);
unsigned long __pte_frag_size_shift;
EXPORT_SYMBOL(__pte_frag_size_shift);
-unsigned long ioremap_bot;
-#else /* !CONFIG_PPC_BOOK3S_64 */
-unsigned long ioremap_bot = IOREMAP_BASE;
#endif
-int __weak ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size, pgprot_t prot, int nid)
-{
- unsigned long i;
-
- for (i = 0; i < size; i += PAGE_SIZE) {
- int err = map_kernel_page(ea + i, pa + i, prot);
- if (err) {
- if (slab_is_available())
- unmap_kernel_range(ea, size);
- else
- WARN_ON_ONCE(1); /* Should clean up */
- return err;
- }
- }
-
- return 0;
-}
-
-/**
- * __ioremap_at - Low level function to establish the page tables
- * for an IO mapping
- */
-void __iomem *__ioremap_at(phys_addr_t pa, void *ea, unsigned long size, pgprot_t prot)
-{
- /* We don't support the 4K PFN hack with ioremap */
- if (pgprot_val(prot) & H_PAGE_4K_PFN)
- return NULL;
-
- if ((ea + size) >= (void *)IOREMAP_END) {
- pr_warn("Outside the supported range\n");
- return NULL;
- }
-
- WARN_ON(pa & ~PAGE_MASK);
- WARN_ON(((unsigned long)ea) & ~PAGE_MASK);
- WARN_ON(size & ~PAGE_MASK);
-
- if (ioremap_range((unsigned long)ea, pa, size, prot, NUMA_NO_NODE))
- return NULL;
-
- return (void __iomem *)ea;
-}
-
-/**
- * __iounmap_from - Low level function to tear down the page tables
- * for an IO mapping. This is used for mappings that
- * are manipulated manually, like partial unmapping of
- * PCI IOs or ISA space.
- */
-void __iounmap_at(void *ea, unsigned long size)
-{
- WARN_ON(((unsigned long)ea) & ~PAGE_MASK);
- WARN_ON(size & ~PAGE_MASK);
-
- unmap_kernel_range((unsigned long)ea, size);
-}
-
-void __iomem * __ioremap_caller(phys_addr_t addr, unsigned long size,
- pgprot_t prot, void *caller)
-{
- phys_addr_t paligned;
- void __iomem *ret;
-
- /*
- * Choose an address to map it to.
- * Once the imalloc system is running, we use it.
- * Before that, we map using addresses going
- * up from ioremap_bot. imalloc will use
- * the addresses from ioremap_bot through
- * IMALLOC_END
- *
- */
- paligned = addr & PAGE_MASK;
- size = PAGE_ALIGN(addr + size) - paligned;
-
- if ((size == 0) || (paligned == 0))
- return NULL;
-
- if (slab_is_available()) {
- struct vm_struct *area;
-
- area = __get_vm_area_caller(size, VM_IOREMAP,
- ioremap_bot, IOREMAP_END,
- caller);
- if (area == NULL)
- return NULL;
-
- area->phys_addr = paligned;
- ret = __ioremap_at(paligned, area->addr, size, prot);
- } else {
- ret = __ioremap_at(paligned, (void *)ioremap_bot, size, prot);
- if (ret)
- ioremap_bot += size;
- }
-
- if (ret)
- ret += addr & ~PAGE_MASK;
- return ret;
-}
-
-void __iomem * __ioremap(phys_addr_t addr, unsigned long size,
- unsigned long flags)
-{
- return __ioremap_caller(addr, size, __pgprot(flags), __builtin_return_address(0));
-}
-
-void __iomem * ioremap(phys_addr_t addr, unsigned long size)
-{
- pgprot_t prot = pgprot_noncached(PAGE_KERNEL);
- void *caller = __builtin_return_address(0);
-
- if (ppc_md.ioremap)
- return ppc_md.ioremap(addr, size, prot, caller);
- return __ioremap_caller(addr, size, prot, caller);
-}
-
-void __iomem * ioremap_wc(phys_addr_t addr, unsigned long size)
-{
- pgprot_t prot = pgprot_noncached_wc(PAGE_KERNEL);
- void *caller = __builtin_return_address(0);
-
- if (ppc_md.ioremap)
- return ppc_md.ioremap(addr, size, prot, caller);
- return __ioremap_caller(addr, size, prot, caller);
-}
-
-void __iomem *ioremap_coherent(phys_addr_t addr, unsigned long size)
-{
- pgprot_t prot = pgprot_cached(PAGE_KERNEL);
- void *caller = __builtin_return_address(0);
-
- if (ppc_md.ioremap)
- return ppc_md.ioremap(addr, size, prot, caller);
- return __ioremap_caller(addr, size, prot, caller);
-}
-
-void __iomem * ioremap_prot(phys_addr_t addr, unsigned long size,
- unsigned long flags)
-{
- pte_t pte = __pte(flags);
- void *caller = __builtin_return_address(0);
-
- /* writeable implies dirty for kernel addresses */
- if (pte_write(pte))
- pte = pte_mkdirty(pte);
-
- /* we don't want to let _PAGE_EXEC leak out */
- pte = pte_exprotect(pte);
- /*
- * Force kernel mapping.
- */
- pte = pte_mkprivileged(pte);
-
- if (ppc_md.ioremap)
- return ppc_md.ioremap(addr, size, pte_pgprot(pte), caller);
- return __ioremap_caller(addr, size, pte_pgprot(pte), caller);
-}
-
-
-/*
- * Unmap an IO region and remove it from imalloc'd list.
- * Access to IO memory should be serialized by driver.
- */
-void __iounmap(volatile void __iomem *token)
-{
- void *addr;
-
- if (!slab_is_available())
- return;
-
- addr = (void *) ((unsigned long __force)
- PCI_FIX_ADDR(token) & PAGE_MASK);
- if ((unsigned long)addr < ioremap_bot) {
- printk(KERN_WARNING "Attempt to iounmap early bolted mapping"
- " at 0x%p\n", addr);
- return;
- }
- vunmap(addr);
-}
-
-void iounmap(volatile void __iomem *token)
-{
- if (ppc_md.iounmap)
- ppc_md.iounmap(token);
- else
- __iounmap(token);
-}
-
-EXPORT_SYMBOL(ioremap);
-EXPORT_SYMBOL(ioremap_wc);
-EXPORT_SYMBOL(ioremap_prot);
-EXPORT_SYMBOL(__ioremap);
-EXPORT_SYMBOL(__ioremap_at);
-EXPORT_SYMBOL(iounmap);
-EXPORT_SYMBOL(__iounmap);
-EXPORT_SYMBOL(__iounmap_at);
-
#ifndef __PAGETABLE_PUD_FOLDED
/* 4 level page table */
struct page *pgd_page(pgd_t pgd)
diff --git a/arch/powerpc/mm/ptdump/bats.c b/arch/powerpc/mm/ptdump/bats.c
index a0d23e96e841..4154feac1da3 100644
--- a/arch/powerpc/mm/ptdump/bats.c
+++ b/arch/powerpc/mm/ptdump/bats.c
@@ -149,7 +149,7 @@ static int bats_show_603(struct seq_file *m, void *v)
static int bats_open(struct inode *inode, struct file *file)
{
- if (cpu_has_feature(CPU_FTR_601))
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S_601))
return single_open(file, bats_show_601, NULL);
return single_open(file, bats_show_603, NULL);
diff --git a/arch/powerpc/mm/ptdump/hashpagetable.c b/arch/powerpc/mm/ptdump/hashpagetable.c
index 72f0e4a3d839..a07278027c6f 100644
--- a/arch/powerpc/mm/ptdump/hashpagetable.c
+++ b/arch/powerpc/mm/ptdump/hashpagetable.c
@@ -237,7 +237,6 @@ static int native_find(unsigned long ea, int psize, bool primary, u64 *v, u64
return -1;
}
-#ifdef CONFIG_PPC_PSERIES
static int pseries_find(unsigned long ea, int psize, bool primary, u64 *v, u64 *r)
{
struct hash_pte ptes[4];
@@ -274,7 +273,6 @@ static int pseries_find(unsigned long ea, int psize, bool primary, u64 *v, u64 *
}
return -1;
}
-#endif
static void decode_r(int bps, unsigned long r, unsigned long *rpn, int *aps,
unsigned long *lp_bits)
@@ -316,10 +314,9 @@ static void decode_r(int bps, unsigned long r, unsigned long *rpn, int *aps,
static int base_hpte_find(unsigned long ea, int psize, bool primary, u64 *v,
u64 *r)
{
-#ifdef CONFIG_PPC_PSERIES
- if (firmware_has_feature(FW_FEATURE_LPAR))
+ if (IS_ENABLED(CONFIG_PPC_PSERIES) && firmware_has_feature(FW_FEATURE_LPAR))
return pseries_find(ea, psize, primary, v, r);
-#endif
+
return native_find(ea, psize, primary, v, r);
}
@@ -386,12 +383,13 @@ static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start)
psize = mmu_vmalloc_psize;
else
psize = mmu_io_psize;
-#ifdef CONFIG_PPC_64K_PAGES
+
/* check for secret 4K mappings */
- if (((pteval & H_PAGE_COMBO) == H_PAGE_COMBO) ||
- ((pteval & H_PAGE_4K_PFN) == H_PAGE_4K_PFN))
+ if (IS_ENABLED(CONFIG_PPC_64K_PAGES) &&
+ ((pteval & H_PAGE_COMBO) == H_PAGE_COMBO ||
+ (pteval & H_PAGE_4K_PFN) == H_PAGE_4K_PFN))
psize = mmu_io_psize;
-#endif
+
/* check for hashpte */
status = hpte_find(st, addr, psize);
@@ -469,9 +467,10 @@ static void walk_linearmapping(struct pg_state *st)
static void walk_vmemmap(struct pg_state *st)
{
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
struct vmemmap_backing *ptr = vmemmap_list;
+ if (!IS_ENABLED(CONFIG_SPARSEMEM_VMEMMAP))
+ return;
/*
* Traverse the vmemmaped memory and dump pages that are in the hash
* pagetable.
@@ -481,7 +480,6 @@ static void walk_vmemmap(struct pg_state *st)
ptr = ptr->list;
}
seq_puts(st->seq, "---[ vmemmap end ]---\n");
-#endif
}
static void populate_markers(void)
@@ -495,11 +493,7 @@ static void populate_markers(void)
address_markers[6].start_address = PHB_IO_END;
address_markers[7].start_address = IOREMAP_BASE;
address_markers[8].start_address = IOREMAP_END;
-#ifdef CONFIG_PPC_BOOK3S_64
address_markers[9].start_address = H_VMEMMAP_START;
-#else
- address_markers[9].start_address = VMEMMAP_BASE;
-#endif
}
static int ptdump_show(struct seq_file *m, void *v)
diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
index 6a88a9f585d4..206156255247 100644
--- a/arch/powerpc/mm/ptdump/ptdump.c
+++ b/arch/powerpc/mm/ptdump/ptdump.c
@@ -24,11 +24,9 @@
#include <asm/page.h>
#include <asm/pgalloc.h>
-#include "ptdump.h"
+#include <mm/mmu_decl.h>
-#ifdef CONFIG_PPC32
-#define KERN_VIRT_START PAGE_OFFSET
-#endif
+#include "ptdump.h"
/*
* To visualise what is happening,
@@ -88,10 +86,6 @@ static struct addr_marker address_markers[] = {
#else
{ 0, "Early I/O remap start" },
{ 0, "Early I/O remap end" },
-#ifdef CONFIG_NOT_COHERENT_CACHE
- { 0, "Consistent mem start" },
- { 0, "Consistent mem end" },
-#endif
#ifdef CONFIG_HIGHMEM
{ 0, "Highmem PTEs start" },
{ 0, "Highmem PTEs end" },
@@ -181,10 +175,12 @@ static void dump_addr(struct pg_state *st, unsigned long addr)
static void note_prot_wx(struct pg_state *st, unsigned long addr)
{
- if (!st->check_wx)
+ pte_t pte = __pte(st->current_flags);
+
+ if (!IS_ENABLED(CONFIG_PPC_DEBUG_WX) || !st->check_wx)
return;
- if (!((st->current_flags & pgprot_val(PAGE_KERNEL_X)) == pgprot_val(PAGE_KERNEL_X)))
+ if (!pte_write(pte) || !pte_exec(pte))
return;
WARN_ONCE(1, "powerpc/mm: Found insecure W+X mapping at address %p/%pS\n",
@@ -299,17 +295,15 @@ static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start)
static void walk_pagetables(struct pg_state *st)
{
- pgd_t *pgd = pgd_offset_k(0UL);
unsigned int i;
- unsigned long addr;
-
- addr = st->start_address;
+ unsigned long addr = st->start_address & PGDIR_MASK;
+ pgd_t *pgd = pgd_offset_k(addr);
/*
* Traverse the linux pagetable structure and dump pages that are in
* the hash pagetable.
*/
- for (i = 0; i < PTRS_PER_PGD; i++, pgd++, addr += PGDIR_SIZE) {
+ for (i = pgd_index(addr); i < PTRS_PER_PGD; i++, pgd++, addr += PGDIR_SIZE) {
if (!pgd_none(*pgd) && !pgd_is_leaf(*pgd))
/* pgd exists */
walk_pud(st, pgd, addr);
@@ -341,11 +335,6 @@ static void populate_markers(void)
#else /* !CONFIG_PPC64 */
address_markers[i++].start_address = ioremap_bot;
address_markers[i++].start_address = IOREMAP_TOP;
-#ifdef CONFIG_NOT_COHERENT_CACHE
- address_markers[i++].start_address = IOREMAP_TOP;
- address_markers[i++].start_address = IOREMAP_TOP +
- CONFIG_CONSISTENT_SIZE;
-#endif
#ifdef CONFIG_HIGHMEM
address_markers[i++].start_address = PKMAP_BASE;
address_markers[i++].start_address = PKMAP_ADDR(LAST_PKMAP);
@@ -364,12 +353,13 @@ static int ptdump_show(struct seq_file *m, void *v)
struct pg_state st = {
.seq = m,
.marker = address_markers,
+ .start_address = PAGE_OFFSET,
};
- if (radix_enabled())
- st.start_address = PAGE_OFFSET;
- else
+#ifdef CONFIG_PPC64
+ if (!radix_enabled())
st.start_address = KERN_VIRT_START;
+#endif
/* Traverse kernel page tables */
walk_pagetables(&st);
@@ -407,12 +397,13 @@ void ptdump_check_wx(void)
.seq = NULL,
.marker = address_markers,
.check_wx = true,
+ .start_address = PAGE_OFFSET,
};
- if (radix_enabled())
- st.start_address = PAGE_OFFSET;
- else
+#ifdef CONFIG_PPC64
+ if (!radix_enabled())
st.start_address = KERN_VIRT_START;
+#endif
walk_pagetables(&st);
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index 42bbcd47cc85..dffe1a45b6ed 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -50,7 +50,7 @@ static void slice_print_mask(const char *label, const struct slice_mask *mask) {
#endif
-static inline bool slice_addr_is_low(unsigned long addr)
+static inline notrace bool slice_addr_is_low(unsigned long addr)
{
u64 tmp = (u64)addr;
@@ -659,7 +659,7 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp,
mm_ctx_user_psize(&current->mm->context), 1);
}
-unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr)
+unsigned int notrace get_slice_psize(struct mm_struct *mm, unsigned long addr)
{
unsigned char *psizes;
int index, mask_index;
diff --git a/arch/powerpc/net/bpf_jit32.h b/arch/powerpc/net/bpf_jit32.h
index 6e5a2a4faeab..4ec2a9f14f84 100644
--- a/arch/powerpc/net/bpf_jit32.h
+++ b/arch/powerpc/net/bpf_jit32.h
@@ -97,12 +97,12 @@ DECLARE_LOAD_FUNC(sk_load_byte_msh);
#ifdef CONFIG_SMP
#ifdef CONFIG_PPC64
#define PPC_BPF_LOAD_CPU(r) \
- do { BUILD_BUG_ON(FIELD_SIZEOF(struct paca_struct, paca_index) != 2); \
+ do { BUILD_BUG_ON(sizeof_field(struct paca_struct, paca_index) != 2); \
PPC_LHZ_OFFS(r, 13, offsetof(struct paca_struct, paca_index)); \
} while (0)
#else
#define PPC_BPF_LOAD_CPU(r) \
- do { BUILD_BUG_ON(FIELD_SIZEOF(struct task_struct, cpu) != 4); \
+ do { BUILD_BUG_ON(sizeof_field(struct task_struct, cpu) != 4); \
PPC_LHZ_OFFS(r, 2, offsetof(struct task_struct, cpu)); \
} while(0)
#endif
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index d57b46e0dd60..0acc9d5fb19e 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -321,7 +321,7 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
ctx->seen |= SEEN_XREG | SEEN_MEM | (1<<(K & 0xf));
break;
case BPF_LD | BPF_W | BPF_LEN: /* A = skb->len; */
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
+ BUILD_BUG_ON(sizeof_field(struct sk_buff, len) != 4);
PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, len));
break;
case BPF_LDX | BPF_W | BPF_ABS: /* A = *((u32 *)(seccomp_data + K)); */
@@ -333,16 +333,16 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
/*** Ancillary info loads ***/
case BPF_ANC | SKF_AD_PROTOCOL: /* A = ntohs(skb->protocol); */
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
+ BUILD_BUG_ON(sizeof_field(struct sk_buff,
protocol) != 2);
PPC_NTOHS_OFFS(r_A, r_skb, offsetof(struct sk_buff,
protocol));
break;
case BPF_ANC | SKF_AD_IFINDEX:
case BPF_ANC | SKF_AD_HATYPE:
- BUILD_BUG_ON(FIELD_SIZEOF(struct net_device,
+ BUILD_BUG_ON(sizeof_field(struct net_device,
ifindex) != 4);
- BUILD_BUG_ON(FIELD_SIZEOF(struct net_device,
+ BUILD_BUG_ON(sizeof_field(struct net_device,
type) != 2);
PPC_LL_OFFS(r_scratch1, r_skb, offsetof(struct sk_buff,
dev));
@@ -365,17 +365,17 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
break;
case BPF_ANC | SKF_AD_MARK:
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
+ BUILD_BUG_ON(sizeof_field(struct sk_buff, mark) != 4);
PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
mark));
break;
case BPF_ANC | SKF_AD_RXHASH:
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
+ BUILD_BUG_ON(sizeof_field(struct sk_buff, hash) != 4);
PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
hash));
break;
case BPF_ANC | SKF_AD_VLAN_TAG:
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
+ BUILD_BUG_ON(sizeof_field(struct sk_buff, vlan_tci) != 2);
PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
vlan_tci));
@@ -388,7 +388,7 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
PPC_ANDI(r_A, r_A, 1);
break;
case BPF_ANC | SKF_AD_QUEUE:
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
+ BUILD_BUG_ON(sizeof_field(struct sk_buff,
queue_mapping) != 2);
PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
queue_mapping));
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 02a59946a78a..be3517ef0574 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -1142,6 +1142,19 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
}
/*
+ * If we have seen a tail call, we need a second pass.
+ * This is because bpf_jit_emit_common_epilogue() is called
+ * from bpf_jit_emit_tail_call() with a not yet stable ctx->seen.
+ */
+ if (cgctx.seen & SEEN_TAILCALL) {
+ cgctx.idx = 0;
+ if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) {
+ fp = org_fp;
+ goto out_addrs;
+ }
+ }
+
+ /*
* Pretend to build prologue, given the features we've seen. This will
* update ctgtx.idx as it pretends to output instructions, then we can
* calculate total size from idx.
diff --git a/arch/powerpc/oprofile/backtrace.c b/arch/powerpc/oprofile/backtrace.c
index 43245f4a9bcb..6f347fa29f41 100644
--- a/arch/powerpc/oprofile/backtrace.c
+++ b/arch/powerpc/oprofile/backtrace.c
@@ -9,7 +9,7 @@
#include <linux/sched.h>
#include <asm/processor.h>
#include <linux/uaccess.h>
-#include <asm/compat.h>
+#include <linux/compat.h>
#include <asm/oprofile_impl.h>
#define STACK_SP(STACK) *(STACK)
@@ -28,15 +28,12 @@ static unsigned int user_getsp32(unsigned int sp, int is_first)
unsigned int stack_frame[2];
void __user *p = compat_ptr(sp);
- if (!access_ok(p, sizeof(stack_frame)))
- return 0;
-
/*
* The most likely reason for this is that we returned -EFAULT,
* which means that we've done all that we can do from
* interrupt context.
*/
- if (__copy_from_user_inatomic(stack_frame, p, sizeof(stack_frame)))
+ if (probe_user_read(stack_frame, (void __user *)p, sizeof(stack_frame)))
return 0;
if (!is_first)
@@ -54,11 +51,7 @@ static unsigned long user_getsp64(unsigned long sp, int is_first)
{
unsigned long stack_frame[3];
- if (!access_ok((void __user *)sp, sizeof(stack_frame)))
- return 0;
-
- if (__copy_from_user_inatomic(stack_frame, (void __user *)sp,
- sizeof(stack_frame)))
+ if (probe_user_read(stack_frame, (void __user *)sp, sizeof(stack_frame)))
return 0;
if (!is_first)
@@ -103,7 +96,6 @@ void op_powerpc_backtrace(struct pt_regs * const regs, unsigned int depth)
first_frame = 0;
}
} else {
- pagefault_disable();
#ifdef CONFIG_PPC64
if (!is_32bit_task()) {
while (depth--) {
@@ -112,7 +104,6 @@ void op_powerpc_backtrace(struct pt_regs * const regs, unsigned int depth)
break;
first_frame = 0;
}
- pagefault_enable();
return;
}
#endif
@@ -123,6 +114,5 @@ void op_powerpc_backtrace(struct pt_regs * const regs, unsigned int depth)
break;
first_frame = 0;
}
- pagefault_enable();
}
}
diff --git a/arch/powerpc/perf/8xx-pmu.c b/arch/powerpc/perf/8xx-pmu.c
index 19124b0b171a..1ad03c55c88c 100644
--- a/arch/powerpc/perf/8xx-pmu.c
+++ b/arch/powerpc/perf/8xx-pmu.c
@@ -157,10 +157,6 @@ static void mpc8xx_pmu_read(struct perf_event *event)
static void mpc8xx_pmu_del(struct perf_event *event, int flags)
{
- /* mfspr r10, SPRN_SPRG_SCRATCH0 */
- unsigned int insn = PPC_INST_MFSPR | __PPC_RS(R10) |
- __PPC_SPR(SPRN_SPRG_SCRATCH0);
-
mpc8xx_pmu_read(event);
/* If it was the last user, stop counting to avoid useles overhead */
@@ -173,6 +169,10 @@ static void mpc8xx_pmu_del(struct perf_event *event, int flags)
break;
case PERF_8xx_ID_ITLB_LOAD_MISS:
if (atomic_dec_return(&itlb_miss_ref) == 0) {
+ /* mfspr r10, SPRN_SPRG_SCRATCH0 */
+ unsigned int insn = PPC_INST_MFSPR | __PPC_RS(R10) |
+ __PPC_SPR(SPRN_SPRG_SCRATCH0);
+
patch_instruction_site(&patch__itlbmiss_exit_1, insn);
#ifndef CONFIG_PIN_TLB_TEXT
patch_instruction_site(&patch__itlbmiss_exit_2, insn);
@@ -181,6 +181,10 @@ static void mpc8xx_pmu_del(struct perf_event *event, int flags)
break;
case PERF_8xx_ID_DTLB_LOAD_MISS:
if (atomic_dec_return(&dtlb_miss_ref) == 0) {
+ /* mfspr r10, SPRN_DAR */
+ unsigned int insn = PPC_INST_MFSPR | __PPC_RS(R10) |
+ __PPC_SPR(SPRN_DAR);
+
patch_instruction_site(&patch__dtlbmiss_exit_1, insn);
patch_instruction_site(&patch__dtlbmiss_exit_2, insn);
patch_instruction_site(&patch__dtlbmiss_exit_3, insn);
diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c
index c84bbd4298a0..cbc251981209 100644
--- a/arch/powerpc/perf/callchain.c
+++ b/arch/powerpc/perf/callchain.c
@@ -155,12 +155,8 @@ static int read_user_stack_64(unsigned long __user *ptr, unsigned long *ret)
((unsigned long)ptr & 7))
return -EFAULT;
- pagefault_disable();
- if (!__get_user_inatomic(*ret, ptr)) {
- pagefault_enable();
+ if (!probe_user_read(ret, ptr, sizeof(*ret)))
return 0;
- }
- pagefault_enable();
return read_user_stack_slow(ptr, ret, 8);
}
@@ -171,12 +167,8 @@ static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret)
((unsigned long)ptr & 3))
return -EFAULT;
- pagefault_disable();
- if (!__get_user_inatomic(*ret, ptr)) {
- pagefault_enable();
+ if (!probe_user_read(ret, ptr, sizeof(*ret)))
return 0;
- }
- pagefault_enable();
return read_user_stack_slow(ptr, ret, 4);
}
@@ -284,16 +276,6 @@ static void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry,
}
}
-static inline int current_is_64bit(void)
-{
- /*
- * We can't use test_thread_flag() here because we may be on an
- * interrupt stack, and the thread flags don't get copied over
- * from the thread_info on the main stack to the interrupt stack.
- */
- return !test_ti_thread_flag(task_thread_info(current), TIF_32BIT);
-}
-
#else /* CONFIG_PPC64 */
/*
* On 32-bit we just access the address and let hash_page create a
@@ -303,17 +285,11 @@ static inline int current_is_64bit(void)
*/
static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret)
{
- int rc;
-
if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned int) ||
((unsigned long)ptr & 3))
return -EFAULT;
- pagefault_disable();
- rc = __get_user_inatomic(*ret, ptr);
- pagefault_enable();
-
- return rc;
+ return probe_user_read(ret, ptr, sizeof(*ret));
}
static inline void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry,
@@ -321,11 +297,6 @@ static inline void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry
{
}
-static inline int current_is_64bit(void)
-{
- return 0;
-}
-
static inline int valid_user_sp(unsigned long sp, int is_64)
{
if (!sp || (sp & 7) || sp > TASK_SIZE - 32)
@@ -486,7 +457,7 @@ static void perf_callchain_user_32(struct perf_callchain_entry_ctx *entry,
void
perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs)
{
- if (current_is_64bit())
+ if (!is_32bit_task())
perf_callchain_user_64(entry, regs);
else
perf_callchain_user_32(entry, regs);
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index ca92e01d0bd1..3086055bf681 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -96,7 +96,7 @@ static inline unsigned long perf_ip_adjust(struct pt_regs *regs)
{
return 0;
}
-static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) { }
+static inline void perf_get_data_addr(struct perf_event *event, struct pt_regs *regs, u64 *addrp) { }
static inline u32 perf_get_misc_flags(struct pt_regs *regs)
{
return 0;
@@ -127,7 +127,7 @@ static unsigned long ebb_switch_in(bool ebb, struct cpu_hw_events *cpuhw)
static inline void power_pmu_bhrb_enable(struct perf_event *event) {}
static inline void power_pmu_bhrb_disable(struct perf_event *event) {}
static void power_pmu_sched_task(struct perf_event_context *ctx, bool sched_in) {}
-static inline void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) {}
+static inline void power_pmu_bhrb_read(struct perf_event *event, struct cpu_hw_events *cpuhw) {}
static void pmao_restore_workaround(bool ebb) { }
#endif /* CONFIG_PPC32 */
@@ -179,7 +179,7 @@ static inline unsigned long perf_ip_adjust(struct pt_regs *regs)
* pointed to by SIAR; this is indicated by the [POWER6_]MMCRA_SDSYNC, the
* [POWER7P_]MMCRA_SDAR_VALID bit in MMCRA, or the SDAR_VALID bit in SIER.
*/
-static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp)
+static inline void perf_get_data_addr(struct perf_event *event, struct pt_regs *regs, u64 *addrp)
{
unsigned long mmcra = regs->dsisr;
bool sdar_valid;
@@ -204,8 +204,7 @@ static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp)
if (!(mmcra & MMCRA_SAMPLE_ENABLE) || sdar_valid)
*addrp = mfspr(SPRN_SDAR);
- if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN) &&
- is_kernel_addr(mfspr(SPRN_SDAR)))
+ if (is_kernel_addr(mfspr(SPRN_SDAR)) && perf_allow_kernel(&event->attr) != 0)
*addrp = 0;
}
@@ -416,7 +415,6 @@ static void power_pmu_sched_task(struct perf_event_context *ctx, bool sched_in)
static __u64 power_pmu_bhrb_to(u64 addr)
{
unsigned int instr;
- int ret;
__u64 target;
if (is_kernel_addr(addr)) {
@@ -427,13 +425,8 @@ static __u64 power_pmu_bhrb_to(u64 addr)
}
/* Userspace: need copy instruction here then translate it */
- pagefault_disable();
- ret = __get_user_inatomic(instr, (unsigned int __user *)addr);
- if (ret) {
- pagefault_enable();
+ if (probe_user_read(&instr, (unsigned int __user *)addr, sizeof(instr)))
return 0;
- }
- pagefault_enable();
target = branch_target(&instr);
if ((!target) || (instr & BRANCH_ABSOLUTE))
@@ -444,7 +437,7 @@ static __u64 power_pmu_bhrb_to(u64 addr)
}
/* Processing BHRB entries */
-static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
+static void power_pmu_bhrb_read(struct perf_event *event, struct cpu_hw_events *cpuhw)
{
u64 val;
u64 addr;
@@ -472,8 +465,7 @@ static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
* exporting it to userspace (avoid exposure of regions
* where we could have speculative execution)
*/
- if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN) &&
- is_kernel_addr(addr))
+ if (is_kernel_addr(addr) && perf_allow_kernel(&event->attr) != 0)
continue;
/* Branches are read most recent first (ie. mfbhrb 0 is
@@ -2087,12 +2079,12 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
if (event->attr.sample_type &
(PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR))
- perf_get_data_addr(regs, &data.addr);
+ perf_get_data_addr(event, regs, &data.addr);
if (event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK) {
struct cpu_hw_events *cpuhw;
cpuhw = this_cpu_ptr(&cpu_hw_events);
- power_pmu_bhrb_read(cpuhw);
+ power_pmu_bhrb_read(event, cpuhw);
data.br_stack = &cpuhw->bhrb_stack;
}
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index dea243185ea4..cb50a9e1fd2d 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -577,6 +577,7 @@ static int core_imc_mem_init(int cpu, int size)
{
int nid, rc = 0, core_id = (cpu / threads_per_core);
struct imc_mem_info *mem_info;
+ struct page *page;
/*
* alloc_pages_node() will allocate memory for core in the
@@ -587,11 +588,12 @@ static int core_imc_mem_init(int cpu, int size)
mem_info->id = core_id;
/* We need only vbase for core counters */
- mem_info->vbase = page_address(alloc_pages_node(nid,
- GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE |
- __GFP_NOWARN, get_order(size)));
- if (!mem_info->vbase)
+ page = alloc_pages_node(nid,
+ GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE |
+ __GFP_NOWARN, get_order(size));
+ if (!page)
return -ENOMEM;
+ mem_info->vbase = page_address(page);
/* Init the mutex */
core_imc_refc[core_id].id = core_id;
@@ -849,15 +851,17 @@ static int thread_imc_mem_alloc(int cpu_id, int size)
int nid = cpu_to_node(cpu_id);
if (!local_mem) {
+ struct page *page;
/*
* This case could happen only once at start, since we dont
* free the memory in cpu offline path.
*/
- local_mem = page_address(alloc_pages_node(nid,
+ page = alloc_pages_node(nid,
GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE |
- __GFP_NOWARN, get_order(size)));
- if (!local_mem)
+ __GFP_NOWARN, get_order(size));
+ if (!page)
return -ENOMEM;
+ local_mem = page_address(page);
per_cpu(thread_imc_mem, cpu_id) = local_mem;
}
@@ -1095,11 +1099,14 @@ static int trace_imc_mem_alloc(int cpu_id, int size)
int core_id = (cpu_id / threads_per_core);
if (!local_mem) {
- local_mem = page_address(alloc_pages_node(phys_id,
- GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE |
- __GFP_NOWARN, get_order(size)));
- if (!local_mem)
+ struct page *page;
+
+ page = alloc_pages_node(phys_id,
+ GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE |
+ __GFP_NOWARN, get_order(size));
+ if (!page)
return -ENOMEM;
+ local_mem = page_address(page);
per_cpu(trace_imc_mem, cpu_id) = local_mem;
/* Initialise the counters for trace mode */
diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig
index b369ed4e3675..25ebe634a661 100644
--- a/arch/powerpc/platforms/44x/Kconfig
+++ b/arch/powerpc/platforms/44x/Kconfig
@@ -272,14 +272,6 @@ config PPC4xx_GPIO
help
Enable gpiolib support for ppc440 based boards
-config PPC4xx_OCM
- bool "PPC4xx On Chip Memory (OCM) support"
- depends on 4xx
- select PPC_LIB_RHEAP
- help
- Enable OCM support for PowerPC 4xx platforms with on chip memory,
- OCM provides the fast place for memory access to improve performance.
-
# 44x specific CPU modules, selected based on the board above.
config 440EP
bool
diff --git a/arch/powerpc/platforms/4xx/Makefile b/arch/powerpc/platforms/4xx/Makefile
index f5ae27ca131b..d009d2e0b9e8 100644
--- a/arch/powerpc/platforms/4xx/Makefile
+++ b/arch/powerpc/platforms/4xx/Makefile
@@ -1,6 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-y += uic.o machine_check.o
-obj-$(CONFIG_PPC4xx_OCM) += ocm.o
obj-$(CONFIG_4xx_SOC) += soc.o
obj-$(CONFIG_PCI) += pci.o
obj-$(CONFIG_PPC4xx_HSTA_MSI) += hsta_msi.o
diff --git a/arch/powerpc/platforms/4xx/ocm.c b/arch/powerpc/platforms/4xx/ocm.c
deleted file mode 100644
index ba3257406ced..000000000000
--- a/arch/powerpc/platforms/4xx/ocm.c
+++ /dev/null
@@ -1,390 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * PowerPC 4xx OCM memory allocation support
- *
- * (C) Copyright 2009, Applied Micro Circuits Corporation
- * Victor Gallardo (vgallardo@amcc.com)
- *
- * See file CREDITS for list of people who contributed to this
- * project.
- */
-
-#include <linux/kernel.h>
-#include <linux/dma-mapping.h>
-#include <linux/of.h>
-#include <linux/of_address.h>
-#include <asm/rheap.h>
-#include <asm/ppc4xx_ocm.h>
-#include <linux/slab.h>
-#include <linux/debugfs.h>
-
-#define OCM_DISABLED 0
-#define OCM_ENABLED 1
-
-struct ocm_block {
- struct list_head list;
- void __iomem *addr;
- int size;
- const char *owner;
-};
-
-/* non-cached or cached region */
-struct ocm_region {
- phys_addr_t phys;
- void __iomem *virt;
-
- int memtotal;
- int memfree;
-
- rh_info_t *rh;
- struct list_head list;
-};
-
-struct ocm_info {
- int index;
- int status;
- int ready;
-
- phys_addr_t phys;
-
- int alignment;
- int memtotal;
- int cache_size;
-
- struct ocm_region nc; /* non-cached region */
- struct ocm_region c; /* cached region */
-};
-
-static struct ocm_info *ocm_nodes;
-static int ocm_count;
-
-static struct ocm_info *ocm_get_node(unsigned int index)
-{
- if (index >= ocm_count) {
- printk(KERN_ERR "PPC4XX OCM: invalid index");
- return NULL;
- }
-
- return &ocm_nodes[index];
-}
-
-static int ocm_free_region(struct ocm_region *ocm_reg, const void *addr)
-{
- struct ocm_block *blk, *tmp;
- unsigned long offset;
-
- if (!ocm_reg->virt)
- return 0;
-
- list_for_each_entry_safe(blk, tmp, &ocm_reg->list, list) {
- if (blk->addr == addr) {
- offset = addr - ocm_reg->virt;
- ocm_reg->memfree += blk->size;
- rh_free(ocm_reg->rh, offset);
- list_del(&blk->list);
- kfree(blk);
- return 1;
- }
- }
-
- return 0;
-}
-
-static void __init ocm_init_node(int count, struct device_node *node)
-{
- struct ocm_info *ocm;
-
- const unsigned int *cell_index;
- const unsigned int *cache_size;
- int len;
-
- struct resource rsrc;
-
- ocm = ocm_get_node(count);
-
- cell_index = of_get_property(node, "cell-index", &len);
- if (!cell_index) {
- printk(KERN_ERR "PPC4XX OCM: missing cell-index property");
- return;
- }
- ocm->index = *cell_index;
-
- if (of_device_is_available(node))
- ocm->status = OCM_ENABLED;
-
- cache_size = of_get_property(node, "cached-region-size", &len);
- if (cache_size)
- ocm->cache_size = *cache_size;
-
- if (of_address_to_resource(node, 0, &rsrc)) {
- printk(KERN_ERR "PPC4XX OCM%d: could not get resource address\n",
- ocm->index);
- return;
- }
-
- ocm->phys = rsrc.start;
- ocm->memtotal = (rsrc.end - rsrc.start + 1);
-
- printk(KERN_INFO "PPC4XX OCM%d: %d Bytes (%s)\n",
- ocm->index, ocm->memtotal,
- (ocm->status == OCM_DISABLED) ? "disabled" : "enabled");
-
- if (ocm->status == OCM_DISABLED)
- return;
-
- /* request region */
-
- if (!request_mem_region(ocm->phys, ocm->memtotal, "ppc4xx_ocm")) {
- printk(KERN_ERR "PPC4XX OCM%d: could not request region\n",
- ocm->index);
- return;
- }
-
- /* Configure non-cached and cached regions */
-
- ocm->nc.phys = ocm->phys;
- ocm->nc.memtotal = ocm->memtotal - ocm->cache_size;
- ocm->nc.memfree = ocm->nc.memtotal;
-
- ocm->c.phys = ocm->phys + ocm->nc.memtotal;
- ocm->c.memtotal = ocm->cache_size;
- ocm->c.memfree = ocm->c.memtotal;
-
- if (ocm->nc.memtotal == 0)
- ocm->nc.phys = 0;
-
- if (ocm->c.memtotal == 0)
- ocm->c.phys = 0;
-
- printk(KERN_INFO "PPC4XX OCM%d: %d Bytes (non-cached)\n",
- ocm->index, ocm->nc.memtotal);
-
- printk(KERN_INFO "PPC4XX OCM%d: %d Bytes (cached)\n",
- ocm->index, ocm->c.memtotal);
-
- /* ioremap the non-cached region */
- if (ocm->nc.memtotal) {
- ocm->nc.virt = __ioremap(ocm->nc.phys, ocm->nc.memtotal,
- _PAGE_EXEC | pgprot_val(PAGE_KERNEL_NCG));
-
- if (!ocm->nc.virt) {
- printk(KERN_ERR
- "PPC4XX OCM%d: failed to ioremap non-cached memory\n",
- ocm->index);
- ocm->nc.memfree = 0;
- return;
- }
- }
-
- /* ioremap the cached region */
-
- if (ocm->c.memtotal) {
- ocm->c.virt = __ioremap(ocm->c.phys, ocm->c.memtotal,
- _PAGE_EXEC | pgprot_val(PAGE_KERNEL));
-
- if (!ocm->c.virt) {
- printk(KERN_ERR
- "PPC4XX OCM%d: failed to ioremap cached memory\n",
- ocm->index);
- ocm->c.memfree = 0;
- return;
- }
- }
-
- /* Create Remote Heaps */
-
- ocm->alignment = 4; /* default 4 byte alignment */
-
- if (ocm->nc.virt) {
- ocm->nc.rh = rh_create(ocm->alignment);
- rh_attach_region(ocm->nc.rh, 0, ocm->nc.memtotal);
- }
-
- if (ocm->c.virt) {
- ocm->c.rh = rh_create(ocm->alignment);
- rh_attach_region(ocm->c.rh, 0, ocm->c.memtotal);
- }
-
- INIT_LIST_HEAD(&ocm->nc.list);
- INIT_LIST_HEAD(&ocm->c.list);
-
- ocm->ready = 1;
-}
-
-static int ocm_debugfs_show(struct seq_file *m, void *v)
-{
- struct ocm_block *blk, *tmp;
- unsigned int i;
-
- for (i = 0; i < ocm_count; i++) {
- struct ocm_info *ocm = ocm_get_node(i);
-
- if (!ocm || !ocm->ready)
- continue;
-
- seq_printf(m, "PPC4XX OCM : %d\n", ocm->index);
- seq_printf(m, "PhysAddr : %pa\n", &(ocm->phys));
- seq_printf(m, "MemTotal : %d Bytes\n", ocm->memtotal);
- seq_printf(m, "MemTotal(NC) : %d Bytes\n", ocm->nc.memtotal);
- seq_printf(m, "MemTotal(C) : %d Bytes\n\n", ocm->c.memtotal);
-
- seq_printf(m, "NC.PhysAddr : %pa\n", &(ocm->nc.phys));
- seq_printf(m, "NC.VirtAddr : 0x%p\n", ocm->nc.virt);
- seq_printf(m, "NC.MemTotal : %d Bytes\n", ocm->nc.memtotal);
- seq_printf(m, "NC.MemFree : %d Bytes\n", ocm->nc.memfree);
-
- list_for_each_entry_safe(blk, tmp, &ocm->nc.list, list) {
- seq_printf(m, "NC.MemUsed : %d Bytes (%s)\n",
- blk->size, blk->owner);
- }
-
- seq_printf(m, "\nC.PhysAddr : %pa\n", &(ocm->c.phys));
- seq_printf(m, "C.VirtAddr : 0x%p\n", ocm->c.virt);
- seq_printf(m, "C.MemTotal : %d Bytes\n", ocm->c.memtotal);
- seq_printf(m, "C.MemFree : %d Bytes\n", ocm->c.memfree);
-
- list_for_each_entry_safe(blk, tmp, &ocm->c.list, list) {
- seq_printf(m, "C.MemUsed : %d Bytes (%s)\n",
- blk->size, blk->owner);
- }
-
- seq_putc(m, '\n');
- }
-
- return 0;
-}
-
-static int ocm_debugfs_open(struct inode *inode, struct file *file)
-{
- return single_open(file, ocm_debugfs_show, NULL);
-}
-
-static const struct file_operations ocm_debugfs_fops = {
- .open = ocm_debugfs_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
-static int ocm_debugfs_init(void)
-{
- struct dentry *junk;
-
- junk = debugfs_create_dir("ppc4xx_ocm", 0);
- if (!junk) {
- printk(KERN_ALERT "debugfs ppc4xx ocm: failed to create dir\n");
- return -1;
- }
-
- if (debugfs_create_file("info", 0644, junk, NULL, &ocm_debugfs_fops)) {
- printk(KERN_ALERT "debugfs ppc4xx ocm: failed to create file\n");
- return -1;
- }
-
- return 0;
-}
-
-void *ppc4xx_ocm_alloc(phys_addr_t *phys, int size, int align,
- int flags, const char *owner)
-{
- void __iomem *addr = NULL;
- unsigned long offset;
- struct ocm_info *ocm;
- struct ocm_region *ocm_reg;
- struct ocm_block *ocm_blk;
- int i;
-
- for (i = 0; i < ocm_count; i++) {
- ocm = ocm_get_node(i);
-
- if (!ocm || !ocm->ready)
- continue;
-
- if (flags == PPC4XX_OCM_NON_CACHED)
- ocm_reg = &ocm->nc;
- else
- ocm_reg = &ocm->c;
-
- if (!ocm_reg->virt)
- continue;
-
- if (align < ocm->alignment)
- align = ocm->alignment;
-
- offset = rh_alloc_align(ocm_reg->rh, size, align, NULL);
-
- if (IS_ERR_VALUE(offset))
- continue;
-
- ocm_blk = kzalloc(sizeof(*ocm_blk), GFP_KERNEL);
- if (!ocm_blk) {
- rh_free(ocm_reg->rh, offset);
- break;
- }
-
- *phys = ocm_reg->phys + offset;
- addr = ocm_reg->virt + offset;
- size = ALIGN(size, align);
-
- ocm_blk->addr = addr;
- ocm_blk->size = size;
- ocm_blk->owner = owner;
- list_add_tail(&ocm_blk->list, &ocm_reg->list);
-
- ocm_reg->memfree -= size;
-
- break;
- }
-
- return addr;
-}
-
-void ppc4xx_ocm_free(const void *addr)
-{
- int i;
-
- if (!addr)
- return;
-
- for (i = 0; i < ocm_count; i++) {
- struct ocm_info *ocm = ocm_get_node(i);
-
- if (!ocm || !ocm->ready)
- continue;
-
- if (ocm_free_region(&ocm->nc, addr) ||
- ocm_free_region(&ocm->c, addr))
- return;
- }
-}
-
-static int __init ppc4xx_ocm_init(void)
-{
- struct device_node *np;
- int count;
-
- count = 0;
- for_each_compatible_node(np, NULL, "ibm,ocm")
- count++;
-
- if (!count)
- return 0;
-
- ocm_nodes = kzalloc((count * sizeof(struct ocm_info)), GFP_KERNEL);
- if (!ocm_nodes)
- return -ENOMEM;
-
- ocm_count = count;
- count = 0;
-
- for_each_compatible_node(np, NULL, "ibm,ocm") {
- ocm_init_node(count, np);
- count++;
- }
-
- ocm_debugfs_init();
-
- return 0;
-}
-
-arch_initcall(ppc4xx_ocm_init);
diff --git a/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c b/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c
index 13631f35cd14..04bf6ecf7d55 100644
--- a/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c
+++ b/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c
@@ -434,9 +434,9 @@ static int mpc512x_lpbfifo_probe(struct platform_device *pdev)
memset(&lpbfifo, 0, sizeof(struct lpbfifo_data));
spin_lock_init(&lpbfifo.lock);
- lpbfifo.chan = dma_request_slave_channel(&pdev->dev, "rx-tx");
- if (lpbfifo.chan == NULL)
- return -EPROBE_DEFER;
+ lpbfifo.chan = dma_request_chan(&pdev->dev, "rx-tx");
+ if (IS_ERR(lpbfifo.chan))
+ return PTR_ERR(lpbfifo.chan);
if (of_address_to_resource(pdev->dev.of_node, 0, &r) != 0) {
dev_err(&pdev->dev, "bad 'reg' in 'sclpc' device tree node\n");
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
index ba12dc14a3d1..8c0d324f657e 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
@@ -650,6 +650,7 @@ static const struct file_operations mpc52xx_wdt_fops = {
.llseek = no_llseek,
.write = mpc52xx_wdt_write,
.unlocked_ioctl = mpc52xx_wdt_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
.open = mpc52xx_wdt_open,
.release = mpc52xx_wdt_release,
};
diff --git a/arch/powerpc/platforms/83xx/km83xx.c b/arch/powerpc/platforms/83xx/km83xx.c
index 273145aed90a..ada42f03915a 100644
--- a/arch/powerpc/platforms/83xx/km83xx.c
+++ b/arch/powerpc/platforms/83xx/km83xx.c
@@ -34,7 +34,6 @@
#include <sysdev/fsl_soc.h>
#include <sysdev/fsl_pci.h>
#include <soc/fsl/qe/qe.h>
-#include <soc/fsl/qe/qe_ic.h>
#include "mpc83xx.h"
@@ -64,7 +63,7 @@ static void quirk_mpc8360e_qe_enet10(void)
return;
}
- base = ioremap(res.start, res.end - res.start + 1);
+ base = ioremap(res.start, resource_size(&res));
/*
* set output delay adjustments to default values according
@@ -178,7 +177,7 @@ define_machine(mpc83xx_km) {
.name = "mpc83xx-km-platform",
.probe = mpc83xx_km_probe,
.setup_arch = mpc83xx_km_setup_arch,
- .init_IRQ = mpc83xx_ipic_and_qe_init_IRQ,
+ .init_IRQ = mpc83xx_ipic_init_IRQ,
.get_irq = ipic_get_irq,
.restart = mpc83xx_restart,
.time_init = mpc83xx_time_init,
diff --git a/arch/powerpc/platforms/83xx/misc.c b/arch/powerpc/platforms/83xx/misc.c
index f46d7bf3b140..a952e91db3ee 100644
--- a/arch/powerpc/platforms/83xx/misc.c
+++ b/arch/powerpc/platforms/83xx/misc.c
@@ -14,10 +14,11 @@
#include <asm/io.h>
#include <asm/hw_irq.h>
#include <asm/ipic.h>
-#include <soc/fsl/qe/qe_ic.h>
#include <sysdev/fsl_soc.h>
#include <sysdev/fsl_pci.h>
+#include <mm/mmu_decl.h>
+
#include "mpc83xx.h"
static __be32 __iomem *restart_reg_base;
@@ -89,28 +90,6 @@ void __init mpc83xx_ipic_init_IRQ(void)
ipic_set_default_priority();
}
-#ifdef CONFIG_QUICC_ENGINE
-void __init mpc83xx_qe_init_IRQ(void)
-{
- struct device_node *np;
-
- np = of_find_compatible_node(NULL, NULL, "fsl,qe-ic");
- if (!np) {
- np = of_find_node_by_type(NULL, "qeic");
- if (!np)
- return;
- }
- qe_ic_init(np, 0, qe_ic_cascade_low_ipic, qe_ic_cascade_high_ipic);
- of_node_put(np);
-}
-
-void __init mpc83xx_ipic_and_qe_init_IRQ(void)
-{
- mpc83xx_ipic_init_IRQ();
- mpc83xx_qe_init_IRQ();
-}
-#endif /* CONFIG_QUICC_ENGINE */
-
static const struct of_device_id of_bus_ids[] __initconst = {
{ .type = "soc", },
{ .compatible = "soc", },
@@ -145,6 +124,15 @@ void __init mpc83xx_setup_arch(void)
if (ppc_md.progress)
ppc_md.progress("mpc83xx_setup_arch()", 0);
+ if (!__map_without_bats) {
+ phys_addr_t immrbase = get_immrbase();
+ int immrsize = IS_ALIGNED(immrbase, SZ_2M) ? SZ_2M : SZ_1M;
+ unsigned long va = fix_to_virt(FIX_IMMR_BASE);
+
+ setbat(-1, va, immrbase, immrsize, PAGE_KERNEL_NCG);
+ update_bats();
+ }
+
mpc83xx_setup_pci();
}
diff --git a/arch/powerpc/platforms/83xx/mpc832x_mds.c b/arch/powerpc/platforms/83xx/mpc832x_mds.c
index b428835e5919..6fa5402ebf20 100644
--- a/arch/powerpc/platforms/83xx/mpc832x_mds.c
+++ b/arch/powerpc/platforms/83xx/mpc832x_mds.c
@@ -33,7 +33,6 @@
#include <sysdev/fsl_soc.h>
#include <sysdev/fsl_pci.h>
#include <soc/fsl/qe/qe.h>
-#include <soc/fsl/qe/qe_ic.h>
#include "mpc83xx.h"
@@ -102,7 +101,7 @@ define_machine(mpc832x_mds) {
.name = "MPC832x MDS",
.probe = mpc832x_sys_probe,
.setup_arch = mpc832x_sys_setup_arch,
- .init_IRQ = mpc83xx_ipic_and_qe_init_IRQ,
+ .init_IRQ = mpc83xx_ipic_init_IRQ,
.get_irq = ipic_get_irq,
.restart = mpc83xx_restart,
.time_init = mpc83xx_time_init,
diff --git a/arch/powerpc/platforms/83xx/mpc832x_rdb.c b/arch/powerpc/platforms/83xx/mpc832x_rdb.c
index 4588ce632484..622c625d5ce4 100644
--- a/arch/powerpc/platforms/83xx/mpc832x_rdb.c
+++ b/arch/powerpc/platforms/83xx/mpc832x_rdb.c
@@ -22,7 +22,6 @@
#include <asm/ipic.h>
#include <asm/udbg.h>
#include <soc/fsl/qe/qe.h>
-#include <soc/fsl/qe/qe_ic.h>
#include <sysdev/fsl_soc.h>
#include <sysdev/fsl_pci.h>
@@ -220,7 +219,7 @@ define_machine(mpc832x_rdb) {
.name = "MPC832x RDB",
.probe = mpc832x_rdb_probe,
.setup_arch = mpc832x_rdb_setup_arch,
- .init_IRQ = mpc83xx_ipic_and_qe_init_IRQ,
+ .init_IRQ = mpc83xx_ipic_init_IRQ,
.get_irq = ipic_get_irq,
.restart = mpc83xx_restart,
.time_init = mpc83xx_time_init,
diff --git a/arch/powerpc/platforms/83xx/mpc836x_mds.c b/arch/powerpc/platforms/83xx/mpc836x_mds.c
index 4a4efa906d35..90d9cbfae659 100644
--- a/arch/powerpc/platforms/83xx/mpc836x_mds.c
+++ b/arch/powerpc/platforms/83xx/mpc836x_mds.c
@@ -39,9 +39,7 @@
#include <asm/udbg.h>
#include <sysdev/fsl_soc.h>
#include <sysdev/fsl_pci.h>
-#include <sysdev/simple_gpio.h>
#include <soc/fsl/qe/qe.h>
-#include <soc/fsl/qe/qe_ic.h>
#include "mpc83xx.h"
@@ -181,12 +179,6 @@ static int __init mpc836x_usb_cfg(void)
qe_usb_clock_set(QE_CLK21, 48000000);
} else {
setbits8(&bcsr[13], BCSR13_USBMODE);
- /*
- * The BCSR GPIOs are used to control power and
- * speed of the USB transceiver. This is needed for
- * the USB Host only.
- */
- simple_gpiochip_init("fsl,mpc8360mds-bcsr-gpio");
}
of_node_put(np);
@@ -209,7 +201,7 @@ define_machine(mpc836x_mds) {
.name = "MPC836x MDS",
.probe = mpc836x_mds_probe,
.setup_arch = mpc836x_mds_setup_arch,
- .init_IRQ = mpc83xx_ipic_and_qe_init_IRQ,
+ .init_IRQ = mpc83xx_ipic_init_IRQ,
.get_irq = ipic_get_irq,
.restart = mpc83xx_restart,
.time_init = mpc83xx_time_init,
diff --git a/arch/powerpc/platforms/83xx/mpc836x_rdk.c b/arch/powerpc/platforms/83xx/mpc836x_rdk.c
index 9923059cb111..b4aac2cde849 100644
--- a/arch/powerpc/platforms/83xx/mpc836x_rdk.c
+++ b/arch/powerpc/platforms/83xx/mpc836x_rdk.c
@@ -17,7 +17,6 @@
#include <asm/ipic.h>
#include <asm/udbg.h>
#include <soc/fsl/qe/qe.h>
-#include <soc/fsl/qe/qe_ic.h>
#include <sysdev/fsl_soc.h>
#include <sysdev/fsl_pci.h>
@@ -42,7 +41,7 @@ define_machine(mpc836x_rdk) {
.name = "MPC836x RDK",
.probe = mpc836x_rdk_probe,
.setup_arch = mpc836x_rdk_setup_arch,
- .init_IRQ = mpc83xx_ipic_and_qe_init_IRQ,
+ .init_IRQ = mpc83xx_ipic_init_IRQ,
.get_irq = ipic_get_irq,
.restart = mpc83xx_restart,
.time_init = mpc83xx_time_init,
diff --git a/arch/powerpc/platforms/83xx/mpc83xx.h b/arch/powerpc/platforms/83xx/mpc83xx.h
index 459145623334..f37d04332fc7 100644
--- a/arch/powerpc/platforms/83xx/mpc83xx.h
+++ b/arch/powerpc/platforms/83xx/mpc83xx.h
@@ -72,13 +72,6 @@ extern int mpc837x_usb_cfg(void);
extern int mpc834x_usb_cfg(void);
extern int mpc831x_usb_cfg(void);
extern void mpc83xx_ipic_init_IRQ(void);
-#ifdef CONFIG_QUICC_ENGINE
-extern void mpc83xx_qe_init_IRQ(void);
-extern void mpc83xx_ipic_and_qe_init_IRQ(void);
-#else
-static inline void __init mpc83xx_qe_init_IRQ(void) {}
-#define mpc83xx_ipic_and_qe_init_IRQ mpc83xx_ipic_init_IRQ
-#endif /* CONFIG_QUICC_ENGINE */
#ifdef CONFIG_PCI
extern void mpc83xx_setup_pci(void);
diff --git a/arch/powerpc/platforms/85xx/common.c b/arch/powerpc/platforms/85xx/common.c
index fe0606439b5a..a554b6d87cf7 100644
--- a/arch/powerpc/platforms/85xx/common.c
+++ b/arch/powerpc/platforms/85xx/common.c
@@ -86,29 +86,6 @@ void __init mpc85xx_cpm2_pic_init(void)
#endif
#ifdef CONFIG_QUICC_ENGINE
-void __init mpc85xx_qe_init(void)
-{
- struct device_node *np;
-
- np = of_find_compatible_node(NULL, NULL, "fsl,qe");
- if (!np) {
- np = of_find_node_by_name(NULL, "qe");
- if (!np) {
- pr_err("%s: Could not find Quicc Engine node\n",
- __func__);
- return;
- }
- }
-
- if (!of_device_is_available(np)) {
- of_node_put(np);
- return;
- }
-
- of_node_put(np);
-
-}
-
void __init mpc85xx_qe_par_io_init(void)
{
struct device_node *np;
diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c
index 7ee2c6628f64..27ac38f7e1a9 100644
--- a/arch/powerpc/platforms/85xx/corenet_generic.c
+++ b/arch/powerpc/platforms/85xx/corenet_generic.c
@@ -24,7 +24,6 @@
#include <asm/mpic.h>
#include <asm/ehv_pic.h>
#include <asm/swiotlb.h>
-#include <soc/fsl/qe/qe_ic.h>
#include <linux/of_platform.h>
#include <sysdev/fsl_soc.h>
@@ -38,8 +37,6 @@ void __init corenet_gen_pic_init(void)
unsigned int flags = MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU |
MPIC_NO_RESET;
- struct device_node *np;
-
if (ppc_md.get_irq == mpic_get_coreint_irq)
flags |= MPIC_ENABLE_COREINT;
@@ -47,13 +44,6 @@ void __init corenet_gen_pic_init(void)
BUG_ON(mpic == NULL);
mpic_init(mpic);
-
- np = of_find_compatible_node(NULL, NULL, "fsl,qe-ic");
- if (np) {
- qe_ic_init(np, 0, qe_ic_cascade_low_mpic,
- qe_ic_cascade_high_mpic);
- of_node_put(np);
- }
}
/*
@@ -66,8 +56,6 @@ void __init corenet_gen_setup_arch(void)
swiotlb_detect_4g();
pr_info("%s board\n", ppc_md.name);
-
- mpc85xx_qe_init();
}
static const struct of_device_id of_device_ids[] = {
diff --git a/arch/powerpc/platforms/85xx/mpc85xx.h b/arch/powerpc/platforms/85xx/mpc85xx.h
index fa23f9b0592c..cb84c5c56c36 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx.h
+++ b/arch/powerpc/platforms/85xx/mpc85xx.h
@@ -10,10 +10,8 @@ static inline void __init mpc85xx_cpm2_pic_init(void) {}
#endif /* CONFIG_CPM2 */
#ifdef CONFIG_QUICC_ENGINE
-extern void mpc85xx_qe_init(void);
extern void mpc85xx_qe_par_io_init(void);
#else
-static inline void __init mpc85xx_qe_init(void) {}
static inline void __init mpc85xx_qe_par_io_init(void) {}
#endif
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_mds.c b/arch/powerpc/platforms/85xx/mpc85xx_mds.c
index 5ca254256c47..7759eca7d535 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_mds.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_mds.c
@@ -43,9 +43,7 @@
#include <asm/udbg.h>
#include <sysdev/fsl_soc.h>
#include <sysdev/fsl_pci.h>
-#include <sysdev/simple_gpio.h>
#include <soc/fsl/qe/qe.h>
-#include <soc/fsl/qe/qe_ic.h>
#include <asm/mpic.h>
#include <asm/swiotlb.h>
#include "smp.h"
@@ -238,7 +236,6 @@ static void __init mpc85xx_mds_qe_init(void)
{
struct device_node *np;
- mpc85xx_qe_init();
mpc85xx_qe_par_io_init();
mpc85xx_mds_reset_ucc_phys();
@@ -270,33 +267,8 @@ static void __init mpc85xx_mds_qe_init(void)
}
}
-static void __init mpc85xx_mds_qeic_init(void)
-{
- struct device_node *np;
-
- np = of_find_compatible_node(NULL, NULL, "fsl,qe");
- if (!of_device_is_available(np)) {
- of_node_put(np);
- return;
- }
-
- np = of_find_compatible_node(NULL, NULL, "fsl,qe-ic");
- if (!np) {
- np = of_find_node_by_type(NULL, "qeic");
- if (!np)
- return;
- }
-
- if (machine_is(p1021_mds))
- qe_ic_init(np, 0, qe_ic_cascade_low_mpic,
- qe_ic_cascade_high_mpic);
- else
- qe_ic_init(np, 0, qe_ic_cascade_muxed_mpic, NULL);
- of_node_put(np);
-}
#else
static void __init mpc85xx_mds_qe_init(void) { }
-static void __init mpc85xx_mds_qeic_init(void) { }
#endif /* CONFIG_QUICC_ENGINE */
static void __init mpc85xx_mds_setup_arch(void)
@@ -351,11 +323,6 @@ machine_arch_initcall(mpc8569_mds, board_fixups);
static int __init mpc85xx_publish_devices(void)
{
- if (machine_is(mpc8568_mds))
- simple_gpiochip_init("fsl,mpc8568mds-bcsr-gpio");
- if (machine_is(mpc8569_mds))
- simple_gpiochip_init("fsl,mpc8569mds-bcsr-gpio");
-
return mpc85xx_common_publish_devices();
}
@@ -371,7 +338,6 @@ static void __init mpc85xx_mds_pic_init(void)
BUG_ON(mpic == NULL);
mpic_init(mpic);
- mpc85xx_mds_qeic_init();
}
static int __init mpc85xx_mds_probe(void)
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_rdb.c b/arch/powerpc/platforms/85xx/mpc85xx_rdb.c
index d3c540ee558f..80a80174768c 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_rdb.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_rdb.c
@@ -23,7 +23,6 @@
#include <asm/udbg.h>
#include <asm/mpic.h>
#include <soc/fsl/qe/qe.h>
-#include <soc/fsl/qe/qe_ic.h>
#include <sysdev/fsl_soc.h>
#include <sysdev/fsl_pci.h>
@@ -44,10 +43,6 @@ void __init mpc85xx_rdb_pic_init(void)
{
struct mpic *mpic;
-#ifdef CONFIG_QUICC_ENGINE
- struct device_node *np;
-#endif
-
if (of_machine_is_compatible("fsl,MPC85XXRDB-CAMP")) {
mpic = mpic_alloc(NULL, 0, MPIC_NO_RESET |
MPIC_BIG_ENDIAN |
@@ -62,18 +57,6 @@ void __init mpc85xx_rdb_pic_init(void)
BUG_ON(mpic == NULL);
mpic_init(mpic);
-
-#ifdef CONFIG_QUICC_ENGINE
- np = of_find_compatible_node(NULL, NULL, "fsl,qe-ic");
- if (np) {
- qe_ic_init(np, 0, qe_ic_cascade_low_mpic,
- qe_ic_cascade_high_mpic);
- of_node_put(np);
-
- } else
- pr_err("%s: Could not find qe-ic node\n", __func__);
-#endif
-
}
/*
@@ -89,7 +72,6 @@ static void __init mpc85xx_rdb_setup_arch(void)
fsl_pci_assign_primary();
#ifdef CONFIG_QUICC_ENGINE
- mpc85xx_qe_init();
mpc85xx_qe_par_io_init();
#if defined(CONFIG_UCC_GETH) || defined(CONFIG_SERIAL_QE)
if (machine_is(p1025_rdb)) {
diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c
index 8c7ea2486bc0..48f7d96ae37d 100644
--- a/arch/powerpc/platforms/85xx/smp.c
+++ b/arch/powerpc/platforms/85xx/smp.c
@@ -252,6 +252,15 @@ static int smp_85xx_start_cpu(int cpu)
out_be64((u64 *)(&spin_table->addr_h),
__pa(ppc_function_entry(generic_secondary_smp_init)));
#else
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
+ /*
+ * We need also to write addr_h to spin table for systems
+ * in which their physical memory start address was configured
+ * to above 4G, otherwise the secondary core can not get
+ * correct entry to start from.
+ */
+ out_be32(&spin_table->addr_h, __pa(__early_start) >> 32);
+#endif
out_be32(&spin_table->addr_l, __pa(__early_start));
#endif
flush_spin_table(spin_table);
diff --git a/arch/powerpc/platforms/85xx/twr_p102x.c b/arch/powerpc/platforms/85xx/twr_p102x.c
index 720b0c0f03ba..eaec099b4077 100644
--- a/arch/powerpc/platforms/85xx/twr_p102x.c
+++ b/arch/powerpc/platforms/85xx/twr_p102x.c
@@ -19,7 +19,6 @@
#include <asm/udbg.h>
#include <asm/mpic.h>
#include <soc/fsl/qe/qe.h>
-#include <soc/fsl/qe/qe_ic.h>
#include <sysdev/fsl_soc.h>
#include <sysdev/fsl_pci.h>
@@ -31,26 +30,12 @@ static void __init twr_p1025_pic_init(void)
{
struct mpic *mpic;
-#ifdef CONFIG_QUICC_ENGINE
- struct device_node *np;
-#endif
-
mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN |
MPIC_SINGLE_DEST_CPU,
0, 256, " OpenPIC ");
BUG_ON(mpic == NULL);
mpic_init(mpic);
-
-#ifdef CONFIG_QUICC_ENGINE
- np = of_find_compatible_node(NULL, NULL, "fsl,qe-ic");
- if (np) {
- qe_ic_init(np, 0, qe_ic_cascade_low_mpic,
- qe_ic_cascade_high_mpic);
- of_node_put(np);
- } else
- pr_err("Could not find qe-ic node\n");
-#endif
}
/* ************************************************************************
@@ -60,10 +45,6 @@ static void __init twr_p1025_pic_init(void)
*/
static void __init twr_p1025_setup_arch(void)
{
-#ifdef CONFIG_QUICC_ENGINE
- struct device_node *np;
-#endif
-
if (ppc_md.progress)
ppc_md.progress("twr_p1025_setup_arch()", 0);
@@ -72,12 +53,12 @@ static void __init twr_p1025_setup_arch(void)
fsl_pci_assign_primary();
#ifdef CONFIG_QUICC_ENGINE
- mpc85xx_qe_init();
mpc85xx_qe_par_io_init();
#if IS_ENABLED(CONFIG_UCC_GETH) || IS_ENABLED(CONFIG_SERIAL_QE)
if (machine_is(twr_p1025)) {
struct ccsr_guts __iomem *guts;
+ struct device_node *np;
np = of_find_compatible_node(NULL, NULL, "fsl,p1021-guts");
if (np) {
diff --git a/arch/powerpc/platforms/86xx/mpc8610_hpcd.c b/arch/powerpc/platforms/86xx/mpc8610_hpcd.c
index 96b27f6fdd0f..7733d0607da2 100644
--- a/arch/powerpc/platforms/86xx/mpc8610_hpcd.c
+++ b/arch/powerpc/platforms/86xx/mpc8610_hpcd.c
@@ -34,7 +34,6 @@
#include <linux/of_platform.h>
#include <sysdev/fsl_pci.h>
#include <sysdev/fsl_soc.h>
-#include <sysdev/simple_gpio.h>
#include "mpc86xx.h"
@@ -93,9 +92,6 @@ static const struct of_device_id mpc8610_ids[] __initconst = {
static int __init mpc8610_declare_of_platform_devices(void)
{
- /* Firstly, register PIXIS GPIOs. */
- simple_gpiochip_init("fsl,fpga-pixis-gpio-bank");
-
/* Enable wakeup on PIXIS' event IRQ. */
mpc8610_suspend_init();
diff --git a/arch/powerpc/platforms/8xx/cpm1.c b/arch/powerpc/platforms/8xx/cpm1.c
index 0f65c51271db..a43ee7d1ff85 100644
--- a/arch/powerpc/platforms/8xx/cpm1.c
+++ b/arch/powerpc/platforms/8xx/cpm1.c
@@ -51,7 +51,7 @@
#define CPM_MAP_SIZE (0x4000)
cpm8xx_t __iomem *cpmp; /* Pointer to comm processor space */
-immap_t __iomem *mpc8xx_immr;
+immap_t __iomem *mpc8xx_immr = (void __iomem *)VIRT_IMMR_BASE;
static cpic8xx_t __iomem *cpic_reg;
static struct irq_domain *cpm_pic_host;
@@ -130,7 +130,7 @@ static const struct irq_domain_ops cpm_pic_host_ops = {
.map = cpm_pic_host_map,
};
-unsigned int cpm_pic_init(void)
+unsigned int __init cpm_pic_init(void)
{
struct device_node *np = NULL;
struct resource res;
@@ -201,12 +201,6 @@ void __init cpm_reset(void)
{
sysconf8xx_t __iomem *siu_conf;
- mpc8xx_immr = ioremap(get_immrbase(), 0x4000);
- if (!mpc8xx_immr) {
- printk(KERN_CRIT "Could not map IMMR\n");
- return;
- }
-
cpmp = &mpc8xx_immr->im_cpm;
#ifndef CONFIG_PPC_EARLY_DEBUG_CPM
@@ -306,7 +300,7 @@ struct cpm_ioport32e {
__be32 dir, par, sor, odr, dat;
};
-static void cpm1_set_pin32(int port, int pin, int flags)
+static void __init cpm1_set_pin32(int port, int pin, int flags)
{
struct cpm_ioport32e __iomem *iop;
pin = 1 << (31 - pin);
@@ -348,7 +342,7 @@ static void cpm1_set_pin32(int port, int pin, int flags)
}
}
-static void cpm1_set_pin16(int port, int pin, int flags)
+static void __init cpm1_set_pin16(int port, int pin, int flags)
{
struct cpm_ioport16 __iomem *iop =
(struct cpm_ioport16 __iomem *)&mpc8xx_immr->im_ioport;
@@ -386,7 +380,7 @@ static void cpm1_set_pin16(int port, int pin, int flags)
}
}
-void cpm1_set_pin(enum cpm_port port, int pin, int flags)
+void __init cpm1_set_pin(enum cpm_port port, int pin, int flags)
{
if (port == CPM_PORTB || port == CPM_PORTE)
cpm1_set_pin32(port, pin, flags);
@@ -394,7 +388,7 @@ void cpm1_set_pin(enum cpm_port port, int pin, int flags)
cpm1_set_pin16(port, pin, flags);
}
-int cpm1_clk_setup(enum cpm_clk_target target, int clock, int mode)
+int __init cpm1_clk_setup(enum cpm_clk_target target, int clock, int mode)
{
int shift;
int i, bits = 0;
diff --git a/arch/powerpc/platforms/8xx/pic.c b/arch/powerpc/platforms/8xx/pic.c
index e9617d35fd1f..f2ba837249d6 100644
--- a/arch/powerpc/platforms/8xx/pic.c
+++ b/arch/powerpc/platforms/8xx/pic.c
@@ -125,7 +125,7 @@ static const struct irq_domain_ops mpc8xx_pic_host_ops = {
.xlate = mpc8xx_pic_host_xlate,
};
-int mpc8xx_pic_init(void)
+int __init mpc8xx_pic_init(void)
{
struct resource res;
struct device_node *np;
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index f3fb79fccc72..1f8025383caa 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -177,6 +177,10 @@ config PPC_970_NAP
config PPC_P7_NAP
bool
+config PPC_BOOK3S_IDLE
+ def_bool y
+ depends on (PPC_970_NAP || PPC_P7_NAP)
+
config PPC_INDIRECT_PIO
bool
select GENERIC_IOMAP
@@ -197,7 +201,8 @@ endmenu
config PPC601_SYNC_FIX
bool "Workarounds for PPC601 bugs"
- depends on PPC_BOOK3S_32 && PPC_PMAC
+ depends on PPC_BOOK3S_601 && PPC_PMAC
+ default y
help
Some versions of the PPC601 (the first PowerPC chip) have bugs which
mean that extra synchronization instructions are required near
@@ -302,16 +307,6 @@ config GEN_RTC
replacing their get_rtc_time/set_rtc_time callbacks with
a proper RTC device driver.
-config SIMPLE_GPIO
- bool "Support for simple, memory-mapped GPIO controllers"
- depends on PPC
- select GPIOLIB
- help
- Say Y here to support simple, memory-mapped GPIO controllers.
- These are usually BCSRs used to control board's switches, LEDs,
- chip-selects, Ethernet/USB PHY's power and various other small
- on-board peripherals.
-
config MCU_MPC8349EMITX
bool "MPC8349E-mITX MCU driver"
depends on I2C=y && PPC_83xx
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 56a7c814160d..6caedc88474f 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -1,4 +1,9 @@
# SPDX-License-Identifier: GPL-2.0
+config PPC32
+ bool
+ default y if !PPC64
+ select KASAN_VMALLOC if KASAN && MODULES
+
config PPC64
bool "64-bit kernel"
select ZLIB_DEFLATE
@@ -6,6 +11,9 @@ config PPC64
This option selects whether a 32-bit or a 64-bit kernel
will be built.
+config PPC_BOOK3S_32
+ bool
+
menu "Processor support"
choice
prompt "Processor Type"
@@ -21,12 +29,21 @@ choice
If unsure, select 52xx/6xx/7xx/74xx/82xx/83xx/86xx.
-config PPC_BOOK3S_32
- bool "512x/52xx/6xx/7xx/74xx/82xx/83xx/86xx"
+config PPC_BOOK3S_6xx
+ bool "512x/52xx/6xx/7xx/74xx/82xx/83xx/86xx except 601"
+ select PPC_BOOK3S_32
select PPC_FPU
select PPC_HAVE_PMU_SUPPORT
select PPC_HAVE_KUEP
select PPC_HAVE_KUAP
+ select HAVE_ARCH_VMAP_STACK
+
+config PPC_BOOK3S_601
+ bool "PowerPC 601"
+ select PPC_BOOK3S_32
+ select PPC_FPU
+ select PPC_HAVE_KUAP
+ select HAVE_ARCH_VMAP_STACK
config PPC_85xx
bool "Freescale 85xx"
@@ -39,6 +56,7 @@ config PPC_8xx
select PPC_HAVE_KUEP
select PPC_HAVE_KUAP
select PPC_MM_SLICES if HUGETLB_PAGE
+ select HAVE_ARCH_VMAP_STACK
config 40x
bool "AMCC 40x"
@@ -405,13 +423,13 @@ config PPC_MM_SLICES
bool
config PPC_HAVE_PMU_SUPPORT
- bool
+ bool
config PPC_PERF_CTRS
- def_bool y
- depends on PERF_EVENTS && PPC_HAVE_PMU_SUPPORT
- help
- This enables the powerpc-specific perf_event back-end.
+ def_bool y
+ depends on PERF_EVENTS && PPC_HAVE_PMU_SUPPORT
+ help
+ This enables the powerpc-specific perf_event back-end.
config FORCE_SMP
# Allow platforms to force SMP=y by selecting this
@@ -449,9 +467,10 @@ config NOT_COHERENT_CACHE
bool
depends on 4xx || PPC_8xx || E200 || PPC_MPC512x || \
GAMECUBE_COMMON || AMIGAONE
- select ARCH_HAS_DMA_COHERENT_TO_PFN
+ select ARCH_HAS_DMA_PREP_COHERENT
select ARCH_HAS_SYNC_DMA_FOR_DEVICE
select ARCH_HAS_SYNC_DMA_FOR_CPU
+ select DMA_DIRECT_REMAP
default n if PPC_47x
default y
diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c
index 16dfee29aa41..ca9ffc1c8685 100644
--- a/arch/powerpc/platforms/cell/iommu.c
+++ b/arch/powerpc/platforms/cell/iommu.c
@@ -486,7 +486,7 @@ cell_iommu_setup_window(struct cbe_iommu *iommu, struct device_node *np,
window->table.it_size = size >> window->table.it_page_shift;
window->table.it_ops = &cell_iommu_ops;
- iommu_init_table(&window->table, iommu->nid);
+ iommu_init_table(&window->table, iommu->nid, 0, 0);
pr_debug("\tioid %d\n", window->ioid);
pr_debug("\tblocksize %ld\n", window->table.it_blocksize);
diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c
index 9680d766f20e..855eedb8d7d7 100644
--- a/arch/powerpc/platforms/cell/setup.c
+++ b/arch/powerpc/platforms/cell/setup.c
@@ -240,9 +240,6 @@ static void __init cell_setup_arch(void)
init_pci_config_tokens();
cbe_pervasive_init();
-#ifdef CONFIG_DUMMY_CONSOLE
- conswitchp = &dummy_con;
-#endif
mmio_nvram_init();
}
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index 065ff14b76e1..25390569e24c 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -10,6 +10,8 @@
#include <linux/file.h>
#include <linux/fs.h>
+#include <linux/fs_context.h>
+#include <linux/fs_parser.h>
#include <linux/fsnotify.h>
#include <linux/backing-dev.h>
#include <linux/init.h>
@@ -20,7 +22,6 @@
#include <linux/pagemap.h>
#include <linux/poll.h>
#include <linux/slab.h>
-#include <linux/parser.h>
#include <asm/prom.h>
#include <asm/spu.h>
@@ -30,7 +31,7 @@
#include "spufs.h"
struct spufs_sb_info {
- int debug;
+ bool debug;
};
static struct kmem_cache *spufs_inode_cache;
@@ -197,14 +198,12 @@ static int spufs_fill_dir(struct dentry *dir,
static int spufs_dir_close(struct inode *inode, struct file *file)
{
- struct spu_context *ctx;
struct inode *parent;
struct dentry *dir;
int ret;
dir = file->f_path.dentry;
parent = d_inode(dir->d_parent);
- ctx = SPUFS_I(d_inode(dir))->i_ctx;
inode_lock_nested(parent, I_MUTEX_PARENT);
ret = spufs_rmdir(parent, dir);
@@ -574,16 +573,22 @@ long spufs_create(struct path *path, struct dentry *dentry,
}
/* File system initialization */
+struct spufs_fs_context {
+ kuid_t uid;
+ kgid_t gid;
+ umode_t mode;
+};
+
enum {
- Opt_uid, Opt_gid, Opt_mode, Opt_debug, Opt_err,
+ Opt_uid, Opt_gid, Opt_mode, Opt_debug,
};
-static const match_table_t spufs_tokens = {
- { Opt_uid, "uid=%d" },
- { Opt_gid, "gid=%d" },
- { Opt_mode, "mode=%o" },
- { Opt_debug, "debug" },
- { Opt_err, NULL },
+static const struct fs_parameter_spec spufs_fs_parameters[] = {
+ fsparam_u32 ("gid", Opt_gid),
+ fsparam_u32oct ("mode", Opt_mode),
+ fsparam_u32 ("uid", Opt_uid),
+ fsparam_flag ("debug", Opt_debug),
+ {}
};
static int spufs_show_options(struct seq_file *m, struct dentry *root)
@@ -604,47 +609,41 @@ static int spufs_show_options(struct seq_file *m, struct dentry *root)
return 0;
}
-static int
-spufs_parse_options(struct super_block *sb, char *options, struct inode *root)
-{
- char *p;
- substring_t args[MAX_OPT_ARGS];
-
- while ((p = strsep(&options, ",")) != NULL) {
- int token, option;
-
- if (!*p)
- continue;
-
- token = match_token(p, spufs_tokens, args);
- switch (token) {
- case Opt_uid:
- if (match_int(&args[0], &option))
- return 0;
- root->i_uid = make_kuid(current_user_ns(), option);
- if (!uid_valid(root->i_uid))
- return 0;
- break;
- case Opt_gid:
- if (match_int(&args[0], &option))
- return 0;
- root->i_gid = make_kgid(current_user_ns(), option);
- if (!gid_valid(root->i_gid))
- return 0;
- break;
- case Opt_mode:
- if (match_octal(&args[0], &option))
- return 0;
- root->i_mode = option | S_IFDIR;
- break;
- case Opt_debug:
- spufs_get_sb_info(sb)->debug = 1;
- break;
- default:
- return 0;
- }
+static int spufs_parse_param(struct fs_context *fc, struct fs_parameter *param)
+{
+ struct spufs_fs_context *ctx = fc->fs_private;
+ struct spufs_sb_info *sbi = fc->s_fs_info;
+ struct fs_parse_result result;
+ kuid_t uid;
+ kgid_t gid;
+ int opt;
+
+ opt = fs_parse(fc, spufs_fs_parameters, param, &result);
+ if (opt < 0)
+ return opt;
+
+ switch (opt) {
+ case Opt_uid:
+ uid = make_kuid(current_user_ns(), result.uint_32);
+ if (!uid_valid(uid))
+ return invalf(fc, "Unknown uid");
+ ctx->uid = uid;
+ break;
+ case Opt_gid:
+ gid = make_kgid(current_user_ns(), result.uint_32);
+ if (!gid_valid(gid))
+ return invalf(fc, "Unknown gid");
+ ctx->gid = gid;
+ break;
+ case Opt_mode:
+ ctx->mode = result.uint_32 & S_IALLUGO;
+ break;
+ case Opt_debug:
+ sbi->debug = true;
+ break;
}
- return 1;
+
+ return 0;
}
static void spufs_exit_isolated_loader(void)
@@ -678,79 +677,99 @@ spufs_init_isolated_loader(void)
printk(KERN_INFO "spufs: SPU isolation mode enabled\n");
}
-static int
-spufs_create_root(struct super_block *sb, void *data)
+static int spufs_create_root(struct super_block *sb, struct fs_context *fc)
{
+ struct spufs_fs_context *ctx = fc->fs_private;
struct inode *inode;
- int ret;
- ret = -ENODEV;
if (!spu_management_ops)
- goto out;
+ return -ENODEV;
- ret = -ENOMEM;
- inode = spufs_new_inode(sb, S_IFDIR | 0775);
+ inode = spufs_new_inode(sb, S_IFDIR | ctx->mode);
if (!inode)
- goto out;
+ return -ENOMEM;
+ inode->i_uid = ctx->uid;
+ inode->i_gid = ctx->gid;
inode->i_op = &simple_dir_inode_operations;
inode->i_fop = &simple_dir_operations;
SPUFS_I(inode)->i_ctx = NULL;
inc_nlink(inode);
- ret = -EINVAL;
- if (!spufs_parse_options(sb, data, inode))
- goto out_iput;
-
- ret = -ENOMEM;
sb->s_root = d_make_root(inode);
if (!sb->s_root)
- goto out;
-
+ return -ENOMEM;
return 0;
-out_iput:
- iput(inode);
-out:
- return ret;
}
-static int
-spufs_fill_super(struct super_block *sb, void *data, int silent)
-{
- struct spufs_sb_info *info;
- static const struct super_operations s_ops = {
- .alloc_inode = spufs_alloc_inode,
- .free_inode = spufs_free_inode,
- .statfs = simple_statfs,
- .evict_inode = spufs_evict_inode,
- .show_options = spufs_show_options,
- };
-
- info = kzalloc(sizeof(*info), GFP_KERNEL);
- if (!info)
- return -ENOMEM;
+static const struct super_operations spufs_ops = {
+ .alloc_inode = spufs_alloc_inode,
+ .free_inode = spufs_free_inode,
+ .statfs = simple_statfs,
+ .evict_inode = spufs_evict_inode,
+ .show_options = spufs_show_options,
+};
+static int spufs_fill_super(struct super_block *sb, struct fs_context *fc)
+{
sb->s_maxbytes = MAX_LFS_FILESIZE;
sb->s_blocksize = PAGE_SIZE;
sb->s_blocksize_bits = PAGE_SHIFT;
sb->s_magic = SPUFS_MAGIC;
- sb->s_op = &s_ops;
- sb->s_fs_info = info;
+ sb->s_op = &spufs_ops;
- return spufs_create_root(sb, data);
+ return spufs_create_root(sb, fc);
+}
+
+static int spufs_get_tree(struct fs_context *fc)
+{
+ return get_tree_single(fc, spufs_fill_super);
}
-static struct dentry *
-spufs_mount(struct file_system_type *fstype, int flags,
- const char *name, void *data)
+static void spufs_free_fc(struct fs_context *fc)
{
- return mount_single(fstype, flags, data, spufs_fill_super);
+ kfree(fc->s_fs_info);
+}
+
+static const struct fs_context_operations spufs_context_ops = {
+ .free = spufs_free_fc,
+ .parse_param = spufs_parse_param,
+ .get_tree = spufs_get_tree,
+};
+
+static int spufs_init_fs_context(struct fs_context *fc)
+{
+ struct spufs_fs_context *ctx;
+ struct spufs_sb_info *sbi;
+
+ ctx = kzalloc(sizeof(struct spufs_fs_context), GFP_KERNEL);
+ if (!ctx)
+ goto nomem;
+
+ sbi = kzalloc(sizeof(struct spufs_sb_info), GFP_KERNEL);
+ if (!sbi)
+ goto nomem_ctx;
+
+ ctx->uid = current_uid();
+ ctx->gid = current_gid();
+ ctx->mode = 0755;
+
+ fc->fs_private = ctx;
+ fc->s_fs_info = sbi;
+ fc->ops = &spufs_context_ops;
+ return 0;
+
+nomem_ctx:
+ kfree(ctx);
+nomem:
+ return -ENOMEM;
}
static struct file_system_type spufs_type = {
.owner = THIS_MODULE,
.name = "spufs",
- .mount = spufs_mount,
+ .init_fs_context = spufs_init_fs_context,
+ .parameters = spufs_fs_parameters,
.kill_sb = kill_litter_super,
};
MODULE_ALIAS_FS("spufs");
diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c
index 9cd6f3e1000b..6f019df37916 100644
--- a/arch/powerpc/platforms/maple/setup.c
+++ b/arch/powerpc/platforms/maple/setup.c
@@ -183,9 +183,6 @@ static void __init maple_setup_arch(void)
/* Lookup PCI hosts */
maple_pci_init();
-#ifdef CONFIG_DUMMY_CONSOLE
- conswitchp = &dummy_con;
-#endif
maple_use_rtas_reboot_and_halt_if_present();
printk(KERN_DEBUG "Using native/NAP idle loop\n");
@@ -232,7 +229,7 @@ static void __init maple_init_IRQ(void)
root = of_find_node_by_path("/");
naddr = of_n_addr_cells(root);
opprop = of_get_property(root, "platform-open-pic", &opplen);
- if (opprop != 0) {
+ if (opprop) {
openpic_addr = of_read_number(opprop, naddr);
has_isus = (opplen > naddr);
printk(KERN_DEBUG "OpenPIC addr: %lx, has ISUs: %d\n",
diff --git a/arch/powerpc/platforms/pasemi/iommu.c b/arch/powerpc/platforms/pasemi/iommu.c
index 77fee09104f8..b500a6e47e6b 100644
--- a/arch/powerpc/platforms/pasemi/iommu.c
+++ b/arch/powerpc/platforms/pasemi/iommu.c
@@ -146,7 +146,7 @@ static void iommu_table_iobmap_setup(void)
*/
iommu_table_iobmap.it_blocksize = 4;
iommu_table_iobmap.it_ops = &iommu_table_iobmap_ops;
- iommu_init_table(&iommu_table_iobmap, 0);
+ iommu_init_table(&iommu_table_iobmap, 0, 0, 0);
pr_debug(" <- %s\n", __func__);
}
diff --git a/arch/powerpc/platforms/pasemi/setup.c b/arch/powerpc/platforms/pasemi/setup.c
index 05a52f10c2f0..b612474f8f8e 100644
--- a/arch/powerpc/platforms/pasemi/setup.c
+++ b/arch/powerpc/platforms/pasemi/setup.c
@@ -147,10 +147,6 @@ static void __init pas_setup_arch(void)
/* Lookup PCI hosts */
pas_pci_init();
-#ifdef CONFIG_DUMMY_CONSOLE
- conswitchp = &dummy_con;
-#endif
-
/* Remap SDC register for doing reset */
/* XXXOJN This should maybe come out of the device tree */
reset_reg = ioremap(0xfc101100, 4);
diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig
index 850eee860cf2..938803eab0ad 100644
--- a/arch/powerpc/platforms/powernv/Kconfig
+++ b/arch/powerpc/platforms/powernv/Kconfig
@@ -12,7 +12,6 @@ config PPC_POWERNV
select EPAPR_BOOT
select PPC_INDIRECT_PIO
select PPC_UDBG_16550
- select PPC_SCOM
select ARCH_RANDOM
select CPU_FREQ
select PPC_DOORBELL
@@ -47,3 +46,7 @@ config PPC_VAS
VAS adapters are found in POWER9 based systems.
If unsure, say N.
+
+config SCOM_DEBUGFS
+ bool "Expose SCOM controllers via debugfs"
+ depends on DEBUG_FS
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index da2e99efbd04..c0f8120045c3 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -4,15 +4,20 @@ obj-y += idle.o opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o
obj-y += rng.o opal-elog.o opal-dump.o opal-sysparam.o opal-sensor.o
obj-y += opal-msglog.o opal-hmi.o opal-power.o opal-irqchip.o
obj-y += opal-kmsg.o opal-powercap.o opal-psr.o opal-sensor-groups.o
+obj-y += ultravisor.o
obj-$(CONFIG_SMP) += smp.o subcore.o subcore-asm.o
+obj-$(CONFIG_FA_DUMP) += opal-fadump.o
+obj-$(CONFIG_PRESERVE_FA_DUMP) += opal-fadump.o
+obj-$(CONFIG_OPAL_CORE) += opal-core.o
obj-$(CONFIG_PCI) += pci.o pci-ioda.o npu-dma.o pci-ioda-tce.o
obj-$(CONFIG_CXL_BASE) += pci-cxl.o
obj-$(CONFIG_EEH) += eeh-powernv.o
-obj-$(CONFIG_PPC_SCOM) += opal-xscom.o
obj-$(CONFIG_MEMORY_FAILURE) += opal-memory-errors.o
obj-$(CONFIG_OPAL_PRD) += opal-prd.o
obj-$(CONFIG_PERF_EVENTS) += opal-imc.o
obj-$(CONFIG_PPC_MEMTRACE) += memtrace.o
obj-$(CONFIG_PPC_VAS) += vas.o vas-window.o vas-debug.o
obj-$(CONFIG_OCXL_BASE) += ocxl.o
+obj-$(CONFIG_SCOM_DEBUGFS) += opal-xscom.o
+obj-$(CONFIG_PPC_SECURE_BOOT) += opal-secvar.o
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 620a986209f5..6f300ab7f0e9 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -34,6 +34,7 @@
#include "powernv.h"
#include "pci.h"
+#include "../../../../drivers/pci/pci.h"
static int eeh_event_irq = -EINVAL;
@@ -41,13 +42,10 @@ void pnv_pcibios_bus_add_device(struct pci_dev *pdev)
{
struct pci_dn *pdn = pci_get_pdn(pdev);
- if (!pdev->is_virtfn)
+ if (!pdn || eeh_has_flag(EEH_FORCE_DISABLED))
return;
- /*
- * The following operations will fail if VF's sysfs files
- * aren't created or its resources aren't finalized.
- */
+ dev_dbg(&pdev->dev, "EEH: Setting up device\n");
eeh_add_device_early(pdn);
eeh_add_device_late(pdev);
eeh_sysfs_add_device(pdev);
@@ -199,6 +197,25 @@ PNV_EEH_DBGFS_ENTRY(inbB, 0xE10);
#endif /* CONFIG_DEBUG_FS */
+void pnv_eeh_enable_phbs(void)
+{
+ struct pci_controller *hose;
+ struct pnv_phb *phb;
+
+ list_for_each_entry(hose, &hose_list, list_node) {
+ phb = hose->private_data;
+ /*
+ * If EEH is enabled, we're going to rely on that.
+ * Otherwise, we restore to conventional mechanism
+ * to clear frozen PE during PCI config access.
+ */
+ if (eeh_enabled())
+ phb->flags |= PNV_PHB_FLAG_EEH;
+ else
+ phb->flags &= ~PNV_PHB_FLAG_EEH;
+ }
+}
+
/**
* pnv_eeh_post_init - EEH platform dependent post initialization
*
@@ -213,9 +230,7 @@ int pnv_eeh_post_init(void)
struct pnv_phb *phb;
int ret = 0;
- /* Probe devices & build address cache */
- eeh_probe_devices();
- eeh_addr_cache_build();
+ eeh_show_enabled();
/* Register OPAL event notifier */
eeh_event_irq = opal_event_request(ilog2(OPAL_EVENT_PCI_ERROR));
@@ -237,19 +252,11 @@ int pnv_eeh_post_init(void)
if (!eeh_enabled())
disable_irq(eeh_event_irq);
+ pnv_eeh_enable_phbs();
+
list_for_each_entry(hose, &hose_list, list_node) {
phb = hose->private_data;
- /*
- * If EEH is enabled, we're going to rely on that.
- * Otherwise, we restore to conventional mechanism
- * to clear frozen PE during PCI config access.
- */
- if (eeh_enabled())
- phb->flags |= PNV_PHB_FLAG_EEH;
- else
- phb->flags &= ~PNV_PHB_FLAG_EEH;
-
/* Create debugfs entries */
#ifdef CONFIG_DEBUG_FS
if (phb->has_dbgfs || !phb->dbgfs)
@@ -377,6 +384,8 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data)
if ((pdn->class_code >> 8) == PCI_CLASS_BRIDGE_ISA)
return NULL;
+ eeh_edev_dbg(edev, "Probing device\n");
+
/* Initialize eeh device */
edev->class_code = pdn->class_code;
edev->mode &= 0xFFFFFF00;
@@ -402,9 +411,7 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data)
/* Create PE */
ret = eeh_add_to_parent_pe(edev);
if (ret) {
- pr_warn("%s: Can't add PCI dev %04x:%02x:%02x.%01x to parent PE (%x)\n",
- __func__, hose->global_number, pdn->busno,
- PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn), ret);
+ eeh_edev_warn(edev, "Failed to add device to PE (code %d)\n", ret);
return NULL;
}
@@ -453,11 +460,17 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data)
* Enable EEH explicitly so that we will do EEH check
* while accessing I/O stuff
*/
- eeh_add_flag(EEH_ENABLED);
+ if (!eeh_has_flag(EEH_ENABLED)) {
+ enable_irq(eeh_event_irq);
+ pnv_eeh_enable_phbs();
+ eeh_add_flag(EEH_ENABLED);
+ }
/* Save memory bars */
eeh_save_bars(edev);
+ eeh_edev_dbg(edev, "EEH enabled on device\n");
+
return NULL;
}
@@ -837,7 +850,7 @@ static int __pnv_eeh_bridge_reset(struct pci_dev *dev, int option)
int aer = edev ? edev->aer_cap : 0;
u32 ctrl;
- pr_debug("%s: Reset PCI bus %04x:%02x with option %d\n",
+ pr_debug("%s: Secondary Reset PCI bus %04x:%02x with option %d\n",
__func__, pci_domain_nr(dev->bus),
dev->bus->number, option);
@@ -895,6 +908,10 @@ static int pnv_eeh_bridge_reset(struct pci_dev *pdev, int option)
if (!dn || !of_get_property(dn, "ibm,reset-by-firmware", NULL))
return __pnv_eeh_bridge_reset(pdev, option);
+ pr_debug("%s: FW reset PCI bus %04x:%02x with option %d\n",
+ __func__, pci_domain_nr(pdev->bus),
+ pdev->bus->number, option);
+
switch (option) {
case EEH_RESET_FUNDAMENTAL:
scope = OPAL_RESET_PCI_FUNDAMENTAL;
@@ -1113,17 +1130,37 @@ static int pnv_eeh_reset(struct eeh_pe *pe, int option)
return -EIO;
}
+ if (pci_is_root_bus(bus))
+ return pnv_eeh_root_reset(hose, option);
+
/*
- * If dealing with the root bus (or the bus underneath the
- * root port), we reset the bus underneath the root port.
+ * For hot resets try use the generic PCI error recovery reset
+ * functions. These correctly handles the case where the secondary
+ * bus is behind a hotplug slot and it will use the slot provided
+ * reset methods to prevent spurious hotplug events during the reset.
*
- * The cxl driver depends on this behaviour for bi-modal card
- * switching.
+ * Fundemental resets need to be handled internally to EEH since the
+ * PCI core doesn't really have a concept of a fundemental reset,
+ * mainly because there's no standard way to generate one. Only a
+ * few devices require an FRESET so it should be fine.
*/
- if (pci_is_root_bus(bus) ||
- pci_is_root_bus(bus->parent))
- return pnv_eeh_root_reset(hose, option);
+ if (option != EEH_RESET_FUNDAMENTAL) {
+ /*
+ * NB: Skiboot and pnv_eeh_bridge_reset() also no-op the
+ * de-assert step. It's like the OPAL reset API was
+ * poorly designed or something...
+ */
+ if (option == EEH_RESET_DEACTIVATE)
+ return 0;
+ rc = pci_bus_error_reset(bus->self);
+ if (!rc)
+ return 0;
+ }
+
+ /* otherwise, use the generic bridge reset. this might call into FW */
+ if (pci_is_root_bus(bus->parent))
+ return pnv_eeh_root_reset(hose, option);
return pnv_eeh_bridge_reset(bus->self, option);
}
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index 09f49eed7fb8..78599bca66c2 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -675,7 +675,8 @@ static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on)
sprs.ptcr = mfspr(SPRN_PTCR);
sprs.rpr = mfspr(SPRN_RPR);
sprs.tscr = mfspr(SPRN_TSCR);
- sprs.ldbar = mfspr(SPRN_LDBAR);
+ if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
+ sprs.ldbar = mfspr(SPRN_LDBAR);
sprs_saved = true;
@@ -789,7 +790,8 @@ core_woken:
mtspr(SPRN_MMCR0, sprs.mmcr0);
mtspr(SPRN_MMCR1, sprs.mmcr1);
mtspr(SPRN_MMCR2, sprs.mmcr2);
- mtspr(SPRN_LDBAR, sprs.ldbar);
+ if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
+ mtspr(SPRN_LDBAR, sprs.ldbar);
mtspr(SPRN_SPRG3, local_paca->sprg_vdso);
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index c16249d251f1..b95b9e3c4c98 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -89,6 +89,7 @@ struct pci_dev *pnv_pci_get_npu_dev(struct pci_dev *gpdev, int index)
}
EXPORT_SYMBOL(pnv_pci_get_npu_dev);
+#ifdef CONFIG_IOMMU_API
/*
* Returns the PE assoicated with the PCI device of the given
* NPU. Returns the linked pci device if pci_dev != NULL.
@@ -192,106 +193,6 @@ static long pnv_npu_unset_window(struct iommu_table_group *table_group, int num)
return 0;
}
-/*
- * Enables 32 bit DMA on NPU.
- */
-static void pnv_npu_dma_set_32(struct pnv_ioda_pe *npe)
-{
- struct pci_dev *gpdev;
- struct pnv_ioda_pe *gpe;
- int64_t rc;
-
- /*
- * Find the assoicated PCI devices and get the dma window
- * information from there.
- */
- if (!npe->pdev || !(npe->flags & PNV_IODA_PE_DEV))
- return;
-
- gpe = get_gpu_pci_dev_and_pe(npe, &gpdev);
- if (!gpe)
- return;
-
- rc = pnv_npu_set_window(&npe->table_group, 0,
- gpe->table_group.tables[0]);
-
- /*
- * NVLink devices use the same TCE table configuration as
- * their parent device so drivers shouldn't be doing DMA
- * operations directly on these devices.
- */
- set_dma_ops(&npe->pdev->dev, &dma_dummy_ops);
-}
-
-/*
- * Enables bypass mode on the NPU. The NPU only supports one
- * window per link, so bypass needs to be explicitly enabled or
- * disabled. Unlike for a PHB3 bypass and non-bypass modes can't be
- * active at the same time.
- */
-static int pnv_npu_dma_set_bypass(struct pnv_ioda_pe *npe)
-{
- struct pnv_phb *phb = npe->phb;
- int64_t rc = 0;
- phys_addr_t top = memblock_end_of_DRAM();
-
- if (phb->type != PNV_PHB_NPU_NVLINK || !npe->pdev)
- return -EINVAL;
-
- rc = pnv_npu_unset_window(&npe->table_group, 0);
- if (rc != OPAL_SUCCESS)
- return rc;
-
- /* Enable the bypass window */
-
- top = roundup_pow_of_two(top);
- dev_info(&npe->pdev->dev, "Enabling bypass for PE %x\n",
- npe->pe_number);
- rc = opal_pci_map_pe_dma_window_real(phb->opal_id,
- npe->pe_number, npe->pe_number,
- 0 /* bypass base */, top);
-
- if (rc == OPAL_SUCCESS)
- pnv_pci_ioda2_tce_invalidate_entire(phb, false);
-
- return rc;
-}
-
-void pnv_npu_try_dma_set_bypass(struct pci_dev *gpdev, bool bypass)
-{
- int i;
- struct pnv_phb *phb;
- struct pci_dn *pdn;
- struct pnv_ioda_pe *npe;
- struct pci_dev *npdev;
-
- for (i = 0; ; ++i) {
- npdev = pnv_pci_get_npu_dev(gpdev, i);
-
- if (!npdev)
- break;
-
- pdn = pci_get_pdn(npdev);
- if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
- return;
-
- phb = pci_bus_to_host(npdev->bus)->private_data;
-
- /* We only do bypass if it's enabled on the linked device */
- npe = &phb->ioda.pe_array[pdn->pe_number];
-
- if (bypass) {
- dev_info(&npdev->dev,
- "Using 64-bit DMA iommu bypass\n");
- pnv_npu_dma_set_bypass(npe);
- } else {
- dev_info(&npdev->dev, "Using 32-bit DMA via iommu\n");
- pnv_npu_dma_set_32(npe);
- }
- }
-}
-
-#ifdef CONFIG_IOMMU_API
/* Switch ownership from platform code to external user (e.g. VFIO) */
static void pnv_npu_take_ownership(struct iommu_table_group *table_group)
{
diff --git a/arch/powerpc/platforms/powernv/opal-call.c b/arch/powerpc/platforms/powernv/opal-call.c
index 29ca523c1c79..5cd0f52d258f 100644
--- a/arch/powerpc/platforms/powernv/opal-call.c
+++ b/arch/powerpc/platforms/powernv/opal-call.c
@@ -257,7 +257,7 @@ OPAL_CALL(opal_xive_set_queue_info, OPAL_XIVE_SET_QUEUE_INFO);
OPAL_CALL(opal_xive_donate_page, OPAL_XIVE_DONATE_PAGE);
OPAL_CALL(opal_xive_alloc_vp_block, OPAL_XIVE_ALLOCATE_VP_BLOCK);
OPAL_CALL(opal_xive_free_vp_block, OPAL_XIVE_FREE_VP_BLOCK);
-OPAL_CALL(opal_xive_allocate_irq, OPAL_XIVE_ALLOCATE_IRQ);
+OPAL_CALL(opal_xive_allocate_irq_raw, OPAL_XIVE_ALLOCATE_IRQ);
OPAL_CALL(opal_xive_free_irq, OPAL_XIVE_FREE_IRQ);
OPAL_CALL(opal_xive_get_vp_info, OPAL_XIVE_GET_VP_INFO);
OPAL_CALL(opal_xive_set_vp_info, OPAL_XIVE_SET_VP_INFO);
@@ -287,3 +287,9 @@ OPAL_CALL(opal_pci_set_pbcq_tunnel_bar, OPAL_PCI_SET_PBCQ_TUNNEL_BAR);
OPAL_CALL(opal_sensor_read_u64, OPAL_SENSOR_READ_U64);
OPAL_CALL(opal_sensor_group_enable, OPAL_SENSOR_GROUP_ENABLE);
OPAL_CALL(opal_nx_coproc_init, OPAL_NX_COPROC_INIT);
+OPAL_CALL(opal_mpipl_update, OPAL_MPIPL_UPDATE);
+OPAL_CALL(opal_mpipl_register_tag, OPAL_MPIPL_REGISTER_TAG);
+OPAL_CALL(opal_mpipl_query_tag, OPAL_MPIPL_QUERY_TAG);
+OPAL_CALL(opal_secvar_get, OPAL_SECVAR_GET);
+OPAL_CALL(opal_secvar_get_next, OPAL_SECVAR_GET_NEXT);
+OPAL_CALL(opal_secvar_enqueue_update, OPAL_SECVAR_ENQUEUE_UPDATE);
diff --git a/arch/powerpc/platforms/powernv/opal-core.c b/arch/powerpc/platforms/powernv/opal-core.c
new file mode 100644
index 000000000000..ed895d82c048
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-core.c
@@ -0,0 +1,636 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Interface for exporting the OPAL ELF core.
+ * Heavily inspired from fs/proc/vmcore.c
+ *
+ * Copyright 2019, Hari Bathini, IBM Corporation.
+ */
+
+#define pr_fmt(fmt) "opal core: " fmt
+
+#include <linux/memblock.h>
+#include <linux/uaccess.h>
+#include <linux/proc_fs.h>
+#include <linux/elf.h>
+#include <linux/elfcore.h>
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
+#include <linux/slab.h>
+#include <linux/crash_core.h>
+#include <linux/of.h>
+
+#include <asm/page.h>
+#include <asm/opal.h>
+#include <asm/fadump-internal.h>
+
+#include "opal-fadump.h"
+
+#define MAX_PT_LOAD_CNT 8
+
+/* NT_AUXV note related info */
+#define AUXV_CNT 1
+#define AUXV_DESC_SZ (((2 * AUXV_CNT) + 1) * sizeof(Elf64_Off))
+
+struct opalcore_config {
+ u32 num_cpus;
+ /* PIR value of crashing CPU */
+ u32 crashing_cpu;
+
+ /* CPU state data info from F/W */
+ u64 cpu_state_destination_vaddr;
+ u64 cpu_state_data_size;
+ u64 cpu_state_entry_size;
+
+ /* OPAL memory to be exported as PT_LOAD segments */
+ u64 ptload_addr[MAX_PT_LOAD_CNT];
+ u64 ptload_size[MAX_PT_LOAD_CNT];
+ u64 ptload_cnt;
+
+ /* Pointer to the first PT_LOAD in the ELF core file */
+ Elf64_Phdr *ptload_phdr;
+
+ /* Total size of opalcore file. */
+ size_t opalcore_size;
+
+ /* Buffer for all the ELF core headers and the PT_NOTE */
+ size_t opalcorebuf_sz;
+ char *opalcorebuf;
+
+ /* NT_AUXV buffer */
+ char auxv_buf[AUXV_DESC_SZ];
+};
+
+struct opalcore {
+ struct list_head list;
+ u64 paddr;
+ size_t size;
+ loff_t offset;
+};
+
+static LIST_HEAD(opalcore_list);
+static struct opalcore_config *oc_conf;
+static const struct opal_mpipl_fadump *opalc_metadata;
+static const struct opal_mpipl_fadump *opalc_cpu_metadata;
+
+/*
+ * Set crashing CPU's signal to SIGUSR1. if the kernel is triggered
+ * by kernel, SIGTERM otherwise.
+ */
+bool kernel_initiated;
+
+static struct opalcore * __init get_new_element(void)
+{
+ return kzalloc(sizeof(struct opalcore), GFP_KERNEL);
+}
+
+static inline int is_opalcore_usable(void)
+{
+ return (oc_conf && oc_conf->opalcorebuf != NULL) ? 1 : 0;
+}
+
+static Elf64_Word *append_elf64_note(Elf64_Word *buf, char *name,
+ u32 type, void *data,
+ size_t data_len)
+{
+ Elf64_Nhdr *note = (Elf64_Nhdr *)buf;
+ Elf64_Word namesz = strlen(name) + 1;
+
+ note->n_namesz = cpu_to_be32(namesz);
+ note->n_descsz = cpu_to_be32(data_len);
+ note->n_type = cpu_to_be32(type);
+ buf += DIV_ROUND_UP(sizeof(*note), sizeof(Elf64_Word));
+ memcpy(buf, name, namesz);
+ buf += DIV_ROUND_UP(namesz, sizeof(Elf64_Word));
+ memcpy(buf, data, data_len);
+ buf += DIV_ROUND_UP(data_len, sizeof(Elf64_Word));
+
+ return buf;
+}
+
+static void fill_prstatus(struct elf_prstatus *prstatus, int pir,
+ struct pt_regs *regs)
+{
+ memset(prstatus, 0, sizeof(struct elf_prstatus));
+ elf_core_copy_kernel_regs(&(prstatus->pr_reg), regs);
+
+ /*
+ * Overload PID with PIR value.
+ * As a PIR value could also be '0', add an offset of '100'
+ * to every PIR to avoid misinterpretations in GDB.
+ */
+ prstatus->pr_pid = cpu_to_be32(100 + pir);
+ prstatus->pr_ppid = cpu_to_be32(1);
+
+ /*
+ * Indicate SIGUSR1 for crash initiated from kernel.
+ * SIGTERM otherwise.
+ */
+ if (pir == oc_conf->crashing_cpu) {
+ short sig;
+
+ sig = kernel_initiated ? SIGUSR1 : SIGTERM;
+ prstatus->pr_cursig = cpu_to_be16(sig);
+ }
+}
+
+static Elf64_Word *auxv_to_elf64_notes(Elf64_Word *buf,
+ u64 opal_boot_entry)
+{
+ Elf64_Off *bufp = (Elf64_Off *)oc_conf->auxv_buf;
+ int idx = 0;
+
+ memset(bufp, 0, AUXV_DESC_SZ);
+
+ /* Entry point of OPAL */
+ bufp[idx++] = cpu_to_be64(AT_ENTRY);
+ bufp[idx++] = cpu_to_be64(opal_boot_entry);
+
+ /* end of vector */
+ bufp[idx++] = cpu_to_be64(AT_NULL);
+
+ buf = append_elf64_note(buf, CRASH_CORE_NOTE_NAME, NT_AUXV,
+ oc_conf->auxv_buf, AUXV_DESC_SZ);
+ return buf;
+}
+
+/*
+ * Read from the ELF header and then the crash dump.
+ * Returns number of bytes read on success, -errno on failure.
+ */
+static ssize_t read_opalcore(struct file *file, struct kobject *kobj,
+ struct bin_attribute *bin_attr, char *to,
+ loff_t pos, size_t count)
+{
+ struct opalcore *m;
+ ssize_t tsz, avail;
+ loff_t tpos = pos;
+
+ if (pos >= oc_conf->opalcore_size)
+ return 0;
+
+ /* Adjust count if it goes beyond opalcore size */
+ avail = oc_conf->opalcore_size - pos;
+ if (count > avail)
+ count = avail;
+
+ if (count == 0)
+ return 0;
+
+ /* Read ELF core header and/or PT_NOTE segment */
+ if (tpos < oc_conf->opalcorebuf_sz) {
+ tsz = min_t(size_t, oc_conf->opalcorebuf_sz - tpos, count);
+ memcpy(to, oc_conf->opalcorebuf + tpos, tsz);
+ to += tsz;
+ tpos += tsz;
+ count -= tsz;
+ }
+
+ list_for_each_entry(m, &opalcore_list, list) {
+ /* nothing more to read here */
+ if (count == 0)
+ break;
+
+ if (tpos < m->offset + m->size) {
+ void *addr;
+
+ tsz = min_t(size_t, m->offset + m->size - tpos, count);
+ addr = (void *)(m->paddr + tpos - m->offset);
+ memcpy(to, __va(addr), tsz);
+ to += tsz;
+ tpos += tsz;
+ count -= tsz;
+ }
+ }
+
+ return (tpos - pos);
+}
+
+static struct bin_attribute opal_core_attr = {
+ .attr = {.name = "core", .mode = 0400},
+ .read = read_opalcore
+};
+
+/*
+ * Read CPU state dump data and convert it into ELF notes.
+ *
+ * Each register entry is of 16 bytes, A numerical identifier along with
+ * a GPR/SPR flag in the first 8 bytes and the register value in the next
+ * 8 bytes. For more details refer to F/W documentation.
+ */
+static Elf64_Word * __init opalcore_append_cpu_notes(Elf64_Word *buf)
+{
+ u32 thread_pir, size_per_thread, regs_offset, regs_cnt, reg_esize;
+ struct hdat_fadump_thread_hdr *thdr;
+ struct elf_prstatus prstatus;
+ Elf64_Word *first_cpu_note;
+ struct pt_regs regs;
+ char *bufp;
+ int i;
+
+ size_per_thread = oc_conf->cpu_state_entry_size;
+ bufp = __va(oc_conf->cpu_state_destination_vaddr);
+
+ /*
+ * Offset for register entries, entry size and registers count is
+ * duplicated in every thread header in keeping with HDAT format.
+ * Use these values from the first thread header.
+ */
+ thdr = (struct hdat_fadump_thread_hdr *)bufp;
+ regs_offset = (offsetof(struct hdat_fadump_thread_hdr, offset) +
+ be32_to_cpu(thdr->offset));
+ reg_esize = be32_to_cpu(thdr->esize);
+ regs_cnt = be32_to_cpu(thdr->ecnt);
+
+ pr_debug("--------CPU State Data------------\n");
+ pr_debug("NumCpus : %u\n", oc_conf->num_cpus);
+ pr_debug("\tOffset: %u, Entry size: %u, Cnt: %u\n",
+ regs_offset, reg_esize, regs_cnt);
+
+ /*
+ * Skip past the first CPU note. Fill this note with the
+ * crashing CPU's prstatus.
+ */
+ first_cpu_note = buf;
+ buf = append_elf64_note(buf, CRASH_CORE_NOTE_NAME, NT_PRSTATUS,
+ &prstatus, sizeof(prstatus));
+
+ for (i = 0; i < oc_conf->num_cpus; i++, bufp += size_per_thread) {
+ thdr = (struct hdat_fadump_thread_hdr *)bufp;
+ thread_pir = be32_to_cpu(thdr->pir);
+
+ pr_debug("[%04d] PIR: 0x%x, core state: 0x%02x\n",
+ i, thread_pir, thdr->core_state);
+
+ /*
+ * Register state data of MAX cores is provided by firmware,
+ * but some of this cores may not be active. So, while
+ * processing register state data, check core state and
+ * skip threads that belong to inactive cores.
+ */
+ if (thdr->core_state == HDAT_FADUMP_CORE_INACTIVE)
+ continue;
+
+ opal_fadump_read_regs((bufp + regs_offset), regs_cnt,
+ reg_esize, false, &regs);
+
+ pr_debug("PIR 0x%x - R1 : 0x%llx, NIP : 0x%llx\n", thread_pir,
+ be64_to_cpu(regs.gpr[1]), be64_to_cpu(regs.nip));
+ fill_prstatus(&prstatus, thread_pir, &regs);
+
+ if (thread_pir != oc_conf->crashing_cpu) {
+ buf = append_elf64_note(buf, CRASH_CORE_NOTE_NAME,
+ NT_PRSTATUS, &prstatus,
+ sizeof(prstatus));
+ } else {
+ /*
+ * Add crashing CPU as the first NT_PRSTATUS note for
+ * GDB to process the core file appropriately.
+ */
+ append_elf64_note(first_cpu_note, CRASH_CORE_NOTE_NAME,
+ NT_PRSTATUS, &prstatus,
+ sizeof(prstatus));
+ }
+ }
+
+ return buf;
+}
+
+static int __init create_opalcore(void)
+{
+ u64 opal_boot_entry, opal_base_addr, paddr;
+ u32 hdr_size, cpu_notes_size, count;
+ struct device_node *dn;
+ struct opalcore *new;
+ loff_t opalcore_off;
+ struct page *page;
+ Elf64_Phdr *phdr;
+ Elf64_Ehdr *elf;
+ int i, ret;
+ char *bufp;
+
+ /* Get size of header & CPU notes for OPAL core */
+ hdr_size = (sizeof(Elf64_Ehdr) +
+ ((oc_conf->ptload_cnt + 1) * sizeof(Elf64_Phdr)));
+ cpu_notes_size = ((oc_conf->num_cpus * (CRASH_CORE_NOTE_HEAD_BYTES +
+ CRASH_CORE_NOTE_NAME_BYTES +
+ CRASH_CORE_NOTE_DESC_BYTES)) +
+ (CRASH_CORE_NOTE_HEAD_BYTES +
+ CRASH_CORE_NOTE_NAME_BYTES + AUXV_DESC_SZ));
+
+ /* Allocate buffer to setup OPAL core */
+ oc_conf->opalcorebuf_sz = PAGE_ALIGN(hdr_size + cpu_notes_size);
+ oc_conf->opalcorebuf = alloc_pages_exact(oc_conf->opalcorebuf_sz,
+ GFP_KERNEL | __GFP_ZERO);
+ if (!oc_conf->opalcorebuf) {
+ pr_err("Not enough memory to setup OPAL core (size: %lu)\n",
+ oc_conf->opalcorebuf_sz);
+ oc_conf->opalcorebuf_sz = 0;
+ return -ENOMEM;
+ }
+ count = oc_conf->opalcorebuf_sz / PAGE_SIZE;
+ page = virt_to_page(oc_conf->opalcorebuf);
+ for (i = 0; i < count; i++)
+ mark_page_reserved(page + i);
+
+ pr_debug("opalcorebuf = 0x%llx\n", (u64)oc_conf->opalcorebuf);
+
+ /* Read OPAL related device-tree entries */
+ dn = of_find_node_by_name(NULL, "ibm,opal");
+ if (dn) {
+ ret = of_property_read_u64(dn, "opal-base-address",
+ &opal_base_addr);
+ pr_debug("opal-base-address: %llx\n", opal_base_addr);
+ ret |= of_property_read_u64(dn, "opal-boot-address",
+ &opal_boot_entry);
+ pr_debug("opal-boot-address: %llx\n", opal_boot_entry);
+ }
+ if (!dn || ret)
+ pr_warn("WARNING: Failed to read OPAL base & entry values\n");
+
+ /* Use count to keep track of the program headers */
+ count = 0;
+
+ bufp = oc_conf->opalcorebuf;
+ elf = (Elf64_Ehdr *)bufp;
+ bufp += sizeof(Elf64_Ehdr);
+ memcpy(elf->e_ident, ELFMAG, SELFMAG);
+ elf->e_ident[EI_CLASS] = ELF_CLASS;
+ elf->e_ident[EI_DATA] = ELFDATA2MSB;
+ elf->e_ident[EI_VERSION] = EV_CURRENT;
+ elf->e_ident[EI_OSABI] = ELF_OSABI;
+ memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
+ elf->e_type = cpu_to_be16(ET_CORE);
+ elf->e_machine = cpu_to_be16(ELF_ARCH);
+ elf->e_version = cpu_to_be32(EV_CURRENT);
+ elf->e_entry = 0;
+ elf->e_phoff = cpu_to_be64(sizeof(Elf64_Ehdr));
+ elf->e_shoff = 0;
+ elf->e_flags = 0;
+
+ elf->e_ehsize = cpu_to_be16(sizeof(Elf64_Ehdr));
+ elf->e_phentsize = cpu_to_be16(sizeof(Elf64_Phdr));
+ elf->e_phnum = 0;
+ elf->e_shentsize = 0;
+ elf->e_shnum = 0;
+ elf->e_shstrndx = 0;
+
+ phdr = (Elf64_Phdr *)bufp;
+ bufp += sizeof(Elf64_Phdr);
+ phdr->p_type = cpu_to_be32(PT_NOTE);
+ phdr->p_flags = 0;
+ phdr->p_align = 0;
+ phdr->p_paddr = phdr->p_vaddr = 0;
+ phdr->p_offset = cpu_to_be64(hdr_size);
+ phdr->p_filesz = phdr->p_memsz = cpu_to_be64(cpu_notes_size);
+ count++;
+
+ opalcore_off = oc_conf->opalcorebuf_sz;
+ oc_conf->ptload_phdr = (Elf64_Phdr *)bufp;
+ paddr = 0;
+ for (i = 0; i < oc_conf->ptload_cnt; i++) {
+ phdr = (Elf64_Phdr *)bufp;
+ bufp += sizeof(Elf64_Phdr);
+ phdr->p_type = cpu_to_be32(PT_LOAD);
+ phdr->p_flags = cpu_to_be32(PF_R|PF_W|PF_X);
+ phdr->p_align = 0;
+
+ new = get_new_element();
+ if (!new)
+ return -ENOMEM;
+ new->paddr = oc_conf->ptload_addr[i];
+ new->size = oc_conf->ptload_size[i];
+ new->offset = opalcore_off;
+ list_add_tail(&new->list, &opalcore_list);
+
+ phdr->p_paddr = cpu_to_be64(paddr);
+ phdr->p_vaddr = cpu_to_be64(opal_base_addr + paddr);
+ phdr->p_filesz = phdr->p_memsz =
+ cpu_to_be64(oc_conf->ptload_size[i]);
+ phdr->p_offset = cpu_to_be64(opalcore_off);
+
+ count++;
+ opalcore_off += oc_conf->ptload_size[i];
+ paddr += oc_conf->ptload_size[i];
+ }
+
+ elf->e_phnum = cpu_to_be16(count);
+
+ bufp = (char *)opalcore_append_cpu_notes((Elf64_Word *)bufp);
+ bufp = (char *)auxv_to_elf64_notes((Elf64_Word *)bufp, opal_boot_entry);
+
+ oc_conf->opalcore_size = opalcore_off;
+ return 0;
+}
+
+static void opalcore_cleanup(void)
+{
+ if (oc_conf == NULL)
+ return;
+
+ /* Remove OPAL core sysfs file */
+ sysfs_remove_bin_file(opal_kobj, &opal_core_attr);
+ oc_conf->ptload_phdr = NULL;
+ oc_conf->ptload_cnt = 0;
+
+ /* free the buffer used for setting up OPAL core */
+ if (oc_conf->opalcorebuf) {
+ void *end = (void *)((u64)oc_conf->opalcorebuf +
+ oc_conf->opalcorebuf_sz);
+
+ free_reserved_area(oc_conf->opalcorebuf, end, -1, NULL);
+ oc_conf->opalcorebuf = NULL;
+ oc_conf->opalcorebuf_sz = 0;
+ }
+
+ kfree(oc_conf);
+ oc_conf = NULL;
+}
+__exitcall(opalcore_cleanup);
+
+static void __init opalcore_config_init(void)
+{
+ u32 idx, cpu_data_version;
+ struct device_node *np;
+ const __be32 *prop;
+ u64 addr = 0;
+ int i, ret;
+
+ np = of_find_node_by_path("/ibm,opal/dump");
+ if (np == NULL)
+ return;
+
+ if (!of_device_is_compatible(np, "ibm,opal-dump")) {
+ pr_warn("Support missing for this f/w version!\n");
+ return;
+ }
+
+ /* Check if dump has been initiated on last reboot */
+ prop = of_get_property(np, "mpipl-boot", NULL);
+ if (!prop) {
+ of_node_put(np);
+ return;
+ }
+
+ /* Get OPAL metadata */
+ ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_OPAL, &addr);
+ if ((ret != OPAL_SUCCESS) || !addr) {
+ pr_err("Failed to get OPAL metadata (%d)\n", ret);
+ goto error_out;
+ }
+
+ addr = be64_to_cpu(addr);
+ pr_debug("OPAL metadata addr: %llx\n", addr);
+ opalc_metadata = __va(addr);
+
+ /* Get OPAL CPU metadata */
+ ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_CPU, &addr);
+ if ((ret != OPAL_SUCCESS) || !addr) {
+ pr_err("Failed to get OPAL CPU metadata (%d)\n", ret);
+ goto error_out;
+ }
+
+ addr = be64_to_cpu(addr);
+ pr_debug("CPU metadata addr: %llx\n", addr);
+ opalc_cpu_metadata = __va(addr);
+
+ /* Allocate memory for config buffer */
+ oc_conf = kzalloc(sizeof(struct opalcore_config), GFP_KERNEL);
+ if (oc_conf == NULL)
+ goto error_out;
+
+ /* Parse OPAL metadata */
+ if (opalc_metadata->version != OPAL_MPIPL_VERSION) {
+ pr_warn("Supported OPAL metadata version: %u, found: %u!\n",
+ OPAL_MPIPL_VERSION, opalc_metadata->version);
+ pr_warn("WARNING: F/W using newer OPAL metadata format!!\n");
+ }
+
+ oc_conf->ptload_cnt = 0;
+ idx = be32_to_cpu(opalc_metadata->region_cnt);
+ if (idx > MAX_PT_LOAD_CNT) {
+ pr_warn("WARNING: OPAL regions count (%d) adjusted to limit (%d)",
+ MAX_PT_LOAD_CNT, idx);
+ idx = MAX_PT_LOAD_CNT;
+ }
+ for (i = 0; i < idx; i++) {
+ oc_conf->ptload_addr[oc_conf->ptload_cnt] =
+ be64_to_cpu(opalc_metadata->region[i].dest);
+ oc_conf->ptload_size[oc_conf->ptload_cnt++] =
+ be64_to_cpu(opalc_metadata->region[i].size);
+ }
+ oc_conf->ptload_cnt = i;
+ oc_conf->crashing_cpu = be32_to_cpu(opalc_metadata->crashing_pir);
+
+ if (!oc_conf->ptload_cnt) {
+ pr_err("OPAL memory regions not found\n");
+ goto error_out;
+ }
+
+ /* Parse OPAL CPU metadata */
+ cpu_data_version = be32_to_cpu(opalc_cpu_metadata->cpu_data_version);
+ if (cpu_data_version != HDAT_FADUMP_CPU_DATA_VER) {
+ pr_warn("Supported CPU data version: %u, found: %u!\n",
+ HDAT_FADUMP_CPU_DATA_VER, cpu_data_version);
+ pr_warn("WARNING: F/W using newer CPU state data format!!\n");
+ }
+
+ addr = be64_to_cpu(opalc_cpu_metadata->region[0].dest);
+ if (!addr) {
+ pr_err("CPU state data not found!\n");
+ goto error_out;
+ }
+ oc_conf->cpu_state_destination_vaddr = (u64)__va(addr);
+
+ oc_conf->cpu_state_data_size =
+ be64_to_cpu(opalc_cpu_metadata->region[0].size);
+ oc_conf->cpu_state_entry_size =
+ be32_to_cpu(opalc_cpu_metadata->cpu_data_size);
+
+ if ((oc_conf->cpu_state_entry_size == 0) ||
+ (oc_conf->cpu_state_entry_size > oc_conf->cpu_state_data_size)) {
+ pr_err("CPU state data is invalid.\n");
+ goto error_out;
+ }
+ oc_conf->num_cpus = (oc_conf->cpu_state_data_size /
+ oc_conf->cpu_state_entry_size);
+
+ of_node_put(np);
+ return;
+
+error_out:
+ pr_err("Could not export /sys/firmware/opal/core\n");
+ opalcore_cleanup();
+ of_node_put(np);
+}
+
+static ssize_t fadump_release_opalcore_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ int input = -1;
+
+ if (kstrtoint(buf, 0, &input))
+ return -EINVAL;
+
+ if (input == 1) {
+ if (oc_conf == NULL) {
+ pr_err("'/sys/firmware/opal/core' file not accessible!\n");
+ return -EPERM;
+ }
+
+ /*
+ * Take away '/sys/firmware/opal/core' and release all memory
+ * used for exporting this file.
+ */
+ opalcore_cleanup();
+ } else
+ return -EINVAL;
+
+ return count;
+}
+
+static struct kobj_attribute opalcore_rel_attr = __ATTR(fadump_release_opalcore,
+ 0200, NULL,
+ fadump_release_opalcore_store);
+
+static int __init opalcore_init(void)
+{
+ int rc = -1;
+
+ opalcore_config_init();
+
+ if (oc_conf == NULL)
+ return rc;
+
+ create_opalcore();
+
+ /*
+ * If oc_conf->opalcorebuf= is set in the 2nd kernel,
+ * then capture the dump.
+ */
+ if (!(is_opalcore_usable())) {
+ pr_err("Failed to export /sys/firmware/opal/core\n");
+ opalcore_cleanup();
+ return rc;
+ }
+
+ /* Set OPAL core file size */
+ opal_core_attr.size = oc_conf->opalcore_size;
+
+ /* Export OPAL core sysfs file */
+ rc = sysfs_create_bin_file(opal_kobj, &opal_core_attr);
+ if (rc != 0) {
+ pr_err("Failed to export /sys/firmware/opal/core\n");
+ opalcore_cleanup();
+ return rc;
+ }
+
+ rc = sysfs_create_file(kernel_kobj, &opalcore_rel_attr.attr);
+ if (rc) {
+ pr_warn("unable to create sysfs file fadump_release_opalcore (%d)\n",
+ rc);
+ }
+
+ return 0;
+}
+fs_initcall(opalcore_init);
diff --git a/arch/powerpc/platforms/powernv/opal-fadump.c b/arch/powerpc/platforms/powernv/opal-fadump.c
new file mode 100644
index 000000000000..d361d37d975f
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-fadump.c
@@ -0,0 +1,716 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Firmware-Assisted Dump support on POWER platform (OPAL).
+ *
+ * Copyright 2019, Hari Bathini, IBM Corporation.
+ */
+
+#define pr_fmt(fmt) "opal fadump: " fmt
+
+#include <linux/string.h>
+#include <linux/seq_file.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/libfdt.h>
+#include <linux/mm.h>
+#include <linux/crash_dump.h>
+
+#include <asm/page.h>
+#include <asm/opal.h>
+#include <asm/fadump-internal.h>
+
+#include "opal-fadump.h"
+
+
+#ifdef CONFIG_PRESERVE_FA_DUMP
+/*
+ * When dump is active but PRESERVE_FA_DUMP is enabled on the kernel,
+ * ensure crash data is preserved in hope that the subsequent memory
+ * preserving kernel boot is going to process this crash data.
+ */
+void __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node)
+{
+ const struct opal_fadump_mem_struct *opal_fdm_active;
+ const __be32 *prop;
+ unsigned long dn;
+ u64 addr = 0;
+ s64 ret;
+
+ dn = of_get_flat_dt_subnode_by_name(node, "dump");
+ if (dn == -FDT_ERR_NOTFOUND)
+ return;
+
+ /*
+ * Check if dump has been initiated on last reboot.
+ */
+ prop = of_get_flat_dt_prop(dn, "mpipl-boot", NULL);
+ if (!prop)
+ return;
+
+ ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_KERNEL, &addr);
+ if ((ret != OPAL_SUCCESS) || !addr) {
+ pr_debug("Could not get Kernel metadata (%lld)\n", ret);
+ return;
+ }
+
+ /*
+ * Preserve memory only if kernel memory regions are registered
+ * with f/w for MPIPL.
+ */
+ addr = be64_to_cpu(addr);
+ pr_debug("Kernel metadata addr: %llx\n", addr);
+ opal_fdm_active = (void *)addr;
+ if (opal_fdm_active->registered_regions == 0)
+ return;
+
+ ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_BOOT_MEM, &addr);
+ if ((ret != OPAL_SUCCESS) || !addr) {
+ pr_err("Failed to get boot memory tag (%lld)\n", ret);
+ return;
+ }
+
+ /*
+ * Memory below this address can be used for booting a
+ * capture kernel or petitboot kernel. Preserve everything
+ * above this address for processing crashdump.
+ */
+ fadump_conf->boot_mem_top = be64_to_cpu(addr);
+ pr_debug("Preserve everything above %llx\n", fadump_conf->boot_mem_top);
+
+ pr_info("Firmware-assisted dump is active.\n");
+ fadump_conf->dump_active = 1;
+}
+
+#else /* CONFIG_PRESERVE_FA_DUMP */
+static const struct opal_fadump_mem_struct *opal_fdm_active;
+static const struct opal_mpipl_fadump *opal_cpu_metadata;
+static struct opal_fadump_mem_struct *opal_fdm;
+
+#ifdef CONFIG_OPAL_CORE
+extern bool kernel_initiated;
+#endif
+
+static int opal_fadump_unregister(struct fw_dump *fadump_conf);
+
+static void opal_fadump_update_config(struct fw_dump *fadump_conf,
+ const struct opal_fadump_mem_struct *fdm)
+{
+ pr_debug("Boot memory regions count: %d\n", fdm->region_cnt);
+
+ /*
+ * The destination address of the first boot memory region is the
+ * destination address of boot memory regions.
+ */
+ fadump_conf->boot_mem_dest_addr = fdm->rgn[0].dest;
+ pr_debug("Destination address of boot memory regions: %#016llx\n",
+ fadump_conf->boot_mem_dest_addr);
+
+ fadump_conf->fadumphdr_addr = fdm->fadumphdr_addr;
+}
+
+/*
+ * This function is called in the capture kernel to get configuration details
+ * from metadata setup by the first kernel.
+ */
+static void opal_fadump_get_config(struct fw_dump *fadump_conf,
+ const struct opal_fadump_mem_struct *fdm)
+{
+ unsigned long base, size, last_end, hole_size;
+ int i;
+
+ if (!fadump_conf->dump_active)
+ return;
+
+ last_end = 0;
+ hole_size = 0;
+ fadump_conf->boot_memory_size = 0;
+
+ pr_debug("Boot memory regions:\n");
+ for (i = 0; i < fdm->region_cnt; i++) {
+ base = fdm->rgn[i].src;
+ size = fdm->rgn[i].size;
+ pr_debug("\t[%03d] base: 0x%lx, size: 0x%lx\n", i, base, size);
+
+ fadump_conf->boot_mem_addr[i] = base;
+ fadump_conf->boot_mem_sz[i] = size;
+ fadump_conf->boot_memory_size += size;
+ hole_size += (base - last_end);
+
+ last_end = base + size;
+ }
+
+ /*
+ * Start address of reserve dump area (permanent reservation) for
+ * re-registering FADump after dump capture.
+ */
+ fadump_conf->reserve_dump_area_start = fdm->rgn[0].dest;
+
+ /*
+ * Rarely, but it can so happen that system crashes before all
+ * boot memory regions are registered for MPIPL. In such
+ * cases, warn that the vmcore may not be accurate and proceed
+ * anyway as that is the best bet considering free pages, cache
+ * pages, user pages, etc are usually filtered out.
+ *
+ * Hope the memory that could not be preserved only has pages
+ * that are usually filtered out while saving the vmcore.
+ */
+ if (fdm->region_cnt > fdm->registered_regions) {
+ pr_warn("Not all memory regions were saved!!!\n");
+ pr_warn(" Unsaved memory regions:\n");
+ i = fdm->registered_regions;
+ while (i < fdm->region_cnt) {
+ pr_warn("\t[%03d] base: 0x%llx, size: 0x%llx\n",
+ i, fdm->rgn[i].src, fdm->rgn[i].size);
+ i++;
+ }
+
+ pr_warn("If the unsaved regions only contain pages that are filtered out (eg. free/user pages), the vmcore should still be usable.\n");
+ pr_warn("WARNING: If the unsaved regions contain kernel pages, the vmcore will be corrupted.\n");
+ }
+
+ fadump_conf->boot_mem_top = (fadump_conf->boot_memory_size + hole_size);
+ fadump_conf->boot_mem_regs_cnt = fdm->region_cnt;
+ opal_fadump_update_config(fadump_conf, fdm);
+}
+
+/* Initialize kernel metadata */
+static void opal_fadump_init_metadata(struct opal_fadump_mem_struct *fdm)
+{
+ fdm->version = OPAL_FADUMP_VERSION;
+ fdm->region_cnt = 0;
+ fdm->registered_regions = 0;
+ fdm->fadumphdr_addr = 0;
+}
+
+static u64 opal_fadump_init_mem_struct(struct fw_dump *fadump_conf)
+{
+ u64 addr = fadump_conf->reserve_dump_area_start;
+ int i;
+
+ opal_fdm = __va(fadump_conf->kernel_metadata);
+ opal_fadump_init_metadata(opal_fdm);
+
+ /* Boot memory regions */
+ for (i = 0; i < fadump_conf->boot_mem_regs_cnt; i++) {
+ opal_fdm->rgn[i].src = fadump_conf->boot_mem_addr[i];
+ opal_fdm->rgn[i].dest = addr;
+ opal_fdm->rgn[i].size = fadump_conf->boot_mem_sz[i];
+
+ opal_fdm->region_cnt++;
+ addr += fadump_conf->boot_mem_sz[i];
+ }
+
+ /*
+ * Kernel metadata is passed to f/w and retrieved in capture kerenl.
+ * So, use it to save fadump header address instead of calculating it.
+ */
+ opal_fdm->fadumphdr_addr = (opal_fdm->rgn[0].dest +
+ fadump_conf->boot_memory_size);
+
+ opal_fadump_update_config(fadump_conf, opal_fdm);
+
+ return addr;
+}
+
+static u64 opal_fadump_get_metadata_size(void)
+{
+ return PAGE_ALIGN(sizeof(struct opal_fadump_mem_struct));
+}
+
+static int opal_fadump_setup_metadata(struct fw_dump *fadump_conf)
+{
+ int err = 0;
+ s64 ret;
+
+ /*
+ * Use the last page(s) in FADump memory reservation for
+ * kernel metadata.
+ */
+ fadump_conf->kernel_metadata = (fadump_conf->reserve_dump_area_start +
+ fadump_conf->reserve_dump_area_size -
+ opal_fadump_get_metadata_size());
+ pr_info("Kernel metadata addr: %llx\n", fadump_conf->kernel_metadata);
+
+ /* Initialize kernel metadata before registering the address with f/w */
+ opal_fdm = __va(fadump_conf->kernel_metadata);
+ opal_fadump_init_metadata(opal_fdm);
+
+ /*
+ * Register metadata address with f/w. Can be retrieved in
+ * the capture kernel.
+ */
+ ret = opal_mpipl_register_tag(OPAL_MPIPL_TAG_KERNEL,
+ fadump_conf->kernel_metadata);
+ if (ret != OPAL_SUCCESS) {
+ pr_err("Failed to set kernel metadata tag!\n");
+ err = -EPERM;
+ }
+
+ /*
+ * Register boot memory top address with f/w. Should be retrieved
+ * by a kernel that intends to preserve crash'ed kernel's memory.
+ */
+ ret = opal_mpipl_register_tag(OPAL_MPIPL_TAG_BOOT_MEM,
+ fadump_conf->boot_mem_top);
+ if (ret != OPAL_SUCCESS) {
+ pr_err("Failed to set boot memory tag!\n");
+ err = -EPERM;
+ }
+
+ return err;
+}
+
+static u64 opal_fadump_get_bootmem_min(void)
+{
+ return OPAL_FADUMP_MIN_BOOT_MEM;
+}
+
+static int opal_fadump_register(struct fw_dump *fadump_conf)
+{
+ s64 rc = OPAL_PARAMETER;
+ int i, err = -EIO;
+
+ for (i = 0; i < opal_fdm->region_cnt; i++) {
+ rc = opal_mpipl_update(OPAL_MPIPL_ADD_RANGE,
+ opal_fdm->rgn[i].src,
+ opal_fdm->rgn[i].dest,
+ opal_fdm->rgn[i].size);
+ if (rc != OPAL_SUCCESS)
+ break;
+
+ opal_fdm->registered_regions++;
+ }
+
+ switch (rc) {
+ case OPAL_SUCCESS:
+ pr_info("Registration is successful!\n");
+ fadump_conf->dump_registered = 1;
+ err = 0;
+ break;
+ case OPAL_RESOURCE:
+ /* If MAX regions limit in f/w is hit, warn and proceed. */
+ pr_warn("%d regions could not be registered for MPIPL as MAX limit is reached!\n",
+ (opal_fdm->region_cnt - opal_fdm->registered_regions));
+ fadump_conf->dump_registered = 1;
+ err = 0;
+ break;
+ case OPAL_PARAMETER:
+ pr_err("Failed to register. Parameter Error(%lld).\n", rc);
+ break;
+ case OPAL_HARDWARE:
+ pr_err("Support not available.\n");
+ fadump_conf->fadump_supported = 0;
+ fadump_conf->fadump_enabled = 0;
+ break;
+ default:
+ pr_err("Failed to register. Unknown Error(%lld).\n", rc);
+ break;
+ }
+
+ /*
+ * If some regions were registered before OPAL_MPIPL_ADD_RANGE
+ * OPAL call failed, unregister all regions.
+ */
+ if ((err < 0) && (opal_fdm->registered_regions > 0))
+ opal_fadump_unregister(fadump_conf);
+
+ return err;
+}
+
+static int opal_fadump_unregister(struct fw_dump *fadump_conf)
+{
+ s64 rc;
+
+ rc = opal_mpipl_update(OPAL_MPIPL_REMOVE_ALL, 0, 0, 0);
+ if (rc) {
+ pr_err("Failed to un-register - unexpected Error(%lld).\n", rc);
+ return -EIO;
+ }
+
+ opal_fdm->registered_regions = 0;
+ fadump_conf->dump_registered = 0;
+ return 0;
+}
+
+static int opal_fadump_invalidate(struct fw_dump *fadump_conf)
+{
+ s64 rc;
+
+ rc = opal_mpipl_update(OPAL_MPIPL_FREE_PRESERVED_MEMORY, 0, 0, 0);
+ if (rc) {
+ pr_err("Failed to invalidate - unexpected Error(%lld).\n", rc);
+ return -EIO;
+ }
+
+ fadump_conf->dump_active = 0;
+ opal_fdm_active = NULL;
+ return 0;
+}
+
+static void opal_fadump_cleanup(struct fw_dump *fadump_conf)
+{
+ s64 ret;
+
+ ret = opal_mpipl_register_tag(OPAL_MPIPL_TAG_KERNEL, 0);
+ if (ret != OPAL_SUCCESS)
+ pr_warn("Could not reset (%llu) kernel metadata tag!\n", ret);
+}
+
+/*
+ * Verify if CPU state data is available. If available, do a bit of sanity
+ * checking before processing this data.
+ */
+static bool __init is_opal_fadump_cpu_data_valid(struct fw_dump *fadump_conf)
+{
+ if (!opal_cpu_metadata)
+ return false;
+
+ fadump_conf->cpu_state_data_version =
+ be32_to_cpu(opal_cpu_metadata->cpu_data_version);
+ fadump_conf->cpu_state_entry_size =
+ be32_to_cpu(opal_cpu_metadata->cpu_data_size);
+ fadump_conf->cpu_state_dest_vaddr =
+ (u64)__va(be64_to_cpu(opal_cpu_metadata->region[0].dest));
+ fadump_conf->cpu_state_data_size =
+ be64_to_cpu(opal_cpu_metadata->region[0].size);
+
+ if (fadump_conf->cpu_state_data_version != HDAT_FADUMP_CPU_DATA_VER) {
+ pr_warn("Supported CPU state data version: %u, found: %d!\n",
+ HDAT_FADUMP_CPU_DATA_VER,
+ fadump_conf->cpu_state_data_version);
+ pr_warn("WARNING: F/W using newer CPU state data format!!\n");
+ }
+
+ if ((fadump_conf->cpu_state_dest_vaddr == 0) ||
+ (fadump_conf->cpu_state_entry_size == 0) ||
+ (fadump_conf->cpu_state_entry_size >
+ fadump_conf->cpu_state_data_size)) {
+ pr_err("CPU state data is invalid. Ignoring!\n");
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * Convert CPU state data saved at the time of crash into ELF notes.
+ *
+ * While the crashing CPU's register data is saved by the kernel, CPU state
+ * data for all CPUs is saved by f/w. In CPU state data provided by f/w,
+ * each register entry is of 16 bytes, a numerical identifier along with
+ * a GPR/SPR flag in the first 8 bytes and the register value in the next
+ * 8 bytes. For more details refer to F/W documentation. If this data is
+ * missing or in unsupported format, append crashing CPU's register data
+ * saved by the kernel in the PT_NOTE, to have something to work with in
+ * the vmcore file.
+ */
+static int __init
+opal_fadump_build_cpu_notes(struct fw_dump *fadump_conf,
+ struct fadump_crash_info_header *fdh)
+{
+ u32 thread_pir, size_per_thread, regs_offset, regs_cnt, reg_esize;
+ struct hdat_fadump_thread_hdr *thdr;
+ bool is_cpu_data_valid = false;
+ u32 num_cpus = 1, *note_buf;
+ struct pt_regs regs;
+ char *bufp;
+ int rc, i;
+
+ if (is_opal_fadump_cpu_data_valid(fadump_conf)) {
+ size_per_thread = fadump_conf->cpu_state_entry_size;
+ num_cpus = (fadump_conf->cpu_state_data_size / size_per_thread);
+ bufp = __va(fadump_conf->cpu_state_dest_vaddr);
+ is_cpu_data_valid = true;
+ }
+
+ rc = fadump_setup_cpu_notes_buf(num_cpus);
+ if (rc != 0)
+ return rc;
+
+ note_buf = (u32 *)fadump_conf->cpu_notes_buf_vaddr;
+ if (!is_cpu_data_valid)
+ goto out;
+
+ /*
+ * Offset for register entries, entry size and registers count is
+ * duplicated in every thread header in keeping with HDAT format.
+ * Use these values from the first thread header.
+ */
+ thdr = (struct hdat_fadump_thread_hdr *)bufp;
+ regs_offset = (offsetof(struct hdat_fadump_thread_hdr, offset) +
+ be32_to_cpu(thdr->offset));
+ reg_esize = be32_to_cpu(thdr->esize);
+ regs_cnt = be32_to_cpu(thdr->ecnt);
+
+ pr_debug("--------CPU State Data------------\n");
+ pr_debug("NumCpus : %u\n", num_cpus);
+ pr_debug("\tOffset: %u, Entry size: %u, Cnt: %u\n",
+ regs_offset, reg_esize, regs_cnt);
+
+ for (i = 0; i < num_cpus; i++, bufp += size_per_thread) {
+ thdr = (struct hdat_fadump_thread_hdr *)bufp;
+
+ thread_pir = be32_to_cpu(thdr->pir);
+ pr_debug("[%04d] PIR: 0x%x, core state: 0x%02x\n",
+ i, thread_pir, thdr->core_state);
+
+ /*
+ * If this is kernel initiated crash, crashing_cpu would be set
+ * appropriately and register data of the crashing CPU saved by
+ * crashing kernel. Add this saved register data of crashing CPU
+ * to elf notes and populate the pt_regs for the remaining CPUs
+ * from register state data provided by firmware.
+ */
+ if (fdh->crashing_cpu == thread_pir) {
+ note_buf = fadump_regs_to_elf_notes(note_buf,
+ &fdh->regs);
+ pr_debug("Crashing CPU PIR: 0x%x - R1 : 0x%lx, NIP : 0x%lx\n",
+ fdh->crashing_cpu, fdh->regs.gpr[1],
+ fdh->regs.nip);
+ continue;
+ }
+
+ /*
+ * Register state data of MAX cores is provided by firmware,
+ * but some of this cores may not be active. So, while
+ * processing register state data, check core state and
+ * skip threads that belong to inactive cores.
+ */
+ if (thdr->core_state == HDAT_FADUMP_CORE_INACTIVE)
+ continue;
+
+ opal_fadump_read_regs((bufp + regs_offset), regs_cnt,
+ reg_esize, true, &regs);
+ note_buf = fadump_regs_to_elf_notes(note_buf, &regs);
+ pr_debug("CPU PIR: 0x%x - R1 : 0x%lx, NIP : 0x%lx\n",
+ thread_pir, regs.gpr[1], regs.nip);
+ }
+
+out:
+ /*
+ * CPU state data is invalid/unsupported. Try appending crashing CPU's
+ * register data, if it is saved by the kernel.
+ */
+ if (fadump_conf->cpu_notes_buf_vaddr == (u64)note_buf) {
+ if (fdh->crashing_cpu == FADUMP_CPU_UNKNOWN) {
+ fadump_free_cpu_notes_buf();
+ return -ENODEV;
+ }
+
+ pr_warn("WARNING: appending only crashing CPU's register data\n");
+ note_buf = fadump_regs_to_elf_notes(note_buf, &(fdh->regs));
+ }
+
+ final_note(note_buf);
+
+ pr_debug("Updating elfcore header (%llx) with cpu notes\n",
+ fdh->elfcorehdr_addr);
+ fadump_update_elfcore_header(__va(fdh->elfcorehdr_addr));
+ return 0;
+}
+
+static int __init opal_fadump_process(struct fw_dump *fadump_conf)
+{
+ struct fadump_crash_info_header *fdh;
+ int rc = -EINVAL;
+
+ if (!opal_fdm_active || !fadump_conf->fadumphdr_addr)
+ return rc;
+
+ /* Validate the fadump crash info header */
+ fdh = __va(fadump_conf->fadumphdr_addr);
+ if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) {
+ pr_err("Crash info header is not valid.\n");
+ return rc;
+ }
+
+#ifdef CONFIG_OPAL_CORE
+ /*
+ * If this is a kernel initiated crash, crashing_cpu would be set
+ * appropriately and register data of the crashing CPU saved by
+ * crashing kernel. Add this saved register data of crashing CPU
+ * to elf notes and populate the pt_regs for the remaining CPUs
+ * from register state data provided by firmware.
+ */
+ if (fdh->crashing_cpu != FADUMP_CPU_UNKNOWN)
+ kernel_initiated = true;
+#endif
+
+ rc = opal_fadump_build_cpu_notes(fadump_conf, fdh);
+ if (rc)
+ return rc;
+
+ /*
+ * We are done validating dump info and elfcore header is now ready
+ * to be exported. set elfcorehdr_addr so that vmcore module will
+ * export the elfcore header through '/proc/vmcore'.
+ */
+ elfcorehdr_addr = fdh->elfcorehdr_addr;
+
+ return rc;
+}
+
+static void opal_fadump_region_show(struct fw_dump *fadump_conf,
+ struct seq_file *m)
+{
+ const struct opal_fadump_mem_struct *fdm_ptr;
+ u64 dumped_bytes = 0;
+ int i;
+
+ if (fadump_conf->dump_active)
+ fdm_ptr = opal_fdm_active;
+ else
+ fdm_ptr = opal_fdm;
+
+ for (i = 0; i < fdm_ptr->region_cnt; i++) {
+ /*
+ * Only regions that are registered for MPIPL
+ * would have dump data.
+ */
+ if ((fadump_conf->dump_active) &&
+ (i < fdm_ptr->registered_regions))
+ dumped_bytes = fdm_ptr->rgn[i].size;
+
+ seq_printf(m, "DUMP: Src: %#016llx, Dest: %#016llx, ",
+ fdm_ptr->rgn[i].src, fdm_ptr->rgn[i].dest);
+ seq_printf(m, "Size: %#llx, Dumped: %#llx bytes\n",
+ fdm_ptr->rgn[i].size, dumped_bytes);
+ }
+
+ /* Dump is active. Show reserved area start address. */
+ if (fadump_conf->dump_active) {
+ seq_printf(m, "\nMemory above %#016lx is reserved for saving crash dump\n",
+ fadump_conf->reserve_dump_area_start);
+ }
+}
+
+static void opal_fadump_trigger(struct fadump_crash_info_header *fdh,
+ const char *msg)
+{
+ int rc;
+
+ /*
+ * Unlike on pSeries platform, logical CPU number is not provided
+ * with architected register state data. So, store the crashing
+ * CPU's PIR instead to plug the appropriate register data for
+ * crashing CPU in the vmcore file.
+ */
+ fdh->crashing_cpu = (u32)mfspr(SPRN_PIR);
+
+ rc = opal_cec_reboot2(OPAL_REBOOT_MPIPL, msg);
+ if (rc == OPAL_UNSUPPORTED) {
+ pr_emerg("Reboot type %d not supported.\n",
+ OPAL_REBOOT_MPIPL);
+ } else if (rc == OPAL_HARDWARE)
+ pr_emerg("No backend support for MPIPL!\n");
+}
+
+static struct fadump_ops opal_fadump_ops = {
+ .fadump_init_mem_struct = opal_fadump_init_mem_struct,
+ .fadump_get_metadata_size = opal_fadump_get_metadata_size,
+ .fadump_setup_metadata = opal_fadump_setup_metadata,
+ .fadump_get_bootmem_min = opal_fadump_get_bootmem_min,
+ .fadump_register = opal_fadump_register,
+ .fadump_unregister = opal_fadump_unregister,
+ .fadump_invalidate = opal_fadump_invalidate,
+ .fadump_cleanup = opal_fadump_cleanup,
+ .fadump_process = opal_fadump_process,
+ .fadump_region_show = opal_fadump_region_show,
+ .fadump_trigger = opal_fadump_trigger,
+};
+
+void __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node)
+{
+ const __be32 *prop;
+ unsigned long dn;
+ u64 addr = 0;
+ int i, len;
+ s64 ret;
+
+ /*
+ * Check if Firmware-Assisted Dump is supported. if yes, check
+ * if dump has been initiated on last reboot.
+ */
+ dn = of_get_flat_dt_subnode_by_name(node, "dump");
+ if (dn == -FDT_ERR_NOTFOUND) {
+ pr_debug("FADump support is missing!\n");
+ return;
+ }
+
+ if (!of_flat_dt_is_compatible(dn, "ibm,opal-dump")) {
+ pr_err("Support missing for this f/w version!\n");
+ return;
+ }
+
+ prop = of_get_flat_dt_prop(dn, "fw-load-area", &len);
+ if (prop) {
+ /*
+ * Each f/w load area is an (address,size) pair,
+ * 2 cells each, totalling 4 cells per range.
+ */
+ for (i = 0; i < len / (sizeof(*prop) * 4); i++) {
+ u64 base, end;
+
+ base = of_read_number(prop + (i * 4) + 0, 2);
+ end = base;
+ end += of_read_number(prop + (i * 4) + 2, 2);
+ if (end > OPAL_FADUMP_MIN_BOOT_MEM) {
+ pr_err("F/W load area: 0x%llx-0x%llx\n",
+ base, end);
+ pr_err("F/W version not supported!\n");
+ return;
+ }
+ }
+ }
+
+ fadump_conf->ops = &opal_fadump_ops;
+ fadump_conf->fadump_supported = 1;
+
+ /*
+ * Firmware supports 32-bit field for size. Align it to PAGE_SIZE
+ * and request firmware to copy multiple kernel boot memory regions.
+ */
+ fadump_conf->max_copy_size = _ALIGN_DOWN(U32_MAX, PAGE_SIZE);
+
+ /*
+ * Check if dump has been initiated on last reboot.
+ */
+ prop = of_get_flat_dt_prop(dn, "mpipl-boot", NULL);
+ if (!prop)
+ return;
+
+ ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_KERNEL, &addr);
+ if ((ret != OPAL_SUCCESS) || !addr) {
+ pr_err("Failed to get Kernel metadata (%lld)\n", ret);
+ return;
+ }
+
+ addr = be64_to_cpu(addr);
+ pr_debug("Kernel metadata addr: %llx\n", addr);
+
+ opal_fdm_active = __va(addr);
+ if (opal_fdm_active->version != OPAL_FADUMP_VERSION) {
+ pr_warn("Supported kernel metadata version: %u, found: %d!\n",
+ OPAL_FADUMP_VERSION, opal_fdm_active->version);
+ pr_warn("WARNING: Kernel metadata format mismatch identified! Core file maybe corrupted..\n");
+ }
+
+ /* Kernel regions not registered with f/w for MPIPL */
+ if (opal_fdm_active->registered_regions == 0) {
+ opal_fdm_active = NULL;
+ return;
+ }
+
+ ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_CPU, &addr);
+ if (addr) {
+ addr = be64_to_cpu(addr);
+ pr_debug("CPU metadata addr: %llx\n", addr);
+ opal_cpu_metadata = __va(addr);
+ }
+
+ pr_info("Firmware-assisted dump is active.\n");
+ fadump_conf->dump_active = 1;
+ opal_fadump_get_config(fadump_conf, opal_fdm_active);
+}
+#endif /* !CONFIG_PRESERVE_FA_DUMP */
diff --git a/arch/powerpc/platforms/powernv/opal-fadump.h b/arch/powerpc/platforms/powernv/opal-fadump.h
new file mode 100644
index 000000000000..f1e9ecf548c5
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-fadump.h
@@ -0,0 +1,146 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Firmware-Assisted Dump support on POWER platform (OPAL).
+ *
+ * Copyright 2019, Hari Bathini, IBM Corporation.
+ */
+
+#ifndef _POWERNV_OPAL_FADUMP_H
+#define _POWERNV_OPAL_FADUMP_H
+
+#include <asm/reg.h>
+
+/*
+ * With kernel & initrd loaded at 512MB (with 256MB size), enforce a minimum
+ * boot memory size of 768MB to ensure f/w loading kernel and initrd doesn't
+ * mess with crash'ed kernel's memory during MPIPL.
+ */
+#define OPAL_FADUMP_MIN_BOOT_MEM (0x30000000UL)
+
+/*
+ * OPAL FADump metadata structure format version
+ *
+ * OPAL FADump kernel metadata structure stores kernel metadata needed to
+ * register-for/process crash dump. Format version is used to keep a tab on
+ * the changes in the structure format. The changes, if any, to the format
+ * are expected to be minimal and backward compatible.
+ */
+#define OPAL_FADUMP_VERSION 0x1
+
+/*
+ * OPAL FADump kernel metadata
+ *
+ * The address of this structure will be registered with f/w for retrieving
+ * and processing during crash dump.
+ */
+struct opal_fadump_mem_struct {
+ u8 version;
+ u8 reserved[3];
+ u16 region_cnt; /* number of regions */
+ u16 registered_regions; /* Regions registered for MPIPL */
+ u64 fadumphdr_addr;
+ struct opal_mpipl_region rgn[FADUMP_MAX_MEM_REGS];
+} __packed;
+
+/*
+ * CPU state data
+ *
+ * CPU state data information is provided by f/w. The format for this data
+ * is defined in the HDAT spec. Version is used to keep a tab on the changes
+ * in this CPU state data format. Changes to this format are unlikely, but
+ * if there are any changes, please refer to latest HDAT specification.
+ */
+#define HDAT_FADUMP_CPU_DATA_VER 1
+
+#define HDAT_FADUMP_CORE_INACTIVE (0x0F)
+
+/* HDAT thread header for register entries */
+struct hdat_fadump_thread_hdr {
+ __be32 pir;
+ /* 0x00 - 0x0F - The corresponding stop state of the core */
+ u8 core_state;
+ u8 reserved[3];
+
+ __be32 offset; /* Offset to Register Entries array */
+ __be32 ecnt; /* Number of entries */
+ __be32 esize; /* Alloc size of each array entry in bytes */
+ __be32 eactsz; /* Actual size of each array entry in bytes */
+} __packed;
+
+/* Register types populated by f/w */
+#define HDAT_FADUMP_REG_TYPE_GPR 0x01
+#define HDAT_FADUMP_REG_TYPE_SPR 0x02
+
+/* ID numbers used by f/w while populating certain registers */
+#define HDAT_FADUMP_REG_ID_NIP 0x7D0
+#define HDAT_FADUMP_REG_ID_MSR 0x7D1
+#define HDAT_FADUMP_REG_ID_CCR 0x7D2
+
+/* HDAT register entry. */
+struct hdat_fadump_reg_entry {
+ __be32 reg_type;
+ __be32 reg_num;
+ __be64 reg_val;
+} __packed;
+
+static inline void opal_fadump_set_regval_regnum(struct pt_regs *regs,
+ u32 reg_type, u32 reg_num,
+ u64 reg_val)
+{
+ if (reg_type == HDAT_FADUMP_REG_TYPE_GPR) {
+ if (reg_num < 32)
+ regs->gpr[reg_num] = reg_val;
+ return;
+ }
+
+ switch (reg_num) {
+ case SPRN_CTR:
+ regs->ctr = reg_val;
+ break;
+ case SPRN_LR:
+ regs->link = reg_val;
+ break;
+ case SPRN_XER:
+ regs->xer = reg_val;
+ break;
+ case SPRN_DAR:
+ regs->dar = reg_val;
+ break;
+ case SPRN_DSISR:
+ regs->dsisr = reg_val;
+ break;
+ case HDAT_FADUMP_REG_ID_NIP:
+ regs->nip = reg_val;
+ break;
+ case HDAT_FADUMP_REG_ID_MSR:
+ regs->msr = reg_val;
+ break;
+ case HDAT_FADUMP_REG_ID_CCR:
+ regs->ccr = reg_val;
+ break;
+ }
+}
+
+static inline void opal_fadump_read_regs(char *bufp, unsigned int regs_cnt,
+ unsigned int reg_entry_size,
+ bool cpu_endian,
+ struct pt_regs *regs)
+{
+ struct hdat_fadump_reg_entry *reg_entry;
+ u64 val;
+ int i;
+
+ memset(regs, 0, sizeof(struct pt_regs));
+
+ for (i = 0; i < regs_cnt; i++, bufp += reg_entry_size) {
+ reg_entry = (struct hdat_fadump_reg_entry *)bufp;
+ val = (cpu_endian ? be64_to_cpu(reg_entry->reg_val) :
+ reg_entry->reg_val);
+ opal_fadump_set_regval_regnum(regs,
+ be32_to_cpu(reg_entry->reg_type),
+ be32_to_cpu(reg_entry->reg_num),
+ val);
+ }
+}
+
+#endif /* _POWERNV_OPAL_FADUMP_H */
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c
index 186109bdd41b..000b350d4060 100644
--- a/arch/powerpc/platforms/powernv/opal-imc.c
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -53,36 +53,32 @@ static void export_imc_mode_and_cmd(struct device_node *node,
struct imc_pmu *pmu_ptr)
{
static u64 loc, *imc_mode_addr, *imc_cmd_addr;
- int chip = 0, nid;
char mode[16], cmd[16];
u32 cb_offset;
+ struct imc_mem_info *ptr = pmu_ptr->mem_info;
imc_debugfs_parent = debugfs_create_dir("imc", powerpc_debugfs_root);
- /*
- * Return here, either because 'imc' directory already exists,
- * Or failed to create a new one.
- */
if (!imc_debugfs_parent)
return;
if (of_property_read_u32(node, "cb_offset", &cb_offset))
cb_offset = IMC_CNTL_BLK_OFFSET;
- for_each_node(nid) {
- loc = (u64)(pmu_ptr->mem_info[chip].vbase) + cb_offset;
+ while (ptr->vbase != NULL) {
+ loc = (u64)(ptr->vbase) + cb_offset;
imc_mode_addr = (u64 *)(loc + IMC_CNTL_BLK_MODE_OFFSET);
- sprintf(mode, "imc_mode_%d", nid);
+ sprintf(mode, "imc_mode_%d", (u32)(ptr->id));
if (!imc_debugfs_create_x64(mode, 0600, imc_debugfs_parent,
imc_mode_addr))
goto err;
imc_cmd_addr = (u64 *)(loc + IMC_CNTL_BLK_CMD_OFFSET);
- sprintf(cmd, "imc_cmd_%d", nid);
+ sprintf(cmd, "imc_cmd_%d", (u32)(ptr->id));
if (!imc_debugfs_create_x64(cmd, 0600, imc_debugfs_parent,
imc_cmd_addr))
goto err;
- chip++;
+ ptr++;
}
return;
@@ -135,7 +131,6 @@ static int imc_get_mem_addr_nest(struct device_node *node,
}
pmu_ptr->imc_counter_mmaped = true;
- export_imc_mode_and_cmd(node, pmu_ptr);
kfree(base_addr_arr);
kfree(chipid_arr);
return 0;
@@ -151,7 +146,7 @@ error:
* and domain as the inputs.
* Allocates memory for the struct imc_pmu, sets up its domain, size and offsets
*/
-static int imc_pmu_create(struct device_node *parent, int pmu_index, int domain)
+static struct imc_pmu *imc_pmu_create(struct device_node *parent, int pmu_index, int domain)
{
int ret = 0;
struct imc_pmu *pmu_ptr;
@@ -159,27 +154,23 @@ static int imc_pmu_create(struct device_node *parent, int pmu_index, int domain)
/* Return for unknown domain */
if (domain < 0)
- return -EINVAL;
+ return NULL;
/* memory for pmu */
pmu_ptr = kzalloc(sizeof(*pmu_ptr), GFP_KERNEL);
if (!pmu_ptr)
- return -ENOMEM;
+ return NULL;
/* Set the domain */
pmu_ptr->domain = domain;
ret = of_property_read_u32(parent, "size", &pmu_ptr->counter_mem_size);
- if (ret) {
- ret = -EINVAL;
+ if (ret)
goto free_pmu;
- }
if (!of_property_read_u32(parent, "offset", &offset)) {
- if (imc_get_mem_addr_nest(parent, pmu_ptr, offset)) {
- ret = -EINVAL;
+ if (imc_get_mem_addr_nest(parent, pmu_ptr, offset))
goto free_pmu;
- }
}
/* Function to register IMC pmu */
@@ -190,14 +181,14 @@ static int imc_pmu_create(struct device_node *parent, int pmu_index, int domain)
if (pmu_ptr->domain == IMC_DOMAIN_NEST)
kfree(pmu_ptr->mem_info);
kfree(pmu_ptr);
- return ret;
+ return NULL;
}
- return 0;
+ return pmu_ptr;
free_pmu:
kfree(pmu_ptr);
- return ret;
+ return NULL;
}
static void disable_nest_pmu_counters(void)
@@ -254,6 +245,7 @@ int get_max_nest_dev(void)
static int opal_imc_counters_probe(struct platform_device *pdev)
{
struct device_node *imc_dev = pdev->dev.of_node;
+ struct imc_pmu *pmu;
int pmu_count = 0, domain;
bool core_imc_reg = false, thread_imc_reg = false;
u32 type;
@@ -269,6 +261,7 @@ static int opal_imc_counters_probe(struct platform_device *pdev)
}
for_each_compatible_node(imc_dev, NULL, IMC_DTB_UNIT_COMPAT) {
+ pmu = NULL;
if (of_property_read_u32(imc_dev, "type", &type)) {
pr_warn("IMC Device without type property\n");
continue;
@@ -285,7 +278,14 @@ static int opal_imc_counters_probe(struct platform_device *pdev)
domain = IMC_DOMAIN_THREAD;
break;
case IMC_TYPE_TRACE:
- domain = IMC_DOMAIN_TRACE;
+ /*
+ * FIXME. Using trace_imc events to monitor application
+ * or KVM thread performance can cause a checkstop
+ * (system crash).
+ * Disable it for now.
+ */
+ pr_info_once("IMC: disabling trace_imc PMU\n");
+ domain = -1;
break;
default:
pr_warn("IMC Unknown Device type \n");
@@ -293,9 +293,13 @@ static int opal_imc_counters_probe(struct platform_device *pdev)
break;
}
- if (!imc_pmu_create(imc_dev, pmu_count, domain)) {
- if (domain == IMC_DOMAIN_NEST)
+ pmu = imc_pmu_create(imc_dev, pmu_count, domain);
+ if (pmu != NULL) {
+ if (domain == IMC_DOMAIN_NEST) {
+ if (!imc_debugfs_parent)
+ export_imc_mode_and_cmd(imc_dev, pmu);
pmu_count++;
+ }
if (domain == IMC_DOMAIN_CORE)
core_imc_reg = true;
if (domain == IMC_DOMAIN_THREAD)
@@ -303,10 +307,6 @@ static int opal_imc_counters_probe(struct platform_device *pdev)
}
}
- /* If none of the nest units are registered, remove debugfs interface */
- if (pmu_count == 0)
- debugfs_remove_recursive(imc_debugfs_parent);
-
/* If core imc is not registered, unregister thread-imc */
if (!core_imc_reg && thread_imc_reg)
unregister_thread_imc();
diff --git a/arch/powerpc/platforms/powernv/opal-msglog.c b/arch/powerpc/platforms/powernv/opal-msglog.c
index dc51d03c6370..d26da19a611f 100644
--- a/arch/powerpc/platforms/powernv/opal-msglog.c
+++ b/arch/powerpc/platforms/powernv/opal-msglog.c
@@ -29,23 +29,23 @@ struct memcons {
static struct memcons *opal_memcons = NULL;
-ssize_t opal_msglog_copy(char *to, loff_t pos, size_t count)
+ssize_t memcons_copy(struct memcons *mc, char *to, loff_t pos, size_t count)
{
const char *conbuf;
ssize_t ret;
size_t first_read = 0;
uint32_t out_pos, avail;
- if (!opal_memcons)
+ if (!mc)
return -ENODEV;
- out_pos = be32_to_cpu(READ_ONCE(opal_memcons->out_pos));
+ out_pos = be32_to_cpu(READ_ONCE(mc->out_pos));
/* Now we've read out_pos, put a barrier in before reading the new
* data it points to in conbuf. */
smp_rmb();
- conbuf = phys_to_virt(be64_to_cpu(opal_memcons->obuf_phys));
+ conbuf = phys_to_virt(be64_to_cpu(mc->obuf_phys));
/* When the buffer has wrapped, read from the out_pos marker to the end
* of the buffer, and then read the remaining data as in the un-wrapped
@@ -53,7 +53,7 @@ ssize_t opal_msglog_copy(char *to, loff_t pos, size_t count)
if (out_pos & MEMCONS_OUT_POS_WRAP) {
out_pos &= MEMCONS_OUT_POS_MASK;
- avail = be32_to_cpu(opal_memcons->obuf_size) - out_pos;
+ avail = be32_to_cpu(mc->obuf_size) - out_pos;
ret = memory_read_from_buffer(to, count, &pos,
conbuf + out_pos, avail);
@@ -71,7 +71,7 @@ ssize_t opal_msglog_copy(char *to, loff_t pos, size_t count)
}
/* Sanity check. The firmware should not do this to us. */
- if (out_pos > be32_to_cpu(opal_memcons->obuf_size)) {
+ if (out_pos > be32_to_cpu(mc->obuf_size)) {
pr_err("OPAL: memory console corruption. Aborting read.\n");
return -EINVAL;
}
@@ -86,6 +86,11 @@ out:
return ret;
}
+ssize_t opal_msglog_copy(char *to, loff_t pos, size_t count)
+{
+ return memcons_copy(opal_memcons, to, pos, count);
+}
+
static ssize_t opal_msglog_read(struct file *file, struct kobject *kobj,
struct bin_attribute *bin_attr, char *to,
loff_t pos, size_t count)
@@ -98,32 +103,48 @@ static struct bin_attribute opal_msglog_attr = {
.read = opal_msglog_read
};
-void __init opal_msglog_init(void)
+struct memcons *memcons_init(struct device_node *node, const char *mc_prop_name)
{
u64 mcaddr;
struct memcons *mc;
- if (of_property_read_u64(opal_node, "ibm,opal-memcons", &mcaddr)) {
- pr_warn("OPAL: Property ibm,opal-memcons not found, no message log\n");
- return;
+ if (of_property_read_u64(node, mc_prop_name, &mcaddr)) {
+ pr_warn("%s property not found, no message log\n",
+ mc_prop_name);
+ goto out_err;
}
mc = phys_to_virt(mcaddr);
if (!mc) {
- pr_warn("OPAL: memory console address is invalid\n");
- return;
+ pr_warn("memory console address is invalid\n");
+ goto out_err;
}
if (be64_to_cpu(mc->magic) != MEMCONS_MAGIC) {
- pr_warn("OPAL: memory console version is invalid\n");
- return;
+ pr_warn("memory console version is invalid\n");
+ goto out_err;
}
- /* Report maximum size */
- opal_msglog_attr.size = be32_to_cpu(mc->ibuf_size) +
- be32_to_cpu(mc->obuf_size);
+ return mc;
+
+out_err:
+ return NULL;
+}
+
+u32 memcons_get_size(struct memcons *mc)
+{
+ return be32_to_cpu(mc->ibuf_size) + be32_to_cpu(mc->obuf_size);
+}
+
+void __init opal_msglog_init(void)
+{
+ opal_memcons = memcons_init(opal_node, "ibm,opal-memcons");
+ if (!opal_memcons) {
+ pr_warn("OPAL: memcons failed to load from ibm,opal-memcons\n");
+ return;
+ }
- opal_memcons = mc;
+ opal_msglog_attr.size = memcons_get_size(opal_memcons);
}
void __init opal_msglog_sysfs_init(void)
diff --git a/arch/powerpc/platforms/powernv/opal-powercap.c b/arch/powerpc/platforms/powernv/opal-powercap.c
index dc599e787f78..c16d44f6f1d1 100644
--- a/arch/powerpc/platforms/powernv/opal-powercap.c
+++ b/arch/powerpc/platforms/powernv/opal-powercap.c
@@ -13,7 +13,7 @@
#include <asm/opal.h>
-DEFINE_MUTEX(powercap_mutex);
+static DEFINE_MUTEX(powercap_mutex);
static struct kobject *powercap_kobj;
diff --git a/arch/powerpc/platforms/powernv/opal-prd.c b/arch/powerpc/platforms/powernv/opal-prd.c
index e072bf157d62..45f4223a790f 100644
--- a/arch/powerpc/platforms/powernv/opal-prd.c
+++ b/arch/powerpc/platforms/powernv/opal-prd.c
@@ -342,7 +342,7 @@ static int opal_prd_msg_notifier(struct notifier_block *nb,
int msg_size, item_size;
unsigned long flags;
- if (msg_type != OPAL_MSG_PRD)
+ if (msg_type != OPAL_MSG_PRD && msg_type != OPAL_MSG_PRD2)
return 0;
/* Calculate total size of the message and item we need to store. The
@@ -393,6 +393,12 @@ static int opal_prd_probe(struct platform_device *pdev)
return rc;
}
+ rc = opal_message_notifier_register(OPAL_MSG_PRD2, &opal_prd_event_nb);
+ if (rc) {
+ pr_err("Couldn't register PRD2 event notifier\n");
+ return rc;
+ }
+
rc = misc_register(&opal_prd_dev);
if (rc) {
pr_err("failed to register miscdev\n");
diff --git a/arch/powerpc/platforms/powernv/opal-psr.c b/arch/powerpc/platforms/powernv/opal-psr.c
index b6ccb3026c6c..69d7e75950d1 100644
--- a/arch/powerpc/platforms/powernv/opal-psr.c
+++ b/arch/powerpc/platforms/powernv/opal-psr.c
@@ -13,11 +13,11 @@
#include <asm/opal.h>
-DEFINE_MUTEX(psr_mutex);
+static DEFINE_MUTEX(psr_mutex);
static struct kobject *psr_kobj;
-struct psr_attr {
+static struct psr_attr {
u32 handle;
struct kobj_attribute attr;
} *psr_attrs;
diff --git a/arch/powerpc/platforms/powernv/opal-secvar.c b/arch/powerpc/platforms/powernv/opal-secvar.c
new file mode 100644
index 000000000000..14133e120bdd
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-secvar.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PowerNV code for secure variables
+ *
+ * Copyright (C) 2019 IBM Corporation
+ * Author: Claudio Carvalho
+ * Nayna Jain
+ *
+ * APIs to access secure variables managed by OPAL.
+ */
+
+#define pr_fmt(fmt) "secvar: "fmt
+
+#include <linux/types.h>
+#include <linux/platform_device.h>
+#include <linux/of_platform.h>
+#include <asm/opal.h>
+#include <asm/secvar.h>
+#include <asm/secure_boot.h>
+
+static int opal_status_to_err(int rc)
+{
+ int err;
+
+ switch (rc) {
+ case OPAL_SUCCESS:
+ err = 0;
+ break;
+ case OPAL_UNSUPPORTED:
+ err = -ENXIO;
+ break;
+ case OPAL_PARAMETER:
+ err = -EINVAL;
+ break;
+ case OPAL_RESOURCE:
+ err = -ENOSPC;
+ break;
+ case OPAL_HARDWARE:
+ err = -EIO;
+ break;
+ case OPAL_NO_MEM:
+ err = -ENOMEM;
+ break;
+ case OPAL_EMPTY:
+ err = -ENOENT;
+ break;
+ case OPAL_PARTIAL:
+ err = -EFBIG;
+ break;
+ default:
+ err = -EINVAL;
+ }
+
+ return err;
+}
+
+static int opal_get_variable(const char *key, uint64_t ksize,
+ u8 *data, uint64_t *dsize)
+{
+ int rc;
+
+ if (!key || !dsize)
+ return -EINVAL;
+
+ *dsize = cpu_to_be64(*dsize);
+
+ rc = opal_secvar_get(key, ksize, data, dsize);
+
+ *dsize = be64_to_cpu(*dsize);
+
+ return opal_status_to_err(rc);
+}
+
+static int opal_get_next_variable(const char *key, uint64_t *keylen,
+ uint64_t keybufsize)
+{
+ int rc;
+
+ if (!key || !keylen)
+ return -EINVAL;
+
+ *keylen = cpu_to_be64(*keylen);
+
+ rc = opal_secvar_get_next(key, keylen, keybufsize);
+
+ *keylen = be64_to_cpu(*keylen);
+
+ return opal_status_to_err(rc);
+}
+
+static int opal_set_variable(const char *key, uint64_t ksize, u8 *data,
+ uint64_t dsize)
+{
+ int rc;
+
+ if (!key || !data)
+ return -EINVAL;
+
+ rc = opal_secvar_enqueue_update(key, ksize, data, dsize);
+
+ return opal_status_to_err(rc);
+}
+
+static const struct secvar_operations opal_secvar_ops = {
+ .get = opal_get_variable,
+ .get_next = opal_get_next_variable,
+ .set = opal_set_variable,
+};
+
+static int opal_secvar_probe(struct platform_device *pdev)
+{
+ if (!opal_check_token(OPAL_SECVAR_GET)
+ || !opal_check_token(OPAL_SECVAR_GET_NEXT)
+ || !opal_check_token(OPAL_SECVAR_ENQUEUE_UPDATE)) {
+ pr_err("OPAL doesn't support secure variables\n");
+ return -ENODEV;
+ }
+
+ set_secvar_ops(&opal_secvar_ops);
+
+ return 0;
+}
+
+static const struct of_device_id opal_secvar_match[] = {
+ { .compatible = "ibm,secvar-backend",},
+ {},
+};
+
+static struct platform_driver opal_secvar_driver = {
+ .driver = {
+ .name = "secvar",
+ .of_match_table = opal_secvar_match,
+ },
+};
+
+static int __init opal_secvar_init(void)
+{
+ return platform_driver_probe(&opal_secvar_driver, opal_secvar_probe);
+}
+device_initcall(opal_secvar_init);
diff --git a/arch/powerpc/platforms/powernv/opal-sensor-groups.c b/arch/powerpc/platforms/powernv/opal-sensor-groups.c
index 31f13c13275f..f8ae1fb0c102 100644
--- a/arch/powerpc/platforms/powernv/opal-sensor-groups.c
+++ b/arch/powerpc/platforms/powernv/opal-sensor-groups.c
@@ -13,7 +13,7 @@
#include <asm/opal.h>
-DEFINE_MUTEX(sg_mutex);
+static DEFINE_MUTEX(sg_mutex);
static struct kobject *sg_kobj;
diff --git a/arch/powerpc/platforms/powernv/opal-xscom.c b/arch/powerpc/platforms/powernv/opal-xscom.c
index 66430eebe869..fd510d961b8c 100644
--- a/arch/powerpc/platforms/powernv/opal-xscom.c
+++ b/arch/powerpc/platforms/powernv/opal-xscom.c
@@ -1,7 +1,10 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/*
- * PowerNV LPC bus handling.
+ * PowerNV SCOM bus debugfs interface
*
+ * Copyright 2010 Benjamin Herrenschmidt, IBM Corp
+ * <benh@kernel.crashing.org>
+ * and David Gibson, IBM Corporation.
* Copyright 2013 IBM Corp.
*/
@@ -10,62 +13,13 @@
#include <linux/bug.h>
#include <linux/gfp.h>
#include <linux/slab.h>
+#include <linux/uaccess.h>
#include <asm/machdep.h>
#include <asm/firmware.h>
#include <asm/opal.h>
-#include <asm/scom.h>
-
-/*
- * We could probably fit that inside the scom_map_t
- * which is a void* after all but it's really too ugly
- * so let's kmalloc it for now
- */
-struct opal_scom_map {
- uint32_t chip;
- uint64_t addr;
-};
-
-static scom_map_t opal_scom_map(struct device_node *dev, u64 reg, u64 count)
-{
- struct opal_scom_map *m;
- const __be32 *gcid;
-
- if (!of_get_property(dev, "scom-controller", NULL)) {
- pr_err("%s: device %pOF is not a SCOM controller\n",
- __func__, dev);
- return SCOM_MAP_INVALID;
- }
- gcid = of_get_property(dev, "ibm,chip-id", NULL);
- if (!gcid) {
- pr_err("%s: device %pOF has no ibm,chip-id\n",
- __func__, dev);
- return SCOM_MAP_INVALID;
- }
- m = kmalloc(sizeof(*m), GFP_KERNEL);
- if (!m)
- return NULL;
- m->chip = be32_to_cpup(gcid);
- m->addr = reg;
-
- return (scom_map_t)m;
-}
-
-static void opal_scom_unmap(scom_map_t map)
-{
- kfree(map);
-}
-
-static int opal_xscom_err_xlate(int64_t rc)
-{
- switch(rc) {
- case 0:
- return 0;
- /* Add more translations if necessary */
- default:
- return -EIO;
- }
-}
+#include <asm/debugfs.h>
+#include <asm/prom.h>
static u64 opal_scom_unmangle(u64 addr)
{
@@ -98,39 +52,154 @@ static u64 opal_scom_unmangle(u64 addr)
return addr;
}
-static int opal_scom_read(scom_map_t map, u64 reg, u64 *value)
+static int opal_scom_read(uint32_t chip, uint64_t addr, u64 reg, u64 *value)
{
- struct opal_scom_map *m = map;
int64_t rc;
__be64 v;
- reg = opal_scom_unmangle(m->addr + reg);
- rc = opal_xscom_read(m->chip, reg, (__be64 *)__pa(&v));
+ reg = opal_scom_unmangle(addr + reg);
+ rc = opal_xscom_read(chip, reg, (__be64 *)__pa(&v));
+ if (rc) {
+ *value = 0xfffffffffffffffful;
+ return -EIO;
+ }
*value = be64_to_cpu(v);
- return opal_xscom_err_xlate(rc);
+ return 0;
}
-static int opal_scom_write(scom_map_t map, u64 reg, u64 value)
+static int opal_scom_write(uint32_t chip, uint64_t addr, u64 reg, u64 value)
{
- struct opal_scom_map *m = map;
int64_t rc;
- reg = opal_scom_unmangle(m->addr + reg);
- rc = opal_xscom_write(m->chip, reg, value);
- return opal_xscom_err_xlate(rc);
+ reg = opal_scom_unmangle(addr + reg);
+ rc = opal_xscom_write(chip, reg, value);
+ if (rc)
+ return -EIO;
+ return 0;
+}
+
+struct scom_debug_entry {
+ u32 chip;
+ struct debugfs_blob_wrapper path;
+ char name[16];
+};
+
+static ssize_t scom_debug_read(struct file *filp, char __user *ubuf,
+ size_t count, loff_t *ppos)
+{
+ struct scom_debug_entry *ent = filp->private_data;
+ u64 __user *ubuf64 = (u64 __user *)ubuf;
+ loff_t off = *ppos;
+ ssize_t done = 0;
+ u64 reg, reg_base, reg_cnt, val;
+ int rc;
+
+ if (off < 0 || (off & 7) || (count & 7))
+ return -EINVAL;
+ reg_base = off >> 3;
+ reg_cnt = count >> 3;
+
+ for (reg = 0; reg < reg_cnt; reg++) {
+ rc = opal_scom_read(ent->chip, reg_base, reg, &val);
+ if (!rc)
+ rc = put_user(val, ubuf64);
+ if (rc) {
+ if (!done)
+ done = rc;
+ break;
+ }
+ ubuf64++;
+ *ppos += 8;
+ done += 8;
+ }
+ return done;
+}
+
+static ssize_t scom_debug_write(struct file *filp, const char __user *ubuf,
+ size_t count, loff_t *ppos)
+{
+ struct scom_debug_entry *ent = filp->private_data;
+ u64 __user *ubuf64 = (u64 __user *)ubuf;
+ loff_t off = *ppos;
+ ssize_t done = 0;
+ u64 reg, reg_base, reg_cnt, val;
+ int rc;
+
+ if (off < 0 || (off & 7) || (count & 7))
+ return -EINVAL;
+ reg_base = off >> 3;
+ reg_cnt = count >> 3;
+
+ for (reg = 0; reg < reg_cnt; reg++) {
+ rc = get_user(val, ubuf64);
+ if (!rc)
+ rc = opal_scom_write(ent->chip, reg_base, reg, val);
+ if (rc) {
+ if (!done)
+ done = rc;
+ break;
+ }
+ ubuf64++;
+ done += 8;
+ }
+ return done;
}
-static const struct scom_controller opal_scom_controller = {
- .map = opal_scom_map,
- .unmap = opal_scom_unmap,
- .read = opal_scom_read,
- .write = opal_scom_write
+static const struct file_operations scom_debug_fops = {
+ .read = scom_debug_read,
+ .write = scom_debug_write,
+ .open = simple_open,
+ .llseek = default_llseek,
};
-static int opal_xscom_init(void)
+static int scom_debug_init_one(struct dentry *root, struct device_node *dn,
+ int chip)
{
- if (firmware_has_feature(FW_FEATURE_OPAL))
- scom_init(&opal_scom_controller);
+ struct scom_debug_entry *ent;
+ struct dentry *dir;
+
+ ent = kzalloc(sizeof(*ent), GFP_KERNEL);
+ if (!ent)
+ return -ENOMEM;
+
+ ent->chip = chip;
+ snprintf(ent->name, 16, "%08x", chip);
+ ent->path.data = (void *)kasprintf(GFP_KERNEL, "%pOF", dn);
+ ent->path.size = strlen((char *)ent->path.data);
+
+ dir = debugfs_create_dir(ent->name, root);
+ if (!dir) {
+ kfree(ent->path.data);
+ kfree(ent);
+ return -1;
+ }
+
+ debugfs_create_blob("devspec", 0400, dir, &ent->path);
+ debugfs_create_file("access", 0600, dir, ent, &scom_debug_fops);
+
return 0;
}
-machine_arch_initcall(powernv, opal_xscom_init);
+
+static int scom_debug_init(void)
+{
+ struct device_node *dn;
+ struct dentry *root;
+ int chip, rc;
+
+ if (!firmware_has_feature(FW_FEATURE_OPAL))
+ return 0;
+
+ root = debugfs_create_dir("scom", powerpc_debugfs_root);
+ if (!root)
+ return -1;
+
+ rc = 0;
+ for_each_node_with_property(dn, "scom-controller") {
+ chip = of_get_ibm_chip_id(dn);
+ WARN_ON(chip == -1);
+ rc |= scom_debug_init_one(root, dn, chip);
+ }
+
+ return rc;
+}
+device_initcall(scom_debug_init);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index aba443be7daa..2b3dfd0b6cdd 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -35,6 +35,16 @@
#include "powernv.h"
+#define OPAL_MSG_QUEUE_MAX 16
+
+struct opal_msg_node {
+ struct list_head list;
+ struct opal_msg msg;
+};
+
+static DEFINE_SPINLOCK(msg_list_lock);
+static LIST_HEAD(msg_list);
+
/* /sys/firmware/opal */
struct kobject *opal_kobj;
@@ -50,6 +60,8 @@ struct mcheck_recoverable_range {
u64 recover_addr;
};
+static int msg_list_size;
+
static struct mcheck_recoverable_range *mc_recoverable_range;
static int mc_recoverable_range_len;
@@ -58,6 +70,8 @@ static DEFINE_SPINLOCK(opal_write_lock);
static struct atomic_notifier_head opal_msg_notifier_head[OPAL_MSG_TYPE_MAX];
static uint32_t opal_heartbeat;
static struct task_struct *kopald_tsk;
+static struct opal_msg *opal_msg;
+static u32 opal_msg_size __ro_after_init;
void opal_configure_cores(void)
{
@@ -235,6 +249,43 @@ static int __init opal_register_exception_handlers(void)
}
machine_early_initcall(powernv, opal_register_exception_handlers);
+static void queue_replay_msg(void *msg)
+{
+ struct opal_msg_node *msg_node;
+
+ if (msg_list_size < OPAL_MSG_QUEUE_MAX) {
+ msg_node = kzalloc(sizeof(*msg_node), GFP_ATOMIC);
+ if (msg_node) {
+ INIT_LIST_HEAD(&msg_node->list);
+ memcpy(&msg_node->msg, msg, sizeof(struct opal_msg));
+ list_add_tail(&msg_node->list, &msg_list);
+ msg_list_size++;
+ } else
+ pr_warn_once("message queue no memory\n");
+
+ if (msg_list_size >= OPAL_MSG_QUEUE_MAX)
+ pr_warn_once("message queue full\n");
+ }
+}
+
+static void dequeue_replay_msg(enum opal_msg_type msg_type)
+{
+ struct opal_msg_node *msg_node, *tmp;
+
+ list_for_each_entry_safe(msg_node, tmp, &msg_list, list) {
+ if (be32_to_cpu(msg_node->msg.msg_type) != msg_type)
+ continue;
+
+ atomic_notifier_call_chain(&opal_msg_notifier_head[msg_type],
+ msg_type,
+ &msg_node->msg);
+
+ list_del(&msg_node->list);
+ kfree(msg_node);
+ msg_list_size--;
+ }
+}
+
/*
* Opal message notifier based on message type. Allow subscribers to get
* notified for specific messgae type.
@@ -242,14 +293,30 @@ machine_early_initcall(powernv, opal_register_exception_handlers);
int opal_message_notifier_register(enum opal_msg_type msg_type,
struct notifier_block *nb)
{
+ int ret;
+ unsigned long flags;
+
if (!nb || msg_type >= OPAL_MSG_TYPE_MAX) {
pr_warn("%s: Invalid arguments, msg_type:%d\n",
__func__, msg_type);
return -EINVAL;
}
- return atomic_notifier_chain_register(
- &opal_msg_notifier_head[msg_type], nb);
+ spin_lock_irqsave(&msg_list_lock, flags);
+ ret = atomic_notifier_chain_register(
+ &opal_msg_notifier_head[msg_type], nb);
+
+ /*
+ * If the registration succeeded, replay any queued messages that came
+ * in prior to the notifier chain registration. msg_list_lock held here
+ * to ensure they're delivered prior to any subsequent messages.
+ */
+ if (ret == 0)
+ dequeue_replay_msg(msg_type);
+
+ spin_unlock_irqrestore(&msg_list_lock, flags);
+
+ return ret;
}
EXPORT_SYMBOL_GPL(opal_message_notifier_register);
@@ -263,6 +330,23 @@ EXPORT_SYMBOL_GPL(opal_message_notifier_unregister);
static void opal_message_do_notify(uint32_t msg_type, void *msg)
{
+ unsigned long flags;
+ bool queued = false;
+
+ spin_lock_irqsave(&msg_list_lock, flags);
+ if (opal_msg_notifier_head[msg_type].head == NULL) {
+ /*
+ * Queue up the msg since no notifiers have registered
+ * yet for this msg_type.
+ */
+ queue_replay_msg(msg);
+ queued = true;
+ }
+ spin_unlock_irqrestore(&msg_list_lock, flags);
+
+ if (queued)
+ return;
+
/* notify subscribers */
atomic_notifier_call_chain(&opal_msg_notifier_head[msg_type],
msg_type, msg);
@@ -271,14 +355,9 @@ static void opal_message_do_notify(uint32_t msg_type, void *msg)
static void opal_handle_message(void)
{
s64 ret;
- /*
- * TODO: pre-allocate a message buffer depending on opal-msg-size
- * value in /proc/device-tree.
- */
- static struct opal_msg msg;
u32 type;
- ret = opal_get_msg(__pa(&msg), sizeof(msg));
+ ret = opal_get_msg(__pa(opal_msg), opal_msg_size);
/* No opal message pending. */
if (ret == OPAL_RESOURCE)
return;
@@ -290,14 +369,14 @@ static void opal_handle_message(void)
return;
}
- type = be32_to_cpu(msg.msg_type);
+ type = be32_to_cpu(opal_msg->msg_type);
/* Sanity check */
if (type >= OPAL_MSG_TYPE_MAX) {
pr_warn_once("%s: Unknown message type: %u\n", __func__, type);
return;
}
- opal_message_do_notify(type, (void *)&msg);
+ opal_message_do_notify(type, (void *)opal_msg);
}
static irqreturn_t opal_message_notify(int irq, void *data)
@@ -306,10 +385,24 @@ static irqreturn_t opal_message_notify(int irq, void *data)
return IRQ_HANDLED;
}
-static int __init opal_message_init(void)
+static int __init opal_message_init(struct device_node *opal_node)
{
int ret, i, irq;
+ ret = of_property_read_u32(opal_node, "opal-msg-size", &opal_msg_size);
+ if (ret) {
+ pr_notice("Failed to read opal-msg-size property\n");
+ opal_msg_size = sizeof(struct opal_msg);
+ }
+
+ opal_msg = kmalloc(opal_msg_size, GFP_KERNEL);
+ if (!opal_msg) {
+ opal_msg_size = sizeof(struct opal_msg);
+ /* Try to allocate fixed message size */
+ opal_msg = kmalloc(opal_msg_size, GFP_KERNEL);
+ BUG_ON(opal_msg == NULL);
+ }
+
for (i = 0; i < OPAL_MSG_TYPE_MAX; i++)
ATOMIC_INIT_NOTIFIER_HEAD(&opal_msg_notifier_head[i]);
@@ -697,45 +790,81 @@ static int opal_sysfs_init(void)
return 0;
}
-static ssize_t symbol_map_read(struct file *fp, struct kobject *kobj,
- struct bin_attribute *bin_attr,
- char *buf, loff_t off, size_t count)
+static ssize_t export_attr_read(struct file *fp, struct kobject *kobj,
+ struct bin_attribute *bin_attr, char *buf,
+ loff_t off, size_t count)
{
return memory_read_from_buffer(buf, count, &off, bin_attr->private,
bin_attr->size);
}
-static BIN_ATTR_RO(symbol_map, 0);
-
-static void opal_export_symmap(void)
+static int opal_add_one_export(struct kobject *parent, const char *export_name,
+ struct device_node *np, const char *prop_name)
{
- const __be64 *syms;
- unsigned int size;
- struct device_node *fw;
+ struct bin_attribute *attr = NULL;
+ const char *name = NULL;
+ u64 vals[2];
int rc;
- fw = of_find_node_by_path("/ibm,opal/firmware");
- if (!fw)
- return;
- syms = of_get_property(fw, "symbol-map", &size);
- if (!syms || size != 2 * sizeof(__be64))
- return;
+ rc = of_property_read_u64_array(np, prop_name, &vals[0], 2);
+ if (rc)
+ goto out;
- /* Setup attributes */
- bin_attr_symbol_map.private = __va(be64_to_cpu(syms[0]));
- bin_attr_symbol_map.size = be64_to_cpu(syms[1]);
+ attr = kzalloc(sizeof(*attr), GFP_KERNEL);
+ name = kstrdup(export_name, GFP_KERNEL);
+ if (!name) {
+ rc = -ENOMEM;
+ goto out;
+ }
- rc = sysfs_create_bin_file(opal_kobj, &bin_attr_symbol_map);
- if (rc)
- pr_warn("Error %d creating OPAL symbols file\n", rc);
+ sysfs_bin_attr_init(attr);
+ attr->attr.name = name;
+ attr->attr.mode = 0400;
+ attr->read = export_attr_read;
+ attr->private = __va(vals[0]);
+ attr->size = vals[1];
+
+ rc = sysfs_create_bin_file(parent, attr);
+out:
+ if (rc) {
+ kfree(name);
+ kfree(attr);
+ }
+
+ return rc;
}
-static ssize_t export_attr_read(struct file *fp, struct kobject *kobj,
- struct bin_attribute *bin_attr, char *buf,
- loff_t off, size_t count)
+static void opal_add_exported_attrs(struct device_node *np,
+ struct kobject *kobj)
{
- return memory_read_from_buffer(buf, count, &off, bin_attr->private,
- bin_attr->size);
+ struct device_node *child;
+ struct property *prop;
+
+ for_each_property_of_node(np, prop) {
+ int rc;
+
+ if (!strcmp(prop->name, "name") ||
+ !strcmp(prop->name, "phandle"))
+ continue;
+
+ rc = opal_add_one_export(kobj, prop->name, np, prop->name);
+ if (rc) {
+ pr_warn("Unable to add export %pOF/%s, rc = %d!\n",
+ np, prop->name, rc);
+ }
+ }
+
+ for_each_child_of_node(np, child) {
+ struct kobject *child_kobj;
+
+ child_kobj = kobject_create_and_add(child->name, kobj);
+ if (!child_kobj) {
+ pr_err("Unable to create export dir for %pOF\n", child);
+ continue;
+ }
+
+ opal_add_exported_attrs(child, child_kobj);
+ }
}
/*
@@ -747,11 +876,8 @@ static ssize_t export_attr_read(struct file *fp, struct kobject *kobj,
*/
static void opal_export_attrs(void)
{
- struct bin_attribute *attr;
struct device_node *np;
- struct property *prop;
struct kobject *kobj;
- u64 vals[2];
int rc;
np = of_find_node_by_path("/ibm,opal/firmware/exports");
@@ -765,41 +891,16 @@ static void opal_export_attrs(void)
return;
}
- for_each_property_of_node(np, prop) {
- if (!strcmp(prop->name, "name") || !strcmp(prop->name, "phandle"))
- continue;
-
- if (of_property_read_u64_array(np, prop->name, &vals[0], 2))
- continue;
-
- attr = kzalloc(sizeof(*attr), GFP_KERNEL);
-
- if (attr == NULL) {
- pr_warn("Failed kmalloc for bin_attribute!");
- continue;
- }
-
- sysfs_bin_attr_init(attr);
- attr->attr.name = kstrdup(prop->name, GFP_KERNEL);
- attr->attr.mode = 0400;
- attr->read = export_attr_read;
- attr->private = __va(vals[0]);
- attr->size = vals[1];
+ opal_add_exported_attrs(np, kobj);
- if (attr->attr.name == NULL) {
- pr_warn("Failed kstrdup for bin_attribute attr.name");
- kfree(attr);
- continue;
- }
-
- rc = sysfs_create_bin_file(kobj, attr);
- if (rc) {
- pr_warn("Error %d creating OPAL sysfs exports/%s file\n",
- rc, prop->name);
- kfree(attr->attr.name);
- kfree(attr);
- }
- }
+ /*
+ * NB: symbol_map existed before the generic export interface so it
+ * lives under the top level opal_kobj.
+ */
+ rc = opal_add_one_export(opal_kobj, "symbol_map",
+ np->parent, "symbol-map");
+ if (rc)
+ pr_warn("Error %d creating OPAL symbols file\n", rc);
of_node_put(np);
}
@@ -910,7 +1011,7 @@ static int __init opal_init(void)
}
/* Initialise OPAL messaging system */
- opal_message_init();
+ opal_message_init(opal_node);
/* Initialise OPAL asynchronous completion interface */
opal_async_comp_init();
@@ -946,8 +1047,6 @@ static int __init opal_init(void)
/* Create "opal" kobject under /sys/firmware */
rc = opal_sysfs_init();
if (rc == 0) {
- /* Export symbol map to userspace */
- opal_export_symmap();
/* Setup dump region interface */
opal_dump_region_init();
/* Setup error log interface */
@@ -960,11 +1059,10 @@ static int __init opal_init(void)
opal_sys_param_init();
/* Setup message log sysfs interface. */
opal_msglog_sysfs_init();
+ /* Add all export properties*/
+ opal_export_attrs();
}
- /* Export all properties */
- opal_export_attrs();
-
/* Initialize platform devices: IPMI backend, PRD & flash interface */
opal_pdev_init("ibm,opal-ipmi");
opal_pdev_init("ibm,opal-flash");
@@ -988,6 +1086,9 @@ static int __init opal_init(void)
/* Initialise OPAL Power control interface */
opal_power_control_init();
+ /* Initialize OPAL secure variables */
+ opal_pdev_init("ibm,secvar-backend");
+
return 0;
}
machine_subsys_initcall(powernv, opal_init);
diff --git a/arch/powerpc/platforms/powernv/pci-ioda-tce.c b/arch/powerpc/platforms/powernv/pci-ioda-tce.c
index e28f03e1eb5e..5dc6847d5f4c 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda-tce.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda-tce.c
@@ -36,7 +36,8 @@ static __be64 *pnv_alloc_tce_level(int nid, unsigned int shift)
struct page *tce_mem = NULL;
__be64 *addr;
- tce_mem = alloc_pages_node(nid, GFP_KERNEL, shift - PAGE_SHIFT);
+ tce_mem = alloc_pages_node(nid, GFP_ATOMIC | __GFP_NOWARN,
+ shift - PAGE_SHIFT);
if (!tce_mem) {
pr_err("Failed to allocate a TCE memory, level shift=%d\n",
shift);
@@ -48,6 +49,9 @@ static __be64 *pnv_alloc_tce_level(int nid, unsigned int shift)
return addr;
}
+static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr,
+ unsigned long size, unsigned int levels);
+
static __be64 *pnv_tce(struct iommu_table *tbl, bool user, long idx, bool alloc)
{
__be64 *tmp = user ? tbl->it_userspace : (__be64 *) tbl->it_base;
@@ -57,9 +61,9 @@ static __be64 *pnv_tce(struct iommu_table *tbl, bool user, long idx, bool alloc)
while (level) {
int n = (idx & mask) >> (level * shift);
- unsigned long tce;
+ unsigned long oldtce, tce = be64_to_cpu(READ_ONCE(tmp[n]));
- if (tmp[n] == 0) {
+ if (!tce) {
__be64 *tmp2;
if (!alloc)
@@ -70,10 +74,15 @@ static __be64 *pnv_tce(struct iommu_table *tbl, bool user, long idx, bool alloc)
if (!tmp2)
return NULL;
- tmp[n] = cpu_to_be64(__pa(tmp2) |
- TCE_PCI_READ | TCE_PCI_WRITE);
+ tce = __pa(tmp2) | TCE_PCI_READ | TCE_PCI_WRITE;
+ oldtce = be64_to_cpu(cmpxchg(&tmp[n], 0,
+ cpu_to_be64(tce)));
+ if (oldtce) {
+ pnv_pci_ioda2_table_do_free_pages(tmp2,
+ ilog2(tbl->it_level_size) + 3, 1);
+ tce = oldtce;
+ }
}
- tce = be64_to_cpu(tmp[n]);
tmp = __va(tce & ~(TCE_PCI_READ | TCE_PCI_WRITE));
idx &= ~mask;
@@ -161,6 +170,9 @@ void pnv_tce_free(struct iommu_table *tbl, long index, long npages)
if (ptce)
*ptce = cpu_to_be64(0);
+ else
+ /* Skip the rest of the level */
+ i |= tbl->it_level_size - 1;
}
}
@@ -260,7 +272,6 @@ long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
unsigned int table_shift = max_t(unsigned int, entries_shift + 3,
PAGE_SHIFT);
const unsigned long tce_table_size = 1UL << table_shift;
- unsigned int tmplevels = levels;
if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS))
return -EINVAL;
@@ -268,9 +279,6 @@ long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
if (!is_power_of_2(window_size))
return -EINVAL;
- if (alloc_userspace_copy && (window_size > (1ULL << 32)))
- tmplevels = 1;
-
/* Adjust direct table size from window_size and levels */
entries_shift = (entries_shift + levels - 1) / levels;
level_shift = entries_shift + 3;
@@ -281,7 +289,7 @@ long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
/* Allocate TCE table */
addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
- tmplevels, tce_table_size, &offset, &total_allocated);
+ 1, tce_table_size, &offset, &total_allocated);
/* addr==NULL means that the first level allocation failed */
if (!addr)
@@ -292,18 +300,18 @@ long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
* we did not allocate as much as we wanted,
* release partially allocated table.
*/
- if (tmplevels == levels && offset < tce_table_size)
+ if (levels == 1 && offset < tce_table_size)
goto free_tces_exit;
/* Allocate userspace view of the TCE table */
if (alloc_userspace_copy) {
offset = 0;
uas = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
- tmplevels, tce_table_size, &offset,
+ 1, tce_table_size, &offset,
&total_allocated_uas);
if (!uas)
goto free_tces_exit;
- if (tmplevels == levels && (offset < tce_table_size ||
+ if (levels == 1 && (offset < tce_table_size ||
total_allocated_uas != total_allocated))
goto free_uas_exit;
}
@@ -318,7 +326,7 @@ long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
pr_debug("Created TCE table: ws=%08llx ts=%lx @%08llx base=%lx uas=%p levels=%d/%d\n",
window_size, tce_table_size, bus_offset, tbl->it_base,
- tbl->it_userspace, tmplevels, levels);
+ tbl->it_userspace, 1, levels);
return 0;
@@ -332,14 +340,6 @@ free_tces_exit:
return -ENOMEM;
}
-static void pnv_iommu_table_group_link_free(struct rcu_head *head)
-{
- struct iommu_table_group_link *tgl = container_of(head,
- struct iommu_table_group_link, rcu);
-
- kfree(tgl);
-}
-
void pnv_pci_unlink_table_and_group(struct iommu_table *tbl,
struct iommu_table_group *table_group)
{
@@ -355,7 +355,7 @@ void pnv_pci_unlink_table_and_group(struct iommu_table *tbl,
list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) {
if (tgl->table_group == table_group) {
list_del_rcu(&tgl->next);
- call_rcu(&tgl->rcu, pnv_iommu_table_group_link_free);
+ kfree_rcu(tgl, rcu);
found = true;
break;
}
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index d8080558d020..22c22cd7bd82 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -188,7 +188,7 @@ static void pnv_ioda_free_pe(struct pnv_ioda_pe *pe)
unsigned int pe_num = pe->pe_number;
WARN_ON(pe->pdev);
- WARN_ON(pe->npucomp); /* NPUs are not supposed to be freed */
+ WARN_ON(pe->npucomp); /* NPUs for nvlink are not supposed to be freed */
kfree(pe->npucomp);
memset(pe, 0, sizeof(struct pnv_ioda_pe));
clear_bit(pe_num, phb->ioda.pe_alloc);
@@ -777,6 +777,34 @@ static int pnv_ioda_set_peltv(struct pnv_phb *phb,
return 0;
}
+static void pnv_ioda_unset_peltv(struct pnv_phb *phb,
+ struct pnv_ioda_pe *pe,
+ struct pci_dev *parent)
+{
+ int64_t rc;
+
+ while (parent) {
+ struct pci_dn *pdn = pci_get_pdn(parent);
+
+ if (pdn && pdn->pe_number != IODA_INVALID_PE) {
+ rc = opal_pci_set_peltv(phb->opal_id, pdn->pe_number,
+ pe->pe_number,
+ OPAL_REMOVE_PE_FROM_DOMAIN);
+ /* XXX What to do in case of error ? */
+ }
+ parent = parent->bus->self;
+ }
+
+ opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number,
+ OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
+
+ /* Disassociate PE in PELT */
+ rc = opal_pci_set_peltv(phb->opal_id, pe->pe_number,
+ pe->pe_number, OPAL_REMOVE_PE_FROM_DOMAIN);
+ if (rc)
+ pe_warn(pe, "OPAL error %lld remove self from PELTV\n", rc);
+}
+
static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
{
struct pci_dev *parent;
@@ -792,7 +820,7 @@ static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
fcomp = OPAL_IGNORE_RID_FUNCTION_NUMBER;
parent = pe->pbus->self;
if (pe->flags & PNV_IODA_PE_BUS_ALL)
- count = pe->pbus->busn_res.end - pe->pbus->busn_res.start + 1;
+ count = resource_size(&pe->pbus->busn_res);
else
count = 1;
@@ -827,25 +855,13 @@ static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
for (rid = pe->rid; rid < rid_end; rid++)
phb->ioda.pe_rmap[rid] = IODA_INVALID_PE;
- /* Release from all parents PELT-V */
- while (parent) {
- struct pci_dn *pdn = pci_get_pdn(parent);
- if (pdn && pdn->pe_number != IODA_INVALID_PE) {
- rc = opal_pci_set_peltv(phb->opal_id, pdn->pe_number,
- pe->pe_number, OPAL_REMOVE_PE_FROM_DOMAIN);
- /* XXX What to do in case of error ? */
- }
- parent = parent->bus->self;
- }
-
- opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number,
- OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
+ /*
+ * Release from all parents PELT-V. NPUs don't have a PELTV
+ * table
+ */
+ if (phb->type != PNV_PHB_NPU_NVLINK && phb->type != PNV_PHB_NPU_OCAPI)
+ pnv_ioda_unset_peltv(phb, pe, parent);
- /* Disassociate PE in PELT */
- rc = opal_pci_set_peltv(phb->opal_id, pe->pe_number,
- pe->pe_number, OPAL_REMOVE_PE_FROM_DOMAIN);
- if (rc)
- pe_warn(pe, "OPAL error %lld remove self from PELTV\n", rc);
rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid,
bcomp, dcomp, fcomp, OPAL_UNMAP_PE);
if (rc)
@@ -874,7 +890,7 @@ static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
fcomp = OPAL_IGNORE_RID_FUNCTION_NUMBER;
parent = pe->pbus->self;
if (pe->flags & PNV_IODA_PE_BUS_ALL)
- count = pe->pbus->busn_res.end - pe->pbus->busn_res.start + 1;
+ count = resource_size(&pe->pbus->busn_res);
else
count = 1;
@@ -1062,20 +1078,20 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
return NULL;
}
- /* NOTE: We get only one ref to the pci_dev for the pdn, not for the
- * pointer in the PE data structure, both should be destroyed at the
- * same time. However, this needs to be looked at more closely again
- * once we actually start removing things (Hotplug, SR-IOV, ...)
+ /* NOTE: We don't get a reference for the pointer in the PE
+ * data structure, both the device and PE structures should be
+ * destroyed at the same time. However, removing nvlink
+ * devices will need some work.
*
* At some point we want to remove the PDN completely anyways
*/
- pci_dev_get(dev);
pdn->pe_number = pe->pe_number;
pe->flags = PNV_IODA_PE_DEV;
pe->pdev = dev;
pe->pbus = NULL;
pe->mve_number = -1;
pe->rid = dev->bus->number << 8 | pdn->devfn;
+ pe->device_count++;
pe_info(pe, "Associated device to PE\n");
@@ -1084,13 +1100,13 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
pnv_ioda_free_pe(pe);
pdn->pe_number = IODA_INVALID_PE;
pe->pdev = NULL;
- pci_dev_put(dev);
return NULL;
}
/* Put PE to the list */
+ mutex_lock(&phb->ioda.pe_list_mutex);
list_add_tail(&pe->list, &phb->ioda.pe_list);
-
+ mutex_unlock(&phb->ioda.pe_list_mutex);
return pe;
}
@@ -1206,6 +1222,14 @@ static struct pnv_ioda_pe *pnv_ioda_setup_npu_PE(struct pci_dev *npu_pdev)
struct pnv_phb *phb = hose->private_data;
/*
+ * Intentionally leak a reference on the npu device (for
+ * nvlink only; this is not an opencapi path) to make sure it
+ * never goes away, as it's been the case all along and some
+ * work is needed otherwise.
+ */
+ pci_dev_get(npu_pdev);
+
+ /*
* Due to a hardware errata PE#0 on the NPU is reserved for
* error handling. This means we only have three PEs remaining
* which need to be assigned to four links, implying some
@@ -1228,11 +1252,11 @@ static struct pnv_ioda_pe *pnv_ioda_setup_npu_PE(struct pci_dev *npu_pdev)
*/
dev_info(&npu_pdev->dev,
"Associating to existing PE %x\n", pe_num);
- pci_dev_get(npu_pdev);
npu_pdn = pci_get_pdn(npu_pdev);
rid = npu_pdev->bus->number << 8 | npu_pdn->devfn;
npu_pdn->pe_number = pe_num;
phb->ioda.pe_rmap[rid] = pe->pe_number;
+ pe->device_count++;
/* Map the PE to this link */
rc = opal_pci_set_pe(phb->opal_id, pe_num, rid,
@@ -1268,8 +1292,6 @@ static void pnv_pci_ioda_setup_PEs(void)
{
struct pci_controller *hose;
struct pnv_phb *phb;
- struct pci_bus *bus;
- struct pci_dev *pdev;
struct pnv_ioda_pe *pe;
list_for_each_entry(hose, &hose_list, list_node) {
@@ -1281,11 +1303,6 @@ static void pnv_pci_ioda_setup_PEs(void)
if (phb->model == PNV_PHB_MODEL_NPU2)
WARN_ON_ONCE(pnv_npu2_init(hose));
}
- if (phb->type == PNV_PHB_NPU_OCAPI) {
- bus = hose->bus;
- list_for_each_entry(pdev, &bus->devices, bus_list)
- pnv_ioda_setup_dev_PE(pdev);
- }
}
list_for_each_entry(hose, &hose_list, list_node) {
phb = hose->private_data;
@@ -1558,6 +1575,10 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
/* Reserve PE for each VF */
for (vf_index = 0; vf_index < num_vfs; vf_index++) {
+ int vf_devfn = pci_iov_virtfn_devfn(pdev, vf_index);
+ int vf_bus = pci_iov_virtfn_bus(pdev, vf_index);
+ struct pci_dn *vf_pdn;
+
if (pdn->m64_single_mode)
pe_num = pdn->pe_num_map[vf_index];
else
@@ -1570,13 +1591,11 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
pe->pbus = NULL;
pe->parent_dev = pdev;
pe->mve_number = -1;
- pe->rid = (pci_iov_virtfn_bus(pdev, vf_index) << 8) |
- pci_iov_virtfn_devfn(pdev, vf_index);
+ pe->rid = (vf_bus << 8) | vf_devfn;
pe_info(pe, "VF %04d:%02d:%02d.%d associated with PE#%x\n",
hose->global_number, pdev->bus->number,
- PCI_SLOT(pci_iov_virtfn_devfn(pdev, vf_index)),
- PCI_FUNC(pci_iov_virtfn_devfn(pdev, vf_index)), pe_num);
+ PCI_SLOT(vf_devfn), PCI_FUNC(vf_devfn), pe_num);
if (pnv_ioda_configure_pe(phb, pe)) {
/* XXX What do we do here ? */
@@ -1590,6 +1609,15 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
list_add_tail(&pe->list, &phb->ioda.pe_list);
mutex_unlock(&phb->ioda.pe_list_mutex);
+ /* associate this pe to it's pdn */
+ list_for_each_entry(vf_pdn, &pdn->parent->child_list, list) {
+ if (vf_pdn->busno == vf_bus &&
+ vf_pdn->devfn == vf_devfn) {
+ vf_pdn->pe_number = pe_num;
+ break;
+ }
+ }
+
pnv_pci_ioda2_setup_dma_pe(phb, pe);
#ifdef CONFIG_IOMMU_API
iommu_register_group(&pe->table_group,
@@ -1719,21 +1747,23 @@ int pnv_pcibios_sriov_disable(struct pci_dev *pdev)
pnv_pci_sriov_disable(pdev);
/* Release PCI data */
- remove_dev_pci_data(pdev);
+ remove_sriov_vf_pdns(pdev);
return 0;
}
int pnv_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
{
/* Allocate PCI data */
- add_dev_pci_data(pdev);
+ add_sriov_vf_pdns(pdev);
return pnv_pci_sriov_enable(pdev, num_vfs);
}
#endif /* CONFIG_PCI_IOV */
-static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev)
+static void pnv_pci_ioda_dma_dev_setup(struct pci_dev *pdev)
{
+ struct pci_controller *hose = pci_bus_to_host(pdev->bus);
+ struct pnv_phb *phb = hose->private_data;
struct pci_dn *pdn = pci_get_pdn(pdev);
struct pnv_ioda_pe *pe;
@@ -1939,26 +1969,12 @@ static int pnv_ioda1_tce_build(struct iommu_table *tbl, long index,
}
#ifdef CONFIG_IOMMU_API
-static int pnv_ioda1_tce_xchg(struct iommu_table *tbl, long index,
- unsigned long *hpa, enum dma_data_direction *direction)
+/* Common for IODA1 and IODA2 */
+static int pnv_ioda_tce_xchg_no_kill(struct iommu_table *tbl, long index,
+ unsigned long *hpa, enum dma_data_direction *direction,
+ bool realmode)
{
- long ret = pnv_tce_xchg(tbl, index, hpa, direction, true);
-
- if (!ret)
- pnv_pci_p7ioc_tce_invalidate(tbl, index, 1, false);
-
- return ret;
-}
-
-static int pnv_ioda1_tce_xchg_rm(struct iommu_table *tbl, long index,
- unsigned long *hpa, enum dma_data_direction *direction)
-{
- long ret = pnv_tce_xchg(tbl, index, hpa, direction, false);
-
- if (!ret)
- pnv_pci_p7ioc_tce_invalidate(tbl, index, 1, true);
-
- return ret;
+ return pnv_tce_xchg(tbl, index, hpa, direction, !realmode);
}
#endif
@@ -1973,8 +1989,8 @@ static void pnv_ioda1_tce_free(struct iommu_table *tbl, long index,
static struct iommu_table_ops pnv_ioda1_iommu_ops = {
.set = pnv_ioda1_tce_build,
#ifdef CONFIG_IOMMU_API
- .exchange = pnv_ioda1_tce_xchg,
- .exchange_rm = pnv_ioda1_tce_xchg_rm,
+ .xchg_no_kill = pnv_ioda_tce_xchg_no_kill,
+ .tce_kill = pnv_pci_p7ioc_tce_invalidate,
.useraddrptr = pnv_tce_useraddrptr,
#endif
.clear = pnv_ioda1_tce_free,
@@ -2103,30 +2119,6 @@ static int pnv_ioda2_tce_build(struct iommu_table *tbl, long index,
return ret;
}
-#ifdef CONFIG_IOMMU_API
-static int pnv_ioda2_tce_xchg(struct iommu_table *tbl, long index,
- unsigned long *hpa, enum dma_data_direction *direction)
-{
- long ret = pnv_tce_xchg(tbl, index, hpa, direction, true);
-
- if (!ret)
- pnv_pci_ioda2_tce_invalidate(tbl, index, 1, false);
-
- return ret;
-}
-
-static int pnv_ioda2_tce_xchg_rm(struct iommu_table *tbl, long index,
- unsigned long *hpa, enum dma_data_direction *direction)
-{
- long ret = pnv_tce_xchg(tbl, index, hpa, direction, false);
-
- if (!ret)
- pnv_pci_ioda2_tce_invalidate(tbl, index, 1, true);
-
- return ret;
-}
-#endif
-
static void pnv_ioda2_tce_free(struct iommu_table *tbl, long index,
long npages)
{
@@ -2138,8 +2130,8 @@ static void pnv_ioda2_tce_free(struct iommu_table *tbl, long index,
static struct iommu_table_ops pnv_ioda2_iommu_ops = {
.set = pnv_ioda2_tce_build,
#ifdef CONFIG_IOMMU_API
- .exchange = pnv_ioda2_tce_xchg,
- .exchange_rm = pnv_ioda2_tce_xchg_rm,
+ .xchg_no_kill = pnv_ioda_tce_xchg_no_kill,
+ .tce_kill = pnv_pci_ioda2_tce_invalidate,
.useraddrptr = pnv_tce_useraddrptr,
#endif
.clear = pnv_ioda2_tce_free,
@@ -2303,7 +2295,7 @@ found:
tbl->it_ops = &pnv_ioda1_iommu_ops;
pe->table_group.tce32_start = tbl->it_offset << tbl->it_page_shift;
pe->table_group.tce32_size = tbl->it_size << tbl->it_page_shift;
- iommu_init_table(tbl, phb->hose->node);
+ iommu_init_table(tbl, phb->hose->node, 0, 0);
if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))
pnv_ioda_setup_bus_dma(pe, pe->pbus);
@@ -2420,6 +2412,7 @@ static long pnv_pci_ioda2_setup_default_config(struct pnv_ioda_pe *pe)
{
struct iommu_table *tbl = NULL;
long rc;
+ unsigned long res_start, res_end;
/*
* crashkernel= specifies the kdump kernel's maximum memory at
@@ -2433,19 +2426,46 @@ static long pnv_pci_ioda2_setup_default_config(struct pnv_ioda_pe *pe)
* DMA window can be larger than available memory, which will
* cause errors later.
*/
- const u64 window_size = min((u64)pe->table_group.tce32_size, max_memory);
+ const u64 maxblock = 1UL << (PAGE_SHIFT + MAX_ORDER - 1);
+
+ /*
+ * We create the default window as big as we can. The constraint is
+ * the max order of allocation possible. The TCE table is likely to
+ * end up being multilevel and with on-demand allocation in place,
+ * the initial use is not going to be huge as the default window aims
+ * to support crippled devices (i.e. not fully 64bit DMAble) only.
+ */
+ /* iommu_table::it_map uses 1 bit per IOMMU page, hence 8 */
+ const u64 window_size = min((maxblock * 8) << PAGE_SHIFT, max_memory);
+ /* Each TCE level cannot exceed maxblock so go multilevel if needed */
+ unsigned long tces_order = ilog2(window_size >> PAGE_SHIFT);
+ unsigned long tcelevel_order = ilog2(maxblock >> 3);
+ unsigned int levels = tces_order / tcelevel_order;
+
+ if (tces_order % tcelevel_order)
+ levels += 1;
+ /*
+ * We try to stick to default levels (which is >1 at the moment) in
+ * order to save memory by relying on on-demain TCE level allocation.
+ */
+ levels = max_t(unsigned int, levels, POWERNV_IOMMU_DEFAULT_LEVELS);
- rc = pnv_pci_ioda2_create_table(&pe->table_group, 0,
- IOMMU_PAGE_SHIFT_4K,
- window_size,
- POWERNV_IOMMU_DEFAULT_LEVELS, false, &tbl);
+ rc = pnv_pci_ioda2_create_table(&pe->table_group, 0, PAGE_SHIFT,
+ window_size, levels, false, &tbl);
if (rc) {
pe_err(pe, "Failed to create 32-bit TCE table, err %ld",
rc);
return rc;
}
- iommu_init_table(tbl, pe->phb->hose->node);
+ /* We use top part of 32bit space for MMIO so exclude it from DMA */
+ res_start = 0;
+ res_end = 0;
+ if (window_size > pe->phb->ioda.m32_pci_base) {
+ res_start = pe->phb->ioda.m32_pci_base >> tbl->it_page_shift;
+ res_end = min(window_size, SZ_4G) >> tbl->it_page_shift;
+ }
+ iommu_init_table(tbl, pe->phb->hose->node, res_start, res_end);
rc = pnv_pci_ioda2_set_window(&pe->table_group, 0, tbl);
if (rc) {
@@ -2899,9 +2919,6 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
struct pci_dn *pdn;
int mul, total_vfs;
- if (!pdev->is_physfn || pci_dev_is_added(pdev))
- return;
-
pdn = pci_get_pdn(pdev);
pdn->vfs_expanded = 0;
pdn->m64_single_mode = false;
@@ -2976,6 +2993,30 @@ truncate_iov:
res->end = res->start - 1;
}
}
+
+static void pnv_pci_ioda_fixup_iov(struct pci_dev *pdev)
+{
+ if (WARN_ON(pci_dev_is_added(pdev)))
+ return;
+
+ if (pdev->is_virtfn) {
+ struct pnv_ioda_pe *pe = pnv_ioda_get_pe(pdev);
+
+ /*
+ * VF PEs are single-device PEs so their pdev pointer needs to
+ * be set. The pdev doesn't exist when the PE is allocated (in
+ * (pcibios_sriov_enable()) so we fix it up here.
+ */
+ pe->pdev = pdev;
+ WARN_ON(!(pe->flags & PNV_IODA_PE_VF));
+ } else if (pdev->is_physfn) {
+ /*
+ * For PFs adjust their allocated IOV resources to match what
+ * the PHB can support using it's M64 BAR table.
+ */
+ pnv_pci_ioda_fixup_iov_resources(pdev);
+ }
+}
#endif /* CONFIG_PCI_IOV */
static void pnv_ioda_setup_pe_res(struct pnv_ioda_pe *pe,
@@ -3072,19 +3113,9 @@ static void pnv_ioda_setup_pe_seg(struct pnv_ioda_pe *pe)
#ifdef CONFIG_DEBUG_FS
static int pnv_pci_diag_data_set(void *data, u64 val)
{
- struct pci_controller *hose;
- struct pnv_phb *phb;
+ struct pnv_phb *phb = data;
s64 ret;
- if (val != 1ULL)
- return -EINVAL;
-
- hose = (struct pci_controller *)data;
- if (!hose || !hose->private_data)
- return -ENODEV;
-
- phb = hose->private_data;
-
/* Retrieve the diag data from firmware */
ret = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag_data,
phb->diag_data_size);
@@ -3096,8 +3127,35 @@ static int pnv_pci_diag_data_set(void *data, u64 val)
return 0;
}
-DEFINE_SIMPLE_ATTRIBUTE(pnv_pci_diag_data_fops, NULL,
- pnv_pci_diag_data_set, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(pnv_pci_diag_data_fops, NULL, pnv_pci_diag_data_set,
+ "%llu\n");
+
+static int pnv_pci_ioda_pe_dump(void *data, u64 val)
+{
+ struct pnv_phb *phb = data;
+ int pe_num;
+
+ for (pe_num = 0; pe_num < phb->ioda.total_pe_num; pe_num++) {
+ struct pnv_ioda_pe *pe = &phb->ioda.pe_array[pe_num];
+
+ if (!test_bit(pe_num, phb->ioda.pe_alloc))
+ continue;
+
+ pe_warn(pe, "rid: %04x dev count: %2d flags: %s%s%s%s%s%s\n",
+ pe->rid, pe->device_count,
+ (pe->flags & PNV_IODA_PE_DEV) ? "dev " : "",
+ (pe->flags & PNV_IODA_PE_BUS) ? "bus " : "",
+ (pe->flags & PNV_IODA_PE_BUS_ALL) ? "all " : "",
+ (pe->flags & PNV_IODA_PE_MASTER) ? "master " : "",
+ (pe->flags & PNV_IODA_PE_SLAVE) ? "slave " : "",
+ (pe->flags & PNV_IODA_PE_VF) ? "vf " : "");
+ }
+
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(pnv_pci_ioda_pe_dump_fops, NULL,
+ pnv_pci_ioda_pe_dump, "%llu\n");
#endif /* CONFIG_DEBUG_FS */
@@ -3122,8 +3180,10 @@ static void pnv_pci_ioda_create_dbgfs(void)
continue;
}
- debugfs_create_file("dump_diag_regs", 0200, phb->dbgfs, hose,
- &pnv_pci_diag_data_fops);
+ debugfs_create_file_unsafe("dump_diag_regs", 0200, phb->dbgfs,
+ phb, &pnv_pci_diag_data_fops);
+ debugfs_create_file_unsafe("dump_ioda_pe_state", 0200, phb->dbgfs,
+ phb, &pnv_pci_ioda_pe_dump_fops);
}
#endif /* CONFIG_DEBUG_FS */
}
@@ -3393,6 +3453,28 @@ static bool pnv_pci_enable_device_hook(struct pci_dev *dev)
return true;
}
+static bool pnv_ocapi_enable_device_hook(struct pci_dev *dev)
+{
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+ struct pnv_phb *phb = hose->private_data;
+ struct pci_dn *pdn;
+ struct pnv_ioda_pe *pe;
+
+ if (!phb->initialized)
+ return true;
+
+ pdn = pci_get_pdn(dev);
+ if (!pdn)
+ return false;
+
+ if (pdn->pe_number == IODA_INVALID_PE) {
+ pe = pnv_ioda_setup_dev_PE(dev);
+ if (!pe)
+ return false;
+ }
+ return true;
+}
+
static long pnv_pci_ioda1_unset_window(struct iommu_table_group *table_group,
int num)
{
@@ -3522,7 +3604,10 @@ static void pnv_ioda_release_pe(struct pnv_ioda_pe *pe)
struct pnv_phb *phb = pe->phb;
struct pnv_ioda_pe *slave, *tmp;
+ mutex_lock(&phb->ioda.pe_list_mutex);
list_del(&pe->list);
+ mutex_unlock(&phb->ioda.pe_list_mutex);
+
switch (phb->type) {
case PNV_PHB_IODA1:
pnv_pci_ioda1_release_pe_dma(pe);
@@ -3530,6 +3615,8 @@ static void pnv_ioda_release_pe(struct pnv_ioda_pe *pe)
case PNV_PHB_IODA2:
pnv_pci_ioda2_release_pe_dma(pe);
break;
+ case PNV_PHB_NPU_OCAPI:
+ break;
default:
WARN_ON(1);
}
@@ -3604,9 +3691,29 @@ static void pnv_pci_ioda_shutdown(struct pci_controller *hose)
OPAL_ASSERT_RESET);
}
+static void pnv_pci_ioda_dma_bus_setup(struct pci_bus *bus)
+{
+ struct pci_controller *hose = bus->sysdata;
+ struct pnv_phb *phb = hose->private_data;
+ struct pnv_ioda_pe *pe;
+
+ list_for_each_entry(pe, &phb->ioda.pe_list, list) {
+ if (!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)))
+ continue;
+
+ if (!pe->pbus)
+ continue;
+
+ if (bus->number == ((pe->rid >> 8) & 0xFF)) {
+ pe->pbus = bus;
+ break;
+ }
+ }
+}
+
static const struct pci_controller_ops pnv_pci_ioda_controller_ops = {
- .dma_dev_setup = pnv_pci_dma_dev_setup,
- .dma_bus_setup = pnv_pci_dma_bus_setup,
+ .dma_dev_setup = pnv_pci_ioda_dma_dev_setup,
+ .dma_bus_setup = pnv_pci_ioda_dma_bus_setup,
.iommu_bypass_supported = pnv_pci_ioda_iommu_bypass_supported,
.setup_msi_irqs = pnv_setup_msi_irqs,
.teardown_msi_irqs = pnv_teardown_msi_irqs,
@@ -3619,7 +3726,6 @@ static const struct pci_controller_ops pnv_pci_ioda_controller_ops = {
};
static const struct pci_controller_ops pnv_npu_ioda_controller_ops = {
- .dma_dev_setup = pnv_pci_dma_dev_setup,
.setup_msi_irqs = pnv_setup_msi_irqs,
.teardown_msi_irqs = pnv_teardown_msi_irqs,
.enable_device_hook = pnv_pci_enable_device_hook,
@@ -3630,7 +3736,8 @@ static const struct pci_controller_ops pnv_npu_ioda_controller_ops = {
};
static const struct pci_controller_ops pnv_npu_ocapi_ioda_controller_ops = {
- .enable_device_hook = pnv_pci_enable_device_hook,
+ .enable_device_hook = pnv_ocapi_enable_device_hook,
+ .release_device = pnv_pci_release_device,
.window_alignment = pnv_pci_window_alignment,
.reset_secondary_bus = pnv_pci_reset_secondary_bus,
.shutdown = pnv_pci_ioda_shutdown,
@@ -3865,14 +3972,13 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
hose->controller_ops = pnv_npu_ocapi_ioda_controller_ops;
break;
default:
- phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup;
hose->controller_ops = pnv_pci_ioda_controller_ops;
}
ppc_md.pcibios_default_alignment = pnv_pci_default_alignment;
#ifdef CONFIG_PCI_IOV
- ppc_md.pcibios_fixup_sriov = pnv_pci_ioda_fixup_iov_resources;
+ ppc_md.pcibios_fixup_sriov = pnv_pci_ioda_fixup_iov;
ppc_md.pcibios_iov_resource_alignment = pnv_pci_iov_resource_alignment;
ppc_md.pcibios_sriov_enable = pnv_pcibios_sriov_enable;
ppc_md.pcibios_sriov_disable = pnv_pcibios_sriov_disable;
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 6104418c9ad5..5bf818246339 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -38,7 +38,7 @@ static DEFINE_MUTEX(tunnel_mutex);
int pnv_pci_get_slot_id(struct device_node *np, uint64_t *id)
{
- struct device_node *parent = np;
+ struct device_node *node = np;
u32 bdfn;
u64 phbid;
int ret;
@@ -48,24 +48,29 @@ int pnv_pci_get_slot_id(struct device_node *np, uint64_t *id)
return -ENXIO;
bdfn = ((bdfn & 0x00ffff00) >> 8);
- while ((parent = of_get_parent(parent))) {
- if (!PCI_DN(parent)) {
- of_node_put(parent);
+ for (node = np; node; node = of_get_parent(node)) {
+ if (!PCI_DN(node)) {
+ of_node_put(node);
break;
}
- if (!of_device_is_compatible(parent, "ibm,ioda2-phb")) {
- of_node_put(parent);
+ if (!of_device_is_compatible(node, "ibm,ioda2-phb") &&
+ !of_device_is_compatible(node, "ibm,ioda3-phb") &&
+ !of_device_is_compatible(node, "ibm,ioda2-npu2-opencapi-phb")) {
+ of_node_put(node);
continue;
}
- ret = of_property_read_u64(parent, "ibm,opal-phbid", &phbid);
+ ret = of_property_read_u64(node, "ibm,opal-phbid", &phbid);
if (ret) {
- of_node_put(parent);
+ of_node_put(node);
return -ENXIO;
}
- *id = PCI_SLOT_ID(phbid, bdfn);
+ if (of_device_is_compatible(node, "ibm,ioda2-npu2-opencapi-phb"))
+ *id = PCI_PHB_SLOT_ID(phbid);
+ else
+ *id = PCI_SLOT_ID(phbid, bdfn);
return 0;
}
@@ -809,53 +814,6 @@ struct iommu_table *pnv_pci_table_alloc(int nid)
return tbl;
}
-void pnv_pci_dma_dev_setup(struct pci_dev *pdev)
-{
- struct pci_controller *hose = pci_bus_to_host(pdev->bus);
- struct pnv_phb *phb = hose->private_data;
-#ifdef CONFIG_PCI_IOV
- struct pnv_ioda_pe *pe;
- struct pci_dn *pdn;
-
- /* Fix the VF pdn PE number */
- if (pdev->is_virtfn) {
- pdn = pci_get_pdn(pdev);
- WARN_ON(pdn->pe_number != IODA_INVALID_PE);
- list_for_each_entry(pe, &phb->ioda.pe_list, list) {
- if (pe->rid == ((pdev->bus->number << 8) |
- (pdev->devfn & 0xff))) {
- pdn->pe_number = pe->pe_number;
- pe->pdev = pdev;
- break;
- }
- }
- }
-#endif /* CONFIG_PCI_IOV */
-
- if (phb && phb->dma_dev_setup)
- phb->dma_dev_setup(phb, pdev);
-}
-
-void pnv_pci_dma_bus_setup(struct pci_bus *bus)
-{
- struct pci_controller *hose = bus->sysdata;
- struct pnv_phb *phb = hose->private_data;
- struct pnv_ioda_pe *pe;
-
- list_for_each_entry(pe, &phb->ioda.pe_list, list) {
- if (!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)))
- continue;
-
- if (!pe->pbus)
- continue;
-
- if (bus->number == ((pe->rid >> 8) & 0xFF)) {
- pe->pbus = bus;
- break;
- }
- }
-}
-
struct device_node *pnv_pci_get_phb_node(struct pci_dev *dev)
{
struct pci_controller *hose = pci_bus_to_host(dev->bus);
@@ -944,6 +902,23 @@ void __init pnv_pci_init(void)
if (!firmware_has_feature(FW_FEATURE_OPAL))
return;
+#ifdef CONFIG_PCIEPORTBUS
+ /*
+ * On PowerNV PCIe devices are (currently) managed in cooperation
+ * with firmware. This isn't *strictly* required, but there's enough
+ * assumptions baked into both firmware and the platform code that
+ * it's unwise to allow the portbus services to be used.
+ *
+ * We need to fix this eventually, but for now set this flag to disable
+ * the portbus driver. The AER service isn't required since that AER
+ * events are handled via EEH. The pciehp hotplug driver can't work
+ * without kernel changes (and portbus binding breaks pnv_php). The
+ * other services also require some thinking about how we're going
+ * to integrate them.
+ */
+ pcie_ports_disabled = true;
+#endif
+
/* Look for IODA IO-Hubs. */
for_each_compatible_node(np, NULL, "ibm,ioda-hub") {
pnv_pci_init_ioda_hub(np);
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index 469c24463247..d3bbdeab3a32 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -108,7 +108,6 @@ struct pnv_phb {
int (*msi_setup)(struct pnv_phb *phb, struct pci_dev *dev,
unsigned int hwirq, unsigned int virq,
unsigned int is_64, struct msi_msg *msg);
- void (*dma_dev_setup)(struct pnv_phb *phb, struct pci_dev *pdev);
int (*init_m64)(struct pnv_phb *phb);
int (*get_pe_state)(struct pnv_phb *phb, int pe_no);
void (*freeze_pe)(struct pnv_phb *phb, int pe_no);
@@ -189,8 +188,6 @@ extern void pnv_npu2_map_lpar(struct pnv_ioda_pe *gpe, unsigned long msr);
extern void pnv_pci_reset_secondary_bus(struct pci_dev *dev);
extern int pnv_eeh_phb_reset(struct pci_controller *hose, int option);
-extern void pnv_pci_dma_dev_setup(struct pci_dev *pdev);
-extern void pnv_pci_dma_bus_setup(struct pci_bus *bus);
extern int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type);
extern void pnv_teardown_msi_irqs(struct pci_dev *pdev);
extern struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev);
@@ -219,7 +216,7 @@ extern struct iommu_table_group *pnv_npu_compound_attach(
struct pnv_ioda_pe *pe);
/* pci-ioda-tce.c */
-#define POWERNV_IOMMU_DEFAULT_LEVELS 1
+#define POWERNV_IOMMU_DEFAULT_LEVELS 2
#define POWERNV_IOMMU_MAX_LEVELS 5
extern int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h
index fd4a1c5a6369..1aa51c4fa904 100644
--- a/arch/powerpc/platforms/powernv/powernv.h
+++ b/arch/powerpc/platforms/powernv/powernv.h
@@ -30,4 +30,9 @@ extern void opal_event_shutdown(void);
bool cpu_core_split_required(void);
+struct memcons;
+ssize_t memcons_copy(struct memcons *mc, char *to, loff_t pos, size_t count);
+u32 memcons_get_size(struct memcons *mc);
+struct memcons *memcons_init(struct device_node *node, const char *mc_prop_name);
+
#endif /* _POWERNV_H */
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index a5e52f9eed3c..11fdae81b5dd 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -24,6 +24,7 @@
#include <linux/bug.h>
#include <linux/pci.h>
#include <linux/cpufreq.h>
+#include <linux/memblock.h>
#include <asm/machdep.h>
#include <asm/firmware.h>
@@ -166,6 +167,14 @@ static void __init pnv_init(void)
else
#endif
add_preferred_console("hvc", 0, NULL);
+
+ if (!radix_enabled()) {
+ int i;
+
+ /* Allocate per cpu area to save old slb contents during MCE */
+ for_each_possible_cpu(i)
+ paca_ptrs[i]->mce_faulty_slbs = memblock_alloc_node(mmu_slb_size, __alignof__(*paca_ptrs[i]->mce_faulty_slbs), cpu_to_node(i));
+ }
}
static void __init pnv_init_IRQ(void)
@@ -224,6 +233,10 @@ static void __noreturn pnv_restart(char *cmd)
rc = opal_cec_reboot();
else if (strcmp(cmd, "full") == 0)
rc = opal_cec_reboot2(OPAL_REBOOT_FULL_IPL, NULL);
+ else if (strcmp(cmd, "mpipl") == 0)
+ rc = opal_cec_reboot2(OPAL_REBOOT_MPIPL, NULL);
+ else if (strcmp(cmd, "error") == 0)
+ rc = opal_cec_reboot2(OPAL_REBOOT_PLATFORM_ERROR, NULL);
else
rc = OPAL_UNSUPPORTED;
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index 94cd96b9b7bb..13e251699346 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -146,20 +146,25 @@ static int pnv_smp_cpu_disable(void)
return 0;
}
+static void pnv_flush_interrupts(void)
+{
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ if (xive_enabled())
+ xive_flush_interrupt();
+ else
+ icp_opal_flush_interrupt();
+ } else {
+ icp_native_flush_interrupt();
+ }
+}
+
static void pnv_smp_cpu_kill_self(void)
{
+ unsigned long srr1, unexpected_mask, wmask;
unsigned int cpu;
- unsigned long srr1, wmask;
u64 lpcr_val;
/* Standard hot unplug procedure */
- /*
- * This hard disables local interurpts, ensuring we have no lazy
- * irqs pending.
- */
- WARN_ON(irqs_disabled());
- hard_irq_disable();
- WARN_ON(lazy_irq_pending());
idle_task_exit();
current->active_mm = NULL; /* for sanity */
@@ -173,6 +178,27 @@ static void pnv_smp_cpu_kill_self(void)
wmask = SRR1_WAKEMASK_P8;
/*
+ * This turns the irq soft-disabled state we're called with, into a
+ * hard-disabled state with pending irq_happened interrupts cleared.
+ *
+ * PACA_IRQ_DEC - Decrementer should be ignored.
+ * PACA_IRQ_HMI - Can be ignored, processing is done in real mode.
+ * PACA_IRQ_DBELL, EE, PMI - Unexpected.
+ */
+ hard_irq_disable();
+ if (generic_check_cpu_restart(cpu))
+ goto out;
+
+ unexpected_mask = ~(PACA_IRQ_DEC | PACA_IRQ_HMI | PACA_IRQ_HARD_DIS);
+ if (local_paca->irq_happened & unexpected_mask) {
+ if (local_paca->irq_happened & PACA_IRQ_EE)
+ pnv_flush_interrupts();
+ DBG("CPU%d Unexpected exit while offline irq_happened=%lx!\n",
+ cpu, local_paca->irq_happened);
+ }
+ local_paca->irq_happened = PACA_IRQ_HARD_DIS;
+
+ /*
* We don't want to take decrementer interrupts while we are
* offline, so clear LPCR:PECE1. We keep PECE2 (and
* LPCR_PECE_HVEE on P9) enabled so as to let IPIs in.
@@ -193,10 +219,11 @@ static void pnv_smp_cpu_kill_self(void)
* for coming online, which are handled via
* generic_check_cpu_restart() calls.
*/
- kvmppc_set_host_ipi(cpu, 0);
+ kvmppc_clear_host_ipi(cpu);
srr1 = pnv_cpu_offline(cpu);
+ WARN_ON_ONCE(!irqs_disabled());
WARN_ON(lazy_irq_pending());
/*
@@ -212,13 +239,7 @@ static void pnv_smp_cpu_kill_self(void)
*/
if (((srr1 & wmask) == SRR1_WAKEEE) ||
((srr1 & wmask) == SRR1_WAKEHVI)) {
- if (cpu_has_feature(CPU_FTR_ARCH_300)) {
- if (xive_enabled())
- xive_flush_interrupt();
- else
- icp_opal_flush_interrupt();
- } else
- icp_native_flush_interrupt();
+ pnv_flush_interrupts();
} else if ((srr1 & wmask) == SRR1_WAKEHDBELL) {
unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
asm volatile(PPC_MSGCLR(%0) : : "r" (msg));
@@ -266,7 +287,7 @@ static void pnv_smp_cpu_kill_self(void)
*/
lpcr_val = mfspr(SPRN_LPCR) | (u64)LPCR_PECE1;
pnv_program_cpu_hotplug_lpcr(cpu, lpcr_val);
-
+out:
DBG("CPU%d coming online...\n", cpu);
}
diff --git a/arch/powerpc/platforms/powernv/ultravisor.c b/arch/powerpc/platforms/powernv/ultravisor.c
new file mode 100644
index 000000000000..e4a00ad06f9d
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/ultravisor.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Ultravisor high level interfaces
+ *
+ * Copyright 2019, IBM Corporation.
+ *
+ */
+#include <linux/init.h>
+#include <linux/printk.h>
+#include <linux/of_fdt.h>
+#include <linux/of.h>
+
+#include <asm/ultravisor.h>
+#include <asm/firmware.h>
+#include <asm/machdep.h>
+
+#include "powernv.h"
+
+static struct kobject *ultravisor_kobj;
+
+int __init early_init_dt_scan_ultravisor(unsigned long node, const char *uname,
+ int depth, void *data)
+{
+ if (!of_flat_dt_is_compatible(node, "ibm,ultravisor"))
+ return 0;
+
+ powerpc_firmware_features |= FW_FEATURE_ULTRAVISOR;
+ pr_debug("Ultravisor detected!\n");
+ return 1;
+}
+
+static struct memcons *uv_memcons;
+
+static ssize_t uv_msglog_read(struct file *file, struct kobject *kobj,
+ struct bin_attribute *bin_attr, char *to,
+ loff_t pos, size_t count)
+{
+ return memcons_copy(uv_memcons, to, pos, count);
+}
+
+static struct bin_attribute uv_msglog_attr = {
+ .attr = {.name = "msglog", .mode = 0400},
+ .read = uv_msglog_read
+};
+
+static int __init uv_init(void)
+{
+ struct device_node *node;
+
+ if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
+ return 0;
+
+ node = of_find_compatible_node(NULL, NULL, "ibm,uv-firmware");
+ if (!node)
+ return -ENODEV;
+
+ uv_memcons = memcons_init(node, "memcons");
+ if (!uv_memcons)
+ return -ENOENT;
+
+ uv_msglog_attr.size = memcons_get_size(uv_memcons);
+
+ ultravisor_kobj = kobject_create_and_add("ultravisor", firmware_kobj);
+ if (!ultravisor_kobj)
+ return -ENOMEM;
+
+ return sysfs_create_bin_file(ultravisor_kobj, &uv_msglog_attr);
+}
+machine_subsys_initcall(powernv, uv_init);
diff --git a/arch/powerpc/platforms/ps3/setup.c b/arch/powerpc/platforms/ps3/setup.c
index 8108b9b9b9ea..b29368931c56 100644
--- a/arch/powerpc/platforms/ps3/setup.c
+++ b/arch/powerpc/platforms/ps3/setup.c
@@ -200,10 +200,6 @@ static void __init ps3_setup_arch(void)
smp_init_ps3();
#endif
-#ifdef CONFIG_DUMMY_CONSOLE
- conswitchp = &dummy_con;
-#endif
-
prealloc_ps3fb_videomemory();
prealloc_ps3flash_bounce_buffer();
diff --git a/arch/powerpc/platforms/ps3/spu.c b/arch/powerpc/platforms/ps3/spu.c
index bdaeaecdc06b..1193c294b8d0 100644
--- a/arch/powerpc/platforms/ps3/spu.c
+++ b/arch/powerpc/platforms/ps3/spu.c
@@ -184,10 +184,7 @@ static void spu_unmap(struct spu *spu)
* setup_areas - Map the spu regions into the address space.
*
* The current HV requires the spu shadow regs to be mapped with the
- * PTE page protection bits set as read-only (PP=3). This implementation
- * uses the low level __ioremap() to bypass the page protection settings
- * inforced by ioremap_prot() to get the needed PTE bits set for the
- * shadow regs.
+ * PTE page protection bits set as read-only.
*/
static int __init setup_areas(struct spu *spu)
@@ -195,9 +192,8 @@ static int __init setup_areas(struct spu *spu)
struct table {char* name; unsigned long addr; unsigned long size;};
unsigned long shadow_flags = pgprot_val(pgprot_noncached_wc(PAGE_KERNEL_RO));
- spu_pdata(spu)->shadow = __ioremap(spu_pdata(spu)->shadow_addr,
- sizeof(struct spe_shadow),
- shadow_flags);
+ spu_pdata(spu)->shadow = ioremap_prot(spu_pdata(spu)->shadow_addr,
+ sizeof(struct spe_shadow), shadow_flags);
if (!spu_pdata(spu)->shadow) {
pr_debug("%s:%d: ioremap shadow failed\n", __func__, __LINE__);
goto fail_ioremap;
diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c
index 98410119c47b..3542b7bd6a46 100644
--- a/arch/powerpc/platforms/ps3/system-bus.c
+++ b/arch/powerpc/platforms/ps3/system-bus.c
@@ -686,20 +686,16 @@ static int ps3_dma_supported(struct device *_dev, u64 mask)
return mask >= DMA_BIT_MASK(32);
}
-static u64 ps3_dma_get_required_mask(struct device *_dev)
-{
- return DMA_BIT_MASK(32);
-}
-
static const struct dma_map_ops ps3_sb_dma_ops = {
.alloc = ps3_alloc_coherent,
.free = ps3_free_coherent,
.map_sg = ps3_sb_map_sg,
.unmap_sg = ps3_sb_unmap_sg,
.dma_supported = ps3_dma_supported,
- .get_required_mask = ps3_dma_get_required_mask,
.map_page = ps3_sb_map_page,
.unmap_page = ps3_unmap_page,
+ .mmap = dma_common_mmap,
+ .get_sgtable = dma_common_get_sgtable,
};
static const struct dma_map_ops ps3_ioc0_dma_ops = {
@@ -708,9 +704,10 @@ static const struct dma_map_ops ps3_ioc0_dma_ops = {
.map_sg = ps3_ioc0_map_sg,
.unmap_sg = ps3_ioc0_unmap_sg,
.dma_supported = ps3_dma_supported,
- .get_required_mask = ps3_dma_get_required_mask,
.map_page = ps3_ioc0_map_page,
.unmap_page = ps3_unmap_page,
+ .mmap = dma_common_mmap,
+ .get_sgtable = dma_common_get_sgtable,
};
/**
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index f7b484f55553..24c18362e5ea 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -21,7 +21,6 @@ config PPC_PSERIES
select PPC_DOORBELL
select HOTPLUG_CPU
select ARCH_RANDOM
- select PPC_DOORBELL
select FORCE_SMP
select SWIOTLB
default y
@@ -108,6 +107,7 @@ config PPC_SMLPAR
config CMM
tristate "Collaborative memory management"
depends on PPC_SMLPAR
+ select MEMORY_BALLOON
default y
help
Select this option, if you want to enable the kernel interface
@@ -145,3 +145,17 @@ config PAPR_SCM
tristate "Support for the PAPR Storage Class Memory interface"
help
Enable access to hypervisor provided storage class memory.
+
+config PPC_SVM
+ bool "Secure virtual machine (SVM) support for POWER"
+ depends on PPC_PSERIES
+ select SWIOTLB
+ select ARCH_HAS_MEM_ENCRYPT
+ select ARCH_HAS_FORCE_DMA_UNENCRYPTED
+ help
+ There are certain POWER platforms which support secure guests using
+ the Protected Execution Facility, with the help of an Ultravisor
+ executing below the hypervisor layer. This enables support for
+ those guests.
+
+ If unsure, say "N".
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index ab3d59aeacca..a3c74a5cf20d 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -26,6 +26,8 @@ obj-$(CONFIG_IBMVIO) += vio.o
obj-$(CONFIG_IBMEBUS) += ibmebus.o
obj-$(CONFIG_PAPR_SCM) += papr_scm.o
obj-$(CONFIG_PPC_SPLPAR) += vphn.o
+obj-$(CONFIG_PPC_SVM) += svm.o
+obj-$(CONFIG_FA_DUMP) += rtas-fadump.o
ifdef CONFIG_PPC_PSERIES
obj-$(CONFIG_SUSPEND) += suspend.o
diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c
index b33251d75927..9dba7e880885 100644
--- a/arch/powerpc/platforms/pseries/cmm.c
+++ b/arch/powerpc/platforms/pseries/cmm.c
@@ -19,6 +19,10 @@
#include <linux/stringify.h>
#include <linux/swap.h>
#include <linux/device.h>
+#include <linux/mount.h>
+#include <linux/pseudo_fs.h>
+#include <linux/magic.h>
+#include <linux/balloon_compaction.h>
#include <asm/firmware.h>
#include <asm/hvcall.h>
#include <asm/mmu.h>
@@ -38,12 +42,8 @@
#define CMM_MIN_MEM_MB 256
#define KB2PAGES(_p) ((_p)>>(PAGE_SHIFT-10))
#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10))
-/*
- * The priority level tries to ensure that this notifier is called as
- * late as possible to reduce thrashing in the shared memory pool.
- */
+
#define CMM_MEM_HOTPLUG_PRI 1
-#define CMM_MEM_ISOLATE_PRI 15
static unsigned int delay = CMM_DEFAULT_DELAY;
static unsigned int hotplug_delay = CMM_HOTPLUG_DELAY;
@@ -51,6 +51,8 @@ static unsigned int oom_kb = CMM_OOM_KB;
static unsigned int cmm_debug = CMM_DEBUG;
static unsigned int cmm_disabled = CMM_DISABLE;
static unsigned long min_mem_mb = CMM_MIN_MEM_MB;
+static bool __read_mostly simulate;
+static unsigned long simulate_loan_target_kb;
static struct device cmm_dev;
MODULE_AUTHOR("Brian King <brking@linux.vnet.ibm.com>");
@@ -74,35 +76,31 @@ MODULE_PARM_DESC(min_mem_mb, "Minimum amount of memory (in MB) to not balloon. "
module_param_named(debug, cmm_debug, uint, 0644);
MODULE_PARM_DESC(debug, "Enable module debugging logging. Set to 1 to enable. "
"[Default=" __stringify(CMM_DEBUG) "]");
-
-#define CMM_NR_PAGES ((PAGE_SIZE - sizeof(void *) - sizeof(unsigned long)) / sizeof(unsigned long))
+module_param_named(simulate, simulate, bool, 0444);
+MODULE_PARM_DESC(simulate, "Enable simulation mode (no communication with hw).");
#define cmm_dbg(...) if (cmm_debug) { printk(KERN_INFO "cmm: "__VA_ARGS__); }
-struct cmm_page_array {
- struct cmm_page_array *next;
- unsigned long index;
- unsigned long page[CMM_NR_PAGES];
-};
-
-static unsigned long loaned_pages;
+static atomic_long_t loaned_pages;
static unsigned long loaned_pages_target;
static unsigned long oom_freed_pages;
-static struct cmm_page_array *cmm_page_list;
-static DEFINE_SPINLOCK(cmm_lock);
-
static DEFINE_MUTEX(hotplug_mutex);
static int hotplug_occurred; /* protected by the hotplug mutex */
static struct task_struct *cmm_thread_ptr;
+static struct balloon_dev_info b_dev_info;
-static long plpar_page_set_loaned(unsigned long vpa)
+static long plpar_page_set_loaned(struct page *page)
{
+ const unsigned long vpa = page_to_phys(page);
unsigned long cmo_page_sz = cmo_get_page_size();
long rc = 0;
int i;
+ if (unlikely(simulate))
+ return 0;
+
for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz)
rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED, vpa + i, 0);
@@ -113,12 +111,16 @@ static long plpar_page_set_loaned(unsigned long vpa)
return rc;
}
-static long plpar_page_set_active(unsigned long vpa)
+static long plpar_page_set_active(struct page *page)
{
+ const unsigned long vpa = page_to_phys(page);
unsigned long cmo_page_sz = cmo_get_page_size();
long rc = 0;
int i;
+ if (unlikely(simulate))
+ return 0;
+
for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz)
rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE, vpa + i, 0);
@@ -138,8 +140,7 @@ static long plpar_page_set_active(unsigned long vpa)
**/
static long cmm_alloc_pages(long nr)
{
- struct cmm_page_array *pa, *npa;
- unsigned long addr;
+ struct page *page;
long rc;
cmm_dbg("Begin request for %ld pages\n", nr);
@@ -156,46 +157,19 @@ static long cmm_alloc_pages(long nr)
break;
}
- addr = __get_free_page(GFP_NOIO | __GFP_NOWARN |
- __GFP_NORETRY | __GFP_NOMEMALLOC);
- if (!addr)
+ page = balloon_page_alloc();
+ if (!page)
break;
- spin_lock(&cmm_lock);
- pa = cmm_page_list;
- if (!pa || pa->index >= CMM_NR_PAGES) {
- /* Need a new page for the page list. */
- spin_unlock(&cmm_lock);
- npa = (struct cmm_page_array *)__get_free_page(
- GFP_NOIO | __GFP_NOWARN |
- __GFP_NORETRY | __GFP_NOMEMALLOC);
- if (!npa) {
- pr_info("%s: Can not allocate new page list\n", __func__);
- free_page(addr);
- break;
- }
- spin_lock(&cmm_lock);
- pa = cmm_page_list;
-
- if (!pa || pa->index >= CMM_NR_PAGES) {
- npa->next = pa;
- npa->index = 0;
- pa = npa;
- cmm_page_list = pa;
- } else
- free_page((unsigned long) npa);
- }
-
- if ((rc = plpar_page_set_loaned(__pa(addr)))) {
+ rc = plpar_page_set_loaned(page);
+ if (rc) {
pr_err("%s: Can not set page to loaned. rc=%ld\n", __func__, rc);
- spin_unlock(&cmm_lock);
- free_page(addr);
+ __free_page(page);
break;
}
- pa->page[pa->index++] = addr;
- loaned_pages++;
- totalram_pages_dec();
- spin_unlock(&cmm_lock);
+ balloon_page_enqueue(&b_dev_info, page);
+ atomic_long_inc(&loaned_pages);
+ adjust_managed_page_count(page, -1);
nr--;
}
@@ -212,30 +186,19 @@ static long cmm_alloc_pages(long nr)
**/
static long cmm_free_pages(long nr)
{
- struct cmm_page_array *pa;
- unsigned long addr;
+ struct page *page;
cmm_dbg("Begin free of %ld pages.\n", nr);
- spin_lock(&cmm_lock);
- pa = cmm_page_list;
while (nr) {
- if (!pa || pa->index <= 0)
+ page = balloon_page_dequeue(&b_dev_info);
+ if (!page)
break;
- addr = pa->page[--pa->index];
-
- if (pa->index == 0) {
- pa = pa->next;
- free_page((unsigned long) cmm_page_list);
- cmm_page_list = pa;
- }
-
- plpar_page_set_active(__pa(addr));
- free_page(addr);
- loaned_pages--;
+ plpar_page_set_active(page);
+ adjust_managed_page_count(page, 1);
+ __free_page(page);
+ atomic_long_dec(&loaned_pages);
nr--;
- totalram_pages_inc();
}
- spin_unlock(&cmm_lock);
cmm_dbg("End request with %ld pages unfulfilled\n", nr);
return nr;
}
@@ -257,7 +220,7 @@ static int cmm_oom_notify(struct notifier_block *self,
cmm_dbg("OOM processing started\n");
nr = cmm_free_pages(nr);
- loaned_pages_target = loaned_pages;
+ loaned_pages_target = atomic_long_read(&loaned_pages);
*freed += KB2PAGES(oom_kb) - nr;
oom_freed_pages += KB2PAGES(oom_kb) - nr;
cmm_dbg("OOM processing complete\n");
@@ -274,19 +237,24 @@ static int cmm_oom_notify(struct notifier_block *self,
**/
static void cmm_get_mpp(void)
{
+ const long __loaned_pages = atomic_long_read(&loaned_pages);
+ const long total_pages = totalram_pages() + __loaned_pages;
int rc;
struct hvcall_mpp_data mpp_data;
signed long active_pages_target, page_loan_request, target;
- signed long total_pages = totalram_pages() + loaned_pages;
signed long min_mem_pages = (min_mem_mb * 1024 * 1024) / PAGE_SIZE;
- rc = h_get_mpp(&mpp_data);
-
- if (rc != H_SUCCESS)
- return;
-
- page_loan_request = div_s64((s64)mpp_data.loan_request, PAGE_SIZE);
- target = page_loan_request + (signed long)loaned_pages;
+ if (likely(!simulate)) {
+ rc = h_get_mpp(&mpp_data);
+ if (rc != H_SUCCESS)
+ return;
+ page_loan_request = div_s64((s64)mpp_data.loan_request,
+ PAGE_SIZE);
+ target = page_loan_request + __loaned_pages;
+ } else {
+ target = KB2PAGES(simulate_loan_target_kb);
+ page_loan_request = target - __loaned_pages;
+ }
if (target < 0 || total_pages < min_mem_pages)
target = 0;
@@ -307,7 +275,7 @@ static void cmm_get_mpp(void)
loaned_pages_target = target;
cmm_dbg("delta = %ld, loaned = %lu, target = %lu, oom = %lu, totalram = %lu\n",
- page_loan_request, loaned_pages, loaned_pages_target,
+ page_loan_request, __loaned_pages, loaned_pages_target,
oom_freed_pages, totalram_pages());
}
@@ -325,6 +293,7 @@ static struct notifier_block cmm_oom_nb = {
static int cmm_thread(void *dummy)
{
unsigned long timeleft;
+ long __loaned_pages;
while (1) {
timeleft = msleep_interruptible(delay * 1000);
@@ -355,11 +324,12 @@ static int cmm_thread(void *dummy)
cmm_get_mpp();
- if (loaned_pages_target > loaned_pages) {
- if (cmm_alloc_pages(loaned_pages_target - loaned_pages))
- loaned_pages_target = loaned_pages;
- } else if (loaned_pages_target < loaned_pages)
- cmm_free_pages(loaned_pages - loaned_pages_target);
+ __loaned_pages = atomic_long_read(&loaned_pages);
+ if (loaned_pages_target > __loaned_pages) {
+ if (cmm_alloc_pages(loaned_pages_target - __loaned_pages))
+ loaned_pages_target = __loaned_pages;
+ } else if (loaned_pages_target < __loaned_pages)
+ cmm_free_pages(__loaned_pages - loaned_pages_target);
}
return 0;
}
@@ -373,7 +343,7 @@ static int cmm_thread(void *dummy)
} \
static DEVICE_ATTR(name, 0444, show_##name, NULL)
-CMM_SHOW(loaned_kb, "%lu\n", PAGES2KB(loaned_pages));
+CMM_SHOW(loaned_kb, "%lu\n", PAGES2KB(atomic_long_read(&loaned_pages)));
CMM_SHOW(loaned_target_kb, "%lu\n", PAGES2KB(loaned_pages_target));
static ssize_t show_oom_pages(struct device *dev,
@@ -406,11 +376,18 @@ static struct device_attribute *cmm_attrs[] = {
&dev_attr_oom_freed_kb,
};
+static DEVICE_ULONG_ATTR(simulate_loan_target_kb, 0644,
+ simulate_loan_target_kb);
+
static struct bus_type cmm_subsys = {
.name = "cmm",
.dev_name = "cmm",
};
+static void cmm_release_device(struct device *dev)
+{
+}
+
/**
* cmm_sysfs_register - Register with sysfs
*
@@ -426,6 +403,7 @@ static int cmm_sysfs_register(struct device *dev)
dev->id = 0;
dev->bus = &cmm_subsys;
+ dev->release = cmm_release_device;
if ((rc = device_register(dev)))
goto subsys_unregister;
@@ -435,6 +413,11 @@ static int cmm_sysfs_register(struct device *dev)
goto fail;
}
+ if (!simulate)
+ return 0;
+ rc = device_create_file(dev, &dev_attr_simulate_loan_target_kb.attr);
+ if (rc)
+ goto fail;
return 0;
fail:
@@ -471,7 +454,7 @@ static int cmm_reboot_notifier(struct notifier_block *nb,
if (cmm_thread_ptr)
kthread_stop(cmm_thread_ptr);
cmm_thread_ptr = NULL;
- cmm_free_pages(loaned_pages);
+ cmm_free_pages(atomic_long_read(&loaned_pages));
}
return NOTIFY_DONE;
}
@@ -481,142 +464,6 @@ static struct notifier_block cmm_reboot_nb = {
};
/**
- * cmm_count_pages - Count the number of pages loaned in a particular range.
- *
- * @arg: memory_isolate_notify structure with address range and count
- *
- * Return value:
- * 0 on success
- **/
-static unsigned long cmm_count_pages(void *arg)
-{
- struct memory_isolate_notify *marg = arg;
- struct cmm_page_array *pa;
- unsigned long start = (unsigned long)pfn_to_kaddr(marg->start_pfn);
- unsigned long end = start + (marg->nr_pages << PAGE_SHIFT);
- unsigned long idx;
-
- spin_lock(&cmm_lock);
- pa = cmm_page_list;
- while (pa) {
- if ((unsigned long)pa >= start && (unsigned long)pa < end)
- marg->pages_found++;
- for (idx = 0; idx < pa->index; idx++)
- if (pa->page[idx] >= start && pa->page[idx] < end)
- marg->pages_found++;
- pa = pa->next;
- }
- spin_unlock(&cmm_lock);
- return 0;
-}
-
-/**
- * cmm_memory_isolate_cb - Handle memory isolation notifier calls
- * @self: notifier block struct
- * @action: action to take
- * @arg: struct memory_isolate_notify data for handler
- *
- * Return value:
- * NOTIFY_OK or notifier error based on subfunction return value
- **/
-static int cmm_memory_isolate_cb(struct notifier_block *self,
- unsigned long action, void *arg)
-{
- int ret = 0;
-
- if (action == MEM_ISOLATE_COUNT)
- ret = cmm_count_pages(arg);
-
- return notifier_from_errno(ret);
-}
-
-static struct notifier_block cmm_mem_isolate_nb = {
- .notifier_call = cmm_memory_isolate_cb,
- .priority = CMM_MEM_ISOLATE_PRI
-};
-
-/**
- * cmm_mem_going_offline - Unloan pages where memory is to be removed
- * @arg: memory_notify structure with page range to be offlined
- *
- * Return value:
- * 0 on success
- **/
-static int cmm_mem_going_offline(void *arg)
-{
- struct memory_notify *marg = arg;
- unsigned long start_page = (unsigned long)pfn_to_kaddr(marg->start_pfn);
- unsigned long end_page = start_page + (marg->nr_pages << PAGE_SHIFT);
- struct cmm_page_array *pa_curr, *pa_last, *npa;
- unsigned long idx;
- unsigned long freed = 0;
-
- cmm_dbg("Memory going offline, searching 0x%lx (%ld pages).\n",
- start_page, marg->nr_pages);
- spin_lock(&cmm_lock);
-
- /* Search the page list for pages in the range to be offlined */
- pa_last = pa_curr = cmm_page_list;
- while (pa_curr) {
- for (idx = (pa_curr->index - 1); (idx + 1) > 0; idx--) {
- if ((pa_curr->page[idx] < start_page) ||
- (pa_curr->page[idx] >= end_page))
- continue;
-
- plpar_page_set_active(__pa(pa_curr->page[idx]));
- free_page(pa_curr->page[idx]);
- freed++;
- loaned_pages--;
- totalram_pages_inc();
- pa_curr->page[idx] = pa_last->page[--pa_last->index];
- if (pa_last->index == 0) {
- if (pa_curr == pa_last)
- pa_curr = pa_last->next;
- pa_last = pa_last->next;
- free_page((unsigned long)cmm_page_list);
- cmm_page_list = pa_last;
- }
- }
- pa_curr = pa_curr->next;
- }
-
- /* Search for page list structures in the range to be offlined */
- pa_last = NULL;
- pa_curr = cmm_page_list;
- while (pa_curr) {
- if (((unsigned long)pa_curr >= start_page) &&
- ((unsigned long)pa_curr < end_page)) {
- npa = (struct cmm_page_array *)__get_free_page(
- GFP_NOIO | __GFP_NOWARN |
- __GFP_NORETRY | __GFP_NOMEMALLOC);
- if (!npa) {
- spin_unlock(&cmm_lock);
- cmm_dbg("Failed to allocate memory for list "
- "management. Memory hotplug "
- "failed.\n");
- return -ENOMEM;
- }
- memcpy(npa, pa_curr, PAGE_SIZE);
- if (pa_curr == cmm_page_list)
- cmm_page_list = npa;
- if (pa_last)
- pa_last->next = npa;
- free_page((unsigned long) pa_curr);
- freed++;
- pa_curr = npa;
- }
-
- pa_last = pa_curr;
- pa_curr = pa_curr->next;
- }
-
- spin_unlock(&cmm_lock);
- cmm_dbg("Released %ld pages in the search range.\n", freed);
-
- return 0;
-}
-
-/**
* cmm_memory_cb - Handle memory hotplug notifier calls
* @self: notifier block struct
* @action: action to take
@@ -635,7 +482,6 @@ static int cmm_memory_cb(struct notifier_block *self,
case MEM_GOING_OFFLINE:
mutex_lock(&hotplug_mutex);
hotplug_occurred = 1;
- ret = cmm_mem_going_offline(arg);
break;
case MEM_OFFLINE:
case MEM_CANCEL_OFFLINE:
@@ -656,6 +502,116 @@ static struct notifier_block cmm_mem_nb = {
.priority = CMM_MEM_HOTPLUG_PRI
};
+#ifdef CONFIG_BALLOON_COMPACTION
+static struct vfsmount *balloon_mnt;
+
+static int cmm_init_fs_context(struct fs_context *fc)
+{
+ return init_pseudo(fc, PPC_CMM_MAGIC) ? 0 : -ENOMEM;
+}
+
+static struct file_system_type balloon_fs = {
+ .name = "ppc-cmm",
+ .init_fs_context = cmm_init_fs_context,
+ .kill_sb = kill_anon_super,
+};
+
+static int cmm_migratepage(struct balloon_dev_info *b_dev_info,
+ struct page *newpage, struct page *page,
+ enum migrate_mode mode)
+{
+ unsigned long flags;
+
+ /*
+ * loan/"inflate" the newpage first.
+ *
+ * We might race against the cmm_thread who might discover after our
+ * loan request that another page is to be unloaned. However, once
+ * the cmm_thread runs again later, this error will automatically
+ * be corrected.
+ */
+ if (plpar_page_set_loaned(newpage)) {
+ /* Unlikely, but possible. Tell the caller not to retry now. */
+ pr_err_ratelimited("%s: Cannot set page to loaned.", __func__);
+ return -EBUSY;
+ }
+
+ /* balloon page list reference */
+ get_page(newpage);
+
+ /*
+ * When we migrate a page to a different zone, we have to fixup the
+ * count of both involved zones as we adjusted the managed page count
+ * when inflating.
+ */
+ if (page_zone(page) != page_zone(newpage)) {
+ adjust_managed_page_count(page, 1);
+ adjust_managed_page_count(newpage, -1);
+ }
+
+ spin_lock_irqsave(&b_dev_info->pages_lock, flags);
+ balloon_page_insert(b_dev_info, newpage);
+ balloon_page_delete(page);
+ b_dev_info->isolated_pages--;
+ spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
+
+ /*
+ * activate/"deflate" the old page. We ignore any errors just like the
+ * other callers.
+ */
+ plpar_page_set_active(page);
+
+ /* balloon page list reference */
+ put_page(page);
+
+ return MIGRATEPAGE_SUCCESS;
+}
+
+static int cmm_balloon_compaction_init(void)
+{
+ int rc;
+
+ balloon_devinfo_init(&b_dev_info);
+ b_dev_info.migratepage = cmm_migratepage;
+
+ balloon_mnt = kern_mount(&balloon_fs);
+ if (IS_ERR(balloon_mnt)) {
+ rc = PTR_ERR(balloon_mnt);
+ balloon_mnt = NULL;
+ return rc;
+ }
+
+ b_dev_info.inode = alloc_anon_inode(balloon_mnt->mnt_sb);
+ if (IS_ERR(b_dev_info.inode)) {
+ rc = PTR_ERR(b_dev_info.inode);
+ b_dev_info.inode = NULL;
+ kern_unmount(balloon_mnt);
+ balloon_mnt = NULL;
+ return rc;
+ }
+
+ b_dev_info.inode->i_mapping->a_ops = &balloon_aops;
+ return 0;
+}
+static void cmm_balloon_compaction_deinit(void)
+{
+ if (b_dev_info.inode)
+ iput(b_dev_info.inode);
+ b_dev_info.inode = NULL;
+ kern_unmount(balloon_mnt);
+ balloon_mnt = NULL;
+}
+#else /* CONFIG_BALLOON_COMPACTION */
+static int cmm_balloon_compaction_init(void)
+{
+ return 0;
+}
+
+static void cmm_balloon_compaction_deinit(void)
+{
+}
+#endif /* CONFIG_BALLOON_COMPACTION */
+
/**
* cmm_init - Module initialization
*
@@ -664,26 +620,31 @@ static struct notifier_block cmm_mem_nb = {
**/
static int cmm_init(void)
{
- int rc = -ENOMEM;
+ int rc;
- if (!firmware_has_feature(FW_FEATURE_CMO))
+ if (!firmware_has_feature(FW_FEATURE_CMO) && !simulate)
return -EOPNOTSUPP;
- if ((rc = register_oom_notifier(&cmm_oom_nb)) < 0)
+ rc = cmm_balloon_compaction_init();
+ if (rc)
return rc;
+ rc = register_oom_notifier(&cmm_oom_nb);
+ if (rc < 0)
+ goto out_balloon_compaction;
+
if ((rc = register_reboot_notifier(&cmm_reboot_nb)))
goto out_oom_notifier;
if ((rc = cmm_sysfs_register(&cmm_dev)))
goto out_reboot_notifier;
- if (register_memory_notifier(&cmm_mem_nb) ||
- register_memory_isolate_notifier(&cmm_mem_isolate_nb))
+ rc = register_memory_notifier(&cmm_mem_nb);
+ if (rc)
goto out_unregister_notifier;
if (cmm_disabled)
- return rc;
+ return 0;
cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
if (IS_ERR(cmm_thread_ptr)) {
@@ -691,16 +652,16 @@ static int cmm_init(void)
goto out_unregister_notifier;
}
- return rc;
-
+ return 0;
out_unregister_notifier:
unregister_memory_notifier(&cmm_mem_nb);
- unregister_memory_isolate_notifier(&cmm_mem_isolate_nb);
cmm_unregister_sysfs(&cmm_dev);
out_reboot_notifier:
unregister_reboot_notifier(&cmm_reboot_nb);
out_oom_notifier:
unregister_oom_notifier(&cmm_oom_nb);
+out_balloon_compaction:
+ cmm_balloon_compaction_deinit();
return rc;
}
@@ -717,9 +678,9 @@ static void cmm_exit(void)
unregister_oom_notifier(&cmm_oom_nb);
unregister_reboot_notifier(&cmm_reboot_nb);
unregister_memory_notifier(&cmm_mem_nb);
- unregister_memory_isolate_notifier(&cmm_mem_isolate_nb);
- cmm_free_pages(loaned_pages);
+ cmm_free_pages(atomic_long_read(&loaned_pages));
cmm_unregister_sysfs(&cmm_dev);
+ cmm_balloon_compaction_deinit();
}
/**
@@ -739,7 +700,7 @@ static int cmm_set_disable(const char *val, const struct kernel_param *kp)
if (cmm_thread_ptr)
kthread_stop(cmm_thread_ptr);
cmm_thread_ptr = NULL;
- cmm_free_pages(loaned_pages);
+ cmm_free_pages(atomic_long_read(&loaned_pages));
} else if (!disable && cmm_disabled) {
cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
if (IS_ERR(cmm_thread_ptr))
diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c
index 2b87480f2837..eab8aa293743 100644
--- a/arch/powerpc/platforms/pseries/dtl.c
+++ b/arch/powerpc/platforms/pseries/dtl.c
@@ -19,7 +19,6 @@
struct dtl {
struct dtl_entry *buf;
- struct dentry *file;
int cpu;
int buf_entries;
u64 last_idx;
@@ -320,46 +319,28 @@ static const struct file_operations dtl_fops = {
static struct dentry *dtl_dir;
-static int dtl_setup_file(struct dtl *dtl)
+static void dtl_setup_file(struct dtl *dtl)
{
char name[10];
sprintf(name, "cpu-%d", dtl->cpu);
- dtl->file = debugfs_create_file(name, 0400, dtl_dir, dtl, &dtl_fops);
- if (!dtl->file)
- return -ENOMEM;
-
- return 0;
+ debugfs_create_file(name, 0400, dtl_dir, dtl, &dtl_fops);
}
static int dtl_init(void)
{
- struct dentry *event_mask_file, *buf_entries_file;
- int rc, i;
+ int i;
if (!firmware_has_feature(FW_FEATURE_SPLPAR))
return -ENODEV;
/* set up common debugfs structure */
- rc = -ENOMEM;
dtl_dir = debugfs_create_dir("dtl", powerpc_debugfs_root);
- if (!dtl_dir) {
- printk(KERN_WARNING "%s: can't create dtl root dir\n",
- __func__);
- goto err;
- }
- event_mask_file = debugfs_create_x8("dtl_event_mask", 0600,
- dtl_dir, &dtl_event_mask);
- buf_entries_file = debugfs_create_u32("dtl_buf_entries", 0400,
- dtl_dir, &dtl_buf_entries);
-
- if (!event_mask_file || !buf_entries_file) {
- printk(KERN_WARNING "%s: can't create dtl files\n", __func__);
- goto err_remove_dir;
- }
+ debugfs_create_x8("dtl_event_mask", 0600, dtl_dir, &dtl_event_mask);
+ debugfs_create_u32("dtl_buf_entries", 0400, dtl_dir, &dtl_buf_entries);
/* set up the per-cpu log structures */
for_each_possible_cpu(i) {
@@ -367,16 +348,9 @@ static int dtl_init(void)
spin_lock_init(&dtl->lock);
dtl->cpu = i;
- rc = dtl_setup_file(dtl);
- if (rc)
- goto err_remove_dir;
+ dtl_setup_file(dtl);
}
return 0;
-
-err_remove_dir:
- debugfs_remove_recursive(dtl_dir);
-err:
- return rc;
}
machine_arch_initcall(pseries, dtl_init);
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index 9edae1863e2f..893ba3f562c4 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -42,42 +42,44 @@ static int ibm_get_config_addr_info;
static int ibm_get_config_addr_info2;
static int ibm_configure_pe;
-#ifdef CONFIG_PCI_IOV
void pseries_pcibios_bus_add_device(struct pci_dev *pdev)
{
struct pci_dn *pdn = pci_get_pdn(pdev);
- struct pci_dn *physfn_pdn;
- struct eeh_dev *edev;
- if (!pdev->is_virtfn)
+ if (eeh_has_flag(EEH_FORCE_DISABLED))
return;
- pdn->device_id = pdev->device;
- pdn->vendor_id = pdev->vendor;
- pdn->class_code = pdev->class;
- /*
- * Last allow unfreeze return code used for retrieval
- * by user space in eeh-sysfs to show the last command
- * completion from platform.
- */
- pdn->last_allow_rc = 0;
- physfn_pdn = pci_get_pdn(pdev->physfn);
- pdn->pe_number = physfn_pdn->pe_num_map[pdn->vf_index];
- edev = pdn_to_eeh_dev(pdn);
+ dev_dbg(&pdev->dev, "EEH: Setting up device\n");
+#ifdef CONFIG_PCI_IOV
+ if (pdev->is_virtfn) {
+ struct pci_dn *physfn_pdn;
- /*
- * The following operations will fail if VF's sysfs files
- * aren't created or its resources aren't finalized.
- */
+ pdn->device_id = pdev->device;
+ pdn->vendor_id = pdev->vendor;
+ pdn->class_code = pdev->class;
+ /*
+ * Last allow unfreeze return code used for retrieval
+ * by user space in eeh-sysfs to show the last command
+ * completion from platform.
+ */
+ pdn->last_allow_rc = 0;
+ physfn_pdn = pci_get_pdn(pdev->physfn);
+ pdn->pe_number = physfn_pdn->pe_num_map[pdn->vf_index];
+ }
+#endif
eeh_add_device_early(pdn);
eeh_add_device_late(pdev);
- edev->pe_config_addr = (pdn->busno << 16) | (pdn->devfn << 8);
- eeh_rmv_from_parent_pe(edev); /* Remove as it is adding to bus pe */
- eeh_add_to_parent_pe(edev); /* Add as VF PE type */
- eeh_sysfs_add_device(pdev);
+#ifdef CONFIG_PCI_IOV
+ if (pdev->is_virtfn) {
+ struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
-}
+ edev->pe_config_addr = (pdn->busno << 16) | (pdn->devfn << 8);
+ eeh_rmv_from_parent_pe(edev); /* Remove as it is adding to bus pe */
+ eeh_add_to_parent_pe(edev); /* Add as VF PE type */
+ }
#endif
+ eeh_sysfs_add_device(pdev);
+}
/*
* Buffer for reporting slot-error-detail rtas calls. Its here
@@ -144,10 +146,8 @@ static int pseries_eeh_init(void)
/* Set EEH probe mode */
eeh_add_flag(EEH_PROBE_MODE_DEVTREE | EEH_ENABLE_IO_FOR_LOG);
-#ifdef CONFIG_PCI_IOV
/* Set EEH machine dependent code */
ppc_md.pcibios_bus_add_device = pseries_pcibios_bus_add_device;
-#endif
return 0;
}
@@ -251,6 +251,8 @@ static void *pseries_eeh_probe(struct pci_dn *pdn, void *data)
if ((pdn->class_code >> 8) == PCI_CLASS_BRIDGE_ISA)
return NULL;
+ eeh_edev_dbg(edev, "Probing device\n");
+
/*
* Update class code and mode of eeh device. We need
* correctly reflects that current device is root port
@@ -280,8 +282,11 @@ static void *pseries_eeh_probe(struct pci_dn *pdn, void *data)
pe.config_addr = (pdn->busno << 16) | (pdn->devfn << 8);
/* Enable EEH on the device */
+ eeh_edev_dbg(edev, "Enabling EEH on device\n");
ret = eeh_ops->set_option(&pe, EEH_OPT_ENABLE);
- if (!ret) {
+ if (ret) {
+ eeh_edev_dbg(edev, "EEH failed to enable on device (code %d)\n", ret);
+ } else {
/* Retrieve PE address */
edev->pe_config_addr = eeh_ops->get_pe_addr(&pe);
pe.addr = edev->pe_config_addr;
@@ -297,11 +302,6 @@ static void *pseries_eeh_probe(struct pci_dn *pdn, void *data)
if (enable) {
eeh_add_flag(EEH_ENABLED);
eeh_add_to_parent_pe(edev);
-
- pr_debug("%s: EEH enabled on %02x:%02x.%01x PHB#%x-PE#%x\n",
- __func__, pdn->busno, PCI_SLOT(pdn->devfn),
- PCI_FUNC(pdn->devfn), pe.phb->global_number,
- pe.addr);
} else if (pdn->parent && pdn_to_eeh_dev(pdn->parent) &&
(pdn_to_eeh_dev(pdn->parent))->pe) {
/* This device doesn't support EEH, but it may have an
@@ -310,6 +310,8 @@ static void *pseries_eeh_probe(struct pci_dn *pdn, void *data)
edev->pe_config_addr = pdn_to_eeh_dev(pdn->parent)->pe_config_addr;
eeh_add_to_parent_pe(edev);
}
+ eeh_edev_dbg(edev, "EEH is %s on device (code %d)\n",
+ (enable ? "enabled" : "unsupported"), ret);
}
/* Save memory bars */
diff --git a/arch/powerpc/platforms/pseries/firmware.c b/arch/powerpc/platforms/pseries/firmware.c
index d4a8f1702417..3e49cc23a97a 100644
--- a/arch/powerpc/platforms/pseries/firmware.c
+++ b/arch/powerpc/platforms/pseries/firmware.c
@@ -22,6 +22,7 @@
#include <asm/firmware.h>
#include <asm/prom.h>
#include <asm/udbg.h>
+#include <asm/svm.h>
#include "pseries.h"
@@ -55,7 +56,8 @@ hypertas_fw_features_table[] = {
{FW_FEATURE_LLAN, "hcall-lLAN"},
{FW_FEATURE_BULK_REMOVE, "hcall-bulk"},
{FW_FEATURE_XDABR, "hcall-xdabr"},
- {FW_FEATURE_MULTITCE, "hcall-multi-tce"},
+ {FW_FEATURE_PUT_TCE_IND | FW_FEATURE_STUFF_TCE,
+ "hcall-multi-tce"},
{FW_FEATURE_SPLPAR, "hcall-splpar"},
{FW_FEATURE_VPHN, "hcall-vphn"},
{FW_FEATURE_SET_MODE, "hcall-set-mode"},
@@ -100,6 +102,12 @@ static void __init fw_hypertas_feature_init(const char *hypertas,
}
}
+ if (is_secure_guest() &&
+ (powerpc_firmware_features & FW_FEATURE_PUT_TCE_IND)) {
+ powerpc_firmware_features &= ~FW_FEATURE_PUT_TCE_IND;
+ pr_debug("SVM: disabling PUT_TCE_IND firmware feature\n");
+ }
+
pr_debug(" <- fw_hypertas_feature_init()\n");
}
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index bbda646b63b5..3e8cbfe7a80f 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -338,6 +338,62 @@ static void pseries_remove_processor(struct device_node *np)
cpu_maps_update_done();
}
+static int dlpar_offline_cpu(struct device_node *dn)
+{
+ int rc = 0;
+ unsigned int cpu;
+ int len, nthreads, i;
+ const __be32 *intserv;
+ u32 thread;
+
+ intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len);
+ if (!intserv)
+ return -EINVAL;
+
+ nthreads = len / sizeof(u32);
+
+ cpu_maps_update_begin();
+ for (i = 0; i < nthreads; i++) {
+ thread = be32_to_cpu(intserv[i]);
+ for_each_present_cpu(cpu) {
+ if (get_hard_smp_processor_id(cpu) != thread)
+ continue;
+
+ if (get_cpu_current_state(cpu) == CPU_STATE_OFFLINE)
+ break;
+
+ if (get_cpu_current_state(cpu) == CPU_STATE_ONLINE) {
+ set_preferred_offline_state(cpu,
+ CPU_STATE_OFFLINE);
+ cpu_maps_update_done();
+ timed_topology_update(1);
+ rc = device_offline(get_cpu_device(cpu));
+ if (rc)
+ goto out;
+ cpu_maps_update_begin();
+ break;
+ }
+
+ /*
+ * The cpu is in CPU_STATE_INACTIVE.
+ * Upgrade it's state to CPU_STATE_OFFLINE.
+ */
+ set_preferred_offline_state(cpu, CPU_STATE_OFFLINE);
+ WARN_ON(plpar_hcall_norets(H_PROD, thread) != H_SUCCESS);
+ __cpu_die(cpu);
+ break;
+ }
+ if (cpu == num_possible_cpus()) {
+ pr_warn("Could not find cpu to offline with physical id 0x%x\n",
+ thread);
+ }
+ }
+ cpu_maps_update_done();
+
+out:
+ return rc;
+}
+
static int dlpar_online_cpu(struct device_node *dn)
{
int rc = 0;
@@ -364,8 +420,10 @@ static int dlpar_online_cpu(struct device_node *dn)
timed_topology_update(1);
find_and_online_cpu_nid(cpu);
rc = device_online(get_cpu_device(cpu));
- if (rc)
+ if (rc) {
+ dlpar_offline_cpu(dn);
goto out;
+ }
cpu_maps_update_begin();
break;
@@ -407,17 +465,67 @@ static bool dlpar_cpu_exists(struct device_node *parent, u32 drc_index)
return found;
}
+static bool drc_info_valid_index(struct device_node *parent, u32 drc_index)
+{
+ struct property *info;
+ struct of_drc_info drc;
+ const __be32 *value;
+ u32 index;
+ int count, i, j;
+
+ info = of_find_property(parent, "ibm,drc-info", NULL);
+ if (!info)
+ return false;
+
+ value = of_prop_next_u32(info, NULL, &count);
+
+ /* First value of ibm,drc-info is number of drc-info records */
+ if (value)
+ value++;
+ else
+ return false;
+
+ for (i = 0; i < count; i++) {
+ if (of_read_drc_info_cell(&info, &value, &drc))
+ return false;
+
+ if (strncmp(drc.drc_type, "CPU", 3))
+ break;
+
+ if (drc_index > drc.last_drc_index)
+ continue;
+
+ index = drc.drc_index_start;
+ for (j = 0; j < drc.num_sequential_elems; j++) {
+ if (drc_index == index)
+ return true;
+
+ index += drc.sequential_inc;
+ }
+ }
+
+ return false;
+}
+
static bool valid_cpu_drc_index(struct device_node *parent, u32 drc_index)
{
bool found = false;
int rc, index;
- index = 0;
+ if (of_find_property(parent, "ibm,drc-info", NULL))
+ return drc_info_valid_index(parent, drc_index);
+
+ /* Note that the format of the ibm,drc-indexes array is
+ * the number of entries in the array followed by the array
+ * of drc values so we start looking at index = 1.
+ */
+ index = 1;
while (!found) {
u32 drc;
rc = of_property_read_u32_index(parent, "ibm,drc-indexes",
index++, &drc);
+
if (rc)
break;
@@ -505,63 +613,6 @@ static ssize_t dlpar_cpu_add(u32 drc_index)
return rc;
}
-static int dlpar_offline_cpu(struct device_node *dn)
-{
- int rc = 0;
- unsigned int cpu;
- int len, nthreads, i;
- const __be32 *intserv;
- u32 thread;
-
- intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len);
- if (!intserv)
- return -EINVAL;
-
- nthreads = len / sizeof(u32);
-
- cpu_maps_update_begin();
- for (i = 0; i < nthreads; i++) {
- thread = be32_to_cpu(intserv[i]);
- for_each_present_cpu(cpu) {
- if (get_hard_smp_processor_id(cpu) != thread)
- continue;
-
- if (get_cpu_current_state(cpu) == CPU_STATE_OFFLINE)
- break;
-
- if (get_cpu_current_state(cpu) == CPU_STATE_ONLINE) {
- set_preferred_offline_state(cpu,
- CPU_STATE_OFFLINE);
- cpu_maps_update_done();
- timed_topology_update(1);
- rc = device_offline(get_cpu_device(cpu));
- if (rc)
- goto out;
- cpu_maps_update_begin();
- break;
-
- }
-
- /*
- * The cpu is in CPU_STATE_INACTIVE.
- * Upgrade it's state to CPU_STATE_OFFLINE.
- */
- set_preferred_offline_state(cpu, CPU_STATE_OFFLINE);
- BUG_ON(plpar_hcall_norets(H_PROD, thread)
- != H_SUCCESS);
- __cpu_die(cpu);
- break;
- }
- if (cpu == num_possible_cpus())
- printk(KERN_WARNING "Could not find cpu to offline with physical id 0x%x\n", thread);
- }
- cpu_maps_update_done();
-
-out:
- return rc;
-
-}
-
static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index)
{
int rc;
@@ -717,19 +768,52 @@ static int dlpar_cpu_remove_by_count(u32 cpus_to_remove)
return rc;
}
-static int find_dlpar_cpus_to_add(u32 *cpu_drcs, u32 cpus_to_add)
+static int find_drc_info_cpus_to_add(struct device_node *cpus,
+ struct property *info,
+ u32 *cpu_drcs, u32 cpus_to_add)
{
- struct device_node *parent;
+ struct of_drc_info drc;
+ const __be32 *value;
+ u32 count, drc_index;
int cpus_found = 0;
- int index, rc;
+ int i, j;
- parent = of_find_node_by_path("/cpus");
- if (!parent) {
- pr_warn("Could not find CPU root node in device tree\n");
- kfree(cpu_drcs);
+ if (!info)
return -1;
+
+ value = of_prop_next_u32(info, NULL, &count);
+ if (value)
+ value++;
+
+ for (i = 0; i < count; i++) {
+ of_read_drc_info_cell(&info, &value, &drc);
+ if (strncmp(drc.drc_type, "CPU", 3))
+ break;
+
+ drc_index = drc.drc_index_start;
+ for (j = 0; j < drc.num_sequential_elems; j++) {
+ if (dlpar_cpu_exists(cpus, drc_index))
+ continue;
+
+ cpu_drcs[cpus_found++] = drc_index;
+
+ if (cpus_found == cpus_to_add)
+ return cpus_found;
+
+ drc_index += drc.sequential_inc;
+ }
}
+ return cpus_found;
+}
+
+static int find_drc_index_cpus_to_add(struct device_node *cpus,
+ u32 *cpu_drcs, u32 cpus_to_add)
+{
+ int cpus_found = 0;
+ int index, rc;
+ u32 drc_index;
+
/* Search the ibm,drc-indexes array for possible CPU drcs to
* add. Note that the format of the ibm,drc-indexes array is
* the number of entries in the array followed by the array
@@ -737,25 +821,25 @@ static int find_dlpar_cpus_to_add(u32 *cpu_drcs, u32 cpus_to_add)
*/
index = 1;
while (cpus_found < cpus_to_add) {
- u32 drc;
+ rc = of_property_read_u32_index(cpus, "ibm,drc-indexes",
+ index++, &drc_index);
- rc = of_property_read_u32_index(parent, "ibm,drc-indexes",
- index++, &drc);
if (rc)
break;
- if (dlpar_cpu_exists(parent, drc))
+ if (dlpar_cpu_exists(cpus, drc_index))
continue;
- cpu_drcs[cpus_found++] = drc;
+ cpu_drcs[cpus_found++] = drc_index;
}
- of_node_put(parent);
return cpus_found;
}
static int dlpar_cpu_add_by_count(u32 cpus_to_add)
{
+ struct device_node *parent;
+ struct property *info;
u32 *cpu_drcs;
int cpus_added = 0;
int cpus_found;
@@ -767,7 +851,21 @@ static int dlpar_cpu_add_by_count(u32 cpus_to_add)
if (!cpu_drcs)
return -EINVAL;
- cpus_found = find_dlpar_cpus_to_add(cpu_drcs, cpus_to_add);
+ parent = of_find_node_by_path("/cpus");
+ if (!parent) {
+ pr_warn("Could not find CPU root node in device tree\n");
+ kfree(cpu_drcs);
+ return -1;
+ }
+
+ info = of_find_property(parent, "ibm,drc-info", NULL);
+ if (info)
+ cpus_found = find_drc_info_cpus_to_add(parent, info, cpu_drcs, cpus_to_add);
+ else
+ cpus_found = find_drc_index_cpus_to_add(parent, cpu_drcs, cpus_to_add);
+
+ of_node_put(parent);
+
if (cpus_found < cpus_to_add) {
pr_warn("Failed to find enough CPUs (%d of %d) to add\n",
cpus_found, cpus_to_add);
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 46d0d35b9ca4..a4d40a3ceea3 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -338,7 +338,7 @@ static int pseries_remove_mem_node(struct device_node *np)
static bool lmb_is_removable(struct drmem_lmb *lmb)
{
int i, scns_per_block;
- int rc = 1;
+ bool rc = true;
unsigned long pfn, block_sz;
u64 phys_addr;
@@ -360,14 +360,16 @@ static bool lmb_is_removable(struct drmem_lmb *lmb)
for (i = 0; i < scns_per_block; i++) {
pfn = PFN_DOWN(phys_addr);
- if (!pfn_present(pfn))
+ if (!pfn_present(pfn)) {
+ phys_addr += MIN_MEMORY_BLOCK_SIZE;
continue;
+ }
- rc &= is_mem_section_removable(pfn, PAGES_PER_SECTION);
+ rc = rc && is_mem_section_removable(pfn, PAGES_PER_SECTION);
phys_addr += MIN_MEMORY_BLOCK_SIZE;
}
- return rc ? true : false;
+ return rc;
}
static int dlpar_add_lmb(struct drmem_lmb *);
@@ -880,34 +882,44 @@ int dlpar_memory(struct pseries_hp_errorlog *hp_elog)
switch (hp_elog->action) {
case PSERIES_HP_ELOG_ACTION_ADD:
- if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT) {
+ switch (hp_elog->id_type) {
+ case PSERIES_HP_ELOG_ID_DRC_COUNT:
count = hp_elog->_drc_u.drc_count;
rc = dlpar_memory_add_by_count(count);
- } else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX) {
+ break;
+ case PSERIES_HP_ELOG_ID_DRC_INDEX:
drc_index = hp_elog->_drc_u.drc_index;
rc = dlpar_memory_add_by_index(drc_index);
- } else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_IC) {
+ break;
+ case PSERIES_HP_ELOG_ID_DRC_IC:
count = hp_elog->_drc_u.ic.count;
drc_index = hp_elog->_drc_u.ic.index;
rc = dlpar_memory_add_by_ic(count, drc_index);
- } else {
+ break;
+ default:
rc = -EINVAL;
+ break;
}
break;
case PSERIES_HP_ELOG_ACTION_REMOVE:
- if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT) {
+ switch (hp_elog->id_type) {
+ case PSERIES_HP_ELOG_ID_DRC_COUNT:
count = hp_elog->_drc_u.drc_count;
rc = dlpar_memory_remove_by_count(count);
- } else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX) {
+ break;
+ case PSERIES_HP_ELOG_ID_DRC_INDEX:
drc_index = hp_elog->_drc_u.drc_index;
rc = dlpar_memory_remove_by_index(drc_index);
- } else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_IC) {
+ break;
+ case PSERIES_HP_ELOG_ID_DRC_IC:
count = hp_elog->_drc_u.ic.count;
drc_index = hp_elog->_drc_u.ic.index;
rc = dlpar_memory_remove_by_ic(count, drc_index);
- } else {
+ break;
+ default:
rc = -EINVAL;
+ break;
}
break;
diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c
index bcc1b67417a8..c40c62ec432e 100644
--- a/arch/powerpc/platforms/pseries/hvCall_inst.c
+++ b/arch/powerpc/platforms/pseries/hvCall_inst.c
@@ -129,7 +129,6 @@ static void probe_hcall_exit(void *ignored, unsigned long opcode, long retval,
static int __init hcall_inst_init(void)
{
struct dentry *hcall_root;
- struct dentry *hcall_file;
char cpu_name_buf[CPU_NAME_BUF_SIZE];
int cpu;
@@ -145,17 +144,12 @@ static int __init hcall_inst_init(void)
}
hcall_root = debugfs_create_dir(HCALL_ROOT_DIR, NULL);
- if (!hcall_root)
- return -ENOMEM;
for_each_possible_cpu(cpu) {
snprintf(cpu_name_buf, CPU_NAME_BUF_SIZE, "cpu%d", cpu);
- hcall_file = debugfs_create_file(cpu_name_buf, 0444,
- hcall_root,
- per_cpu(hcall_stats, cpu),
- &hcall_inst_seq_fops);
- if (!hcall_file)
- return -ENOMEM;
+ debugfs_create_file(cpu_name_buf, 0444, hcall_root,
+ per_cpu(hcall_stats, cpu),
+ &hcall_inst_seq_fops);
}
return 0;
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 889dc2e44b89..2e0a8eab5588 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -132,10 +132,10 @@ static unsigned long tce_get_pseries(struct iommu_table *tbl, long index)
return be64_to_cpu(*tcep);
}
-static void tce_free_pSeriesLP(struct iommu_table*, long, long);
+static void tce_free_pSeriesLP(unsigned long liobn, long, long);
static void tce_freemulti_pSeriesLP(struct iommu_table*, long, long);
-static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
+static int tce_build_pSeriesLP(unsigned long liobn, long tcenum, long tceshift,
long npages, unsigned long uaddr,
enum dma_data_direction direction,
unsigned long attrs)
@@ -146,25 +146,25 @@ static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
int ret = 0;
long tcenum_start = tcenum, npages_start = npages;
- rpn = __pa(uaddr) >> TCE_SHIFT;
+ rpn = __pa(uaddr) >> tceshift;
proto_tce = TCE_PCI_READ;
if (direction != DMA_TO_DEVICE)
proto_tce |= TCE_PCI_WRITE;
while (npages--) {
- tce = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT;
- rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, tce);
+ tce = proto_tce | (rpn & TCE_RPN_MASK) << tceshift;
+ rc = plpar_tce_put((u64)liobn, (u64)tcenum << tceshift, tce);
if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
ret = (int)rc;
- tce_free_pSeriesLP(tbl, tcenum_start,
+ tce_free_pSeriesLP(liobn, tcenum_start,
(npages_start - (npages + 1)));
break;
}
if (rc && printk_ratelimit()) {
printk("tce_build_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc);
- printk("\tindex = 0x%llx\n", (u64)tbl->it_index);
+ printk("\tindex = 0x%llx\n", (u64)liobn);
printk("\ttcenum = 0x%llx\n", (u64)tcenum);
printk("\ttce val = 0x%llx\n", tce );
dump_stack();
@@ -192,8 +192,9 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
int ret = 0;
unsigned long flags;
- if ((npages == 1) || !firmware_has_feature(FW_FEATURE_MULTITCE)) {
- return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
+ if ((npages == 1) || !firmware_has_feature(FW_FEATURE_PUT_TCE_IND)) {
+ return tce_build_pSeriesLP(tbl->it_index, tcenum,
+ tbl->it_page_shift, npages, uaddr,
direction, attrs);
}
@@ -209,8 +210,9 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
/* If allocation fails, fall back to the loop implementation */
if (!tcep) {
local_irq_restore(flags);
- return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
- direction, attrs);
+ return tce_build_pSeriesLP(tbl->it_index, tcenum,
+ tbl->it_page_shift,
+ npages, uaddr, direction, attrs);
}
__this_cpu_write(tce_page, tcep);
}
@@ -261,16 +263,16 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
return ret;
}
-static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages)
+static void tce_free_pSeriesLP(unsigned long liobn, long tcenum, long npages)
{
u64 rc;
while (npages--) {
- rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, 0);
+ rc = plpar_tce_put((u64)liobn, (u64)tcenum << 12, 0);
if (rc && printk_ratelimit()) {
printk("tce_free_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc);
- printk("\tindex = 0x%llx\n", (u64)tbl->it_index);
+ printk("\tindex = 0x%llx\n", (u64)liobn);
printk("\ttcenum = 0x%llx\n", (u64)tcenum);
dump_stack();
}
@@ -284,8 +286,8 @@ static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long n
{
u64 rc;
- if (!firmware_has_feature(FW_FEATURE_MULTITCE))
- return tce_free_pSeriesLP(tbl, tcenum, npages);
+ if (!firmware_has_feature(FW_FEATURE_STUFF_TCE))
+ return tce_free_pSeriesLP(tbl->it_index, tcenum, npages);
rc = plpar_tce_stuff((u64)tbl->it_index, (u64)tcenum << 12, 0, npages);
@@ -400,6 +402,19 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn,
u64 rc = 0;
long l, limit;
+ if (!firmware_has_feature(FW_FEATURE_PUT_TCE_IND)) {
+ unsigned long tceshift = be32_to_cpu(maprange->tce_shift);
+ unsigned long dmastart = (start_pfn << PAGE_SHIFT) +
+ be64_to_cpu(maprange->dma_base);
+ unsigned long tcenum = dmastart >> tceshift;
+ unsigned long npages = num_pfn << PAGE_SHIFT >> tceshift;
+ void *uaddr = __va(start_pfn << PAGE_SHIFT);
+
+ return tce_build_pSeriesLP(be32_to_cpu(maprange->liobn),
+ tcenum, tceshift, npages, (unsigned long) uaddr,
+ DMA_BIDIRECTIONAL, 0);
+ }
+
local_irq_disable(); /* to protect tcep and the page behind it */
tcep = __this_cpu_read(tce_page);
@@ -609,7 +624,7 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus)
iommu_table_setparms(pci->phb, dn, tbl);
tbl->it_ops = &iommu_table_pseries_ops;
- iommu_init_table(tbl, pci->phb->node);
+ iommu_init_table(tbl, pci->phb->node, 0, 0);
/* Divide the rest (1.75GB) among the children */
pci->phb->dma_window_size = 0x80000000ul;
@@ -621,7 +636,8 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus)
#ifdef CONFIG_IOMMU_API
static int tce_exchange_pseries(struct iommu_table *tbl, long index, unsigned
- long *tce, enum dma_data_direction *direction)
+ long *tce, enum dma_data_direction *direction,
+ bool realmode)
{
long rc;
unsigned long ioba = (unsigned long) index << tbl->it_page_shift;
@@ -649,7 +665,7 @@ static int tce_exchange_pseries(struct iommu_table *tbl, long index, unsigned
struct iommu_table_ops iommu_table_lpar_multi_ops = {
.set = tce_buildmulti_pSeriesLP,
#ifdef CONFIG_IOMMU_API
- .exchange = tce_exchange_pseries,
+ .xchg_no_kill = tce_exchange_pseries,
#endif
.clear = tce_freemulti_pSeriesLP,
.get = tce_get_pSeriesLP
@@ -690,7 +706,7 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
iommu_table_setparms_lpar(ppci->phb, pdn, tbl,
ppci->table_group, dma_window);
tbl->it_ops = &iommu_table_lpar_multi_ops;
- iommu_init_table(tbl, ppci->phb->node);
+ iommu_init_table(tbl, ppci->phb->node, 0, 0);
iommu_register_group(ppci->table_group,
pci_domain_nr(bus), 0);
pr_debug(" created table: %p\n", ppci->table_group);
@@ -719,7 +735,7 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev)
tbl = PCI_DN(dn)->table_group->tables[0];
iommu_table_setparms(phb, dn, tbl);
tbl->it_ops = &iommu_table_pseries_ops;
- iommu_init_table(tbl, phb->node);
+ iommu_init_table(tbl, phb->node, 0, 0);
set_iommu_table_base(&dev->dev, tbl);
return;
}
@@ -1169,7 +1185,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
iommu_table_setparms_lpar(pci->phb, pdn, tbl,
pci->table_group, dma_window);
tbl->it_ops = &iommu_table_lpar_multi_ops;
- iommu_init_table(tbl, pci->phb->node);
+ iommu_init_table(tbl, pci->phb->node, 0, 0);
iommu_register_group(pci->table_group,
pci_domain_nr(pci->phb->bus), 0);
pr_debug(" created table: %p\n", pci->table_group);
@@ -1325,9 +1341,11 @@ static int __init disable_multitce(char *str)
{
if (strcmp(str, "off") == 0 &&
firmware_has_feature(FW_FEATURE_LPAR) &&
- firmware_has_feature(FW_FEATURE_MULTITCE)) {
+ (firmware_has_feature(FW_FEATURE_PUT_TCE_IND) ||
+ firmware_has_feature(FW_FEATURE_STUFF_TCE))) {
printk(KERN_INFO "Disabling MULTITCE firmware feature\n");
- powerpc_firmware_features &= ~FW_FEATURE_MULTITCE;
+ powerpc_firmware_features &=
+ ~(FW_FEATURE_PUT_TCE_IND | FW_FEATURE_STUFF_TCE);
}
return 1;
}
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 09bb878c21e0..3c3da25b445c 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -56,6 +56,22 @@ EXPORT_SYMBOL(plpar_hcall);
EXPORT_SYMBOL(plpar_hcall9);
EXPORT_SYMBOL(plpar_hcall_norets);
+/*
+ * H_BLOCK_REMOVE supported block size for this page size in segment who's base
+ * page size is that page size.
+ *
+ * The first index is the segment base page size, the second one is the actual
+ * page size.
+ */
+static int hblkrm_size[MMU_PAGE_COUNT][MMU_PAGE_COUNT] __ro_after_init;
+
+/*
+ * Due to the involved complexity, and that the current hypervisor is only
+ * returning this value or 0, we are limiting the support of the H_BLOCK_REMOVE
+ * buffer size to 8 size block.
+ */
+#define HBLKRM_SUPPORTED_BLOCK_SIZE 8
+
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
static u8 dtl_mask = DTL_LOG_PREEMPT;
#else
@@ -566,12 +582,12 @@ static int vcpudispatch_stats_open(struct inode *inode, struct file *file)
return single_open(file, vcpudispatch_stats_display, NULL);
}
-static const struct file_operations vcpudispatch_stats_proc_ops = {
- .open = vcpudispatch_stats_open,
- .read = seq_read,
- .write = vcpudispatch_stats_write,
- .llseek = seq_lseek,
- .release = single_release,
+static const struct proc_ops vcpudispatch_stats_proc_ops = {
+ .proc_open = vcpudispatch_stats_open,
+ .proc_read = seq_read,
+ .proc_write = vcpudispatch_stats_write,
+ .proc_lseek = seq_lseek,
+ .proc_release = single_release,
};
static ssize_t vcpudispatch_stats_freq_write(struct file *file,
@@ -610,12 +626,12 @@ static int vcpudispatch_stats_freq_open(struct inode *inode, struct file *file)
return single_open(file, vcpudispatch_stats_freq_display, NULL);
}
-static const struct file_operations vcpudispatch_stats_freq_proc_ops = {
- .open = vcpudispatch_stats_freq_open,
- .read = seq_read,
- .write = vcpudispatch_stats_freq_write,
- .llseek = seq_lseek,
- .release = single_release,
+static const struct proc_ops vcpudispatch_stats_freq_proc_ops = {
+ .proc_open = vcpudispatch_stats_freq_open,
+ .proc_read = seq_read,
+ .proc_write = vcpudispatch_stats_freq_write,
+ .proc_lseek = seq_lseek,
+ .proc_release = single_release,
};
static int __init vcpudispatch_stats_procfs_init(void)
@@ -758,7 +774,7 @@ static long pSeries_lpar_hpte_remove(unsigned long hpte_group)
/* don't remove a bolted entry */
lpar_rc = plpar_pte_remove(H_ANDCOND, hpte_group + slot_offset,
- (0x1UL << 4), &dummy1, &dummy2);
+ HPTE_V_BOLTED, &dummy1, &dummy2);
if (lpar_rc == H_SUCCESS)
return i;
@@ -922,11 +938,19 @@ static long pSeries_lpar_hpte_find(unsigned long vpn, int psize, int ssize)
hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize);
want_v = hpte_encode_avpn(vpn, psize, ssize);
- /* Bolted entries are always in the primary group */
+ /*
+ * We try to keep bolted entries always in primary hash
+ * But in some case we can find them in secondary too.
+ */
hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP;
slot = __pSeries_lpar_hpte_find(want_v, hpte_group);
- if (slot < 0)
- return -1;
+ if (slot < 0) {
+ /* Try in secondary */
+ hpte_group = (~hash & htab_hash_mask) * HPTES_PER_GROUP;
+ slot = __pSeries_lpar_hpte_find(want_v, hpte_group);
+ if (slot < 0)
+ return -1;
+ }
return hpte_group + slot;
}
@@ -984,6 +1008,17 @@ static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn,
#define HBLKR_CTRL_ERRNOTFOUND 0x8800000000000000UL
#define HBLKR_CTRL_ERRBUSY 0xa000000000000000UL
+/*
+ * Returned true if we are supporting this block size for the specified segment
+ * base page size and actual page size.
+ *
+ * Currently, we only support 8 size block.
+ */
+static inline bool is_supported_hlbkrm(int bpsize, int psize)
+{
+ return (hblkrm_size[bpsize][psize] == HBLKRM_SUPPORTED_BLOCK_SIZE);
+}
+
/**
* H_BLOCK_REMOVE caller.
* @idx should point to the latest @param entry set with a PTEX.
@@ -1143,7 +1178,8 @@ static inline void __pSeries_lpar_hugepage_invalidate(unsigned long *slot,
if (lock_tlbie)
spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags);
- if (firmware_has_feature(FW_FEATURE_BLOCK_REMOVE))
+ /* Assuming THP size is 16M */
+ if (is_supported_hlbkrm(psize, MMU_PAGE_16M))
hugepage_block_invalidate(slot, vpn, count, psize, ssize);
else
hugepage_bulk_invalidate(slot, vpn, count, psize, ssize);
@@ -1312,6 +1348,140 @@ static void do_block_remove(unsigned long number, struct ppc64_tlb_batch *batch,
}
/*
+ * TLB Block Invalidate Characteristics
+ *
+ * These characteristics define the size of the block the hcall H_BLOCK_REMOVE
+ * is able to process for each couple segment base page size, actual page size.
+ *
+ * The ibm,get-system-parameter properties is returning a buffer with the
+ * following layout:
+ *
+ * [ 2 bytes size of the RTAS buffer (excluding these 2 bytes) ]
+ * -----------------
+ * TLB Block Invalidate Specifiers:
+ * [ 1 byte LOG base 2 of the TLB invalidate block size being specified ]
+ * [ 1 byte Number of page sizes (N) that are supported for the specified
+ * TLB invalidate block size ]
+ * [ 1 byte Encoded segment base page size and actual page size
+ * MSB=0 means 4k segment base page size and actual page size
+ * MSB=1 the penc value in mmu_psize_def ]
+ * ...
+ * -----------------
+ * Next TLB Block Invalidate Specifiers...
+ * -----------------
+ * [ 0 ]
+ */
+static inline void set_hblkrm_bloc_size(int bpsize, int psize,
+ unsigned int block_size)
+{
+ if (block_size > hblkrm_size[bpsize][psize])
+ hblkrm_size[bpsize][psize] = block_size;
+}
+
+/*
+ * Decode the Encoded segment base page size and actual page size.
+ * PAPR specifies:
+ * - bit 7 is the L bit
+ * - bits 0-5 are the penc value
+ * If the L bit is 0, this means 4K segment base page size and actual page size
+ * otherwise the penc value should be read.
+ */
+#define HBLKRM_L_MASK 0x80
+#define HBLKRM_PENC_MASK 0x3f
+static inline void __init check_lp_set_hblkrm(unsigned int lp,
+ unsigned int block_size)
+{
+ unsigned int bpsize, psize;
+
+ /* First, check the L bit, if not set, this means 4K */
+ if ((lp & HBLKRM_L_MASK) == 0) {
+ set_hblkrm_bloc_size(MMU_PAGE_4K, MMU_PAGE_4K, block_size);
+ return;
+ }
+
+ lp &= HBLKRM_PENC_MASK;
+ for (bpsize = 0; bpsize < MMU_PAGE_COUNT; bpsize++) {
+ struct mmu_psize_def *def = &mmu_psize_defs[bpsize];
+
+ for (psize = 0; psize < MMU_PAGE_COUNT; psize++) {
+ if (def->penc[psize] == lp) {
+ set_hblkrm_bloc_size(bpsize, psize, block_size);
+ return;
+ }
+ }
+ }
+}
+
+#define SPLPAR_TLB_BIC_TOKEN 50
+
+/*
+ * The size of the TLB Block Invalidate Characteristics is variable. But at the
+ * maximum it will be the number of possible page sizes *2 + 10 bytes.
+ * Currently MMU_PAGE_COUNT is 16, which means 42 bytes. Use a cache line size
+ * (128 bytes) for the buffer to get plenty of space.
+ */
+#define SPLPAR_TLB_BIC_MAXLENGTH 128
+
+void __init pseries_lpar_read_hblkrm_characteristics(void)
+{
+ unsigned char local_buffer[SPLPAR_TLB_BIC_MAXLENGTH];
+ int call_status, len, idx, bpsize;
+
+ if (!firmware_has_feature(FW_FEATURE_BLOCK_REMOVE))
+ return;
+
+ spin_lock(&rtas_data_buf_lock);
+ memset(rtas_data_buf, 0, RTAS_DATA_BUF_SIZE);
+ call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
+ NULL,
+ SPLPAR_TLB_BIC_TOKEN,
+ __pa(rtas_data_buf),
+ RTAS_DATA_BUF_SIZE);
+ memcpy(local_buffer, rtas_data_buf, SPLPAR_TLB_BIC_MAXLENGTH);
+ local_buffer[SPLPAR_TLB_BIC_MAXLENGTH - 1] = '\0';
+ spin_unlock(&rtas_data_buf_lock);
+
+ if (call_status != 0) {
+ pr_warn("%s %s Error calling get-system-parameter (0x%x)\n",
+ __FILE__, __func__, call_status);
+ return;
+ }
+
+ /*
+ * The first two (2) bytes of the data in the buffer are the length of
+ * the returned data, not counting these first two (2) bytes.
+ */
+ len = be16_to_cpu(*((u16 *)local_buffer)) + 2;
+ if (len > SPLPAR_TLB_BIC_MAXLENGTH) {
+ pr_warn("%s too large returned buffer %d", __func__, len);
+ return;
+ }
+
+ idx = 2;
+ while (idx < len) {
+ u8 block_shift = local_buffer[idx++];
+ u32 block_size;
+ unsigned int npsize;
+
+ if (!block_shift)
+ break;
+
+ block_size = 1 << block_shift;
+
+ for (npsize = local_buffer[idx++];
+ npsize > 0 && idx < len; npsize--)
+ check_lp_set_hblkrm((unsigned int) local_buffer[idx++],
+ block_size);
+ }
+
+ for (bpsize = 0; bpsize < MMU_PAGE_COUNT; bpsize++)
+ for (idx = 0; idx < MMU_PAGE_COUNT; idx++)
+ if (hblkrm_size[bpsize][idx])
+ pr_info("H_BLOCK_REMOVE supports base psize:%d psize:%d block size:%d",
+ bpsize, idx, hblkrm_size[bpsize][idx]);
+}
+
+/*
* Take a spinlock around flushes to avoid bouncing the hypervisor tlbie
* lock.
*/
@@ -1330,7 +1500,7 @@ static void pSeries_lpar_flush_hash_range(unsigned long number, int local)
if (lock_tlbie)
spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags);
- if (firmware_has_feature(FW_FEATURE_BLOCK_REMOVE)) {
+ if (is_supported_hlbkrm(batch->psize, batch->psize)) {
do_block_remove(number, batch, param);
goto out;
}
@@ -1413,7 +1583,10 @@ static int pseries_lpar_resize_hpt_commit(void *data)
return 0;
}
-/* Must be called in user context */
+/*
+ * Must be called in process context. The caller must hold the
+ * cpus_lock.
+ */
static int pseries_lpar_resize_hpt(unsigned long shift)
{
struct hpt_resize_state state = {
@@ -1467,7 +1640,8 @@ static int pseries_lpar_resize_hpt(unsigned long shift)
t1 = ktime_get();
- rc = stop_machine(pseries_lpar_resize_hpt_commit, &state, NULL);
+ rc = stop_machine_cpuslocked(pseries_lpar_resize_hpt_commit,
+ &state, NULL);
t2 = ktime_get();
@@ -1527,16 +1701,24 @@ void __init hpte_init_pseries(void)
mmu_hash_ops.flush_hash_range = pSeries_lpar_flush_hash_range;
mmu_hash_ops.hpte_clear_all = pseries_hpte_clear_all;
mmu_hash_ops.hugepage_invalidate = pSeries_lpar_hugepage_invalidate;
- register_process_table = pseries_lpar_register_process_table;
if (firmware_has_feature(FW_FEATURE_HPT_RESIZE))
mmu_hash_ops.resize_hpt = pseries_lpar_resize_hpt;
+
+ /*
+ * On POWER9, we need to do a H_REGISTER_PROC_TBL hcall
+ * to inform the hypervisor that we wish to use the HPT.
+ */
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ pseries_lpar_register_process_table(0, 0, 0);
}
void radix_init_pseries(void)
{
pr_info("Using radix MMU under hypervisor\n");
- register_process_table = pseries_lpar_register_process_table;
+
+ pseries_lpar_register_process_table(__pa(process_tb),
+ 0, PRTB_SIZE_SHIFT - 12);
}
#ifdef CONFIG_PPC_SMLPAR
@@ -1818,30 +2000,17 @@ static int __init vpa_debugfs_init(void)
{
char name[16];
long i;
- static struct dentry *vpa_dir;
+ struct dentry *vpa_dir;
if (!firmware_has_feature(FW_FEATURE_SPLPAR))
return 0;
vpa_dir = debugfs_create_dir("vpa", powerpc_debugfs_root);
- if (!vpa_dir) {
- pr_warn("%s: can't create vpa root dir\n", __func__);
- return -ENOMEM;
- }
/* set up the per-cpu vpa file*/
for_each_possible_cpu(i) {
- struct dentry *d;
-
sprintf(name, "cpu-%ld", i);
-
- d = debugfs_create_file(name, 0400, vpa_dir, (void *)i,
- &vpa_fops);
- if (!d) {
- pr_warn("%s: can't create per-cpu vpa file\n",
- __func__);
- return -ENOMEM;
- }
+ debugfs_create_file(name, 0400, vpa_dir, (void *)i, &vpa_fops);
}
return 0;
diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c
index e33e8bc4b69b..b8d28ab88178 100644
--- a/arch/powerpc/platforms/pseries/lparcfg.c
+++ b/arch/powerpc/platforms/pseries/lparcfg.c
@@ -435,10 +435,10 @@ static void maxmem_data(struct seq_file *m)
{
unsigned long maxmem = 0;
- maxmem += drmem_info->n_lmbs * drmem_info->lmb_size;
+ maxmem += (unsigned long)drmem_info->n_lmbs * drmem_info->lmb_size;
maxmem += hugetlb_total_pages() * PAGE_SIZE;
- seq_printf(m, "MaxMem=%ld\n", maxmem);
+ seq_printf(m, "MaxMem=%lu\n", maxmem);
}
static int pseries_lparcfg_data(struct seq_file *m, void *v)
@@ -698,12 +698,12 @@ static int lparcfg_open(struct inode *inode, struct file *file)
return single_open(file, lparcfg_data, NULL);
}
-static const struct file_operations lparcfg_fops = {
- .read = seq_read,
- .write = lparcfg_write,
- .open = lparcfg_open,
- .release = single_release,
- .llseek = seq_lseek,
+static const struct proc_ops lparcfg_proc_ops = {
+ .proc_read = seq_read,
+ .proc_write = lparcfg_write,
+ .proc_open = lparcfg_open,
+ .proc_release = single_release,
+ .proc_lseek = seq_lseek,
};
static int __init lparcfg_init(void)
@@ -714,7 +714,7 @@ static int __init lparcfg_init(void)
if (firmware_has_feature(FW_FEATURE_SPLPAR))
mode |= 0200;
- if (!proc_create("powerpc/lparcfg", mode, NULL, &lparcfg_fops)) {
+ if (!proc_create("powerpc/lparcfg", mode, NULL, &lparcfg_proc_ops)) {
printk(KERN_ERR "Failed to create powerpc/lparcfg\n");
return -EIO;
}
diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
index fe812bebdf5e..b571285f6c14 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -9,6 +9,7 @@
#include <linux/cpu.h>
#include <linux/kernel.h>
#include <linux/kobject.h>
+#include <linux/sched.h>
#include <linux/smp.h>
#include <linux/stat.h>
#include <linux/completion.h>
@@ -207,7 +208,11 @@ static int update_dt_node(__be32 phandle, s32 scope)
prop_data += vd;
}
+
+ cond_resched();
}
+
+ cond_resched();
} while (rtas_rc == 1);
of_node_put(dn);
@@ -310,8 +315,12 @@ int pseries_devicetree_update(s32 scope)
add_dt_node(phandle, drc_index);
break;
}
+
+ cond_resched();
}
}
+
+ cond_resched();
} while (rc == 1);
kfree(rtas_buf);
diff --git a/arch/powerpc/platforms/pseries/of_helpers.c b/arch/powerpc/platforms/pseries/of_helpers.c
index 6df192f38f80..66dfd8256712 100644
--- a/arch/powerpc/platforms/pseries/of_helpers.c
+++ b/arch/powerpc/platforms/pseries/of_helpers.c
@@ -45,14 +45,14 @@ struct device_node *pseries_of_derive_parent(const char *path)
int of_read_drc_info_cell(struct property **prop, const __be32 **curval,
struct of_drc_info *data)
{
- const char *p;
+ const char *p = (char *)(*curval);
const __be32 *p2;
if (!data)
return -EINVAL;
/* Get drc-type:encode-string */
- p = data->drc_type = (char*) (*curval);
+ data->drc_type = (char *)p;
p = of_prop_next_string(*prop, p);
if (!p)
return -EINVAL;
@@ -65,9 +65,7 @@ int of_read_drc_info_cell(struct property **prop, const __be32 **curval,
/* Get drc-index-start:encode-int */
p2 = (const __be32 *)p;
- p2 = of_prop_next_u32(*prop, p2, &data->drc_index_start);
- if (!p2)
- return -EINVAL;
+ data->drc_index_start = be32_to_cpu(*p2);
/* Get drc-name-suffix-start:encode-int */
p2 = of_prop_next_u32(*prop, p2, &data->drc_name_suffix_start);
diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
index a5ac371a3f06..0b4467e378e5 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -65,29 +65,22 @@ static int drc_pmem_bind(struct papr_scm_priv *p)
cond_resched();
} while (rc == H_BUSY);
- if (rc) {
- /* H_OVERLAP needs a separate error path */
- if (rc == H_OVERLAP)
- return -EBUSY;
-
- dev_err(&p->pdev->dev, "bind err: %lld\n", rc);
- return -ENXIO;
- }
+ if (rc)
+ return rc;
p->bound_addr = saved;
-
- dev_dbg(&p->pdev->dev, "bound drc %x to %pR\n", p->drc_index, &p->res);
-
- return 0;
+ dev_dbg(&p->pdev->dev, "bound drc 0x%x to 0x%lx\n",
+ p->drc_index, (unsigned long)saved);
+ return rc;
}
-static int drc_pmem_unbind(struct papr_scm_priv *p)
+static void drc_pmem_unbind(struct papr_scm_priv *p)
{
unsigned long ret[PLPAR_HCALL_BUFSIZE];
uint64_t token = 0;
int64_t rc;
- dev_dbg(&p->pdev->dev, "unbind drc %x\n", p->drc_index);
+ dev_dbg(&p->pdev->dev, "unbind drc 0x%x\n", p->drc_index);
/* NB: unbind has the same retry requirements as drc_pmem_bind() */
do {
@@ -110,12 +103,48 @@ static int drc_pmem_unbind(struct papr_scm_priv *p)
if (rc)
dev_err(&p->pdev->dev, "unbind error: %lld\n", rc);
else
- dev_dbg(&p->pdev->dev, "unbind drc %x complete\n",
+ dev_dbg(&p->pdev->dev, "unbind drc 0x%x complete\n",
p->drc_index);
- return rc == H_SUCCESS ? 0 : -ENXIO;
+ return;
+}
+
+static int drc_pmem_query_n_bind(struct papr_scm_priv *p)
+{
+ unsigned long start_addr;
+ unsigned long end_addr;
+ unsigned long ret[PLPAR_HCALL_BUFSIZE];
+ int64_t rc;
+
+
+ rc = plpar_hcall(H_SCM_QUERY_BLOCK_MEM_BINDING, ret,
+ p->drc_index, 0);
+ if (rc)
+ goto err_out;
+ start_addr = ret[0];
+
+ /* Make sure the full region is bound. */
+ rc = plpar_hcall(H_SCM_QUERY_BLOCK_MEM_BINDING, ret,
+ p->drc_index, p->blocks - 1);
+ if (rc)
+ goto err_out;
+ end_addr = ret[0];
+
+ if ((end_addr - start_addr) != ((p->blocks - 1) * p->block_size))
+ goto err_out;
+
+ p->bound_addr = start_addr;
+ dev_dbg(&p->pdev->dev, "bound drc 0x%x to 0x%lx\n", p->drc_index, start_addr);
+ return rc;
+
+err_out:
+ dev_info(&p->pdev->dev,
+ "Failed to query, trying an unbind followed by bind");
+ drc_pmem_unbind(p);
+ return drc_pmem_bind(p);
}
+
static int papr_scm_meta_get(struct papr_scm_priv *p,
struct nd_cmd_get_config_data_hdr *hdr)
{
@@ -124,7 +153,7 @@ static int papr_scm_meta_get(struct papr_scm_priv *p,
int len, read;
int64_t ret;
- if ((hdr->in_offset + hdr->in_length) >= p->metadata_size)
+ if ((hdr->in_offset + hdr->in_length) > p->metadata_size)
return -EINVAL;
for (len = hdr->in_length; len; len -= read) {
@@ -178,7 +207,7 @@ static int papr_scm_meta_set(struct papr_scm_priv *p,
__be64 data_be;
int64_t ret;
- if ((hdr->in_offset + hdr->in_length) >= p->metadata_size)
+ if ((hdr->in_offset + hdr->in_length) > p->metadata_size)
return -EINVAL;
for (len = hdr->in_length; len; len -= wrote) {
@@ -256,25 +285,6 @@ int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
return 0;
}
-static const struct attribute_group *region_attr_groups[] = {
- &nd_region_attribute_group,
- &nd_device_attribute_group,
- &nd_mapping_attribute_group,
- &nd_numa_attribute_group,
- NULL,
-};
-
-static const struct attribute_group *bus_attr_groups[] = {
- &nvdimm_bus_attribute_group,
- NULL,
-};
-
-static const struct attribute_group *papr_scm_dimm_groups[] = {
- &nvdimm_attribute_group,
- &nd_device_attribute_group,
- NULL,
-};
-
static inline int papr_scm_node(int node)
{
int min_dist = INT_MAX, dist;
@@ -305,7 +315,6 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
p->bus_desc.ndctl = papr_scm_ndctl;
p->bus_desc.module = THIS_MODULE;
p->bus_desc.of_node = p->pdev->dev.of_node;
- p->bus_desc.attr_groups = bus_attr_groups;
p->bus_desc.provider_name = kstrdup(p->pdev->name, GFP_KERNEL);
if (!p->bus_desc.provider_name)
@@ -314,14 +323,15 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
p->bus = nvdimm_bus_register(NULL, &p->bus_desc);
if (!p->bus) {
dev_err(dev, "Error creating nvdimm bus %pOF\n", p->dn);
+ kfree(p->bus_desc.provider_name);
return -ENXIO;
}
dimm_flags = 0;
set_bit(NDD_ALIASING, &dimm_flags);
- p->nvdimm = nvdimm_create(p->bus, p, papr_scm_dimm_groups,
- dimm_flags, PAPR_SCM_DIMM_CMD_MASK, 0, NULL);
+ p->nvdimm = nvdimm_create(p->bus, p, NULL, dimm_flags,
+ PAPR_SCM_DIMM_CMD_MASK, 0, NULL);
if (!p->nvdimm) {
dev_err(dev, "Error creating DIMM object for %pOF\n", p->dn);
goto err;
@@ -338,7 +348,6 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
mapping.size = p->blocks * p->block_size; // XXX: potential overflow?
memset(&ndr_desc, 0, sizeof(ndr_desc));
- ndr_desc.attr_groups = region_attr_groups;
target_nid = dev_to_node(&p->pdev->dev);
online_nid = papr_scm_node(target_nid);
ndr_desc.numa_node = online_nid;
@@ -349,7 +358,6 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
ndr_desc.mapping = &mapping;
ndr_desc.num_mappings = 1;
ndr_desc.nd_set = &p->nd_set;
- set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags);
if (p->is_volatile)
p->region = nvdimm_volatile_region_create(p->bus, &ndr_desc);
@@ -436,14 +444,14 @@ static int papr_scm_probe(struct platform_device *pdev)
rc = drc_pmem_bind(p);
/* If phyp says drc memory still bound then force unbound and retry */
- if (rc == -EBUSY) {
- dev_warn(&pdev->dev, "Retrying bind after unbinding\n");
- drc_pmem_unbind(p);
- rc = drc_pmem_bind(p);
- }
+ if (rc == H_OVERLAP)
+ rc = drc_pmem_query_n_bind(p);
- if (rc)
+ if (rc != H_SUCCESS) {
+ dev_err(&p->pdev->dev, "bind err: %d\n", rc);
+ rc = -ENXIO;
goto err;
+ }
/* setup the resource for the newly bound range */
p->res.start = p->bound_addr;
@@ -470,6 +478,7 @@ static int papr_scm_remove(struct platform_device *pdev)
nvdimm_bus_unregister(p->bus);
drc_pmem_unbind(p);
+ kfree(p->bus_desc.provider_name);
kfree(p);
return 0;
@@ -485,7 +494,6 @@ static struct platform_driver papr_scm_driver = {
.remove = papr_scm_remove,
.driver = {
.name = "papr_scm",
- .owner = THIS_MODULE,
.of_match_table = papr_scm_match,
},
};
diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c
index 1eae1d09980c..911534b89c85 100644
--- a/arch/powerpc/platforms/pseries/pci.c
+++ b/arch/powerpc/platforms/pseries/pci.c
@@ -192,7 +192,7 @@ int pseries_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
int pseries_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
{
/* Allocate PCI data */
- add_dev_pci_data(pdev);
+ add_sriov_vf_pdns(pdev);
return pseries_pci_sriov_enable(pdev, num_vfs);
}
@@ -204,7 +204,7 @@ int pseries_pcibios_sriov_disable(struct pci_dev *pdev)
/* Releasing pe_num_map */
kfree(pdn->pe_num_map);
/* Release PCI data */
- remove_dev_pci_data(pdev);
+ remove_sriov_vf_pdns(pdev);
pci_vf_drivers_autoprobe(pdev, true);
return 0;
}
@@ -229,8 +229,7 @@ void __init pSeries_final_fixup(void)
pSeries_request_regions();
- eeh_probe_devices();
- eeh_addr_cache_build();
+ eeh_show_enabled();
#ifdef CONFIG_PCI_IOV
ppc_md.pcibios_sriov_enable = pseries_pcibios_sriov_enable;
diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c
index 561917fa54a8..361986e4354e 100644
--- a/arch/powerpc/platforms/pseries/pci_dlpar.c
+++ b/arch/powerpc/platforms/pseries/pci_dlpar.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* PCI Dynamic LPAR, PCI Hot Plug and PCI EEH recovery code
* for RPA-compliant PPC64 platform.
@@ -6,23 +7,6 @@
*
* Updates, 2005, John Rose <johnrose@austin.ibm.com>
* Updates, 2005, Linas Vepstas <linas@austin.ibm.com>
- *
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or (at
- * your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT. See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <linux/pci.h>
diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
index a6624d4bd9d0..13fa370a87e4 100644
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -112,5 +112,6 @@ static inline unsigned long cmo_get_page_size(void)
int dlpar_workqueue_init(void);
void pseries_setup_rfi_flush(void);
+void pseries_lpar_read_hblkrm_characteristics(void);
#endif /* _PSERIES_PSERIES_H */
diff --git a/arch/powerpc/platforms/pseries/pseries_energy.c b/arch/powerpc/platforms/pseries/pseries_energy.c
index a96874f9492f..09e98d301db0 100644
--- a/arch/powerpc/platforms/pseries/pseries_energy.c
+++ b/arch/powerpc/platforms/pseries/pseries_energy.c
@@ -36,6 +36,7 @@ static int sysfs_entries;
static u32 cpu_to_drc_index(int cpu)
{
struct device_node *dn = NULL;
+ struct property *info;
int thread_index;
int rc = 1;
u32 ret = 0;
@@ -47,20 +48,18 @@ static u32 cpu_to_drc_index(int cpu)
/* Convert logical cpu number to core number */
thread_index = cpu_core_index_of_thread(cpu);
- if (firmware_has_feature(FW_FEATURE_DRC_INFO)) {
- struct property *info = NULL;
+ info = of_find_property(dn, "ibm,drc-info", NULL);
+ if (info) {
struct of_drc_info drc;
int j;
u32 num_set_entries;
const __be32 *value;
- info = of_find_property(dn, "ibm,drc-info", NULL);
- if (info == NULL)
- goto err_of_node_put;
-
value = of_prop_next_u32(info, NULL, &num_set_entries);
if (!value)
goto err_of_node_put;
+ else
+ value++;
for (j = 0; j < num_set_entries; j++) {
@@ -110,6 +109,7 @@ err:
static int drc_index_to_cpu(u32 drc_index)
{
struct device_node *dn = NULL;
+ struct property *info;
const int *indexes;
int thread_index = 0, cpu = 0;
int rc = 1;
@@ -117,21 +117,18 @@ static int drc_index_to_cpu(u32 drc_index)
dn = of_find_node_by_path("/cpus");
if (dn == NULL)
goto err;
-
- if (firmware_has_feature(FW_FEATURE_DRC_INFO)) {
- struct property *info = NULL;
+ info = of_find_property(dn, "ibm,drc-info", NULL);
+ if (info) {
struct of_drc_info drc;
int j;
u32 num_set_entries;
const __be32 *value;
- info = of_find_property(dn, "ibm,drc-info", NULL);
- if (info == NULL)
- goto err_of_node_put;
-
value = of_prop_next_u32(info, NULL, &num_set_entries);
if (!value)
goto err_of_node_put;
+ else
+ value++;
for (j = 0; j < num_set_entries; j++) {
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index f16fdd0f71f7..1d7f973c647b 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -76,6 +76,7 @@ struct pseries_mc_errorlog {
#define MC_ERROR_TYPE_UE 0x00
#define MC_ERROR_TYPE_SLB 0x01
#define MC_ERROR_TYPE_ERAT 0x02
+#define MC_ERROR_TYPE_UNKNOWN 0x03
#define MC_ERROR_TYPE_TLB 0x04
#define MC_ERROR_TYPE_D_CACHE 0x05
#define MC_ERROR_TYPE_I_CACHE 0x07
@@ -87,6 +88,9 @@ struct pseries_mc_errorlog {
#define MC_ERROR_UE_LOAD_STORE 3
#define MC_ERROR_UE_PAGE_TABLE_WALK_LOAD_STORE 4
+#define UE_EFFECTIVE_ADDR_PROVIDED 0x40
+#define UE_LOGICAL_ADDR_PROVIDED 0x20
+
#define MC_ERROR_SLB_PARITY 0
#define MC_ERROR_SLB_MULTIHIT 1
#define MC_ERROR_SLB_INDETERMINATE 2
@@ -113,27 +117,6 @@ static inline u8 rtas_mc_error_sub_type(const struct pseries_mc_errorlog *mlog)
}
}
-static
-inline u64 rtas_mc_get_effective_addr(const struct pseries_mc_errorlog *mlog)
-{
- __be64 addr = 0;
-
- switch (mlog->error_type) {
- case MC_ERROR_TYPE_UE:
- if (mlog->sub_err_type & 0x40)
- addr = mlog->effective_address;
- break;
- case MC_ERROR_TYPE_SLB:
- case MC_ERROR_TYPE_ERAT:
- case MC_ERROR_TYPE_TLB:
- if (mlog->sub_err_type & 0x80)
- addr = mlog->effective_address;
- default:
- break;
- }
- return be64_to_cpu(addr);
-}
-
/*
* Enable the hotplug interrupt late because processing them may touch other
* devices or systems (e.g. hugepages) that have not been initialized at the
@@ -272,7 +255,7 @@ static void rtas_parse_epow_errlog(struct rtas_error_log *log)
break;
case EPOW_SYSTEM_SHUTDOWN:
- handle_system_shutdown(epow_log->event_modifier);
+ handle_system_shutdown(modifier);
break;
case EPOW_SYSTEM_HALT:
@@ -511,160 +494,165 @@ int pSeries_system_reset_exception(struct pt_regs *regs)
return 0; /* need to perform reset */
}
-#define VAL_TO_STRING(ar, val) \
- (((val) < ARRAY_SIZE(ar)) ? ar[(val)] : "Unknown")
-static void pseries_print_mce_info(struct pt_regs *regs,
- struct rtas_error_log *errp)
+static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp)
{
- const char *level, *sevstr;
+ struct mce_error_info mce_err = { 0 };
+ unsigned long eaddr = 0, paddr = 0;
struct pseries_errorlog *pseries_log;
struct pseries_mc_errorlog *mce_log;
- u8 error_type, err_sub_type;
- u64 addr;
- u8 initiator = rtas_error_initiator(errp);
int disposition = rtas_error_disposition(errp);
+ int initiator = rtas_error_initiator(errp);
+ int severity = rtas_error_severity(errp);
+ u8 error_type, err_sub_type;
- static const char * const initiators[] = {
- [0] = "Unknown",
- [1] = "CPU",
- [2] = "PCI",
- [3] = "ISA",
- [4] = "Memory",
- [5] = "Power Mgmt",
- };
- static const char * const mc_err_types[] = {
- [0] = "UE",
- [1] = "SLB",
- [2] = "ERAT",
- [3] = "Unknown",
- [4] = "TLB",
- [5] = "D-Cache",
- [6] = "Unknown",
- [7] = "I-Cache",
- };
- static const char * const mc_ue_types[] = {
- [0] = "Indeterminate",
- [1] = "Instruction fetch",
- [2] = "Page table walk ifetch",
- [3] = "Load/Store",
- [4] = "Page table walk Load/Store",
- };
-
- /* SLB sub errors valid values are 0x0, 0x1, 0x2 */
- static const char * const mc_slb_types[] = {
- [0] = "Parity",
- [1] = "Multihit",
- [2] = "Indeterminate",
- };
-
- /* TLB and ERAT sub errors valid values are 0x1, 0x2, 0x3 */
- static const char * const mc_soft_types[] = {
- [0] = "Unknown",
- [1] = "Parity",
- [2] = "Multihit",
- [3] = "Indeterminate",
- };
-
- if (!rtas_error_extended(errp)) {
- pr_err("Machine check interrupt: Missing extended error log\n");
- return;
- }
+ if (initiator == RTAS_INITIATOR_UNKNOWN)
+ mce_err.initiator = MCE_INITIATOR_UNKNOWN;
+ else if (initiator == RTAS_INITIATOR_CPU)
+ mce_err.initiator = MCE_INITIATOR_CPU;
+ else if (initiator == RTAS_INITIATOR_PCI)
+ mce_err.initiator = MCE_INITIATOR_PCI;
+ else if (initiator == RTAS_INITIATOR_ISA)
+ mce_err.initiator = MCE_INITIATOR_ISA;
+ else if (initiator == RTAS_INITIATOR_MEMORY)
+ mce_err.initiator = MCE_INITIATOR_MEMORY;
+ else if (initiator == RTAS_INITIATOR_POWERMGM)
+ mce_err.initiator = MCE_INITIATOR_POWERMGM;
+ else
+ mce_err.initiator = MCE_INITIATOR_UNKNOWN;
+
+ if (severity == RTAS_SEVERITY_NO_ERROR)
+ mce_err.severity = MCE_SEV_NO_ERROR;
+ else if (severity == RTAS_SEVERITY_EVENT)
+ mce_err.severity = MCE_SEV_WARNING;
+ else if (severity == RTAS_SEVERITY_WARNING)
+ mce_err.severity = MCE_SEV_WARNING;
+ else if (severity == RTAS_SEVERITY_ERROR_SYNC)
+ mce_err.severity = MCE_SEV_SEVERE;
+ else if (severity == RTAS_SEVERITY_ERROR)
+ mce_err.severity = MCE_SEV_SEVERE;
+ else if (severity == RTAS_SEVERITY_FATAL)
+ mce_err.severity = MCE_SEV_FATAL;
+ else
+ mce_err.severity = MCE_SEV_FATAL;
+
+ if (severity <= RTAS_SEVERITY_ERROR_SYNC)
+ mce_err.sync_error = true;
+ else
+ mce_err.sync_error = false;
+
+ mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN;
+ mce_err.error_class = MCE_ECLASS_UNKNOWN;
+
+ if (!rtas_error_extended(errp))
+ goto out;
pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
if (pseries_log == NULL)
- return;
+ goto out;
mce_log = (struct pseries_mc_errorlog *)pseries_log->data;
-
error_type = mce_log->error_type;
err_sub_type = rtas_mc_error_sub_type(mce_log);
- switch (rtas_error_severity(errp)) {
- case RTAS_SEVERITY_NO_ERROR:
- level = KERN_INFO;
- sevstr = "Harmless";
- break;
- case RTAS_SEVERITY_WARNING:
- level = KERN_WARNING;
- sevstr = "";
- break;
- case RTAS_SEVERITY_ERROR:
- case RTAS_SEVERITY_ERROR_SYNC:
- level = KERN_ERR;
- sevstr = "Severe";
- break;
- case RTAS_SEVERITY_FATAL:
- default:
- level = KERN_ERR;
- sevstr = "Fatal";
- break;
- }
+ switch (mce_log->error_type) {
+ case MC_ERROR_TYPE_UE:
+ mce_err.error_type = MCE_ERROR_TYPE_UE;
+ switch (err_sub_type) {
+ case MC_ERROR_UE_IFETCH:
+ mce_err.u.ue_error_type = MCE_UE_ERROR_IFETCH;
+ break;
+ case MC_ERROR_UE_PAGE_TABLE_WALK_IFETCH:
+ mce_err.u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH;
+ break;
+ case MC_ERROR_UE_LOAD_STORE:
+ mce_err.u.ue_error_type = MCE_UE_ERROR_LOAD_STORE;
+ break;
+ case MC_ERROR_UE_PAGE_TABLE_WALK_LOAD_STORE:
+ mce_err.u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE;
+ break;
+ case MC_ERROR_UE_INDETERMINATE:
+ default:
+ mce_err.u.ue_error_type = MCE_UE_ERROR_INDETERMINATE;
+ break;
+ }
+ if (mce_log->sub_err_type & UE_EFFECTIVE_ADDR_PROVIDED)
+ eaddr = be64_to_cpu(mce_log->effective_address);
-#ifdef CONFIG_PPC_BOOK3S_64
- /* Display faulty slb contents for SLB errors. */
- if (error_type == MC_ERROR_TYPE_SLB)
- slb_dump_contents(local_paca->mce_faulty_slbs);
-#endif
+ if (mce_log->sub_err_type & UE_LOGICAL_ADDR_PROVIDED) {
+ paddr = be64_to_cpu(mce_log->logical_address);
+ } else if (mce_log->sub_err_type & UE_EFFECTIVE_ADDR_PROVIDED) {
+ unsigned long pfn;
- printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
- disposition == RTAS_DISP_FULLY_RECOVERED ?
- "Recovered" : "Not recovered");
- if (user_mode(regs)) {
- printk("%s NIP: [%016lx] PID: %d Comm: %s\n", level,
- regs->nip, current->pid, current->comm);
- } else {
- printk("%s NIP [%016lx]: %pS\n", level, regs->nip,
- (void *)regs->nip);
- }
- printk("%s Initiator: %s\n", level,
- VAL_TO_STRING(initiators, initiator));
+ pfn = addr_to_pfn(regs, eaddr);
+ if (pfn != ULONG_MAX)
+ paddr = pfn << PAGE_SHIFT;
+ }
- switch (error_type) {
- case MC_ERROR_TYPE_UE:
- printk("%s Error type: %s [%s]\n", level,
- VAL_TO_STRING(mc_err_types, error_type),
- VAL_TO_STRING(mc_ue_types, err_sub_type));
break;
case MC_ERROR_TYPE_SLB:
- printk("%s Error type: %s [%s]\n", level,
- VAL_TO_STRING(mc_err_types, error_type),
- VAL_TO_STRING(mc_slb_types, err_sub_type));
+ mce_err.error_type = MCE_ERROR_TYPE_SLB;
+ switch (err_sub_type) {
+ case MC_ERROR_SLB_PARITY:
+ mce_err.u.slb_error_type = MCE_SLB_ERROR_PARITY;
+ break;
+ case MC_ERROR_SLB_MULTIHIT:
+ mce_err.u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
+ break;
+ case MC_ERROR_SLB_INDETERMINATE:
+ default:
+ mce_err.u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE;
+ break;
+ }
+ if (mce_log->sub_err_type & 0x80)
+ eaddr = be64_to_cpu(mce_log->effective_address);
break;
case MC_ERROR_TYPE_ERAT:
+ mce_err.error_type = MCE_ERROR_TYPE_ERAT;
+ switch (err_sub_type) {
+ case MC_ERROR_ERAT_PARITY:
+ mce_err.u.erat_error_type = MCE_ERAT_ERROR_PARITY;
+ break;
+ case MC_ERROR_ERAT_MULTIHIT:
+ mce_err.u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
+ break;
+ case MC_ERROR_ERAT_INDETERMINATE:
+ default:
+ mce_err.u.erat_error_type = MCE_ERAT_ERROR_INDETERMINATE;
+ break;
+ }
+ if (mce_log->sub_err_type & 0x80)
+ eaddr = be64_to_cpu(mce_log->effective_address);
+ break;
case MC_ERROR_TYPE_TLB:
- printk("%s Error type: %s [%s]\n", level,
- VAL_TO_STRING(mc_err_types, error_type),
- VAL_TO_STRING(mc_soft_types, err_sub_type));
+ mce_err.error_type = MCE_ERROR_TYPE_TLB;
+ switch (err_sub_type) {
+ case MC_ERROR_TLB_PARITY:
+ mce_err.u.tlb_error_type = MCE_TLB_ERROR_PARITY;
+ break;
+ case MC_ERROR_TLB_MULTIHIT:
+ mce_err.u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
+ break;
+ case MC_ERROR_TLB_INDETERMINATE:
+ default:
+ mce_err.u.tlb_error_type = MCE_TLB_ERROR_INDETERMINATE;
+ break;
+ }
+ if (mce_log->sub_err_type & 0x80)
+ eaddr = be64_to_cpu(mce_log->effective_address);
+ break;
+ case MC_ERROR_TYPE_D_CACHE:
+ mce_err.error_type = MCE_ERROR_TYPE_DCACHE;
break;
+ case MC_ERROR_TYPE_I_CACHE:
+ mce_err.error_type = MCE_ERROR_TYPE_DCACHE;
+ break;
+ case MC_ERROR_TYPE_UNKNOWN:
default:
- printk("%s Error type: %s\n", level,
- VAL_TO_STRING(mc_err_types, error_type));
+ mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN;
break;
}
- addr = rtas_mc_get_effective_addr(mce_log);
- if (addr)
- printk("%s Effective address: %016llx\n", level, addr);
-}
-
-static int mce_handle_error(struct rtas_error_log *errp)
-{
- struct pseries_errorlog *pseries_log;
- struct pseries_mc_errorlog *mce_log;
- int disposition = rtas_error_disposition(errp);
- u8 error_type;
-
- if (!rtas_error_extended(errp))
- goto out;
-
- pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
- if (pseries_log == NULL)
- goto out;
-
- mce_log = (struct pseries_mc_errorlog *)pseries_log->data;
- error_type = mce_log->error_type;
-
#ifdef CONFIG_PPC_BOOK3S_64
if (disposition == RTAS_DISP_NOT_RECOVERED) {
switch (error_type) {
@@ -682,98 +670,24 @@ static int mce_handle_error(struct rtas_error_log *errp)
slb_save_contents(local_paca->mce_faulty_slbs);
flush_and_reload_slb();
disposition = RTAS_DISP_FULLY_RECOVERED;
- rtas_set_disposition_recovered(errp);
break;
default:
break;
}
+ } else if (disposition == RTAS_DISP_LIMITED_RECOVERY) {
+ /* Platform corrected itself but could be degraded */
+ printk(KERN_ERR "MCE: limited recovery, system may "
+ "be degraded\n");
+ disposition = RTAS_DISP_FULLY_RECOVERED;
}
#endif
out:
- return disposition;
-}
-
-#ifdef CONFIG_MEMORY_FAILURE
-
-static DEFINE_PER_CPU(int, rtas_ue_count);
-static DEFINE_PER_CPU(unsigned long, rtas_ue_paddr[MAX_MC_EVT]);
+ save_mce_event(regs, disposition == RTAS_DISP_FULLY_RECOVERED,
+ &mce_err, regs->nip, eaddr, paddr);
-#define UE_EFFECTIVE_ADDR_PROVIDED 0x40
-#define UE_LOGICAL_ADDR_PROVIDED 0x20
-
-
-static void pseries_hwpoison_work_fn(struct work_struct *work)
-{
- unsigned long paddr;
- int index;
-
- while (__this_cpu_read(rtas_ue_count) > 0) {
- index = __this_cpu_read(rtas_ue_count) - 1;
- paddr = __this_cpu_read(rtas_ue_paddr[index]);
- memory_failure(paddr >> PAGE_SHIFT, 0);
- __this_cpu_dec(rtas_ue_count);
- }
-}
-
-static DECLARE_WORK(hwpoison_work, pseries_hwpoison_work_fn);
-
-static void queue_ue_paddr(unsigned long paddr)
-{
- int index;
-
- index = __this_cpu_inc_return(rtas_ue_count) - 1;
- if (index >= MAX_MC_EVT) {
- __this_cpu_dec(rtas_ue_count);
- return;
- }
- this_cpu_write(rtas_ue_paddr[index], paddr);
- schedule_work(&hwpoison_work);
-}
-
-static void pseries_do_memory_failure(struct pt_regs *regs,
- struct pseries_mc_errorlog *mce_log)
-{
- unsigned long paddr;
-
- if (mce_log->sub_err_type & UE_LOGICAL_ADDR_PROVIDED) {
- paddr = be64_to_cpu(mce_log->logical_address);
- } else if (mce_log->sub_err_type & UE_EFFECTIVE_ADDR_PROVIDED) {
- unsigned long pfn;
-
- pfn = addr_to_pfn(regs,
- be64_to_cpu(mce_log->effective_address));
- if (pfn == ULONG_MAX)
- return;
- paddr = pfn << PAGE_SHIFT;
- } else {
- return;
- }
- queue_ue_paddr(paddr);
-}
-
-static void pseries_process_ue(struct pt_regs *regs,
- struct rtas_error_log *errp)
-{
- struct pseries_errorlog *pseries_log;
- struct pseries_mc_errorlog *mce_log;
-
- if (!rtas_error_extended(errp))
- return;
-
- pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
- if (!pseries_log)
- return;
-
- mce_log = (struct pseries_mc_errorlog *)pseries_log->data;
-
- if (mce_log->error_type == MC_ERROR_TYPE_UE)
- pseries_do_memory_failure(regs, mce_log);
+ return disposition;
}
-#else
-static inline void pseries_process_ue(struct pt_regs *regs,
- struct rtas_error_log *errp) { }
-#endif /*CONFIG_MEMORY_FAILURE */
/*
* Process MCE rtas errlog event.
@@ -795,49 +709,51 @@ static void mce_process_errlog_event(struct irq_work *work)
* Return 1 if corrected (or delivered a signal).
* Return 0 if there is nothing we can do.
*/
-static int recover_mce(struct pt_regs *regs, struct rtas_error_log *err)
+static int recover_mce(struct pt_regs *regs, struct machine_check_event *evt)
{
int recovered = 0;
- int disposition = rtas_error_disposition(err);
-
- pseries_print_mce_info(regs, err);
if (!(regs->msr & MSR_RI)) {
/* If MSR_RI isn't set, we cannot recover */
pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n");
recovered = 0;
-
- } else if (disposition == RTAS_DISP_FULLY_RECOVERED) {
+ } else if (evt->disposition == MCE_DISPOSITION_RECOVERED) {
/* Platform corrected itself */
recovered = 1;
+ } else if (evt->severity == MCE_SEV_FATAL) {
+ /* Fatal machine check */
+ pr_err("Machine check interrupt is fatal\n");
+ recovered = 0;
+ }
- } else if (disposition == RTAS_DISP_LIMITED_RECOVERY) {
- /* Platform corrected itself but could be degraded */
- printk(KERN_ERR "MCE: limited recovery, system may "
- "be degraded\n");
- recovered = 1;
-
- } else if (user_mode(regs) && !is_global_init(current) &&
- rtas_error_severity(err) == RTAS_SEVERITY_ERROR_SYNC) {
-
+ if (!recovered && evt->sync_error) {
/*
- * If we received a synchronous error when in userspace
- * kill the task. Firmware may report details of the fail
- * asynchronously, so we can't rely on the target and type
- * fields being valid here.
+ * Try to kill processes if we get a synchronous machine check
+ * (e.g., one caused by execution of this instruction). This
+ * will devolve into a panic if we try to kill init or are in
+ * an interrupt etc.
+ *
+ * TODO: Queue up this address for hwpoisioning later.
+ * TODO: This is not quite right for d-side machine
+ * checks ->nip is not necessarily the important
+ * address.
*/
- printk(KERN_ERR "MCE: uncorrectable error, killing task "
- "%s:%d\n", current->comm, current->pid);
-
- _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
- recovered = 1;
+ if ((user_mode(regs))) {
+ _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
+ recovered = 1;
+ } else if (die_will_crash()) {
+ /*
+ * die() would kill the kernel, so better to go via
+ * the platform reboot code that will log the
+ * machine check.
+ */
+ recovered = 0;
+ } else {
+ die("Machine check", regs, SIGBUS);
+ recovered = 1;
+ }
}
- pseries_process_ue(regs, err);
-
- /* Queue irq work to log this rtas event later. */
- irq_work_queue(&mce_errlog_process_work);
-
return recovered;
}
@@ -853,14 +769,21 @@ static int recover_mce(struct pt_regs *regs, struct rtas_error_log *err)
*/
int pSeries_machine_check_exception(struct pt_regs *regs)
{
- struct rtas_error_log *errp;
+ struct machine_check_event evt;
- if (fwnmi_active) {
- fwnmi_release_errinfo();
- errp = fwnmi_get_errlog();
- if (errp && recover_mce(regs, errp))
- return 1;
+ if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
+ return 0;
+
+ /* Print things out */
+ if (evt.version != MCE_V1) {
+ pr_err("Machine Check Exception, Unknown event version %d !\n",
+ evt.version);
+ return 0;
}
+ machine_check_print_event_info(&evt, user_mode(regs), false);
+
+ if (recover_mce(regs, &evt))
+ return 1;
return 0;
}
@@ -877,7 +800,12 @@ long pseries_machine_check_realmode(struct pt_regs *regs)
* to panic. Hence we will call it as soon as we go into
* virtual mode.
*/
- disposition = mce_handle_error(errp);
+ disposition = mce_handle_error(regs, errp);
+ fwnmi_release_errinfo();
+
+ /* Queue irq work to log this rtas event later. */
+ irq_work_queue(&mce_errlog_process_work);
+
if (disposition == RTAS_DISP_FULLY_RECOVERED)
return 1;
}
diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c
index 8a9c4fb95b8b..7f7369fec46b 100644
--- a/arch/powerpc/platforms/pseries/reconfig.c
+++ b/arch/powerpc/platforms/pseries/reconfig.c
@@ -391,9 +391,9 @@ out:
return rv ? rv : count;
}
-static const struct file_operations ofdt_fops = {
- .write = ofdt_write,
- .llseek = noop_llseek,
+static const struct proc_ops ofdt_proc_ops = {
+ .proc_write = ofdt_write,
+ .proc_lseek = noop_llseek,
};
/* create /proc/powerpc/ofdt write-only by root */
@@ -401,7 +401,7 @@ static int proc_ppc64_create_ofdt(void)
{
struct proc_dir_entry *ent;
- ent = proc_create("powerpc/ofdt", 0200, NULL, &ofdt_fops);
+ ent = proc_create("powerpc/ofdt", 0200, NULL, &ofdt_proc_ops);
if (ent)
proc_set_size(ent, 0);
diff --git a/arch/powerpc/platforms/pseries/rtas-fadump.c b/arch/powerpc/platforms/pseries/rtas-fadump.c
new file mode 100644
index 000000000000..70c3013fdd07
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/rtas-fadump.c
@@ -0,0 +1,550 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Firmware-Assisted Dump support on POWERVM platform.
+ *
+ * Copyright 2011, Mahesh Salgaonkar, IBM Corporation.
+ * Copyright 2019, Hari Bathini, IBM Corporation.
+ */
+
+#define pr_fmt(fmt) "rtas fadump: " fmt
+
+#include <linux/string.h>
+#include <linux/memblock.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/crash_dump.h>
+
+#include <asm/page.h>
+#include <asm/prom.h>
+#include <asm/rtas.h>
+#include <asm/fadump.h>
+#include <asm/fadump-internal.h>
+
+#include "rtas-fadump.h"
+
+static struct rtas_fadump_mem_struct fdm;
+static const struct rtas_fadump_mem_struct *fdm_active;
+
+static void rtas_fadump_update_config(struct fw_dump *fadump_conf,
+ const struct rtas_fadump_mem_struct *fdm)
+{
+ fadump_conf->boot_mem_dest_addr =
+ be64_to_cpu(fdm->rmr_region.destination_address);
+
+ fadump_conf->fadumphdr_addr = (fadump_conf->boot_mem_dest_addr +
+ fadump_conf->boot_memory_size);
+}
+
+/*
+ * This function is called in the capture kernel to get configuration details
+ * setup in the first kernel and passed to the f/w.
+ */
+static void rtas_fadump_get_config(struct fw_dump *fadump_conf,
+ const struct rtas_fadump_mem_struct *fdm)
+{
+ fadump_conf->boot_mem_addr[0] =
+ be64_to_cpu(fdm->rmr_region.source_address);
+ fadump_conf->boot_mem_sz[0] = be64_to_cpu(fdm->rmr_region.source_len);
+ fadump_conf->boot_memory_size = fadump_conf->boot_mem_sz[0];
+
+ fadump_conf->boot_mem_top = fadump_conf->boot_memory_size;
+ fadump_conf->boot_mem_regs_cnt = 1;
+
+ /*
+ * Start address of reserve dump area (permanent reservation) for
+ * re-registering FADump after dump capture.
+ */
+ fadump_conf->reserve_dump_area_start =
+ be64_to_cpu(fdm->cpu_state_data.destination_address);
+
+ rtas_fadump_update_config(fadump_conf, fdm);
+}
+
+static u64 rtas_fadump_init_mem_struct(struct fw_dump *fadump_conf)
+{
+ u64 addr = fadump_conf->reserve_dump_area_start;
+
+ memset(&fdm, 0, sizeof(struct rtas_fadump_mem_struct));
+ addr = addr & PAGE_MASK;
+
+ fdm.header.dump_format_version = cpu_to_be32(0x00000001);
+ fdm.header.dump_num_sections = cpu_to_be16(3);
+ fdm.header.dump_status_flag = 0;
+ fdm.header.offset_first_dump_section =
+ cpu_to_be32((u32)offsetof(struct rtas_fadump_mem_struct,
+ cpu_state_data));
+
+ /*
+ * Fields for disk dump option.
+ * We are not using disk dump option, hence set these fields to 0.
+ */
+ fdm.header.dd_block_size = 0;
+ fdm.header.dd_block_offset = 0;
+ fdm.header.dd_num_blocks = 0;
+ fdm.header.dd_offset_disk_path = 0;
+
+ /* set 0 to disable an automatic dump-reboot. */
+ fdm.header.max_time_auto = 0;
+
+ /* Kernel dump sections */
+ /* cpu state data section. */
+ fdm.cpu_state_data.request_flag =
+ cpu_to_be32(RTAS_FADUMP_REQUEST_FLAG);
+ fdm.cpu_state_data.source_data_type =
+ cpu_to_be16(RTAS_FADUMP_CPU_STATE_DATA);
+ fdm.cpu_state_data.source_address = 0;
+ fdm.cpu_state_data.source_len =
+ cpu_to_be64(fadump_conf->cpu_state_data_size);
+ fdm.cpu_state_data.destination_address = cpu_to_be64(addr);
+ addr += fadump_conf->cpu_state_data_size;
+
+ /* hpte region section */
+ fdm.hpte_region.request_flag = cpu_to_be32(RTAS_FADUMP_REQUEST_FLAG);
+ fdm.hpte_region.source_data_type =
+ cpu_to_be16(RTAS_FADUMP_HPTE_REGION);
+ fdm.hpte_region.source_address = 0;
+ fdm.hpte_region.source_len =
+ cpu_to_be64(fadump_conf->hpte_region_size);
+ fdm.hpte_region.destination_address = cpu_to_be64(addr);
+ addr += fadump_conf->hpte_region_size;
+
+ /* RMA region section */
+ fdm.rmr_region.request_flag = cpu_to_be32(RTAS_FADUMP_REQUEST_FLAG);
+ fdm.rmr_region.source_data_type =
+ cpu_to_be16(RTAS_FADUMP_REAL_MODE_REGION);
+ fdm.rmr_region.source_address = cpu_to_be64(0);
+ fdm.rmr_region.source_len = cpu_to_be64(fadump_conf->boot_memory_size);
+ fdm.rmr_region.destination_address = cpu_to_be64(addr);
+ addr += fadump_conf->boot_memory_size;
+
+ rtas_fadump_update_config(fadump_conf, &fdm);
+
+ return addr;
+}
+
+static u64 rtas_fadump_get_bootmem_min(void)
+{
+ return RTAS_FADUMP_MIN_BOOT_MEM;
+}
+
+static int rtas_fadump_register(struct fw_dump *fadump_conf)
+{
+ unsigned int wait_time;
+ int rc, err = -EIO;
+
+ /* TODO: Add upper time limit for the delay */
+ do {
+ rc = rtas_call(fadump_conf->ibm_configure_kernel_dump, 3, 1,
+ NULL, FADUMP_REGISTER, &fdm,
+ sizeof(struct rtas_fadump_mem_struct));
+
+ wait_time = rtas_busy_delay_time(rc);
+ if (wait_time)
+ mdelay(wait_time);
+
+ } while (wait_time);
+
+ switch (rc) {
+ case 0:
+ pr_info("Registration is successful!\n");
+ fadump_conf->dump_registered = 1;
+ err = 0;
+ break;
+ case -1:
+ pr_err("Failed to register. Hardware Error(%d).\n", rc);
+ break;
+ case -3:
+ if (!is_fadump_boot_mem_contiguous())
+ pr_err("Can't have holes in boot memory area.\n");
+ else if (!is_fadump_reserved_mem_contiguous())
+ pr_err("Can't have holes in reserved memory area.\n");
+
+ pr_err("Failed to register. Parameter Error(%d).\n", rc);
+ err = -EINVAL;
+ break;
+ case -9:
+ pr_err("Already registered!\n");
+ fadump_conf->dump_registered = 1;
+ err = -EEXIST;
+ break;
+ default:
+ pr_err("Failed to register. Unknown Error(%d).\n", rc);
+ break;
+ }
+
+ return err;
+}
+
+static int rtas_fadump_unregister(struct fw_dump *fadump_conf)
+{
+ unsigned int wait_time;
+ int rc;
+
+ /* TODO: Add upper time limit for the delay */
+ do {
+ rc = rtas_call(fadump_conf->ibm_configure_kernel_dump, 3, 1,
+ NULL, FADUMP_UNREGISTER, &fdm,
+ sizeof(struct rtas_fadump_mem_struct));
+
+ wait_time = rtas_busy_delay_time(rc);
+ if (wait_time)
+ mdelay(wait_time);
+ } while (wait_time);
+
+ if (rc) {
+ pr_err("Failed to un-register - unexpected error(%d).\n", rc);
+ return -EIO;
+ }
+
+ fadump_conf->dump_registered = 0;
+ return 0;
+}
+
+static int rtas_fadump_invalidate(struct fw_dump *fadump_conf)
+{
+ unsigned int wait_time;
+ int rc;
+
+ /* TODO: Add upper time limit for the delay */
+ do {
+ rc = rtas_call(fadump_conf->ibm_configure_kernel_dump, 3, 1,
+ NULL, FADUMP_INVALIDATE, fdm_active,
+ sizeof(struct rtas_fadump_mem_struct));
+
+ wait_time = rtas_busy_delay_time(rc);
+ if (wait_time)
+ mdelay(wait_time);
+ } while (wait_time);
+
+ if (rc) {
+ pr_err("Failed to invalidate - unexpected error (%d).\n", rc);
+ return -EIO;
+ }
+
+ fadump_conf->dump_active = 0;
+ fdm_active = NULL;
+ return 0;
+}
+
+#define RTAS_FADUMP_GPR_MASK 0xffffff0000000000
+static inline int rtas_fadump_gpr_index(u64 id)
+{
+ char str[3];
+ int i = -1;
+
+ if ((id & RTAS_FADUMP_GPR_MASK) == fadump_str_to_u64("GPR")) {
+ /* get the digits at the end */
+ id &= ~RTAS_FADUMP_GPR_MASK;
+ id >>= 24;
+ str[2] = '\0';
+ str[1] = id & 0xff;
+ str[0] = (id >> 8) & 0xff;
+ if (kstrtoint(str, 10, &i))
+ i = -EINVAL;
+ if (i > 31)
+ i = -1;
+ }
+ return i;
+}
+
+void rtas_fadump_set_regval(struct pt_regs *regs, u64 reg_id, u64 reg_val)
+{
+ int i;
+
+ i = rtas_fadump_gpr_index(reg_id);
+ if (i >= 0)
+ regs->gpr[i] = (unsigned long)reg_val;
+ else if (reg_id == fadump_str_to_u64("NIA"))
+ regs->nip = (unsigned long)reg_val;
+ else if (reg_id == fadump_str_to_u64("MSR"))
+ regs->msr = (unsigned long)reg_val;
+ else if (reg_id == fadump_str_to_u64("CTR"))
+ regs->ctr = (unsigned long)reg_val;
+ else if (reg_id == fadump_str_to_u64("LR"))
+ regs->link = (unsigned long)reg_val;
+ else if (reg_id == fadump_str_to_u64("XER"))
+ regs->xer = (unsigned long)reg_val;
+ else if (reg_id == fadump_str_to_u64("CR"))
+ regs->ccr = (unsigned long)reg_val;
+ else if (reg_id == fadump_str_to_u64("DAR"))
+ regs->dar = (unsigned long)reg_val;
+ else if (reg_id == fadump_str_to_u64("DSISR"))
+ regs->dsisr = (unsigned long)reg_val;
+}
+
+static struct rtas_fadump_reg_entry*
+rtas_fadump_read_regs(struct rtas_fadump_reg_entry *reg_entry,
+ struct pt_regs *regs)
+{
+ memset(regs, 0, sizeof(struct pt_regs));
+
+ while (be64_to_cpu(reg_entry->reg_id) != fadump_str_to_u64("CPUEND")) {
+ rtas_fadump_set_regval(regs, be64_to_cpu(reg_entry->reg_id),
+ be64_to_cpu(reg_entry->reg_value));
+ reg_entry++;
+ }
+ reg_entry++;
+ return reg_entry;
+}
+
+/*
+ * Read CPU state dump data and convert it into ELF notes.
+ * The CPU dump starts with magic number "REGSAVE". NumCpusOffset should be
+ * used to access the data to allow for additional fields to be added without
+ * affecting compatibility. Each list of registers for a CPU starts with
+ * "CPUSTRT" and ends with "CPUEND". Each register entry is of 16 bytes,
+ * 8 Byte ASCII identifier and 8 Byte register value. The register entry
+ * with identifier "CPUSTRT" and "CPUEND" contains 4 byte cpu id as part
+ * of register value. For more details refer to PAPR document.
+ *
+ * Only for the crashing cpu we ignore the CPU dump data and get exact
+ * state from fadump crash info structure populated by first kernel at the
+ * time of crash.
+ */
+static int __init rtas_fadump_build_cpu_notes(struct fw_dump *fadump_conf)
+{
+ struct rtas_fadump_reg_save_area_header *reg_header;
+ struct fadump_crash_info_header *fdh = NULL;
+ struct rtas_fadump_reg_entry *reg_entry;
+ u32 num_cpus, *note_buf;
+ int i, rc = 0, cpu = 0;
+ struct pt_regs regs;
+ unsigned long addr;
+ void *vaddr;
+
+ addr = be64_to_cpu(fdm_active->cpu_state_data.destination_address);
+ vaddr = __va(addr);
+
+ reg_header = vaddr;
+ if (be64_to_cpu(reg_header->magic_number) !=
+ fadump_str_to_u64("REGSAVE")) {
+ pr_err("Unable to read register save area.\n");
+ return -ENOENT;
+ }
+
+ pr_debug("--------CPU State Data------------\n");
+ pr_debug("Magic Number: %llx\n", be64_to_cpu(reg_header->magic_number));
+ pr_debug("NumCpuOffset: %x\n", be32_to_cpu(reg_header->num_cpu_offset));
+
+ vaddr += be32_to_cpu(reg_header->num_cpu_offset);
+ num_cpus = be32_to_cpu(*((__be32 *)(vaddr)));
+ pr_debug("NumCpus : %u\n", num_cpus);
+ vaddr += sizeof(u32);
+ reg_entry = (struct rtas_fadump_reg_entry *)vaddr;
+
+ rc = fadump_setup_cpu_notes_buf(num_cpus);
+ if (rc != 0)
+ return rc;
+
+ note_buf = (u32 *)fadump_conf->cpu_notes_buf_vaddr;
+
+ if (fadump_conf->fadumphdr_addr)
+ fdh = __va(fadump_conf->fadumphdr_addr);
+
+ for (i = 0; i < num_cpus; i++) {
+ if (be64_to_cpu(reg_entry->reg_id) !=
+ fadump_str_to_u64("CPUSTRT")) {
+ pr_err("Unable to read CPU state data\n");
+ rc = -ENOENT;
+ goto error_out;
+ }
+ /* Lower 4 bytes of reg_value contains logical cpu id */
+ cpu = (be64_to_cpu(reg_entry->reg_value) &
+ RTAS_FADUMP_CPU_ID_MASK);
+ if (fdh && !cpumask_test_cpu(cpu, &fdh->online_mask)) {
+ RTAS_FADUMP_SKIP_TO_NEXT_CPU(reg_entry);
+ continue;
+ }
+ pr_debug("Reading register data for cpu %d...\n", cpu);
+ if (fdh && fdh->crashing_cpu == cpu) {
+ regs = fdh->regs;
+ note_buf = fadump_regs_to_elf_notes(note_buf, &regs);
+ RTAS_FADUMP_SKIP_TO_NEXT_CPU(reg_entry);
+ } else {
+ reg_entry++;
+ reg_entry = rtas_fadump_read_regs(reg_entry, &regs);
+ note_buf = fadump_regs_to_elf_notes(note_buf, &regs);
+ }
+ }
+ final_note(note_buf);
+
+ if (fdh) {
+ pr_debug("Updating elfcore header (%llx) with cpu notes\n",
+ fdh->elfcorehdr_addr);
+ fadump_update_elfcore_header(__va(fdh->elfcorehdr_addr));
+ }
+ return 0;
+
+error_out:
+ fadump_free_cpu_notes_buf();
+ return rc;
+
+}
+
+/*
+ * Validate and process the dump data stored by firmware before exporting
+ * it through '/proc/vmcore'.
+ */
+static int __init rtas_fadump_process(struct fw_dump *fadump_conf)
+{
+ struct fadump_crash_info_header *fdh;
+ int rc = 0;
+
+ if (!fdm_active || !fadump_conf->fadumphdr_addr)
+ return -EINVAL;
+
+ /* Check if the dump data is valid. */
+ if ((be16_to_cpu(fdm_active->header.dump_status_flag) ==
+ RTAS_FADUMP_ERROR_FLAG) ||
+ (fdm_active->cpu_state_data.error_flags != 0) ||
+ (fdm_active->rmr_region.error_flags != 0)) {
+ pr_err("Dump taken by platform is not valid\n");
+ return -EINVAL;
+ }
+ if ((fdm_active->rmr_region.bytes_dumped !=
+ fdm_active->rmr_region.source_len) ||
+ !fdm_active->cpu_state_data.bytes_dumped) {
+ pr_err("Dump taken by platform is incomplete\n");
+ return -EINVAL;
+ }
+
+ /* Validate the fadump crash info header */
+ fdh = __va(fadump_conf->fadumphdr_addr);
+ if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) {
+ pr_err("Crash info header is not valid.\n");
+ return -EINVAL;
+ }
+
+ rc = rtas_fadump_build_cpu_notes(fadump_conf);
+ if (rc)
+ return rc;
+
+ /*
+ * We are done validating dump info and elfcore header is now ready
+ * to be exported. set elfcorehdr_addr so that vmcore module will
+ * export the elfcore header through '/proc/vmcore'.
+ */
+ elfcorehdr_addr = fdh->elfcorehdr_addr;
+
+ return 0;
+}
+
+static void rtas_fadump_region_show(struct fw_dump *fadump_conf,
+ struct seq_file *m)
+{
+ const struct rtas_fadump_section *cpu_data_section;
+ const struct rtas_fadump_mem_struct *fdm_ptr;
+
+ if (fdm_active)
+ fdm_ptr = fdm_active;
+ else
+ fdm_ptr = &fdm;
+
+ cpu_data_section = &(fdm_ptr->cpu_state_data);
+ seq_printf(m, "CPU :[%#016llx-%#016llx] %#llx bytes, Dumped: %#llx\n",
+ be64_to_cpu(cpu_data_section->destination_address),
+ be64_to_cpu(cpu_data_section->destination_address) +
+ be64_to_cpu(cpu_data_section->source_len) - 1,
+ be64_to_cpu(cpu_data_section->source_len),
+ be64_to_cpu(cpu_data_section->bytes_dumped));
+
+ seq_printf(m, "HPTE:[%#016llx-%#016llx] %#llx bytes, Dumped: %#llx\n",
+ be64_to_cpu(fdm_ptr->hpte_region.destination_address),
+ be64_to_cpu(fdm_ptr->hpte_region.destination_address) +
+ be64_to_cpu(fdm_ptr->hpte_region.source_len) - 1,
+ be64_to_cpu(fdm_ptr->hpte_region.source_len),
+ be64_to_cpu(fdm_ptr->hpte_region.bytes_dumped));
+
+ seq_printf(m, "DUMP: Src: %#016llx, Dest: %#016llx, ",
+ be64_to_cpu(fdm_ptr->rmr_region.source_address),
+ be64_to_cpu(fdm_ptr->rmr_region.destination_address));
+ seq_printf(m, "Size: %#llx, Dumped: %#llx bytes\n",
+ be64_to_cpu(fdm_ptr->rmr_region.source_len),
+ be64_to_cpu(fdm_ptr->rmr_region.bytes_dumped));
+
+ /* Dump is active. Show reserved area start address. */
+ if (fdm_active) {
+ seq_printf(m, "\nMemory above %#016lx is reserved for saving crash dump\n",
+ fadump_conf->reserve_dump_area_start);
+ }
+}
+
+static void rtas_fadump_trigger(struct fadump_crash_info_header *fdh,
+ const char *msg)
+{
+ /* Call ibm,os-term rtas call to trigger firmware assisted dump */
+ rtas_os_term((char *)msg);
+}
+
+static struct fadump_ops rtas_fadump_ops = {
+ .fadump_init_mem_struct = rtas_fadump_init_mem_struct,
+ .fadump_get_bootmem_min = rtas_fadump_get_bootmem_min,
+ .fadump_register = rtas_fadump_register,
+ .fadump_unregister = rtas_fadump_unregister,
+ .fadump_invalidate = rtas_fadump_invalidate,
+ .fadump_process = rtas_fadump_process,
+ .fadump_region_show = rtas_fadump_region_show,
+ .fadump_trigger = rtas_fadump_trigger,
+};
+
+void __init rtas_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node)
+{
+ int i, size, num_sections;
+ const __be32 *sections;
+ const __be32 *token;
+
+ /*
+ * Check if Firmware Assisted dump is supported. if yes, check
+ * if dump has been initiated on last reboot.
+ */
+ token = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump", NULL);
+ if (!token)
+ return;
+
+ fadump_conf->ibm_configure_kernel_dump = be32_to_cpu(*token);
+ fadump_conf->ops = &rtas_fadump_ops;
+ fadump_conf->fadump_supported = 1;
+
+ /* Firmware supports 64-bit value for size, align it to pagesize. */
+ fadump_conf->max_copy_size = _ALIGN_DOWN(U64_MAX, PAGE_SIZE);
+
+ /*
+ * The 'ibm,kernel-dump' rtas node is present only if there is
+ * dump data waiting for us.
+ */
+ fdm_active = of_get_flat_dt_prop(node, "ibm,kernel-dump", NULL);
+ if (fdm_active) {
+ pr_info("Firmware-assisted dump is active.\n");
+ fadump_conf->dump_active = 1;
+ rtas_fadump_get_config(fadump_conf, (void *)__pa(fdm_active));
+ }
+
+ /* Get the sizes required to store dump data for the firmware provided
+ * dump sections.
+ * For each dump section type supported, a 32bit cell which defines
+ * the ID of a supported section followed by two 32 bit cells which
+ * gives the size of the section in bytes.
+ */
+ sections = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump-sizes",
+ &size);
+
+ if (!sections)
+ return;
+
+ num_sections = size / (3 * sizeof(u32));
+
+ for (i = 0; i < num_sections; i++, sections += 3) {
+ u32 type = (u32)of_read_number(sections, 1);
+
+ switch (type) {
+ case RTAS_FADUMP_CPU_STATE_DATA:
+ fadump_conf->cpu_state_data_size =
+ of_read_ulong(&sections[1], 2);
+ break;
+ case RTAS_FADUMP_HPTE_REGION:
+ fadump_conf->hpte_region_size =
+ of_read_ulong(&sections[1], 2);
+ break;
+ }
+ }
+}
diff --git a/arch/powerpc/platforms/pseries/rtas-fadump.h b/arch/powerpc/platforms/pseries/rtas-fadump.h
new file mode 100644
index 000000000000..fd59bd7ca9c3
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/rtas-fadump.h
@@ -0,0 +1,114 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Firmware-Assisted Dump support on POWERVM platform.
+ *
+ * Copyright 2011, Mahesh Salgaonkar, IBM Corporation.
+ * Copyright 2019, Hari Bathini, IBM Corporation.
+ */
+
+#ifndef _PSERIES_RTAS_FADUMP_H
+#define _PSERIES_RTAS_FADUMP_H
+
+/*
+ * On some Power systems where RMO is 128MB, it still requires minimum of
+ * 256MB for kernel to boot successfully. When kdump infrastructure is
+ * configured to save vmcore over network, we run into OOM issue while
+ * loading modules related to network setup. Hence we need additional 64M
+ * of memory to avoid OOM issue.
+ */
+#define RTAS_FADUMP_MIN_BOOT_MEM ((0x1UL << 28) + (0x1UL << 26))
+
+/* Firmware provided dump sections */
+#define RTAS_FADUMP_CPU_STATE_DATA 0x0001
+#define RTAS_FADUMP_HPTE_REGION 0x0002
+#define RTAS_FADUMP_REAL_MODE_REGION 0x0011
+
+/* Dump request flag */
+#define RTAS_FADUMP_REQUEST_FLAG 0x00000001
+
+/* Dump status flag */
+#define RTAS_FADUMP_ERROR_FLAG 0x2000
+
+/* Kernel Dump section info */
+struct rtas_fadump_section {
+ __be32 request_flag;
+ __be16 source_data_type;
+ __be16 error_flags;
+ __be64 source_address;
+ __be64 source_len;
+ __be64 bytes_dumped;
+ __be64 destination_address;
+};
+
+/* ibm,configure-kernel-dump header. */
+struct rtas_fadump_section_header {
+ __be32 dump_format_version;
+ __be16 dump_num_sections;
+ __be16 dump_status_flag;
+ __be32 offset_first_dump_section;
+
+ /* Fields for disk dump option. */
+ __be32 dd_block_size;
+ __be64 dd_block_offset;
+ __be64 dd_num_blocks;
+ __be32 dd_offset_disk_path;
+
+ /* Maximum time allowed to prevent an automatic dump-reboot. */
+ __be32 max_time_auto;
+};
+
+/*
+ * Firmware Assisted dump memory structure. This structure is required for
+ * registering future kernel dump with power firmware through rtas call.
+ *
+ * No disk dump option. Hence disk dump path string section is not included.
+ */
+struct rtas_fadump_mem_struct {
+ struct rtas_fadump_section_header header;
+
+ /* Kernel dump sections */
+ struct rtas_fadump_section cpu_state_data;
+ struct rtas_fadump_section hpte_region;
+
+ /*
+ * TODO: Extend multiple boot memory regions support in the kernel
+ * for this platform.
+ */
+ struct rtas_fadump_section rmr_region;
+};
+
+/*
+ * The firmware-assisted dump format.
+ *
+ * The register save area is an area in the partition's memory used to preserve
+ * the register contents (CPU state data) for the active CPUs during a firmware
+ * assisted dump. The dump format contains register save area header followed
+ * by register entries. Each list of registers for a CPU starts with "CPUSTRT"
+ * and ends with "CPUEND".
+ */
+
+/* Register save area header. */
+struct rtas_fadump_reg_save_area_header {
+ __be64 magic_number;
+ __be32 version;
+ __be32 num_cpu_offset;
+};
+
+/* Register entry. */
+struct rtas_fadump_reg_entry {
+ __be64 reg_id;
+ __be64 reg_value;
+};
+
+/* Utility macros */
+#define RTAS_FADUMP_SKIP_TO_NEXT_CPU(reg_entry) \
+({ \
+ while (be64_to_cpu(reg_entry->reg_id) != \
+ fadump_str_to_u64("CPUEND")) \
+ reg_entry++; \
+ reg_entry++; \
+})
+
+#define RTAS_FADUMP_CPU_ID_MASK ((1UL << 32) - 1)
+
+#endif /* _PSERIES_RTAS_FADUMP_H */
diff --git a/arch/powerpc/platforms/pseries/scanlog.c b/arch/powerpc/platforms/pseries/scanlog.c
index a0001280503c..2879c4f0ceb7 100644
--- a/arch/powerpc/platforms/pseries/scanlog.c
+++ b/arch/powerpc/platforms/pseries/scanlog.c
@@ -152,13 +152,12 @@ static int scanlog_release(struct inode * inode, struct file * file)
return 0;
}
-static const struct file_operations scanlog_fops = {
- .owner = THIS_MODULE,
- .read = scanlog_read,
- .write = scanlog_write,
- .open = scanlog_open,
- .release = scanlog_release,
- .llseek = noop_llseek,
+static const struct proc_ops scanlog_proc_ops = {
+ .proc_read = scanlog_read,
+ .proc_write = scanlog_write,
+ .proc_open = scanlog_open,
+ .proc_release = scanlog_release,
+ .proc_lseek = noop_llseek,
};
static int __init scanlog_init(void)
@@ -176,7 +175,7 @@ static int __init scanlog_init(void)
goto err;
ent = proc_create("powerpc/rtas/scan-log-dump", 0400, NULL,
- &scanlog_fops);
+ &scanlog_proc_ops);
if (!ent)
goto err;
return 0;
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index f5940cc71c37..0c8421dd01ab 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -69,10 +69,14 @@
#include <asm/security_features.h>
#include <asm/asm-const.h>
#include <asm/swiotlb.h>
+#include <asm/svm.h>
#include "pseries.h"
#include "../../../../drivers/pci/pci.h"
+DEFINE_STATIC_KEY_FALSE(shared_processor);
+EXPORT_SYMBOL_GPL(shared_processor);
+
int CMO_PrPSP = -1;
int CMO_SecPSP = -1;
unsigned long CMO_PageSize = (ASM_CONST(1) << IOMMU_PAGE_SHIFT_4K);
@@ -141,17 +145,19 @@ static void __init fwnmi_init(void)
}
#ifdef CONFIG_PPC_BOOK3S_64
- /* Allocate per cpu slb area to save old slb contents during MCE */
- size = sizeof(struct slb_entry) * mmu_slb_size * nr_cpus;
- slb_ptr = memblock_alloc_try_nid_raw(size, sizeof(struct slb_entry),
- MEMBLOCK_LOW_LIMIT, ppc64_rma_size,
- NUMA_NO_NODE);
- if (!slb_ptr)
- panic("Failed to allocate %zu bytes below %pa for slb area\n",
- size, &ppc64_rma_size);
-
- for_each_possible_cpu(i)
- paca_ptrs[i]->mce_faulty_slbs = slb_ptr + (mmu_slb_size * i);
+ if (!radix_enabled()) {
+ /* Allocate per cpu area to save old slb contents during MCE */
+ size = sizeof(struct slb_entry) * mmu_slb_size * nr_cpus;
+ slb_ptr = memblock_alloc_try_nid_raw(size,
+ sizeof(struct slb_entry), MEMBLOCK_LOW_LIMIT,
+ ppc64_rma_size, NUMA_NO_NODE);
+ if (!slb_ptr)
+ panic("Failed to allocate %zu bytes below %pa for slb area\n",
+ size, &ppc64_rma_size);
+
+ for_each_possible_cpu(i)
+ paca_ptrs[i]->mce_faulty_slbs = slb_ptr + (mmu_slb_size * i);
+ }
#endif
}
@@ -297,8 +303,10 @@ static inline int alloc_dispatch_logs(void)
static int alloc_dispatch_log_kmem_cache(void)
{
+ void (*ctor)(void *) = get_dtl_cache_ctor();
+
dtl_cache = kmem_cache_create("dtl", DISPATCH_LOG_BYTES,
- DISPATCH_LOG_BYTES, 0, NULL);
+ DISPATCH_LOG_BYTES, 0, ctor);
if (!dtl_cache) {
pr_warn("Failed to create dispatch trace log buffer cache\n");
pr_warn("Stolen time statistics will be unreliable\n");
@@ -316,6 +324,9 @@ static void pseries_lpar_idle(void)
* low power mode by ceding processor to hypervisor
*/
+ if (!prep_irq_for_idle())
+ return;
+
/* Indicate to hypervisor that we are idle. */
get_lppaca()->idle = 1;
@@ -736,6 +747,7 @@ static void __init pSeries_setup_arch(void)
pseries_setup_rfi_flush();
setup_stf_barrier();
+ pseries_lpar_read_hblkrm_characteristics();
/* By default, only probe PCI (can be overridden by rtas_pci) */
pci_add_flags(PCI_PROBE_ONLY);
@@ -749,6 +761,10 @@ static void __init pSeries_setup_arch(void)
if (firmware_has_feature(FW_FEATURE_LPAR)) {
vpa_init(boot_cpuid);
+
+ if (lppaca_shared_proc(get_lppaca()))
+ static_branch_enable(&shared_processor);
+
ppc_md.power_save = pseries_lpar_idle;
ppc_md.enable_pmcs = pseries_lpar_enable_pmcs;
#ifdef CONFIG_PCI_IOV
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index 4b3ef8d9c63f..ad61e90032da 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -41,6 +41,7 @@
#include <asm/dbell.h>
#include <asm/plpar_wrappers.h>
#include <asm/code-patching.h>
+#include <asm/svm.h>
#include "pseries.h"
#include "offline_states.h"
@@ -221,7 +222,7 @@ static __init void pSeries_smp_probe_xics(void)
{
xics_smp_probe();
- if (cpu_has_feature(CPU_FTR_DBELL))
+ if (cpu_has_feature(CPU_FTR_DBELL) && !is_secure_guest())
smp_ops->cause_ipi = smp_pseries_cause_ipi;
else
smp_ops->cause_ipi = icp_ops->cause_ipi;
diff --git a/arch/powerpc/platforms/pseries/svm.c b/arch/powerpc/platforms/pseries/svm.c
new file mode 100644
index 000000000000..40c0637203d5
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/svm.c
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Secure VM platform
+ *
+ * Copyright 2018 IBM Corporation
+ * Author: Anshuman Khandual <khandual@linux.vnet.ibm.com>
+ */
+
+#include <linux/mm.h>
+#include <asm/machdep.h>
+#include <asm/svm.h>
+#include <asm/swiotlb.h>
+#include <asm/ultravisor.h>
+
+static int __init init_svm(void)
+{
+ if (!is_secure_guest())
+ return 0;
+
+ /* Don't release the SWIOTLB buffer. */
+ ppc_swiotlb_enable = 1;
+
+ /*
+ * Since the guest memory is inaccessible to the host, devices always
+ * need to use the SWIOTLB buffer for DMA even if dma_capable() says
+ * otherwise.
+ */
+ swiotlb_force = SWIOTLB_FORCE;
+
+ /* Share the SWIOTLB buffer with the host. */
+ swiotlb_update_mem_attributes();
+
+ return 0;
+}
+machine_early_initcall(pseries, init_svm);
+
+int set_memory_encrypted(unsigned long addr, int numpages)
+{
+ if (!PAGE_ALIGNED(addr))
+ return -EINVAL;
+
+ uv_unshare_page(PHYS_PFN(__pa(addr)), numpages);
+
+ return 0;
+}
+
+int set_memory_decrypted(unsigned long addr, int numpages)
+{
+ if (!PAGE_ALIGNED(addr))
+ return -EINVAL;
+
+ uv_share_page(PHYS_PFN(__pa(addr)), numpages);
+
+ return 0;
+}
+
+/* There's one dispatch log per CPU. */
+#define NR_DTL_PAGE (DISPATCH_LOG_BYTES * CONFIG_NR_CPUS / PAGE_SIZE)
+
+static struct page *dtl_page_store[NR_DTL_PAGE];
+static long dtl_nr_pages;
+
+static bool is_dtl_page_shared(struct page *page)
+{
+ long i;
+
+ for (i = 0; i < dtl_nr_pages; i++)
+ if (dtl_page_store[i] == page)
+ return true;
+
+ return false;
+}
+
+void dtl_cache_ctor(void *addr)
+{
+ unsigned long pfn = PHYS_PFN(__pa(addr));
+ struct page *page = pfn_to_page(pfn);
+
+ if (!is_dtl_page_shared(page)) {
+ dtl_page_store[dtl_nr_pages] = page;
+ dtl_nr_pages++;
+ WARN_ON(dtl_nr_pages >= NR_DTL_PAGE);
+ uv_share_page(pfn, 1);
+ }
+}
diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c
index 6601b9d404dc..f682b7babc09 100644
--- a/arch/powerpc/platforms/pseries/vio.c
+++ b/arch/powerpc/platforms/pseries/vio.c
@@ -605,6 +605,8 @@ static const struct dma_map_ops vio_dma_mapping_ops = {
.unmap_page = vio_dma_iommu_unmap_page,
.dma_supported = dma_iommu_dma_supported,
.get_required_mask = dma_iommu_get_required_mask,
+ .mmap = dma_common_mmap,
+ .get_sgtable = dma_common_get_sgtable,
};
/**
@@ -1174,6 +1176,8 @@ static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev)
if (tbl == NULL)
return NULL;
+ kref_init(&tbl->it_kref);
+
of_parse_dma_window(dev->dev.of_node, dma_window,
&tbl->it_index, &offset, &size);
@@ -1191,7 +1195,7 @@ static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev)
else
tbl->it_ops = &iommu_table_pseries_ops;
- return iommu_init_table(tbl, -1);
+ return iommu_init_table(tbl, -1, 0, 0);
}
/**
diff --git a/arch/powerpc/sysdev/Kconfig b/arch/powerpc/sysdev/Kconfig
index d23288c4abf6..9ebcc1337560 100644
--- a/arch/powerpc/sysdev/Kconfig
+++ b/arch/powerpc/sysdev/Kconfig
@@ -28,13 +28,6 @@ config PPC_MSI_BITMAP
source "arch/powerpc/sysdev/xics/Kconfig"
source "arch/powerpc/sysdev/xive/Kconfig"
-config PPC_SCOM
- bool
-
-config SCOM_DEBUGFS
- bool "Expose SCOM controllers via debugfs"
- depends on PPC_SCOM && DEBUG_FS
-
config GE_FPGA
bool
diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile
index 9d73dfddf060..cb5a5bd2cef5 100644
--- a/arch/powerpc/sysdev/Makefile
+++ b/arch/powerpc/sysdev/Makefile
@@ -24,7 +24,6 @@ obj-$(CONFIG_FSL_CORENET_RCPM) += fsl_rcpm.o
obj-$(CONFIG_FSL_LBC) += fsl_lbc.o
obj-$(CONFIG_FSL_GTM) += fsl_gtm.o
obj-$(CONFIG_FSL_85XX_CACHE_SRAM) += fsl_85xx_l2ctlr.o fsl_85xx_cache_sram.o
-obj-$(CONFIG_SIMPLE_GPIO) += simple_gpio.o
obj-$(CONFIG_FSL_RIO) += fsl_rio.o fsl_rmu.o
obj-$(CONFIG_TSI108_BRIDGE) += tsi108_pci.o tsi108_dev.o
obj-$(CONFIG_RTC_DRV_CMOS) += rtc_cmos_setup.o
@@ -49,8 +48,6 @@ ifdef CONFIG_SUSPEND
obj-$(CONFIG_PPC_BOOK3S_32) += 6xx-suspend.o
endif
-obj-$(CONFIG_PPC_SCOM) += scom.o
-
obj-$(CONFIG_PPC_EARLY_DEBUG_MEMCONS) += udbg_memcons.o
obj-$(CONFIG_PPC_XICS) += xics/
diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c
index 21a1fae0714e..6b4a34b36d98 100644
--- a/arch/powerpc/sysdev/dart_iommu.c
+++ b/arch/powerpc/sysdev/dart_iommu.c
@@ -344,7 +344,7 @@ static void iommu_table_dart_setup(void)
iommu_table_dart.it_index = 0;
iommu_table_dart.it_blocksize = 1;
iommu_table_dart.it_ops = &iommu_dart_ops;
- iommu_init_table(&iommu_table_dart, -1);
+ iommu_init_table(&iommu_table_dart, -1, 0, 0);
/* Reserve the last page of the DART to avoid possible prefetch
* past the DART mapped area
diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
index ff0e2b156cb5..4a8874bc1057 100644
--- a/arch/powerpc/sysdev/fsl_pci.c
+++ b/arch/powerpc/sysdev/fsl_pci.c
@@ -115,8 +115,8 @@ static void pci_dma_dev_setup_swiotlb(struct pci_dev *pdev)
{
struct pci_controller *hose = pci_bus_to_host(pdev->bus);
- pdev->dev.bus_dma_mask =
- hose->dma_window_base_cur + hose->dma_window_size;
+ pdev->dev.bus_dma_limit =
+ hose->dma_window_base_cur + hose->dma_window_size - 1;
}
static void setup_swiotlb_ops(struct pci_controller *hose)
@@ -135,7 +135,7 @@ static void fsl_pci_dma_set_mask(struct device *dev, u64 dma_mask)
* mapping that allows addressing any RAM address from across PCI.
*/
if (dev_is_pci(dev) && dma_mask >= pci64_dma_offset * 2 - 1) {
- dev->bus_dma_mask = 0;
+ dev->bus_dma_limit = 0;
dev->archdata.dma_offset = pci64_dma_offset;
}
}
@@ -1065,13 +1065,11 @@ int fsl_pci_mcheck_exception(struct pt_regs *regs)
addr += mfspr(SPRN_MCAR);
if (is_in_pci_mem_space(addr)) {
- if (user_mode(regs)) {
- pagefault_disable();
- ret = get_user(inst, (__u32 __user *)regs->nip);
- pagefault_enable();
- } else {
+ if (user_mode(regs))
+ ret = probe_user_read(&inst, (void __user *)regs->nip,
+ sizeof(inst));
+ else
ret = probe_kernel_address((void *)regs->nip, inst);
- }
if (!ret && mcheck_handle_load(regs, inst)) {
regs->nip += 4;
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index 934a77324f6b..a3a72b780e67 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -964,7 +964,7 @@ static struct irq_chip mpic_irq_chip = {
};
#ifdef CONFIG_SMP
-static struct irq_chip mpic_ipi_chip = {
+static const struct irq_chip mpic_ipi_chip = {
.irq_mask = mpic_mask_ipi,
.irq_unmask = mpic_unmask_ipi,
.irq_eoi = mpic_end_ipi,
@@ -978,7 +978,7 @@ static struct irq_chip mpic_tm_chip = {
};
#ifdef CONFIG_MPIC_U3_HT_IRQS
-static struct irq_chip mpic_irq_ht_chip = {
+static const struct irq_chip mpic_irq_ht_chip = {
.irq_startup = mpic_startup_ht_irq,
.irq_shutdown = mpic_shutdown_ht_irq,
.irq_mask = mpic_mask_irq,
diff --git a/arch/powerpc/sysdev/scom.c b/arch/powerpc/sysdev/scom.c
deleted file mode 100644
index 94e885bf3aee..000000000000
--- a/arch/powerpc/sysdev/scom.c
+++ /dev/null
@@ -1,223 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright 2010 Benjamin Herrenschmidt, IBM Corp
- * <benh@kernel.crashing.org>
- * and David Gibson, IBM Corporation.
- */
-
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/export.h>
-#include <asm/debugfs.h>
-#include <asm/prom.h>
-#include <asm/scom.h>
-#include <linux/uaccess.h>
-
-const struct scom_controller *scom_controller;
-EXPORT_SYMBOL_GPL(scom_controller);
-
-struct device_node *scom_find_parent(struct device_node *node)
-{
- struct device_node *par, *tmp;
- const u32 *p;
-
- for (par = of_node_get(node); par;) {
- if (of_get_property(par, "scom-controller", NULL))
- break;
- p = of_get_property(par, "scom-parent", NULL);
- tmp = par;
- if (p == NULL)
- par = of_get_parent(par);
- else
- par = of_find_node_by_phandle(*p);
- of_node_put(tmp);
- }
- return par;
-}
-EXPORT_SYMBOL_GPL(scom_find_parent);
-
-scom_map_t scom_map_device(struct device_node *dev, int index)
-{
- struct device_node *parent;
- unsigned int cells, size;
- const __be32 *prop, *sprop;
- u64 reg, cnt;
- scom_map_t ret;
-
- parent = scom_find_parent(dev);
-
- if (parent == NULL)
- return NULL;
-
- /*
- * We support "scom-reg" properties for adding scom registers
- * to a random device-tree node with an explicit scom-parent
- *
- * We also support the simple "reg" property if the device is
- * a direct child of a scom controller.
- *
- * In case both exist, "scom-reg" takes precedence.
- */
- prop = of_get_property(dev, "scom-reg", &size);
- sprop = of_get_property(parent, "#scom-cells", NULL);
- if (!prop && parent == dev->parent) {
- prop = of_get_property(dev, "reg", &size);
- sprop = of_get_property(parent, "#address-cells", NULL);
- }
- if (!prop)
- return NULL;
- cells = sprop ? be32_to_cpup(sprop) : 1;
- size >>= 2;
-
- if (index >= (size / (2*cells)))
- return NULL;
-
- reg = of_read_number(&prop[index * cells * 2], cells);
- cnt = of_read_number(&prop[index * cells * 2 + cells], cells);
-
- ret = scom_map(parent, reg, cnt);
- of_node_put(parent);
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(scom_map_device);
-
-#ifdef CONFIG_SCOM_DEBUGFS
-struct scom_debug_entry {
- struct device_node *dn;
- struct debugfs_blob_wrapper path;
- char name[16];
-};
-
-static ssize_t scom_debug_read(struct file *filp, char __user *ubuf,
- size_t count, loff_t *ppos)
-{
- struct scom_debug_entry *ent = filp->private_data;
- u64 __user *ubuf64 = (u64 __user *)ubuf;
- loff_t off = *ppos;
- ssize_t done = 0;
- u64 reg, reg_cnt, val;
- scom_map_t map;
- int rc;
-
- if (off < 0 || (off & 7) || (count & 7))
- return -EINVAL;
- reg = off >> 3;
- reg_cnt = count >> 3;
-
- map = scom_map(ent->dn, reg, reg_cnt);
- if (!scom_map_ok(map))
- return -ENXIO;
-
- for (reg = 0; reg < reg_cnt; reg++) {
- rc = scom_read(map, reg, &val);
- if (!rc)
- rc = put_user(val, ubuf64);
- if (rc) {
- if (!done)
- done = rc;
- break;
- }
- ubuf64++;
- *ppos += 8;
- done += 8;
- }
- scom_unmap(map);
- return done;
-}
-
-static ssize_t scom_debug_write(struct file* filp, const char __user *ubuf,
- size_t count, loff_t *ppos)
-{
- struct scom_debug_entry *ent = filp->private_data;
- u64 __user *ubuf64 = (u64 __user *)ubuf;
- loff_t off = *ppos;
- ssize_t done = 0;
- u64 reg, reg_cnt, val;
- scom_map_t map;
- int rc;
-
- if (off < 0 || (off & 7) || (count & 7))
- return -EINVAL;
- reg = off >> 3;
- reg_cnt = count >> 3;
-
- map = scom_map(ent->dn, reg, reg_cnt);
- if (!scom_map_ok(map))
- return -ENXIO;
-
- for (reg = 0; reg < reg_cnt; reg++) {
- rc = get_user(val, ubuf64);
- if (!rc)
- rc = scom_write(map, reg, val);
- if (rc) {
- if (!done)
- done = rc;
- break;
- }
- ubuf64++;
- done += 8;
- }
- scom_unmap(map);
- return done;
-}
-
-static const struct file_operations scom_debug_fops = {
- .read = scom_debug_read,
- .write = scom_debug_write,
- .open = simple_open,
- .llseek = default_llseek,
-};
-
-static int scom_debug_init_one(struct dentry *root, struct device_node *dn,
- int i)
-{
- struct scom_debug_entry *ent;
- struct dentry *dir;
-
- ent = kzalloc(sizeof(*ent), GFP_KERNEL);
- if (!ent)
- return -ENOMEM;
-
- ent->dn = of_node_get(dn);
- snprintf(ent->name, 16, "%08x", i);
- ent->path.data = (void*)kasprintf(GFP_KERNEL, "%pOF", dn);
- ent->path.size = strlen((char *)ent->path.data);
-
- dir = debugfs_create_dir(ent->name, root);
- if (!dir) {
- of_node_put(dn);
- kfree(ent->path.data);
- kfree(ent);
- return -1;
- }
-
- debugfs_create_blob("devspec", 0400, dir, &ent->path);
- debugfs_create_file("access", 0600, dir, ent, &scom_debug_fops);
-
- return 0;
-}
-
-static int scom_debug_init(void)
-{
- struct device_node *dn;
- struct dentry *root;
- int i, rc;
-
- root = debugfs_create_dir("scom", powerpc_debugfs_root);
- if (!root)
- return -1;
-
- i = rc = 0;
- for_each_node_with_property(dn, "scom-controller") {
- int id = of_get_ibm_chip_id(dn);
- if (id == -1)
- id = i;
- rc |= scom_debug_init_one(root, dn, id);
- i++;
- }
-
- return rc;
-}
-device_initcall(scom_debug_init);
-#endif /* CONFIG_SCOM_DEBUGFS */
diff --git a/arch/powerpc/sysdev/simple_gpio.c b/arch/powerpc/sysdev/simple_gpio.c
deleted file mode 100644
index dc1740cd9e42..000000000000
--- a/arch/powerpc/sysdev/simple_gpio.c
+++ /dev/null
@@ -1,143 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Simple Memory-Mapped GPIOs
- *
- * Copyright (c) MontaVista Software, Inc. 2008.
- *
- * Author: Anton Vorontsov <avorontsov@ru.mvista.com>
- */
-
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/spinlock.h>
-#include <linux/types.h>
-#include <linux/ioport.h>
-#include <linux/io.h>
-#include <linux/of.h>
-#include <linux/of_gpio.h>
-#include <linux/gpio/driver.h>
-#include <linux/slab.h>
-#include <asm/prom.h>
-#include "simple_gpio.h"
-
-struct u8_gpio_chip {
- struct of_mm_gpio_chip mm_gc;
- spinlock_t lock;
-
- /* shadowed data register to clear/set bits safely */
- u8 data;
-};
-
-static u8 u8_pin2mask(unsigned int pin)
-{
- return 1 << (8 - 1 - pin);
-}
-
-static int u8_gpio_get(struct gpio_chip *gc, unsigned int gpio)
-{
- struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
-
- return !!(in_8(mm_gc->regs) & u8_pin2mask(gpio));
-}
-
-static void u8_gpio_set(struct gpio_chip *gc, unsigned int gpio, int val)
-{
- struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
- struct u8_gpio_chip *u8_gc = gpiochip_get_data(gc);
- unsigned long flags;
-
- spin_lock_irqsave(&u8_gc->lock, flags);
-
- if (val)
- u8_gc->data |= u8_pin2mask(gpio);
- else
- u8_gc->data &= ~u8_pin2mask(gpio);
-
- out_8(mm_gc->regs, u8_gc->data);
-
- spin_unlock_irqrestore(&u8_gc->lock, flags);
-}
-
-static int u8_gpio_dir_in(struct gpio_chip *gc, unsigned int gpio)
-{
- return 0;
-}
-
-static int u8_gpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
-{
- u8_gpio_set(gc, gpio, val);
- return 0;
-}
-
-static void u8_gpio_save_regs(struct of_mm_gpio_chip *mm_gc)
-{
- struct u8_gpio_chip *u8_gc =
- container_of(mm_gc, struct u8_gpio_chip, mm_gc);
-
- u8_gc->data = in_8(mm_gc->regs);
-}
-
-static int __init u8_simple_gpiochip_add(struct device_node *np)
-{
- int ret;
- struct u8_gpio_chip *u8_gc;
- struct of_mm_gpio_chip *mm_gc;
- struct gpio_chip *gc;
-
- u8_gc = kzalloc(sizeof(*u8_gc), GFP_KERNEL);
- if (!u8_gc)
- return -ENOMEM;
-
- spin_lock_init(&u8_gc->lock);
-
- mm_gc = &u8_gc->mm_gc;
- gc = &mm_gc->gc;
-
- mm_gc->save_regs = u8_gpio_save_regs;
- gc->ngpio = 8;
- gc->direction_input = u8_gpio_dir_in;
- gc->direction_output = u8_gpio_dir_out;
- gc->get = u8_gpio_get;
- gc->set = u8_gpio_set;
-
- ret = of_mm_gpiochip_add_data(np, mm_gc, u8_gc);
- if (ret)
- goto err;
- return 0;
-err:
- kfree(u8_gc);
- return ret;
-}
-
-void __init simple_gpiochip_init(const char *compatible)
-{
- struct device_node *np;
-
- for_each_compatible_node(np, NULL, compatible) {
- int ret;
- struct resource r;
-
- ret = of_address_to_resource(np, 0, &r);
- if (ret)
- goto err;
-
- switch (resource_size(&r)) {
- case 1:
- ret = u8_simple_gpiochip_add(np);
- if (ret)
- goto err;
- break;
- default:
- /*
- * Whenever you need support for GPIO bank width > 1,
- * please just turn u8_ code into huge macros, and
- * construct needed uX_ code with it.
- */
- ret = -ENOSYS;
- goto err;
- }
- continue;
-err:
- pr_err("%pOF: registration failed, status %d\n", np, ret);
- }
-}
diff --git a/arch/powerpc/sysdev/simple_gpio.h b/arch/powerpc/sysdev/simple_gpio.h
deleted file mode 100644
index f3f3a20d39e2..000000000000
--- a/arch/powerpc/sysdev/simple_gpio.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __SYSDEV_SIMPLE_GPIO_H
-#define __SYSDEV_SIMPLE_GPIO_H
-
-#include <linux/errno.h>
-
-#ifdef CONFIG_SIMPLE_GPIO
-extern void simple_gpiochip_init(const char *compatible);
-#else
-static inline void simple_gpiochip_init(const char *compatible) {}
-#endif /* CONFIG_SIMPLE_GPIO */
-
-#endif /* __SYSDEV_SIMPLE_GPIO_H */
diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c
index 485569ff7ef1..7d13d2ef5a90 100644
--- a/arch/powerpc/sysdev/xics/icp-native.c
+++ b/arch/powerpc/sysdev/xics/icp-native.c
@@ -140,7 +140,7 @@ static unsigned int icp_native_get_irq(void)
static void icp_native_cause_ipi(int cpu)
{
- kvmppc_set_host_ipi(cpu, 1);
+ kvmppc_set_host_ipi(cpu);
icp_native_set_qirr(cpu, IPI_PRIORITY);
}
@@ -179,7 +179,7 @@ void icp_native_flush_interrupt(void)
if (vec == XICS_IPI) {
/* Clear pending IPI */
int cpu = smp_processor_id();
- kvmppc_set_host_ipi(cpu, 0);
+ kvmppc_clear_host_ipi(cpu);
icp_native_set_qirr(cpu, 0xff);
} else {
pr_err("XICS: hw interrupt 0x%x to offline cpu, disabling\n",
@@ -200,7 +200,7 @@ static irqreturn_t icp_native_ipi_action(int irq, void *dev_id)
{
int cpu = smp_processor_id();
- kvmppc_set_host_ipi(cpu, 0);
+ kvmppc_clear_host_ipi(cpu);
icp_native_set_qirr(cpu, 0xff);
return smp_ipi_demux();
diff --git a/arch/powerpc/sysdev/xics/icp-opal.c b/arch/powerpc/sysdev/xics/icp-opal.c
index 8bb8dd7dd6ad..68fd2540b093 100644
--- a/arch/powerpc/sysdev/xics/icp-opal.c
+++ b/arch/powerpc/sysdev/xics/icp-opal.c
@@ -126,7 +126,7 @@ static void icp_opal_cause_ipi(int cpu)
{
int hw_cpu = get_hard_smp_processor_id(cpu);
- kvmppc_set_host_ipi(cpu, 1);
+ kvmppc_set_host_ipi(cpu);
opal_int_set_mfrr(hw_cpu, IPI_PRIORITY);
}
@@ -134,7 +134,7 @@ static irqreturn_t icp_opal_ipi_action(int irq, void *dev_id)
{
int cpu = smp_processor_id();
- kvmppc_set_host_ipi(cpu, 0);
+ kvmppc_clear_host_ipi(cpu);
opal_int_set_mfrr(get_hard_smp_processor_id(cpu), 0xff);
return smp_ipi_demux();
@@ -157,7 +157,7 @@ void icp_opal_flush_interrupt(void)
if (vec == XICS_IPI) {
/* Clear pending IPI */
int cpu = smp_processor_id();
- kvmppc_set_host_ipi(cpu, 0);
+ kvmppc_clear_host_ipi(cpu);
opal_int_set_mfrr(get_hard_smp_processor_id(cpu), 0xff);
} else {
pr_err("XICS: hw interrupt 0x%x to offline cpu, "
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index 1cdb39575eae..9651ca061828 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -135,7 +135,7 @@ static u32 xive_read_eq(struct xive_q *q, bool just_peek)
static u32 xive_scan_interrupts(struct xive_cpu *xc, bool just_peek)
{
u32 irq = 0;
- u8 prio;
+ u8 prio = 0;
/* Find highest pending priority */
while (xc->pending_prio != 0) {
@@ -148,8 +148,19 @@ static u32 xive_scan_interrupts(struct xive_cpu *xc, bool just_peek)
irq = xive_read_eq(&xc->queue[prio], just_peek);
/* Found something ? That's it */
- if (irq)
- break;
+ if (irq) {
+ if (just_peek || irq_to_desc(irq))
+ break;
+ /*
+ * We should never get here; if we do then we must
+ * have failed to synchronize the interrupt properly
+ * when shutting it down.
+ */
+ pr_crit("xive: got interrupt %d without descriptor, dropping\n",
+ irq);
+ WARN_ON(1);
+ continue;
+ }
/* Clear pending bits */
xc->pending_prio &= ~(1 << prio);
@@ -185,7 +196,7 @@ static u32 xive_scan_interrupts(struct xive_cpu *xc, bool just_peek)
/*
* This is used to perform the magic loads from an ESB
- * described in xive.h
+ * described in xive-regs.h
*/
static notrace u8 xive_esb_read(struct xive_irq_data *xd, u32 offset)
{
@@ -226,26 +237,61 @@ static notrace void xive_dump_eq(const char *name, struct xive_q *q)
i0 = be32_to_cpup(q->qpage + idx);
idx = (idx + 1) & q->msk;
i1 = be32_to_cpup(q->qpage + idx);
- xmon_printf(" %s Q T=%d %08x %08x ...\n", name,
- q->toggle, i0, i1);
+ xmon_printf("%s idx=%d T=%d %08x %08x ...", name,
+ q->idx, q->toggle, i0, i1);
}
notrace void xmon_xive_do_dump(int cpu)
{
struct xive_cpu *xc = per_cpu(xive_cpu, cpu);
- xmon_printf("XIVE state for CPU %d:\n", cpu);
- xmon_printf(" pp=%02x cppr=%02x\n", xc->pending_prio, xc->cppr);
- xive_dump_eq("IRQ", &xc->queue[xive_irq_priority]);
+ xmon_printf("CPU %d:", cpu);
+ if (xc) {
+ xmon_printf("pp=%02x CPPR=%02x ", xc->pending_prio, xc->cppr);
+
#ifdef CONFIG_SMP
- {
- u64 val = xive_esb_read(&xc->ipi_data, XIVE_ESB_GET);
- xmon_printf(" IPI state: %x:%c%c\n", xc->hw_ipi,
- val & XIVE_ESB_VAL_P ? 'P' : 'p',
- val & XIVE_ESB_VAL_Q ? 'Q' : 'q');
- }
+ {
+ u64 val = xive_esb_read(&xc->ipi_data, XIVE_ESB_GET);
+
+ xmon_printf("IPI=0x%08x PQ=%c%c ", xc->hw_ipi,
+ val & XIVE_ESB_VAL_P ? 'P' : '-',
+ val & XIVE_ESB_VAL_Q ? 'Q' : '-');
+ }
#endif
+ xive_dump_eq("EQ", &xc->queue[xive_irq_priority]);
+ }
+ xmon_printf("\n");
}
+
+int xmon_xive_get_irq_config(u32 hw_irq, struct irq_data *d)
+{
+ int rc;
+ u32 target;
+ u8 prio;
+ u32 lirq;
+
+ rc = xive_ops->get_irq_config(hw_irq, &target, &prio, &lirq);
+ if (rc) {
+ xmon_printf("IRQ 0x%08x : no config rc=%d\n", hw_irq, rc);
+ return rc;
+ }
+
+ xmon_printf("IRQ 0x%08x : target=0x%x prio=%02x lirq=0x%x ",
+ hw_irq, target, prio, lirq);
+
+ if (d) {
+ struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
+ u64 val = xive_esb_read(xd, XIVE_ESB_GET);
+
+ xmon_printf("PQ=%c%c",
+ val & XIVE_ESB_VAL_P ? 'P' : '-',
+ val & XIVE_ESB_VAL_Q ? 'Q' : '-');
+ }
+
+ xmon_printf("\n");
+ return 0;
+}
+
#endif /* CONFIG_XMON */
static unsigned int xive_get_irq(void)
@@ -307,6 +353,7 @@ static void xive_do_queue_eoi(struct xive_cpu *xc)
*/
static void xive_do_source_eoi(u32 hw_irq, struct xive_irq_data *xd)
{
+ xd->stale_p = false;
/* If the XIVE supports the new "store EOI facility, use it */
if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
xive_esb_write(xd, XIVE_ESB_STORE_EOI, 0);
@@ -350,7 +397,7 @@ static void xive_do_source_eoi(u32 hw_irq, struct xive_irq_data *xd)
}
}
-/* irq_chip eoi callback */
+/* irq_chip eoi callback, called with irq descriptor lock held */
static void xive_irq_eoi(struct irq_data *d)
{
struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
@@ -366,6 +413,8 @@ static void xive_irq_eoi(struct irq_data *d)
if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d) &&
!(xd->flags & XIVE_IRQ_NO_EOI))
xive_do_source_eoi(irqd_to_hwirq(d), xd);
+ else
+ xd->stale_p = true;
/*
* Clear saved_p to indicate that it's no longer occupying
@@ -397,11 +446,16 @@ static void xive_do_source_set_mask(struct xive_irq_data *xd,
*/
if (mask) {
val = xive_esb_read(xd, XIVE_ESB_SET_PQ_01);
- xd->saved_p = !!(val & XIVE_ESB_VAL_P);
- } else if (xd->saved_p)
+ if (!xd->stale_p && !!(val & XIVE_ESB_VAL_P))
+ xd->saved_p = true;
+ xd->stale_p = false;
+ } else if (xd->saved_p) {
xive_esb_read(xd, XIVE_ESB_SET_PQ_10);
- else
+ xd->saved_p = false;
+ } else {
xive_esb_read(xd, XIVE_ESB_SET_PQ_00);
+ xd->stale_p = false;
+ }
}
/*
@@ -541,6 +595,8 @@ static unsigned int xive_irq_startup(struct irq_data *d)
unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
int target, rc;
+ xd->saved_p = false;
+ xd->stale_p = false;
pr_devel("xive_irq_startup: irq %d [0x%x] data @%p\n",
d->irq, hw_irq, d);
@@ -587,6 +643,7 @@ static unsigned int xive_irq_startup(struct irq_data *d)
return 0;
}
+/* called with irq descriptor lock held */
static void xive_irq_shutdown(struct irq_data *d)
{
struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
@@ -602,16 +659,6 @@ static void xive_irq_shutdown(struct irq_data *d)
xive_do_source_set_mask(xd, true);
/*
- * The above may have set saved_p. We clear it otherwise it
- * will prevent re-enabling later on. It is ok to forget the
- * fact that the interrupt might be in a queue because we are
- * accounting that already in xive_dec_target_count() and will
- * be re-routing it to a new queue with proper accounting when
- * it's started up again
- */
- xd->saved_p = false;
-
- /*
* Mask the interrupt in HW in the IVT/EAS and set the number
* to be the "bad" IRQ number
*/
@@ -797,6 +844,10 @@ static int xive_irq_retrigger(struct irq_data *d)
return 1;
}
+/*
+ * Caller holds the irq descriptor lock, so this won't be called
+ * concurrently with xive_get_irqchip_state on the same interrupt.
+ */
static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state)
{
struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
@@ -820,6 +871,10 @@ static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state)
/* Set it to PQ=10 state to prevent further sends */
pq = xive_esb_read(xd, XIVE_ESB_SET_PQ_10);
+ if (!xd->stale_p) {
+ xd->saved_p = !!(pq & XIVE_ESB_VAL_P);
+ xd->stale_p = !xd->saved_p;
+ }
/* No target ? nothing to do */
if (xd->target == XIVE_INVALID_TARGET) {
@@ -827,7 +882,7 @@ static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state)
* An untargetted interrupt should have been
* also masked at the source
*/
- WARN_ON(pq & 2);
+ WARN_ON(xd->saved_p);
return 0;
}
@@ -847,9 +902,8 @@ static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state)
* This saved_p is cleared by the host EOI, when we know
* for sure the queue slot is no longer in use.
*/
- if (pq & 2) {
- pq = xive_esb_read(xd, XIVE_ESB_SET_PQ_11);
- xd->saved_p = true;
+ if (xd->saved_p) {
+ xive_esb_read(xd, XIVE_ESB_SET_PQ_11);
/*
* Sync the XIVE source HW to ensure the interrupt
@@ -862,8 +916,7 @@ static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state)
*/
if (xive_ops->sync_source)
xive_ops->sync_source(hw_irq);
- } else
- xd->saved_p = false;
+ }
} else {
irqd_clr_forwarded_to_vcpu(d);
@@ -914,6 +967,32 @@ static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state)
return 0;
}
+/* Called with irq descriptor lock held. */
+static int xive_get_irqchip_state(struct irq_data *data,
+ enum irqchip_irq_state which, bool *state)
+{
+ struct xive_irq_data *xd = irq_data_get_irq_handler_data(data);
+ u8 pq;
+
+ switch (which) {
+ case IRQCHIP_STATE_ACTIVE:
+ pq = xive_esb_read(xd, XIVE_ESB_GET);
+
+ /*
+ * The esb value being all 1's means we couldn't get
+ * the PQ state of the interrupt through mmio. It may
+ * happen, for example when querying a PHB interrupt
+ * while the PHB is in an error state. We consider the
+ * interrupt to be inactive in that case.
+ */
+ *state = (pq != XIVE_ESB_INVALID) && !xd->stale_p &&
+ (xd->saved_p || !!(pq & XIVE_ESB_VAL_P));
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
static struct irq_chip xive_irq_chip = {
.name = "XIVE-IRQ",
.irq_startup = xive_irq_startup,
@@ -925,6 +1004,7 @@ static struct irq_chip xive_irq_chip = {
.irq_set_type = xive_irq_set_type,
.irq_retrigger = xive_irq_retrigger,
.irq_set_vcpu_affinity = xive_irq_set_vcpu_affinity,
+ .irq_get_irqchip_state = xive_get_irqchip_state,
};
bool is_xive_irq(struct irq_chip *chip)
@@ -964,6 +1044,15 @@ static int xive_irq_alloc_data(unsigned int virq, irq_hw_number_t hw)
xd->target = XIVE_INVALID_TARGET;
irq_set_handler_data(virq, xd);
+ /*
+ * Turn OFF by default the interrupt being mapped. A side
+ * effect of this check is the mapping the ESB page of the
+ * interrupt in the Linux address space. This prevents page
+ * fault issues in the crash handler which masks all
+ * interrupts.
+ */
+ xive_esb_read(xd, XIVE_ESB_SET_PQ_01);
+
return 0;
}
@@ -1338,6 +1427,11 @@ static void xive_flush_cpu_queue(unsigned int cpu, struct xive_cpu *xc)
xd = irq_desc_get_handler_data(desc);
/*
+ * Clear saved_p to indicate that it's no longer pending
+ */
+ xd->saved_p = false;
+
+ /*
* For LSIs, we EOI, this will cause a resend if it's
* still asserted. Otherwise do an MSI retrigger.
*/
diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c
index 2f26b74f6cfa..0ff6b739052c 100644
--- a/arch/powerpc/sysdev/xive/native.c
+++ b/arch/powerpc/sysdev/xive/native.c
@@ -111,6 +111,20 @@ int xive_native_configure_irq(u32 hw_irq, u32 target, u8 prio, u32 sw_irq)
}
EXPORT_SYMBOL_GPL(xive_native_configure_irq);
+static int xive_native_get_irq_config(u32 hw_irq, u32 *target, u8 *prio,
+ u32 *sw_irq)
+{
+ s64 rc;
+ __be64 vp;
+ __be32 lirq;
+
+ rc = opal_xive_get_irq_config(hw_irq, &vp, prio, &lirq);
+
+ *target = be64_to_cpu(vp);
+ *sw_irq = be32_to_cpu(lirq);
+
+ return rc == 0 ? 0 : -ENXIO;
+}
/* This can be called multiple time to change a queue configuration */
int xive_native_configure_queue(u32 vp_id, struct xive_q *q, u8 prio,
@@ -231,6 +245,17 @@ static bool xive_native_match(struct device_node *node)
return of_device_is_compatible(node, "ibm,opal-xive-vc");
}
+static s64 opal_xive_allocate_irq(u32 chip_id)
+{
+ s64 irq = opal_xive_allocate_irq_raw(chip_id);
+
+ /*
+ * Old versions of skiboot can incorrectly return 0xffffffff to
+ * indicate no space, fix it up here.
+ */
+ return irq == 0xffffffff ? OPAL_RESOURCE : irq;
+}
+
#ifdef CONFIG_SMP
static int xive_native_get_ipi(unsigned int cpu, struct xive_cpu *xc)
{
@@ -442,6 +467,7 @@ EXPORT_SYMBOL_GPL(xive_native_sync_queue);
static const struct xive_ops xive_native_ops = {
.populate_irq_data = xive_native_populate_irq_data,
.configure_irq = xive_native_configure_irq,
+ .get_irq_config = xive_native_get_irq_config,
.setup_queue = xive_native_setup_queue,
.cleanup_queue = xive_native_cleanup_queue,
.match = xive_native_match,
@@ -800,6 +826,13 @@ int xive_native_set_queue_state(u32 vp_id, u32 prio, u32 qtoggle, u32 qindex)
}
EXPORT_SYMBOL_GPL(xive_native_set_queue_state);
+bool xive_native_has_queue_state_support(void)
+{
+ return opal_check_token(OPAL_XIVE_GET_QUEUE_STATE) &&
+ opal_check_token(OPAL_XIVE_SET_QUEUE_STATE);
+}
+EXPORT_SYMBOL_GPL(xive_native_has_queue_state_support);
+
int xive_native_get_vp_state(u32 vp_id, u64 *out_state)
{
__be64 state;
diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c
index 8ef9cf4ebb1c..55dc61cb4867 100644
--- a/arch/powerpc/sysdev/xive/spapr.c
+++ b/arch/powerpc/sysdev/xive/spapr.c
@@ -45,7 +45,7 @@ static int xive_irq_bitmap_add(int base, int count)
{
struct xive_irq_bitmap *xibm;
- xibm = kzalloc(sizeof(*xibm), GFP_ATOMIC);
+ xibm = kzalloc(sizeof(*xibm), GFP_KERNEL);
if (!xibm)
return -ENOMEM;
@@ -53,6 +53,10 @@ static int xive_irq_bitmap_add(int base, int count)
xibm->base = base;
xibm->count = count;
xibm->bitmap = kzalloc(xibm->count, GFP_KERNEL);
+ if (!xibm->bitmap) {
+ kfree(xibm);
+ return -ENOMEM;
+ }
list_add(&xibm->list, &xive_irq_bitmaps);
pr_info("Using IRQ range [%x-%x]", xibm->base,
@@ -211,6 +215,38 @@ static long plpar_int_set_source_config(unsigned long flags,
return 0;
}
+static long plpar_int_get_source_config(unsigned long flags,
+ unsigned long lisn,
+ unsigned long *target,
+ unsigned long *prio,
+ unsigned long *sw_irq)
+{
+ unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+ long rc;
+
+ pr_devel("H_INT_GET_SOURCE_CONFIG flags=%lx lisn=%lx\n", flags, lisn);
+
+ do {
+ rc = plpar_hcall(H_INT_GET_SOURCE_CONFIG, retbuf, flags, lisn,
+ target, prio, sw_irq);
+ } while (plpar_busy_delay(rc));
+
+ if (rc) {
+ pr_err("H_INT_GET_SOURCE_CONFIG lisn=%ld failed %ld\n",
+ lisn, rc);
+ return rc;
+ }
+
+ *target = retbuf[0];
+ *prio = retbuf[1];
+ *sw_irq = retbuf[2];
+
+ pr_devel("H_INT_GET_SOURCE_CONFIG target=%lx prio=%lx sw_irq=%lx\n",
+ retbuf[0], retbuf[1], retbuf[2]);
+
+ return 0;
+}
+
static long plpar_int_get_queue_info(unsigned long flags,
unsigned long target,
unsigned long priority,
@@ -356,20 +392,28 @@ static int xive_spapr_populate_irq_data(u32 hw_irq, struct xive_irq_data *data)
data->esb_shift = esb_shift;
data->trig_page = trig_page;
+ data->hw_irq = hw_irq;
+
/*
* No chip-id for the sPAPR backend. This has an impact how we
* pick a target. See xive_pick_irq_target().
*/
data->src_chip = XIVE_INVALID_CHIP_ID;
+ /*
+ * When the H_INT_ESB flag is set, the H_INT_ESB hcall should
+ * be used for interrupt management. Skip the remapping of the
+ * ESB pages which are not available.
+ */
+ if (data->flags & XIVE_IRQ_FLAG_H_INT_ESB)
+ return 0;
+
data->eoi_mmio = ioremap(data->eoi_page, 1u << data->esb_shift);
if (!data->eoi_mmio) {
pr_err("Failed to map EOI page for irq 0x%x\n", hw_irq);
return -ENOMEM;
}
- data->hw_irq = hw_irq;
-
/* Full function page supports trigger */
if (flags & XIVE_SRC_TRIGGER) {
data->trig_mmio = data->eoi_mmio;
@@ -394,6 +438,24 @@ static int xive_spapr_configure_irq(u32 hw_irq, u32 target, u8 prio, u32 sw_irq)
return rc == 0 ? 0 : -ENXIO;
}
+static int xive_spapr_get_irq_config(u32 hw_irq, u32 *target, u8 *prio,
+ u32 *sw_irq)
+{
+ long rc;
+ unsigned long h_target;
+ unsigned long h_prio;
+ unsigned long h_sw_irq;
+
+ rc = plpar_int_get_source_config(0, hw_irq, &h_target, &h_prio,
+ &h_sw_irq);
+
+ *target = h_target;
+ *prio = h_prio;
+ *sw_irq = h_sw_irq;
+
+ return rc == 0 ? 0 : -ENXIO;
+}
+
/* This can be called multiple time to change a queue configuration */
static int xive_spapr_configure_queue(u32 target, struct xive_q *q, u8 prio,
__be32 *qpage, u32 order)
@@ -586,6 +648,7 @@ static void xive_spapr_sync_source(u32 hw_irq)
static const struct xive_ops xive_spapr_ops = {
.populate_irq_data = xive_spapr_populate_irq_data,
.configure_irq = xive_spapr_configure_irq,
+ .get_irq_config = xive_spapr_get_irq_config,
.setup_queue = xive_spapr_setup_queue,
.cleanup_queue = xive_spapr_cleanup_queue,
.match = xive_spapr_match,
diff --git a/arch/powerpc/sysdev/xive/xive-internal.h b/arch/powerpc/sysdev/xive/xive-internal.h
index 211725dbf364..59cd366e7933 100644
--- a/arch/powerpc/sysdev/xive/xive-internal.h
+++ b/arch/powerpc/sysdev/xive/xive-internal.h
@@ -33,6 +33,8 @@ struct xive_cpu {
struct xive_ops {
int (*populate_irq_data)(u32 hw_irq, struct xive_irq_data *data);
int (*configure_irq)(u32 hw_irq, u32 target, u8 prio, u32 sw_irq);
+ int (*get_irq_config)(u32 hw_irq, u32 *target, u8 *prio,
+ u32 *sw_irq);
int (*setup_queue)(unsigned int cpu, struct xive_cpu *xc, u8 prio);
void (*cleanup_queue)(unsigned int cpu, struct xive_cpu *xc, u8 prio);
void (*setup_cpu)(unsigned int cpu, struct xive_cpu *xc);
diff --git a/arch/powerpc/tools/relocs_check.sh b/arch/powerpc/tools/relocs_check.sh
index 2b4e959caa36..014e00e74d2b 100755
--- a/arch/powerpc/tools/relocs_check.sh
+++ b/arch/powerpc/tools/relocs_check.sh
@@ -10,24 +10,29 @@
# based on relocs_check.pl
# Copyright © 2009 IBM Corporation
-if [ $# -lt 2 ]; then
- echo "$0 [path to objdump] [path to vmlinux]" 1>&2
+if [ $# -lt 3 ]; then
+ echo "$0 [path to objdump] [path to nm] [path to vmlinux]" 1>&2
exit 1
fi
-# Have Kbuild supply the path to objdump so we handle cross compilation.
+# Have Kbuild supply the path to objdump and nm so we handle cross compilation.
objdump="$1"
-vmlinux="$2"
+nm="$2"
+vmlinux="$3"
+
+# Remove from the bad relocations those that match an undefined weak symbol
+# which will result in an absolute relocation to 0.
+# Weak unresolved symbols are of that form in nm output:
+# " w _binary__btf_vmlinux_bin_end"
+undef_weak_symbols=$($nm "$vmlinux" | awk '$1 ~ /w/ { print $2 }')
bad_relocs=$(
-"$objdump" -R "$vmlinux" |
+$objdump -R "$vmlinux" |
# Only look at relocation lines.
grep -E '\<R_' |
# These relocations are okay
# On PPC64:
# R_PPC64_RELATIVE, R_PPC64_NONE
- # R_PPC64_ADDR64 mach_<name>
- # R_PPC64_ADDR64 __crc_<name>
# On PPC:
# R_PPC_RELATIVE, R_PPC_ADDR16_HI,
# R_PPC_ADDR16_HA,R_PPC_ADDR16_LO,
@@ -39,8 +44,7 @@ R_PPC_ADDR16_HI
R_PPC_ADDR16_HA
R_PPC_RELATIVE
R_PPC_NONE' |
- grep -E -v '\<R_PPC64_ADDR64[[:space:]]+mach_' |
- grep -E -v '\<R_PPC64_ADDR64[[:space:]]+__crc_'
+ ([ "$undef_weak_symbols" ] && grep -F -w -v "$undef_weak_symbols" || cat)
)
if [ -z "$bad_relocs" ]; then
diff --git a/arch/powerpc/tools/unrel_branch_check.sh b/arch/powerpc/tools/unrel_branch_check.sh
index 1e972df3107e..77114755dc6f 100755
--- a/arch/powerpc/tools/unrel_branch_check.sh
+++ b/arch/powerpc/tools/unrel_branch_check.sh
@@ -18,14 +18,14 @@ vmlinux="$2"
#__end_interrupts should be located within the first 64K
end_intr=0x$(
-"$objdump" -R "$vmlinux" -d --start-address=0xc000000000000000 \
+$objdump -R "$vmlinux" -d --start-address=0xc000000000000000 \
--stop-address=0xc000000000010000 |
grep '\<__end_interrupts>:' |
awk '{print $1}'
)
BRANCHES=$(
-"$objdump" -R "$vmlinux" -D --start-address=0xc000000000000000 \
+$objdump -R "$vmlinux" -D --start-address=0xc000000000000000 \
--stop-address=${end_intr} |
grep -e "^c[0-9a-f]*:[[:space:]]*\([0-9a-f][0-9a-f][[:space:]]\)\{4\}[[:space:]]*b" |
grep -v '\<__start_initialization_multiplatform>' |
diff --git a/arch/powerpc/xmon/Makefile b/arch/powerpc/xmon/Makefile
index f142570ad860..c3842dbeb1b7 100644
--- a/arch/powerpc/xmon/Makefile
+++ b/arch/powerpc/xmon/Makefile
@@ -1,8 +1,8 @@
# SPDX-License-Identifier: GPL-2.0
# Makefile for xmon
-# Disable clang warning for using setjmp without setjmp.h header
-subdir-ccflags-y := $(call cc-disable-warning, builtin-requires-header)
+# Avoid clang warnings around longjmp/setjmp declarations
+subdir-ccflags-y := -ffreestanding
GCOV_PROFILE := n
KCOV_INSTRUMENT := n
diff --git a/arch/powerpc/xmon/dis-asm.h b/arch/powerpc/xmon/dis-asm.h
index c4d246ebca37..c4c982d6402e 100644
--- a/arch/powerpc/xmon/dis-asm.h
+++ b/arch/powerpc/xmon/dis-asm.h
@@ -13,13 +13,13 @@ extern int print_insn_spu(unsigned long insn, unsigned long memaddr);
#else
static inline int print_insn_powerpc(unsigned long insn, unsigned long memaddr)
{
- printf("%.8x", insn);
+ printf("%.8lx", insn);
return 0;
}
static inline int print_insn_spu(unsigned long insn, unsigned long memaddr)
{
- printf("%.8x", insn);
+ printf("%.8lx", insn);
return 0;
}
#endif
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 14e56c25879f..e8c84d265602 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -25,6 +25,7 @@
#include <linux/nmi.h>
#include <linux/ctype.h>
#include <linux/highmem.h>
+#include <linux/security.h>
#include <asm/debugfs.h>
#include <asm/ptrace.h>
@@ -187,6 +188,8 @@ static void dump_tlb_44x(void);
static void dump_tlb_book3e(void);
#endif
+static void clear_all_bpt(void);
+
#ifdef CONFIG_PPC64
#define REG "%.16lx"
#else
@@ -283,10 +286,38 @@ Commands:\n\
" U show uptime information\n"
" ? help\n"
" # n limit output to n lines per page (for dp, dpa, dl)\n"
-" zr reboot\n\
- zh halt\n"
+" zr reboot\n"
+" zh halt\n"
;
+#ifdef CONFIG_SECURITY
+static bool xmon_is_locked_down(void)
+{
+ static bool lockdown;
+
+ if (!lockdown) {
+ lockdown = !!security_locked_down(LOCKDOWN_XMON_RW);
+ if (lockdown) {
+ printf("xmon: Disabled due to kernel lockdown\n");
+ xmon_is_ro = true;
+ }
+ }
+
+ if (!xmon_is_ro) {
+ xmon_is_ro = !!security_locked_down(LOCKDOWN_XMON_WR);
+ if (xmon_is_ro)
+ printf("xmon: Read-only due to kernel lockdown\n");
+ }
+
+ return lockdown;
+}
+#else /* CONFIG_SECURITY */
+static inline bool xmon_is_locked_down(void)
+{
+ return false;
+}
+#endif
+
static struct pt_regs *xmon_regs;
static inline void sync(void)
@@ -438,7 +469,10 @@ static bool wait_for_other_cpus(int ncpus)
return false;
}
-#endif /* CONFIG_SMP */
+#else /* CONFIG_SMP */
+static inline void get_output_lock(void) {}
+static inline void release_output_lock(void) {}
+#endif
static inline int unrecoverable_excp(struct pt_regs *regs)
{
@@ -455,6 +489,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
int cmd = 0;
struct bpt *bp;
long recurse_jmp[JMP_BUF_LEN];
+ bool locked_down;
unsigned long offset;
unsigned long flags;
#ifdef CONFIG_SMP
@@ -465,6 +500,8 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
local_irq_save(flags);
hard_irq_disable();
+ locked_down = xmon_is_locked_down();
+
if (!fromipi) {
tracing_enabled = tracing_is_on();
tracing_off();
@@ -518,7 +555,8 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
if (!fromipi) {
get_output_lock();
- excprint(regs);
+ if (!locked_down)
+ excprint(regs);
if (bp) {
printf("cpu 0x%x stopped at breakpoint 0x%tx (",
cpu, BP_NUM(bp));
@@ -570,10 +608,14 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
}
remove_bpts();
disable_surveillance();
- /* for breakpoint or single step, print the current instr. */
- if (bp || TRAP(regs) == 0xd00)
- ppc_inst_dump(regs->nip, 1, 0);
- printf("enter ? for help\n");
+
+ if (!locked_down) {
+ /* for breakpoint or single step, print curr insn */
+ if (bp || TRAP(regs) == 0xd00)
+ ppc_inst_dump(regs->nip, 1, 0);
+ printf("enter ? for help\n");
+ }
+
mb();
xmon_gate = 1;
barrier();
@@ -597,8 +639,9 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
spin_cpu_relax();
touch_nmi_watchdog();
} else {
- cmd = cmds(regs);
- if (cmd != 0) {
+ if (!locked_down)
+ cmd = cmds(regs);
+ if (locked_down || cmd != 0) {
/* exiting xmon */
insert_bpts();
xmon_gate = 0;
@@ -635,13 +678,16 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
"can't continue\n");
remove_bpts();
disable_surveillance();
- /* for breakpoint or single step, print the current instr. */
- if (bp || TRAP(regs) == 0xd00)
- ppc_inst_dump(regs->nip, 1, 0);
- printf("enter ? for help\n");
+ if (!locked_down) {
+ /* for breakpoint or single step, print current insn */
+ if (bp || TRAP(regs) == 0xd00)
+ ppc_inst_dump(regs->nip, 1, 0);
+ printf("enter ? for help\n");
+ }
}
- cmd = cmds(regs);
+ if (!locked_down)
+ cmd = cmds(regs);
insert_bpts();
in_xmon = 0;
@@ -670,7 +716,10 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
}
}
#endif
- insert_cpu_bpts();
+ if (locked_down)
+ clear_all_bpt();
+ else
+ insert_cpu_bpts();
touch_nmi_watchdog();
local_irq_restore(flags);
@@ -884,7 +933,7 @@ static void insert_cpu_bpts(void)
if (dabr.enabled) {
brk.address = dabr.address;
brk.type = (dabr.enabled & HW_BRK_TYPE_DABR) | HW_BRK_TYPE_PRIV_ALL;
- brk.len = 8;
+ brk.len = DABR_MAX_LEN;
__set_breakpoint(&brk);
}
@@ -1047,10 +1096,6 @@ cmds(struct pt_regs *excp)
set_lpp_cmd();
break;
case 'b':
- if (xmon_is_ro) {
- printf(xmon_ro_msg);
- break;
- }
bpt_cmds();
break;
case 'C':
@@ -1147,16 +1192,19 @@ static int do_step(struct pt_regs *regs)
static void bootcmds(void)
{
+ char tmp[64];
int cmd;
cmd = inchar();
- if (cmd == 'r')
- ppc_md.restart(NULL);
- else if (cmd == 'h')
+ if (cmd == 'r') {
+ getstring(tmp, 64);
+ ppc_md.restart(tmp);
+ } else if (cmd == 'h') {
ppc_md.halt();
- else if (cmd == 'p')
+ } else if (cmd == 'p') {
if (pm_power_off)
pm_power_off();
+ }
}
static int cpu_cmd(void)
@@ -1319,11 +1367,16 @@ bpt_cmds(void)
struct bpt *bp;
cmd = inchar();
+
switch (cmd) {
#ifndef CONFIG_PPC_8xx
static const char badaddr[] = "Only kernel addresses are permitted for breakpoints\n";
int mode;
case 'd': /* bd - hardware data breakpoint */
+ if (xmon_is_ro) {
+ printf(xmon_ro_msg);
+ break;
+ }
if (!ppc_breakpoint_available()) {
printf("Hardware data breakpoint not supported on this cpu\n");
break;
@@ -1351,6 +1404,10 @@ bpt_cmds(void)
break;
case 'i': /* bi - hardware instr breakpoint */
+ if (xmon_is_ro) {
+ printf(xmon_ro_msg);
+ break;
+ }
if (!cpu_has_feature(CPU_FTR_ARCH_207S)) {
printf("Hardware instruction breakpoint "
"not supported on this cpu\n");
@@ -1409,7 +1466,8 @@ bpt_cmds(void)
break;
}
termch = cmd;
- if (!scanhex(&a)) {
+
+ if (xmon_is_ro || !scanhex(&a)) {
/* print all breakpoints */
printf(" type address\n");
if (dabr.enabled) {
@@ -1894,15 +1952,14 @@ static void dump_300_sprs(void)
printf("pidr = %.16lx tidr = %.16lx\n",
mfspr(SPRN_PID), mfspr(SPRN_TIDR));
- printf("asdr = %.16lx psscr = %.16lx\n",
- mfspr(SPRN_ASDR), hv ? mfspr(SPRN_PSSCR)
- : mfspr(SPRN_PSSCR_PR));
+ printf("psscr = %.16lx\n",
+ hv ? mfspr(SPRN_PSSCR) : mfspr(SPRN_PSSCR_PR));
if (!hv)
return;
- printf("ptcr = %.16lx\n",
- mfspr(SPRN_PTCR));
+ printf("ptcr = %.16lx asdr = %.16lx\n",
+ mfspr(SPRN_PTCR), mfspr(SPRN_ASDR));
#endif
}
@@ -2534,13 +2591,16 @@ static void dump_pacas(void)
static void dump_one_xive(int cpu)
{
unsigned int hwid = get_hard_smp_processor_id(cpu);
-
- opal_xive_dump(XIVE_DUMP_TM_HYP, hwid);
- opal_xive_dump(XIVE_DUMP_TM_POOL, hwid);
- opal_xive_dump(XIVE_DUMP_TM_OS, hwid);
- opal_xive_dump(XIVE_DUMP_TM_USER, hwid);
- opal_xive_dump(XIVE_DUMP_VP, hwid);
- opal_xive_dump(XIVE_DUMP_EMU_STATE, hwid);
+ bool hv = cpu_has_feature(CPU_FTR_HVMODE);
+
+ if (hv) {
+ opal_xive_dump(XIVE_DUMP_TM_HYP, hwid);
+ opal_xive_dump(XIVE_DUMP_TM_POOL, hwid);
+ opal_xive_dump(XIVE_DUMP_TM_OS, hwid);
+ opal_xive_dump(XIVE_DUMP_TM_USER, hwid);
+ opal_xive_dump(XIVE_DUMP_VP, hwid);
+ opal_xive_dump(XIVE_DUMP_EMU_STATE, hwid);
+ }
if (setjmp(bus_error_jmp) != 0) {
catch_memory_errors = 0;
@@ -2569,16 +2629,28 @@ static void dump_all_xives(void)
dump_one_xive(cpu);
}
-static void dump_one_xive_irq(u32 num)
+static void dump_one_xive_irq(u32 num, struct irq_data *d)
{
- s64 rc;
- __be64 vp;
- u8 prio;
- __be32 lirq;
-
- rc = opal_xive_get_irq_config(num, &vp, &prio, &lirq);
- xmon_printf("IRQ 0x%x config: vp=0x%llx prio=%d lirq=0x%x (rc=%lld)\n",
- num, be64_to_cpu(vp), prio, be32_to_cpu(lirq), rc);
+ xmon_xive_get_irq_config(num, d);
+}
+
+static void dump_all_xive_irq(void)
+{
+ unsigned int i;
+ struct irq_desc *desc;
+
+ for_each_irq_desc(i, desc) {
+ struct irq_data *d = irq_desc_get_irq_data(desc);
+ unsigned int hwirq;
+
+ if (!d)
+ continue;
+
+ hwirq = (unsigned int)irqd_to_hwirq(d);
+ /* IPIs are special (HW number 0) */
+ if (hwirq)
+ dump_one_xive_irq(hwirq, d);
+ }
}
static void dump_xives(void)
@@ -2597,7 +2669,9 @@ static void dump_xives(void)
return;
} else if (c == 'i') {
if (scanhex(&num))
- dump_one_xive_irq(num);
+ dump_one_xive_irq(num, NULL);
+ else
+ dump_all_xive_irq();
return;
}
@@ -3745,6 +3819,11 @@ static void xmon_init(int enable)
#ifdef CONFIG_MAGIC_SYSRQ
static void sysrq_handle_xmon(int key)
{
+ if (xmon_is_locked_down()) {
+ clear_all_bpt();
+ xmon_init(0);
+ return;
+ }
/* ensure xmon is enabled */
xmon_init(1);
debugger(get_irq_regs());
@@ -3766,7 +3845,6 @@ static int __init setup_xmon_sysrq(void)
device_initcall(setup_xmon_sysrq);
#endif /* CONFIG_MAGIC_SYSRQ */
-#ifdef CONFIG_DEBUG_FS
static void clear_all_bpt(void)
{
int i;
@@ -3784,18 +3862,22 @@ static void clear_all_bpt(void)
iabr = NULL;
dabr.enabled = 0;
}
-
- printf("xmon: All breakpoints cleared\n");
}
+#ifdef CONFIG_DEBUG_FS
static int xmon_dbgfs_set(void *data, u64 val)
{
xmon_on = !!val;
xmon_init(xmon_on);
/* make sure all breakpoints removed when disabling */
- if (!xmon_on)
+ if (!xmon_on) {
clear_all_bpt();
+ get_output_lock();
+ printf("xmon: All breakpoints cleared\n");
+ release_output_lock();
+ }
+
return 0;
}
@@ -3821,7 +3903,11 @@ static int xmon_early __initdata;
static int __init early_parse_xmon(char *p)
{
- if (!p || strncmp(p, "early", 5) == 0) {
+ if (xmon_is_locked_down()) {
+ xmon_init(0);
+ xmon_early = 0;
+ xmon_on = 0;
+ } else if (!p || strncmp(p, "early", 5) == 0) {
/* just "xmon" is equivalent to "xmon=early" */
xmon_init(1);
xmon_early = 1;
OpenPOWER on IntegriCloud