diff options
Diffstat (limited to 'arch/powerpc')
225 files changed, 5691 insertions, 2570 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 65fba4c34cd7..3da87e198878 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -80,6 +80,7 @@ config ARCH_HAS_DMA_SET_COHERENT_MASK config PPC bool default y + select BUILDTIME_EXTABLE_SORT select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO select BINFMT_ELF @@ -160,8 +161,10 @@ config PPC select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS select GENERIC_CPU_AUTOPROBE select HAVE_VIRT_CPU_ACCOUNTING + select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE select HAVE_ARCH_HARDENED_USERCOPY select HAVE_KERNEL_GZIP + select HAVE_CC_STACKPROTECTOR config GENERIC_CSUM def_bool CPU_LITTLE_ENDIAN @@ -395,6 +398,14 @@ config MPROFILE_KERNEL depends on PPC64 && CPU_LITTLE_ENDIAN def_bool !DISABLE_MPROFILE_KERNEL +config USE_THIN_ARCHIVES + bool "Build the kernel using thin archives" + default n + select THIN_ARCHIVES + help + Build the kernel using thin archives. + If you're unsure say N. + config IOMMU_HELPER def_bool PPC64 @@ -455,6 +466,19 @@ config KEXEC interface is strongly in flux, so no good recommendation can be made. +config KEXEC_FILE + bool "kexec file based system call" + select KEXEC_CORE + select BUILD_BIN2C + depends on PPC64 + depends on CRYPTO=y + depends on CRYPTO_SHA256=y + help + This is a new version of the kexec system call. This call is + file based and takes in file descriptors as system call arguments + for kernel and initramfs as opposed to a list of segments as is the + case for the older kexec call. + config RELOCATABLE bool "Build a relocatable kernel" depends on (PPC64 && !COMPILE_TEST) || (FLATMEM && (44x || FSL_BOOKE)) @@ -478,6 +502,15 @@ config RELOCATABLE setting can still be useful to bootwrappers that need to know the load address of the kernel (eg. u-boot/mkimage). +config RELOCATABLE_TEST + bool "Test relocatable kernel" + depends on (PPC64 && RELOCATABLE) + default n + help + This runs the relocatable kernel at the address it was initially + loaded at, which tends to be non-zero and therefore test the + relocation code. + config CRASH_DUMP bool "Build a kdump crash kernel" depends on PPC64 || 6xx || FSL_BOOKE || (44x && !SMP) @@ -489,7 +522,7 @@ config CRASH_DUMP config FA_DUMP bool "Firmware-assisted dump" - depends on PPC64 && PPC_RTAS && CRASH_DUMP && KEXEC + depends on PPC64 && PPC_RTAS && CRASH_DUMP && KEXEC_CORE help A robust mechanism to get reliable kernel crash dump with assistance from firmware. This approach does not use kexec, @@ -548,6 +581,13 @@ config ARCH_SPARSEMEM_DEFAULT config SYS_SUPPORTS_HUGETLBFS bool +config ILLEGAL_POINTER_VALUE + hex + # This is roughly half way between the top of user space and the bottom + # of kernel space, which seems about as good as we can get. + default 0x5deadbeef0000000 if PPC64 + default 0 + source "mm/Kconfig" config ARCH_MEMORY_PROBE diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index 63292f64b25a..949258d412d0 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -354,4 +354,20 @@ config FAIL_IOMMU If you are unsure, say N. +config PPC_PTDUMP + bool "Export kernel pagetable layout to userspace via debugfs" + depends on DEBUG_KERNEL + select DEBUG_FS + help + This option exports the state of the kernel pagetables to a + debugfs file. This is only useful for kernel developers who are + working in architecture specific areas of the kernel - probably + not a good idea to enable this feature in a production kernel. + + If you are unsure, say N. + +config PPC_HTDUMP + def_bool y + depends on PPC_PTDUMP && PPC_BOOK3S + endmenu diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 617dece67924..31286fa7873c 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -23,7 +23,7 @@ CROSS32AR := $(CROSS32_COMPILE)ar ifeq ($(HAS_BIARCH),y) ifeq ($(CROSS32_COMPILE),) CROSS32CC := $(CC) -m32 -CROSS32AR := GNUTARGET=elf32-powerpc $(AR) +KBUILD_ARFLAGS += --target=elf32-powerpc endif endif @@ -85,7 +85,7 @@ ifeq ($(HAS_BIARCH),y) override AS += -a$(BITS) override LD += -m elf$(BITS)$(LDEMULATION) override CC += -m$(BITS) -override AR := GNUTARGET=elf$(BITS)-$(GNUTARGET) $(AR) +KBUILD_ARFLAGS += --target=elf$(BITS)-$(GNUTARGET) endif LDFLAGS_vmlinux-y := -Bstatic @@ -121,6 +121,7 @@ CFLAGS-$(CONFIG_PPC32) := -ffixed-r2 $(MULTIPLEWORD) ifeq ($(CONFIG_PPC_BOOK3S_64),y) CFLAGS-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=power7,-mtune=power4) +CFLAGS-$(CONFIG_GENERIC_CPU) += -mcpu=power4 else CFLAGS-$(CONFIG_GENERIC_CPU) += -mcpu=powerpc64 endif @@ -249,6 +250,7 @@ core-y += arch/powerpc/kernel/ \ core-$(CONFIG_XMON) += arch/powerpc/xmon/ core-$(CONFIG_KVM) += arch/powerpc/kvm/ core-$(CONFIG_PERF_EVENTS) += arch/powerpc/perf/ +core-$(CONFIG_KEXEC_FILE) += arch/powerpc/purgatory/ drivers-$(CONFIG_OPROFILE) += arch/powerpc/oprofile/ @@ -275,16 +277,16 @@ zImage: relocs_check endif $(BOOT_TARGETS1): vmlinux - $(Q)$(MAKE) ARCH=ppc64 $(build)=$(boot) $(patsubst %,$(boot)/%,$@) + $(Q)$(MAKE) $(build)=$(boot) $(patsubst %,$(boot)/%,$@) $(BOOT_TARGETS2): vmlinux - $(Q)$(MAKE) ARCH=ppc64 $(build)=$(boot) $(patsubst %,$(boot)/%,$@) + $(Q)$(MAKE) $(build)=$(boot) $(patsubst %,$(boot)/%,$@) bootwrapper_install: - $(Q)$(MAKE) ARCH=ppc64 $(build)=$(boot) $(patsubst %,$(boot)/%,$@) + $(Q)$(MAKE) $(build)=$(boot) $(patsubst %,$(boot)/%,$@) %.dtb: scripts - $(Q)$(MAKE) ARCH=ppc64 $(build)=$(boot) $(patsubst %,$(boot)/%,$@) + $(Q)$(MAKE) $(build)=$(boot) $(patsubst %,$(boot)/%,$@) # Used to create 'merged defconfigs' # To use it $(call) it with the first argument as the base defconfig diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index eae2dc8bc218..e82f333cc84a 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -100,7 +100,8 @@ src-wlib-y := string.S crt0.S crtsavres.S stdio.c decompress.c main.c \ ns16550.c serial.c simple_alloc.c div64.S util.S \ elf_util.c $(zlib-y) devtree.c stdlib.c \ oflib.c ofconsole.c cuboot.c mpsc.c cpm-serial.c \ - uartlite.c mpc52xx-psc.c opal.c opal-calls.S + uartlite.c mpc52xx-psc.c opal.c +src-wlib-$(CONFIG_PPC64_BOOT_WRAPPER) += opal-calls.S src-wlib-$(CONFIG_40x) += 4xx.c planetcore.c src-wlib-$(CONFIG_44x) += 4xx.c ebony.c bamboo.c src-wlib-$(CONFIG_8xx) += mpc8xx.c planetcore.c fsl-soc.c @@ -171,10 +172,6 @@ $(addprefix $(obj)/,$(libfdt) $(libfdtheader)): $(obj)/%: $(srctree)/scripts/dtc $(obj)/empty.c: $(Q)touch $@ -$(obj)/zImage.lds: $(obj)/%: $(srctree)/$(src)/%.S - $(CROSS32CC) $(cpp_flags) -E -Wp,-MD,$(depfile) -P -Upowerpc \ - -D__ASSEMBLY__ -DLINKER_SCRIPT -o $@ $< - $(obj)/zImage.coff.lds $(obj)/zImage.ps3.lds : $(obj)/%: $(srctree)/$(src)/%.S $(Q)cp $< $@ @@ -356,17 +353,17 @@ $(addprefix $(obj)/, $(initrd-y)): $(obj)/ramdisk.image.gz # Don't put the ramdisk on the pattern rule; when its missing make will try # the pattern rule with less dependencies that also matches (even with the # hard dependency listed). -$(obj)/zImage.initrd.%: vmlinux $(wrapperbits) +$(obj)/zImage.initrd.%: vmlinux $(wrapperbits) FORCE $(call if_changed,wrap,$*,,,$(obj)/ramdisk.image.gz) -$(addprefix $(obj)/, $(sort $(filter zImage.%, $(image-y)))): vmlinux $(wrapperbits) +$(addprefix $(obj)/, $(sort $(filter zImage.%, $(image-y)))): vmlinux $(wrapperbits) FORCE $(call if_changed,wrap,$(subst $(obj)/zImage.,,$@)) # dtbImage% - a dtbImage is a zImage with an embedded device tree blob -$(obj)/dtbImage.initrd.%: vmlinux $(wrapperbits) $(obj)/%.dtb +$(obj)/dtbImage.initrd.%: vmlinux $(wrapperbits) $(obj)/%.dtb FORCE $(call if_changed,wrap,$*,,$(obj)/$*.dtb,$(obj)/ramdisk.image.gz) -$(obj)/dtbImage.%: vmlinux $(wrapperbits) $(obj)/%.dtb +$(obj)/dtbImage.%: vmlinux $(wrapperbits) $(obj)/%.dtb FORCE $(call if_changed,wrap,$*,,$(obj)/$*.dtb) # This cannot be in the root of $(src) as the zImage rule always adds a $(obj) @@ -374,31 +371,31 @@ $(obj)/dtbImage.%: vmlinux $(wrapperbits) $(obj)/%.dtb $(obj)/vmlinux.strip: vmlinux $(STRIP) -s -R .comment $< -o $@ -$(obj)/uImage: vmlinux $(wrapperbits) +$(obj)/uImage: vmlinux $(wrapperbits) FORCE $(call if_changed,wrap,uboot) -$(obj)/uImage.initrd.%: vmlinux $(obj)/%.dtb $(wrapperbits) +$(obj)/uImage.initrd.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE $(call if_changed,wrap,uboot-$*,,$(obj)/$*.dtb,$(obj)/ramdisk.image.gz) -$(obj)/uImage.%: vmlinux $(obj)/%.dtb $(wrapperbits) +$(obj)/uImage.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE $(call if_changed,wrap,uboot-$*,,$(obj)/$*.dtb) -$(obj)/cuImage.initrd.%: vmlinux $(obj)/%.dtb $(wrapperbits) +$(obj)/cuImage.initrd.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE $(call if_changed,wrap,cuboot-$*,,$(obj)/$*.dtb,$(obj)/ramdisk.image.gz) -$(obj)/cuImage.%: vmlinux $(obj)/%.dtb $(wrapperbits) +$(obj)/cuImage.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE $(call if_changed,wrap,cuboot-$*,,$(obj)/$*.dtb) -$(obj)/simpleImage.initrd.%: vmlinux $(obj)/%.dtb $(wrapperbits) +$(obj)/simpleImage.initrd.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE $(call if_changed,wrap,simpleboot-$*,,$(obj)/$*.dtb,$(obj)/ramdisk.image.gz) -$(obj)/simpleImage.%: vmlinux $(obj)/%.dtb $(wrapperbits) +$(obj)/simpleImage.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE $(call if_changed,wrap,simpleboot-$*,,$(obj)/$*.dtb) -$(obj)/treeImage.initrd.%: vmlinux $(obj)/%.dtb $(wrapperbits) +$(obj)/treeImage.initrd.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE $(call if_changed,wrap,treeboot-$*,,$(obj)/$*.dtb,$(obj)/ramdisk.image.gz) -$(obj)/treeImage.%: vmlinux $(obj)/%.dtb $(wrapperbits) +$(obj)/treeImage.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE $(call if_changed,wrap,treeboot-$*,,$(obj)/$*.dtb) # Rule to build device tree blobs diff --git a/arch/powerpc/boot/dts/fsl/t1023rdb.dts b/arch/powerpc/boot/dts/fsl/t1023rdb.dts index 29757623e5ba..5ba6fbfca274 100644 --- a/arch/powerpc/boot/dts/fsl/t1023rdb.dts +++ b/arch/powerpc/boot/dts/fsl/t1023rdb.dts @@ -41,6 +41,27 @@ #size-cells = <2>; interrupt-parent = <&mpic>; + reserved-memory { + #address-cells = <2>; + #size-cells = <2>; + ranges; + + bman_fbpr: bman-fbpr { + size = <0 0x1000000>; + alignment = <0 0x1000000>; + }; + + qman_fqd: qman-fqd { + size = <0 0x400000>; + alignment = <0 0x400000>; + }; + + qman_pfdr: qman-pfdr { + size = <0 0x2000000>; + alignment = <0 0x2000000>; + }; + }; + ifc: localbus@ffe124000 { reg = <0xf 0xfe124000 0 0x2000>; ranges = <0 0 0xf 0xe8000000 0x08000000 @@ -72,6 +93,14 @@ ranges = <0x00000000 0xf 0x00000000 0x01072000>; }; + bportals: bman-portals@ff4000000 { + ranges = <0x0 0xf 0xf4000000 0x2000000>; + }; + + qportals: qman-portals@ff6000000 { + ranges = <0x0 0xf 0xf6000000 0x2000000>; + }; + soc: soc@ffe000000 { ranges = <0x00000000 0xf 0xfe000000 0x1000000>; reg = <0xf 0xfe000000 0 0x00001000>; diff --git a/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi b/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi index 6e0b4892a740..da2894c59479 100644 --- a/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi @@ -34,6 +34,21 @@ #include <dt-bindings/thermal/thermal.h> +&bman_fbpr { + compatible = "fsl,bman-fbpr"; + alloc-ranges = <0 0 0x10000 0>; +}; + +&qman_fqd { + compatible = "fsl,qman-fqd"; + alloc-ranges = <0 0 0x10000 0>; +}; + +&qman_pfdr { + compatible = "fsl,qman-pfdr"; + alloc-ranges = <0 0 0x10000 0>; +}; + &ifc { #address-cells = <2>; #size-cells = <1>; @@ -180,6 +195,92 @@ }; }; +&bportals { + #address-cells = <0x1>; + #size-cells = <0x1>; + compatible = "simple-bus"; + + bman-portal@0 { + cell-index = <0x0>; + compatible = "fsl,bman-portal"; + reg = <0x0 0x4000>, <0x1000000 0x1000>; + interrupts = <105 2 0 0>; + }; + bman-portal@4000 { + cell-index = <0x1>; + compatible = "fsl,bman-portal"; + reg = <0x4000 0x4000>, <0x1001000 0x1000>; + interrupts = <107 2 0 0>; + }; + bman-portal@8000 { + cell-index = <2>; + compatible = "fsl,bman-portal"; + reg = <0x8000 0x4000>, <0x1002000 0x1000>; + interrupts = <109 2 0 0>; + }; + bman-portal@c000 { + cell-index = <0x3>; + compatible = "fsl,bman-portal"; + reg = <0xc000 0x4000>, <0x1003000 0x1000>; + interrupts = <111 2 0 0>; + }; + bman-portal@10000 { + cell-index = <0x4>; + compatible = "fsl,bman-portal"; + reg = <0x10000 0x4000>, <0x1004000 0x1000>; + interrupts = <113 2 0 0>; + }; + bman-portal@14000 { + cell-index = <0x5>; + compatible = "fsl,bman-portal"; + reg = <0x14000 0x4000>, <0x1005000 0x1000>; + interrupts = <115 2 0 0>; + }; +}; + +&qportals { + #address-cells = <0x1>; + #size-cells = <0x1>; + compatible = "simple-bus"; + + qportal0: qman-portal@0 { + compatible = "fsl,qman-portal"; + reg = <0x0 0x4000>, <0x1000000 0x1000>; + interrupts = <104 0x2 0 0>; + cell-index = <0x0>; + }; + qportal1: qman-portal@4000 { + compatible = "fsl,qman-portal"; + reg = <0x4000 0x4000>, <0x1001000 0x1000>; + interrupts = <106 0x2 0 0>; + cell-index = <0x1>; + }; + qportal2: qman-portal@8000 { + compatible = "fsl,qman-portal"; + reg = <0x8000 0x4000>, <0x1002000 0x1000>; + interrupts = <108 0x2 0 0>; + cell-index = <0x2>; + }; + qportal3: qman-portal@c000 { + compatible = "fsl,qman-portal"; + reg = <0xc000 0x4000>, <0x1003000 0x1000>; + interrupts = <110 0x2 0 0>; + cell-index = <0x3>; + }; + qportal4: qman-portal@10000 { + compatible = "fsl,qman-portal"; + reg = <0x10000 0x4000>, <0x1004000 0x1000>; + interrupts = <112 0x2 0 0>; + cell-index = <0x4>; + }; + qportal5: qman-portal@14000 { + compatible = "fsl,qman-portal"; + reg = <0x14000 0x4000>, <0x1005000 0x1000>; + interrupts = <114 0x2 0 0>; + cell-index = <0x5>; + }; +}; + &soc { #address-cells = <1>; #size-cells = <1>; @@ -413,6 +514,8 @@ }; /include/ "qoriq-sec5.0-0.dtsi" +/include/ "qoriq-qman3.dtsi" +/include/ "qoriq-bman1.dtsi" /include/ "qoriq-fman3l-0.dtsi" /include/ "qoriq-fman3-0-10g-0-best-effort.dtsi" diff --git a/arch/powerpc/boot/dts/fsl/t1024qds.dts b/arch/powerpc/boot/dts/fsl/t1024qds.dts index 772143da367f..d6858b7cd93f 100644 --- a/arch/powerpc/boot/dts/fsl/t1024qds.dts +++ b/arch/powerpc/boot/dts/fsl/t1024qds.dts @@ -41,6 +41,27 @@ #size-cells = <2>; interrupt-parent = <&mpic>; + reserved-memory { + #address-cells = <2>; + #size-cells = <2>; + ranges; + + bman_fbpr: bman-fbpr { + size = <0 0x1000000>; + alignment = <0 0x1000000>; + }; + + qman_fqd: qman-fqd { + size = <0 0x400000>; + alignment = <0 0x400000>; + }; + + qman_pfdr: qman-pfdr { + size = <0 0x2000000>; + alignment = <0 0x2000000>; + }; + }; + ifc: localbus@ffe124000 { reg = <0xf 0xfe124000 0 0x2000>; ranges = <0 0 0xf 0xe8000000 0x08000000 @@ -80,6 +101,14 @@ ranges = <0x00000000 0xf 0x00000000 0x01072000>; }; + bportals: bman-portals@ff4000000 { + ranges = <0x0 0xf 0xf4000000 0x2000000>; + }; + + qportals: qman-portals@ff6000000 { + ranges = <0x0 0xf 0xf6000000 0x2000000>; + }; + soc: soc@ffe000000 { ranges = <0x00000000 0xf 0xfe000000 0x1000000>; reg = <0xf 0xfe000000 0 0x00001000>; diff --git a/arch/powerpc/boot/dts/fsl/t1024rdb.dts b/arch/powerpc/boot/dts/fsl/t1024rdb.dts index 302cdd22b4bb..73a645324bc1 100644 --- a/arch/powerpc/boot/dts/fsl/t1024rdb.dts +++ b/arch/powerpc/boot/dts/fsl/t1024rdb.dts @@ -41,6 +41,31 @@ #size-cells = <2>; interrupt-parent = <&mpic>; + aliases { + sg_2500_aqr105_phy4 = &sg_2500_aqr105_phy4; + }; + + reserved-memory { + #address-cells = <2>; + #size-cells = <2>; + ranges; + + bman_fbpr: bman-fbpr { + size = <0 0x1000000>; + alignment = <0 0x1000000>; + }; + + qman_fqd: qman-fqd { + size = <0 0x400000>; + alignment = <0 0x400000>; + }; + + qman_pfdr: qman-pfdr { + size = <0 0x2000000>; + alignment = <0 0x2000000>; + }; + }; + ifc: localbus@ffe124000 { reg = <0xf 0xfe124000 0 0x2000>; ranges = <0 0 0xf 0xe8000000 0x08000000 @@ -82,6 +107,14 @@ ranges = <0x00000000 0xf 0x00000000 0x01072000>; }; + bportals: bman-portals@ff4000000 { + ranges = <0x0 0xf 0xf4000000 0x2000000>; + }; + + qportals: qman-portals@ff6000000 { + ranges = <0x0 0xf 0xf6000000 0x2000000>; + }; + soc: soc@ffe000000 { ranges = <0x00000000 0xf 0xfe000000 0x1000000>; reg = <0xf 0xfe000000 0 0x00001000>; diff --git a/arch/powerpc/boot/dts/fsl/t1042d4rdb.dts b/arch/powerpc/boot/dts/fsl/t1042d4rdb.dts index 2a5a90dd272e..fcd2aeb5b8ac 100644 --- a/arch/powerpc/boot/dts/fsl/t1042d4rdb.dts +++ b/arch/powerpc/boot/dts/fsl/t1042d4rdb.dts @@ -48,6 +48,58 @@ "fsl,deepsleep-cpld"; }; }; + + soc: soc@ffe000000 { + fman0: fman@400000 { + ethernet@e0000 { + phy-handle = <&phy_sgmii_0>; + phy-connection-type = "sgmii"; + }; + + ethernet@e2000 { + phy-handle = <&phy_sgmii_1>; + phy-connection-type = "sgmii"; + }; + + ethernet@e4000 { + phy-handle = <&phy_sgmii_2>; + phy-connection-type = "sgmii"; + }; + + ethernet@e6000 { + phy-handle = <&phy_rgmii_0>; + phy-connection-type = "rgmii"; + }; + + ethernet@e8000 { + phy-handle = <&phy_rgmii_1>; + phy-connection-type = "rgmii"; + }; + + mdio0: mdio@fc000 { + phy_sgmii_0: ethernet-phy@02 { + reg = <0x02>; + }; + + phy_sgmii_1: ethernet-phy@03 { + reg = <0x03>; + }; + + phy_sgmii_2: ethernet-phy@01 { + reg = <0x01>; + }; + + phy_rgmii_0: ethernet-phy@04 { + reg = <0x04>; + }; + + phy_rgmii_1: ethernet-phy@05 { + reg = <0x05>; + }; + }; + }; + }; + }; #include "t1042si-post.dtsi" diff --git a/arch/powerpc/boot/dts/fsl/t4240rdb.dts b/arch/powerpc/boot/dts/fsl/t4240rdb.dts index cc0a264b8acb..8166c660712a 100644 --- a/arch/powerpc/boot/dts/fsl/t4240rdb.dts +++ b/arch/powerpc/boot/dts/fsl/t4240rdb.dts @@ -125,6 +125,10 @@ }; i2c@118000 { + hwmon@2f { + compatible = "winbond,w83793"; + reg = <0x2f>; + }; eeprom@52 { compatible = "at24,24c256"; reg = <0x52>; diff --git a/arch/powerpc/boot/main.c b/arch/powerpc/boot/main.c index 57d42d129033..78aaf4ffd7ab 100644 --- a/arch/powerpc/boot/main.c +++ b/arch/powerpc/boot/main.c @@ -232,8 +232,12 @@ void start(void) console_ops.close(); kentry = (kernel_entry_t) vmlinux.addr; - if (ft_addr) - kentry(ft_addr, 0, NULL); + if (ft_addr) { + if(platform_ops.kentry) + platform_ops.kentry(ft_addr, vmlinux.addr); + else + kentry(ft_addr, 0, NULL); + } else kentry((unsigned long)initrd.addr, initrd.size, loader_info.promptr); diff --git a/arch/powerpc/boot/opal-calls.S b/arch/powerpc/boot/opal-calls.S index ff2f1b97bc53..2a99fc9a3ccf 100644 --- a/arch/powerpc/boot/opal-calls.S +++ b/arch/powerpc/boot/opal-calls.S @@ -12,6 +12,19 @@ .text + .globl opal_kentry +opal_kentry: + /* r3 is the fdt ptr */ + mtctr r4 + li r4, 0 + li r5, 0 + li r6, 0 + li r7, 0 + ld r11,opal@got(r2) + ld r8,0(r11) + ld r9,8(r11) + bctr + #define OPAL_CALL(name, token) \ .globl name; \ name: \ diff --git a/arch/powerpc/boot/opal.c b/arch/powerpc/boot/opal.c index 1f37e1c1d6d8..0272570d02de 100644 --- a/arch/powerpc/boot/opal.c +++ b/arch/powerpc/boot/opal.c @@ -13,7 +13,7 @@ #include <libfdt.h> #include "../include/asm/opal-api.h" -#ifdef __powerpc64__ +#ifdef CONFIG_PPC64_BOOT_WRAPPER /* Global OPAL struct used by opal-call.S */ struct opal { @@ -23,14 +23,25 @@ struct opal { static u32 opal_con_id; +/* see opal-wrappers.S */ int64_t opal_console_write(int64_t term_number, u64 *length, const u8 *buffer); int64_t opal_console_read(int64_t term_number, uint64_t *length, u8 *buffer); int64_t opal_console_write_buffer_space(uint64_t term_number, uint64_t *length); int64_t opal_console_flush(uint64_t term_number); int64_t opal_poll_events(uint64_t *outstanding_event_mask); +void opal_kentry(unsigned long fdt_addr, void *vmlinux_addr); + static int opal_con_open(void) { + /* + * When OPAL loads the boot kernel it stashes the OPAL base and entry + * address in r8 and r9 so the kernel can use the OPAL console + * before unflattening the devicetree. While executing the wrapper will + * probably trash r8 and r9 so this kentry hook restores them before + * entering the decompressed kernel. + */ + platform_ops.kentry = opal_kentry; return 0; } diff --git a/arch/powerpc/boot/ops.h b/arch/powerpc/boot/ops.h index 309d1b127e96..fad1862f4b2d 100644 --- a/arch/powerpc/boot/ops.h +++ b/arch/powerpc/boot/ops.h @@ -30,6 +30,7 @@ struct platform_ops { void * (*realloc)(void *ptr, unsigned long size); void (*exit)(void); void * (*vmlinux_alloc)(unsigned long size); + void (*kentry)(unsigned long fdt_addr, void *vmlinux_addr); }; extern struct platform_ops platform_ops; diff --git a/arch/powerpc/boot/ps3-head.S b/arch/powerpc/boot/ps3-head.S index b6fcbaf5027b..3dc44b05fb97 100644 --- a/arch/powerpc/boot/ps3-head.S +++ b/arch/powerpc/boot/ps3-head.S @@ -57,11 +57,6 @@ __system_reset_overlay: bctr 1: - /* Save the value at addr zero for a null pointer write check later. */ - - li r4, 0 - lwz r3, 0(r4) - /* Primary delays then goes to _zimage_start in wrapper. */ or 31, 31, 31 /* db16cyc */ diff --git a/arch/powerpc/boot/ps3.c b/arch/powerpc/boot/ps3.c index 4ec2d86d3c50..a05558a7e51a 100644 --- a/arch/powerpc/boot/ps3.c +++ b/arch/powerpc/boot/ps3.c @@ -119,13 +119,12 @@ void ps3_copy_vectors(void) flush_cache((void *)0x100, 512); } -void platform_init(unsigned long null_check) +void platform_init(void) { const u32 heapsize = 0x1000000 - (u32)_end; /* 16MiB */ void *chosen; unsigned long ft_addr; u64 rm_size; - unsigned long val; console_ops.write = ps3_console_write; platform_ops.exit = ps3_exit; @@ -153,11 +152,6 @@ void platform_init(unsigned long null_check) printf(" flat tree at 0x%lx\n\r", ft_addr); - val = *(unsigned long *)0; - - if (val != null_check) - printf("null check failed: %lx != %lx\n\r", val, null_check); - ((kernel_entry_t)0)(ft_addr, 0, NULL); ps3_exit(); diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper index 404b3aabdb4d..76fe3ccfd381 100755 --- a/arch/powerpc/boot/wrapper +++ b/arch/powerpc/boot/wrapper @@ -181,6 +181,28 @@ case "$elfformat" in elf32-powerpc) format=elf32ppc ;; esac +ld_version() +{ + # Poached from scripts/ld-version.sh, but we don't want to call that because + # this script (wrapper) is distributed separately from the kernel source. + # Extract linker version number from stdin and turn into single number. + awk '{ + gsub(".*\\)", ""); + gsub(".*version ", ""); + gsub("-.*", ""); + split($1,a, "."); + print a[1]*100000000 + a[2]*1000000 + a[3]*10000; + exit + }' +} + +# Do not include PT_INTERP segment when linking pie. Non-pie linking +# just ignores this option. +LD_VERSION=$(${CROSS}ld --version | ld_version) +LD_NO_DL_MIN_VERSION=$(echo 2.26 | ld_version) +if [ "$LD_VERSION" -ge "$LD_NO_DL_MIN_VERSION" ] ; then + nodl="--no-dynamic-linker" +fi platformo=$object/"$platform".o lds=$object/zImage.lds @@ -446,7 +468,7 @@ if [ "$platform" != "miboot" ]; then text_start="-Ttext $link_address" fi #link everything - ${CROSS}ld -m $format -T $lds $text_start $pie -o "$ofile" \ + ${CROSS}ld -m $format -T $lds $text_start $pie $nodl -o "$ofile" \ $platformo $tmp $object/wrapper.a rm $tmp fi diff --git a/arch/powerpc/configs/amigaone_defconfig b/arch/powerpc/configs/amigaone_defconfig index 8b83ce8a01e7..8d3e3c41258d 100644 --- a/arch/powerpc/configs/amigaone_defconfig +++ b/arch/powerpc/configs/amigaone_defconfig @@ -45,12 +45,6 @@ CONFIG_PARPORT_PC_FIFO=y CONFIG_BLK_DEV_FD=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y -CONFIG_IDE=y -CONFIG_BLK_DEV_IDECD=y -# CONFIG_IDEPCI_PCIBUS_ORDER is not set -CONFIG_BLK_DEV_GENERIC=y -CONFIG_BLK_DEV_SIIMAGE=y -CONFIG_BLK_DEV_VIA82CXXX=y CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=y @@ -61,6 +55,10 @@ CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_SYM53C8XX_2=y CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0 # CONFIG_SCSI_SYM53C8XX_MMIO is not set +CONFIG_ATA=y +CONFIG_PATA_SIL680=y +CONFIG_PATA_VIA=y +CONFIG_ATA_GENERIC=y CONFIG_NETDEVICES=y CONFIG_VORTEX=y CONFIG_8139CP=y diff --git a/arch/powerpc/configs/cell_defconfig b/arch/powerpc/configs/cell_defconfig index 7b6f30dece34..2d7fcbe047ac 100644 --- a/arch/powerpc/configs/cell_defconfig +++ b/arch/powerpc/configs/cell_defconfig @@ -108,16 +108,15 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=131072 -CONFIG_IDE=y -CONFIG_BLK_DEV_GENERIC=y -CONFIG_BLK_DEV_AEC62XX=y -CONFIG_BLK_DEV_SIIMAGE=y CONFIG_BLK_DEV_SD=y CONFIG_BLK_DEV_SR=m CONFIG_CHR_DEV_SG=y CONFIG_ATA=y CONFIG_SATA_PROMISE=y +CONFIG_PATA_ARTOP=y CONFIG_PATA_PDC2027X=m +CONFIG_PATA_SIL680=y +CONFIG_ATA_GENERIC=y CONFIG_MD=y CONFIG_BLK_DEV_MD=m CONFIG_MD_LINEAR=m diff --git a/arch/powerpc/configs/chrp32_defconfig b/arch/powerpc/configs/chrp32_defconfig index ac9a50da2dc6..1f6f90cd8aff 100644 --- a/arch/powerpc/configs/chrp32_defconfig +++ b/arch/powerpc/configs/chrp32_defconfig @@ -42,12 +42,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_BLK_DEV_FD=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y -CONFIG_IDE=y -CONFIG_BLK_DEV_IDECD=y -CONFIG_BLK_DEV_GENERIC=y -CONFIG_BLK_DEV_SL82C105=y -CONFIG_BLK_DEV_VIA82CXXX=y -CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=y CONFIG_BLK_DEV_SR=y @@ -56,6 +50,10 @@ CONFIG_CHR_DEV_SG=y CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_SYM53C8XX_2=y CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0 +CONFIG_ATA=y +CONFIG_PATA_VIA=y +CONFIG_PATA_WINBOND=y +CONFIG_ATA_GENERIC=y CONFIG_NETDEVICES=y CONFIG_PCNET32=y CONFIG_NET_TULIP=y diff --git a/arch/powerpc/configs/dpaa.config b/arch/powerpc/configs/dpaa.config index efa99c048543..2fe76f5e938a 100644 --- a/arch/powerpc/configs/dpaa.config +++ b/arch/powerpc/configs/dpaa.config @@ -1 +1,4 @@ CONFIG_FSL_DPAA=y +CONFIG_FSL_PAMU=y +CONFIG_FSL_FMAN=y +CONFIG_FSL_DPAA_ETH=y diff --git a/arch/powerpc/configs/fsl-emb-nonhw.config b/arch/powerpc/configs/fsl-emb-nonhw.config index 1a61e81ab0cd..cc49c95494da 100644 --- a/arch/powerpc/configs/fsl-emb-nonhw.config +++ b/arch/powerpc/configs/fsl-emb-nonhw.config @@ -44,6 +44,7 @@ CONFIG_FORCE_MAX_ZONEORDER=13 CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_FRAME_WARN=1024 CONFIG_FTL=y +CONFIG_GPIO_GENERIC_PLATFORM=y CONFIG_HFS_FS=m CONFIG_HFSPLUS_FS=m CONFIG_HIGH_RES_TIMERS=y @@ -104,8 +105,13 @@ CONFIG_PACKET=y CONFIG_PARTITION_ADVANCED=y CONFIG_PERF_EVENTS=y CONFIG_POSIX_MQUEUE=y +CONFIG_POWER_SUPPLY=y +CONFIG_POWER_RESET=y +CONFIG_POWER_RESET_GPIO=y +CONFIG_POWER_RESET_GPIO_RESTART=y CONFIG_QNX4FS_FS=m CONFIG_RCU_TRACE=y +CONFIG_RESET_CONTROLLER=y CONFIG_ROOT_NFS=y CONFIG_SYSV_FS=m CONFIG_SYSVIPC=y diff --git a/arch/powerpc/configs/g5_defconfig b/arch/powerpc/configs/g5_defconfig index 3b2511c090d8..e18f2e06553f 100644 --- a/arch/powerpc/configs/g5_defconfig +++ b/arch/powerpc/configs/g5_defconfig @@ -60,10 +60,6 @@ CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=65536 CONFIG_CDROM_PKTCDVD=m -CONFIG_IDE=y -CONFIG_BLK_DEV_IDECD=y -CONFIG_BLK_DEV_IDE_PMAC=y -CONFIG_BLK_DEV_IDE_PMAC_ATA100FIRST=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=y CONFIG_BLK_DEV_SR=y @@ -73,6 +69,7 @@ CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_SPI_ATTRS=y CONFIG_ATA=y CONFIG_SATA_SVW=y +CONFIG_PATA_MACIO=y CONFIG_MD=y CONFIG_BLK_DEV_MD=y CONFIG_MD_LINEAR=y diff --git a/arch/powerpc/configs/maple_defconfig b/arch/powerpc/configs/maple_defconfig index 27abfab31219..c4018179e219 100644 --- a/arch/powerpc/configs/maple_defconfig +++ b/arch/powerpc/configs/maple_defconfig @@ -39,16 +39,15 @@ CONFIG_IP_PNP_DHCP=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=8192 -CONFIG_IDE=y -CONFIG_BLK_DEV_IDECD=y -CONFIG_IDE_TASK_IOCTL=y -CONFIG_BLK_DEV_GENERIC=y -CONFIG_BLK_DEV_AMD74XX=y # CONFIG_SCSI_PROC_FS is not set CONFIG_BLK_DEV_SD=y +CONFIG_BLK_DEV_SR=y +CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=y CONFIG_SCSI_IPR=y CONFIG_ATA=y +CONFIG_PATA_AMD=y +CONFIG_ATA_GENERIC=y CONFIG_NETDEVICES=y CONFIG_AMD8111_ETH=y CONFIG_TIGON3=y diff --git a/arch/powerpc/configs/pasemi_defconfig b/arch/powerpc/configs/pasemi_defconfig index 76f4edd441d3..5553c5ce4274 100644 --- a/arch/powerpc/configs/pasemi_defconfig +++ b/arch/powerpc/configs/pasemi_defconfig @@ -58,9 +58,6 @@ CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=16384 CONFIG_EEPROM_LEGACY=y -CONFIG_IDE=y -CONFIG_BLK_DEV_IDECD=y -CONFIG_IDE_TASK_IOCTL=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=y CONFIG_CHR_DEV_OSST=y diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig index e5a674d4a716..fc1e7a7388b8 100644 --- a/arch/powerpc/configs/pmac32_defconfig +++ b/arch/powerpc/configs/pmac32_defconfig @@ -117,15 +117,6 @@ CONFIG_CONNECTOR=y CONFIG_MAC_FLOPPY=m CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y -CONFIG_IDE=y -CONFIG_BLK_DEV_IDECS=m -CONFIG_BLK_DEV_IDECD=y -CONFIG_BLK_DEV_GENERIC=y -CONFIG_BLK_DEV_PDC202XX_NEW=y -CONFIG_BLK_DEV_SL82C105=y -CONFIG_BLK_DEV_IDE_PMAC=y -CONFIG_BLK_DEV_IDE_PMAC_ATA100FIRST=y -CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=y CONFIG_BLK_DEV_SR=y @@ -140,6 +131,12 @@ CONFIG_SCSI_SYM53C8XX_2=y CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0 CONFIG_SCSI_MESH=y CONFIG_SCSI_MAC53C94=y +CONFIG_ATA=y +CONFIG_PATA_MACIO=y +CONFIG_PATA_PDC2027X=y +CONFIG_PATA_WINBOND=y +CONFIG_PATA_PCMCIA=m +CONFIG_ATA_GENERIC=y CONFIG_MD=y CONFIG_BLK_DEV_MD=m CONFIG_MD_LINEAR=m diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig index d98b6eb3254f..e4d53fe5976a 100644 --- a/arch/powerpc/configs/powernv_defconfig +++ b/arch/powerpc/configs/powernv_defconfig @@ -49,6 +49,7 @@ CONFIG_BINFMT_MISC=m CONFIG_PPC_TRANSACTIONAL_MEM=y CONFIG_HOTPLUG_CPU=y CONFIG_KEXEC=y +CONFIG_KEXEC_FILE=y CONFIG_IRQ_ALL_CPUS=y CONFIG_NUMA=y CONFIG_MEMORY_HOTPLUG=y @@ -241,10 +242,6 @@ CONFIG_EXT2_FS_SECURITY=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y -CONFIG_REISERFS_FS=m -CONFIG_REISERFS_FS_XATTR=y -CONFIG_REISERFS_FS_POSIX_ACL=y -CONFIG_REISERFS_FS_SECURITY=y CONFIG_JFS_FS=m CONFIG_JFS_POSIX_ACL=y CONFIG_JFS_SECURITY=y @@ -300,7 +297,10 @@ CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_CCM=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_HMAC=y +CONFIG_CRYPT_CRC32C_VPMSUM=m +CONFIG_CRYPTO_MD5_PPC=m CONFIG_CRYPTO_MICHAEL_MIC=m +CONFIG_CRYPTO_SHA256=y CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_ANUBIS=m @@ -308,6 +308,7 @@ CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_CAST6=m CONFIG_CRYPTO_KHAZAD=m CONFIG_CRYPTO_SALSA20=m +CONFIG_CRYPTO_SHA1_PPC=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index 58a98d40086f..0396126ba6a8 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -46,6 +46,7 @@ CONFIG_HZ_100=y CONFIG_BINFMT_MISC=m CONFIG_PPC_TRANSACTIONAL_MEM=y CONFIG_KEXEC=y +CONFIG_KEXEC_FILE=y CONFIG_CRASH_DUMP=y CONFIG_IRQ_ALL_CPUS=y CONFIG_MEMORY_HOTREMOVE=y @@ -85,12 +86,6 @@ CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=65536 CONFIG_VIRTIO_BLK=m -CONFIG_IDE=y -CONFIG_BLK_DEV_IDECD=y -CONFIG_BLK_DEV_GENERIC=y -CONFIG_BLK_DEV_AMD74XX=y -CONFIG_BLK_DEV_IDE_PMAC=y -CONFIG_BLK_DEV_IDE_PMAC_ATA100FIRST=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=m CONFIG_BLK_DEV_SR=y @@ -120,6 +115,9 @@ CONFIG_SATA_AHCI=y CONFIG_SATA_SIL24=y CONFIG_SATA_MV=y CONFIG_SATA_SVW=y +CONFIG_PATA_AMD=y +CONFIG_PATA_MACIO=y +CONFIG_ATA_GENERIC=y CONFIG_MD=y CONFIG_BLK_DEV_MD=y CONFIG_MD_LINEAR=y @@ -335,7 +333,10 @@ CONFIG_PPC_EARLY_DEBUG=y CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_HMAC=y +CONFIG_CRYPT_CRC32C_VPMSUM=m +CONFIG_CRYPTO_MD5_PPC=m CONFIG_CRYPTO_MICHAEL_MIC=m +CONFIG_CRYPTO_SHA256=y CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_ANUBIS=m @@ -343,6 +344,7 @@ CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_CAST6=m CONFIG_CRYPTO_KHAZAD=m CONFIG_CRYPTO_SALSA20=m +CONFIG_CRYPTO_SHA1_PPC=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m diff --git a/arch/powerpc/configs/ppc64e_defconfig b/arch/powerpc/configs/ppc64e_defconfig index fd2edd650c20..11a3473f9e2e 100644 --- a/arch/powerpc/configs/ppc64e_defconfig +++ b/arch/powerpc/configs/ppc64e_defconfig @@ -59,10 +59,6 @@ CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=65536 -CONFIG_IDE=y -CONFIG_BLK_DEV_IDECD=y -CONFIG_BLK_DEV_GENERIC=y -CONFIG_BLK_DEV_AMD74XX=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=y CONFIG_BLK_DEV_SR=y @@ -79,6 +75,8 @@ CONFIG_SCSI_DEBUG=m CONFIG_ATA=y CONFIG_SATA_SIL24=y CONFIG_SATA_SVW=y +CONFIG_PATA_AMD=y +CONFIG_ATA_GENERIC=y CONFIG_MD=y CONFIG_BLK_DEV_MD=y CONFIG_MD_LINEAR=y diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig index 8fbf49801233..3ce91a3df27f 100644 --- a/arch/powerpc/configs/ppc6xx_defconfig +++ b/arch/powerpc/configs/ppc6xx_defconfig @@ -378,13 +378,6 @@ CONFIG_EEPROM_AT24=m CONFIG_EEPROM_LEGACY=m CONFIG_EEPROM_MAX6875=m CONFIG_EEPROM_93CX6=m -CONFIG_IDE=y -CONFIG_BLK_DEV_IDECD=m -CONFIG_IDE_TASK_IOCTL=y -# CONFIG_IDEPCI_PCIBUS_ORDER is not set -CONFIG_BLK_DEV_GENERIC=y -CONFIG_BLK_DEV_IDE_PMAC=y -CONFIG_BLK_DEV_IDE_PMAC_ATA100FIRST=y CONFIG_RAID_ATTRS=m CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=m @@ -411,13 +404,14 @@ CONFIG_ATA=y CONFIG_SATA_FSL=m CONFIG_PDC_ADMA=m CONFIG_ATA_PIIX=m +CONFIG_PATA_MACIO=y CONFIG_PATA_MPC52xx=m CONFIG_PATA_OPTIDMA=m CONFIG_PATA_SCH=m CONFIG_PATA_VIA=m CONFIG_PATA_PLATFORM=m CONFIG_PATA_OF_PLATFORM=m -CONFIG_ATA_GENERIC=m +CONFIG_ATA_GENERIC=y CONFIG_MD=y CONFIG_BLK_DEV_MD=y CONFIG_MD_LINEAR=m diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index 8a3bc016b732..5a06bdde1674 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -52,6 +52,7 @@ CONFIG_HZ_100=y CONFIG_BINFMT_MISC=m CONFIG_PPC_TRANSACTIONAL_MEM=y CONFIG_KEXEC=y +CONFIG_KEXEC_FILE=y CONFIG_IRQ_ALL_CPUS=y CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y @@ -92,10 +93,6 @@ CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=65536 CONFIG_VIRTIO_BLK=m -CONFIG_IDE=y -CONFIG_BLK_DEV_IDECD=y -CONFIG_BLK_DEV_GENERIC=y -CONFIG_BLK_DEV_AMD74XX=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=m CONFIG_BLK_DEV_SR=y @@ -122,7 +119,8 @@ CONFIG_SCSI_DH_RDAC=m CONFIG_SCSI_DH_ALUA=m CONFIG_ATA=y CONFIG_SATA_AHCI=y -# CONFIG_ATA_SFF is not set +CONFIG_PATA_AMD=y +CONFIG_ATA_GENERIC=y CONFIG_MD=y CONFIG_BLK_DEV_MD=y CONFIG_MD_LINEAR=y @@ -244,10 +242,6 @@ CONFIG_EXT2_FS_SECURITY=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y -CONFIG_REISERFS_FS=m -CONFIG_REISERFS_FS_XATTR=y -CONFIG_REISERFS_FS_POSIX_ACL=y -CONFIG_REISERFS_FS_SECURITY=y CONFIG_JFS_FS=m CONFIG_JFS_POSIX_ACL=y CONFIG_JFS_SECURITY=y @@ -302,7 +296,10 @@ CONFIG_XMON=y CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_HMAC=y +CONFIG_CRYPT_CRC32C_VPMSUM=m +CONFIG_CRYPTO_MD5_PPC=m CONFIG_CRYPTO_MICHAEL_MIC=m +CONFIG_CRYPTO_SHA256=y CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_ANUBIS=m @@ -310,6 +307,7 @@ CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_CAST6=m CONFIG_CRYPTO_KHAZAD=m CONFIG_CRYPTO_SALSA20=m +CONFIG_CRYPTO_SHA1_PPC=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m diff --git a/arch/powerpc/configs/storcenter_defconfig b/arch/powerpc/configs/storcenter_defconfig index e9122b15e5fd..74bca2eccd0f 100644 --- a/arch/powerpc/configs/storcenter_defconfig +++ b/arch/powerpc/configs/storcenter_defconfig @@ -36,12 +36,11 @@ CONFIG_NFTL_RW=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP=y -CONFIG_IDE=y -CONFIG_BLK_DEV_VIA82CXXX=y -CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_BLK_DEV_SR=y CONFIG_SCSI_SPI_ATTRS=y +CONFIG_ATA=y +CONFIG_PATA_VIA=y CONFIG_MD=y CONFIG_BLK_DEV_MD=y CONFIG_MD_LINEAR=y diff --git a/arch/powerpc/crypto/Makefile b/arch/powerpc/crypto/Makefile index 7998c177f0a2..87f40454bad3 100644 --- a/arch/powerpc/crypto/Makefile +++ b/arch/powerpc/crypto/Makefile @@ -9,7 +9,7 @@ obj-$(CONFIG_CRYPTO_MD5_PPC) += md5-ppc.o obj-$(CONFIG_CRYPTO_SHA1_PPC) += sha1-powerpc.o obj-$(CONFIG_CRYPTO_SHA1_PPC_SPE) += sha1-ppc-spe.o obj-$(CONFIG_CRYPTO_SHA256_PPC_SPE) += sha256-ppc-spe.o -obj-$(CONFIG_CRYPT_CRC32C_VPMSUM) += crc32c-vpmsum.o +obj-$(CONFIG_CRYPTO_CRC32C_VPMSUM) += crc32c-vpmsum.o aes-ppc-spe-y := aes-spe-core.o aes-spe-keys.o aes-tab-4k.o aes-spe-modes.o aes-spe-glue.o md5-ppc-y := md5-asm.o md5-glue.o diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index d1492736d852..81592562e0f8 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -13,7 +13,10 @@ */ #include <linux/threads.h> -#include <linux/kprobes.h> +#include <asm/cacheflush.h> +#include <asm/checksum.h> +#include <asm/uaccess.h> +#include <asm/epapr_hcalls.h> #include <uapi/asm/ucontext.h> @@ -109,4 +112,12 @@ void early_setup_secondary(void); /* time */ void accumulate_stolen_time(void); +/* misc runtime */ +extern u64 __bswapdi2(u64); +extern s64 __lshrdi3(s64, int); +extern s64 __ashldi3(s64, int); +extern s64 __ashrdi3(s64, int); +extern int __cmpdi2(s64, s64); +extern int __ucmpdi2(u64, u64); + #endif /* _ASM_POWERPC_ASM_PROTOTYPES_H */ diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h index 8e21bb492dca..d310546e5d9d 100644 --- a/arch/powerpc/include/asm/book3s/32/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h @@ -2,14 +2,42 @@ #define _ASM_POWERPC_BOOK3S_32_PGALLOC_H #include <linux/threads.h> +#include <linux/slab.h> -/* For 32-bit, all levels of page tables are just drawn from get_free_page() */ -#define MAX_PGTABLE_INDEX_SIZE 0 +/* + * Functions that deal with pagetables that could be at any level of + * the table need to be passed an "index_size" so they know how to + * handle allocation. For PTE pages (which are linked to a struct + * page for now, and drawn from the main get_free_pages() pool), the + * allocation size will be (2^index_size * sizeof(pointer)) and + * allocations are drawn from the kmem_cache in PGT_CACHE(index_size). + * + * The maximum index size needs to be big enough to allow any + * pagetable sizes we need, but small enough to fit in the low bits of + * any page table pointer. In other words all pagetables, even tiny + * ones, must be aligned to allow at least enough low 0 bits to + * contain this value. This value is also used as a mask, so it must + * be one less than a power of two. + */ +#define MAX_PGTABLE_INDEX_SIZE 0xf extern void __bad_pte(pmd_t *pmd); -extern pgd_t *pgd_alloc(struct mm_struct *mm); -extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); +extern struct kmem_cache *pgtable_cache[]; +#define PGT_CACHE(shift) ({ \ + BUG_ON(!(shift)); \ + pgtable_cache[(shift) - 1]; \ + }) + +static inline pgd_t *pgd_alloc(struct mm_struct *mm) +{ + return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), GFP_KERNEL); +} + +static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) +{ + kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd); +} /* * We don't have any real pmd's, and this code never triggers because @@ -68,8 +96,12 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage) static inline void pgtable_free(void *table, unsigned index_size) { - BUG_ON(index_size); /* 32-bit doesn't use this */ - free_page((unsigned long)table); + if (!index_size) { + free_page((unsigned long)table); + } else { + BUG_ON(index_size > MAX_PGTABLE_INDEX_SIZE); + kmem_cache_free(PGT_CACHE(index_size), table); + } } #define check_pgt_cache() do { } while (0) diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 6b8b2d57fdc8..012223638815 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -8,6 +8,23 @@ /* And here we include common definitions */ #include <asm/pte-common.h> +#define PTE_INDEX_SIZE PTE_SHIFT +#define PMD_INDEX_SIZE 0 +#define PUD_INDEX_SIZE 0 +#define PGD_INDEX_SIZE (32 - PGDIR_SHIFT) + +#define PMD_CACHE_INDEX PMD_INDEX_SIZE + +#ifndef __ASSEMBLY__ +#define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE) +#define PMD_TABLE_SIZE 0 +#define PUD_TABLE_SIZE 0 +#define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE) +#endif /* __ASSEMBLY__ */ + +#define PTRS_PER_PTE (1 << PTE_INDEX_SIZE) +#define PTRS_PER_PGD (1 << PGD_INDEX_SIZE) + /* * The normal case is that PTEs are 32-bits and we have a 1-page * 1024-entry pgdir pointing to 1-page 1024-entry PTE pages. -- paulus @@ -19,14 +36,10 @@ * -Matt */ /* PGDIR_SHIFT determines what a top-level page table entry can map */ -#define PGDIR_SHIFT (PAGE_SHIFT + PTE_SHIFT) +#define PGDIR_SHIFT (PAGE_SHIFT + PTE_INDEX_SIZE) #define PGDIR_SIZE (1UL << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE-1)) -#define PTRS_PER_PTE (1 << PTE_SHIFT) -#define PTRS_PER_PMD 1 -#define PTRS_PER_PGD (1 << (32 - PGDIR_SHIFT)) - #define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE) /* * This is the bottom of the PKMAP area with HIGHMEM or an arbitrary @@ -82,12 +95,8 @@ extern unsigned long ioremap_bot; -/* - * entries per page directory level: our page-table tree is two-level, so - * we don't really have any PMD directory. - */ -#define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_SHIFT) -#define PGD_TABLE_SIZE (sizeof(pgd_t) << (32 - PGDIR_SHIFT)) +/* Bits to mask out from a PGD to get to the PUD page */ +#define PGD_MASKED_BITS 0 #define pte_ERROR(e) \ pr_err("%s:%d: bad pte %llx.\n", __FILE__, __LINE__, \ @@ -224,7 +233,8 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, static inline void __ptep_set_access_flags(struct mm_struct *mm, - pte_t *ptep, pte_t entry) + pte_t *ptep, pte_t entry, + unsigned long address) { unsigned long set = pte_val(entry) & (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC); @@ -283,15 +293,6 @@ static inline void __ptep_set_access_flags(struct mm_struct *mm, #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 3 }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val << 3 }) -#ifndef CONFIG_PPC_4K_PAGES -void pgtable_cache_init(void); -#else -/* - * No page table caches to initialise - */ -#define pgtable_cache_init() do { } while (0) -#endif - extern int get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep, pmd_t **pmdp); diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h index 1af837c561ba..1c64bc6330bc 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-4k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h @@ -16,9 +16,6 @@ #define H_PUD_TABLE_SIZE (sizeof(pud_t) << H_PUD_INDEX_SIZE) #define H_PGD_TABLE_SIZE (sizeof(pgd_t) << H_PGD_INDEX_SIZE) -/* With 4k base page size, hugepage PTEs go at the PMD level */ -#define MIN_HUGEPTE_SHIFT PMD_SHIFT - /* PTE flags to conserve for HPTE identification */ #define _PAGE_HPTEFLAGS (H_PAGE_BUSY | H_PAGE_HASHPTE | \ H_PAGE_F_SECOND | H_PAGE_F_GIX) diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h index 5aae4f530c21..f3dd21efa2ea 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h @@ -6,9 +6,6 @@ #define H_PUD_INDEX_SIZE 5 #define H_PGD_INDEX_SIZE 12 -/* With 4k base page size, hugepage PTEs go at the PMD level */ -#define MIN_HUGEPTE_SHIFT PAGE_SHIFT - #define H_PAGE_COMBO 0x00001000 /* this is a combo 4k page */ #define H_PAGE_4K_PFN 0x00002000 /* PFN is for a single 4k page */ /* diff --git a/arch/powerpc/include/asm/book3s/64/hugetlb-radix.h b/arch/powerpc/include/asm/book3s/64/hugetlb.h index c45189aa7476..c62f14d0bec1 100644 --- a/arch/powerpc/include/asm/book3s/64/hugetlb-radix.h +++ b/arch/powerpc/include/asm/book3s/64/hugetlb.h @@ -1,5 +1,5 @@ -#ifndef _ASM_POWERPC_BOOK3S_64_HUGETLB_RADIX_H -#define _ASM_POWERPC_BOOK3S_64_HUGETLB_RADIX_H +#ifndef _ASM_POWERPC_BOOK3S_64_HUGETLB_H +#define _ASM_POWERPC_BOOK3S_64_HUGETLB_H /* * For radix we want generic code to handle hugetlb. But then if we want * both hash and radix to be enabled together we need to workaround the @@ -21,9 +21,33 @@ static inline int hstate_get_psize(struct hstate *hstate) return MMU_PAGE_2M; else if (shift == mmu_psize_defs[MMU_PAGE_1G].shift) return MMU_PAGE_1G; + else if (shift == mmu_psize_defs[MMU_PAGE_16M].shift) + return MMU_PAGE_16M; + else if (shift == mmu_psize_defs[MMU_PAGE_16G].shift) + return MMU_PAGE_16G; else { WARN(1, "Wrong huge page shift\n"); return mmu_virtual_psize; } } + +#define arch_make_huge_pte arch_make_huge_pte +static inline pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma, + struct page *page, int writable) +{ + unsigned long page_shift; + + if (!cpu_has_feature(CPU_FTR_POWER9_DD1)) + return entry; + + page_shift = huge_page_shift(hstate_vma(vma)); + /* + * We don't support 1G hugetlb pages yet. + */ + VM_WARN_ON(page_shift == mmu_psize_defs[MMU_PAGE_1G].shift); + if (page_shift == mmu_psize_defs[MMU_PAGE_2M].shift) + return __pte(pte_val(entry) | _PAGE_LARGE); + else + return entry; +} #endif diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index e407af2b7333..2e6a823fa502 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -70,7 +70,9 @@ #define HPTE_V_SSIZE_SHIFT 62 #define HPTE_V_AVPN_SHIFT 7 +#define HPTE_V_COMMON_BITS ASM_CONST(0x000fffffffffffff) #define HPTE_V_AVPN ASM_CONST(0x3fffffffffffff80) +#define HPTE_V_AVPN_3_0 ASM_CONST(0x000fffffffffff80) #define HPTE_V_AVPN_VAL(x) (((x) & HPTE_V_AVPN) >> HPTE_V_AVPN_SHIFT) #define HPTE_V_COMPARE(x,y) (!(((x) ^ (y)) & 0xffffffffffffff80UL)) #define HPTE_V_BOLTED ASM_CONST(0x0000000000000010) @@ -80,14 +82,16 @@ #define HPTE_V_VALID ASM_CONST(0x0000000000000001) /* - * ISA 3.0 have a different HPTE format. + * ISA 3.0 has a different HPTE format. */ #define HPTE_R_3_0_SSIZE_SHIFT 58 +#define HPTE_R_3_0_SSIZE_MASK (3ull << HPTE_R_3_0_SSIZE_SHIFT) #define HPTE_R_PP0 ASM_CONST(0x8000000000000000) #define HPTE_R_TS ASM_CONST(0x4000000000000000) #define HPTE_R_KEY_HI ASM_CONST(0x3000000000000000) #define HPTE_R_RPN_SHIFT 12 #define HPTE_R_RPN ASM_CONST(0x0ffffffffffff000) +#define HPTE_R_RPN_3_0 ASM_CONST(0x01fffffffffff000) #define HPTE_R_PP ASM_CONST(0x0000000000000003) #define HPTE_R_PPP ASM_CONST(0x8000000000000003) #define HPTE_R_N ASM_CONST(0x0000000000000004) @@ -316,12 +320,43 @@ static inline unsigned long hpte_encode_avpn(unsigned long vpn, int psize, */ v = (vpn >> (23 - VPN_SHIFT)) & ~(mmu_psize_defs[psize].avpnm); v <<= HPTE_V_AVPN_SHIFT; - if (!cpu_has_feature(CPU_FTR_ARCH_300)) - v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT; + v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT; return v; } /* + * ISA v3.0 defines a new HPTE format, which differs from the old + * format in having smaller AVPN and ARPN fields, and the B field + * in the second dword instead of the first. + */ +static inline unsigned long hpte_old_to_new_v(unsigned long v) +{ + /* trim AVPN, drop B */ + return v & HPTE_V_COMMON_BITS; +} + +static inline unsigned long hpte_old_to_new_r(unsigned long v, unsigned long r) +{ + /* move B field from 1st to 2nd dword, trim ARPN */ + return (r & ~HPTE_R_3_0_SSIZE_MASK) | + (((v) >> HPTE_V_SSIZE_SHIFT) << HPTE_R_3_0_SSIZE_SHIFT); +} + +static inline unsigned long hpte_new_to_old_v(unsigned long v, unsigned long r) +{ + /* insert B field */ + return (v & HPTE_V_COMMON_BITS) | + ((r & HPTE_R_3_0_SSIZE_MASK) << + (HPTE_V_SSIZE_SHIFT - HPTE_R_3_0_SSIZE_SHIFT)); +} + +static inline unsigned long hpte_new_to_old_r(unsigned long r) +{ + /* clear out B field */ + return r & ~HPTE_R_3_0_SSIZE_MASK; +} + +/* * This function sets the AVPN and L fields of the HPTE appropriately * using the base page size and actual page size. */ @@ -341,12 +376,8 @@ static inline unsigned long hpte_encode_v(unsigned long vpn, int base_psize, * aligned for the requested page size */ static inline unsigned long hpte_encode_r(unsigned long pa, int base_psize, - int actual_psize, int ssize) + int actual_psize) { - - if (cpu_has_feature(CPU_FTR_ARCH_300)) - pa |= ((unsigned long) ssize) << HPTE_R_3_0_SSIZE_SHIFT; - /* A 4K page needs no special encoding */ if (actual_psize == MMU_PAGE_4K) return pa & HPTE_R_RPN; diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 9fd77f8794a0..5905f0ff57d1 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -26,6 +26,11 @@ #define _RPAGE_SW1 0x00800 #define _RPAGE_SW2 0x00400 #define _RPAGE_SW3 0x00200 +#define _RPAGE_RSV1 0x1000000000000000UL +#define _RPAGE_RSV2 0x0800000000000000UL +#define _RPAGE_RSV3 0x0400000000000000UL +#define _RPAGE_RSV4 0x0200000000000000UL + #ifdef CONFIG_MEM_SOFT_DIRTY #define _PAGE_SOFT_DIRTY _RPAGE_SW3 /* software: software dirty tracking */ #else @@ -33,6 +38,11 @@ #endif #define _PAGE_SPECIAL _RPAGE_SW2 /* software: special page */ +/* + * For P9 DD1 only, we need to track whether the pte's huge. + */ +#define _PAGE_LARGE _RPAGE_RSV1 + #define _PAGE_PTE (1ul << 62) /* distinguishes PTEs from pointers */ #define _PAGE_PRESENT (1ul << 63) /* pte contains a translation */ @@ -568,10 +578,11 @@ static inline bool check_pte_access(unsigned long access, unsigned long ptev) */ static inline void __ptep_set_access_flags(struct mm_struct *mm, - pte_t *ptep, pte_t entry) + pte_t *ptep, pte_t entry, + unsigned long address) { if (radix_enabled()) - return radix__ptep_set_access_flags(mm, ptep, entry); + return radix__ptep_set_access_flags(mm, ptep, entry, address); return hash__ptep_set_access_flags(ptep, entry); } @@ -789,9 +800,6 @@ extern struct page *pgd_page(pgd_t pgd); #define pgd_ERROR(e) \ pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) -void pgtable_cache_add(unsigned shift, void (*ctor)(void *)); -void pgtable_cache_init(void); - static inline int map_kernel_page(unsigned long ea, unsigned long pa, unsigned long flags) { @@ -1009,7 +1017,8 @@ static inline void pmdp_huge_split_prepare(struct vm_area_struct *vma, #define pmd_move_must_withdraw pmd_move_must_withdraw struct spinlock; static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl, - struct spinlock *old_pmd_ptl) + struct spinlock *old_pmd_ptl, + struct vm_area_struct *vma) { if (radix_enabled()) return false; @@ -1020,6 +1029,16 @@ static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl, */ return true; } + + +#define arch_needs_pgtable_deposit arch_needs_pgtable_deposit +static inline bool arch_needs_pgtable_deposit(void) +{ + if (radix_enabled()) + return false; + return true; +} + #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ */ diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h index 2a46dea8e1b1..b4d1302387a3 100644 --- a/arch/powerpc/include/asm/book3s/64/radix.h +++ b/arch/powerpc/include/asm/book3s/64/radix.h @@ -140,19 +140,20 @@ static inline unsigned long radix__pte_update(struct mm_struct *mm, unsigned long new_pte; old_pte = __radix_pte_update(ptep, ~0, 0); - asm volatile("ptesync" : : : "memory"); /* * new value of pte */ new_pte = (old_pte | set) & ~clr; - /* - * For now let's do heavy pid flush - * radix__flush_tlb_page_psize(mm, addr, mmu_virtual_psize); + * If we are trying to clear the pte, we can skip + * the below sequence and batch the tlb flush. The + * tlb flush batching is done by mmu gather code */ - radix__flush_tlb_mm(mm); - - __radix_pte_update(ptep, 0, new_pte); + if (new_pte) { + asm volatile("ptesync" : : : "memory"); + radix__flush_tlb_pte_p9_dd1(old_pte, mm, addr); + __radix_pte_update(ptep, 0, new_pte); + } } else old_pte = __radix_pte_update(ptep, clr, set); asm volatile("ptesync" : : : "memory"); @@ -167,7 +168,8 @@ static inline unsigned long radix__pte_update(struct mm_struct *mm, * function doesn't need to invalidate tlb. */ static inline void radix__ptep_set_access_flags(struct mm_struct *mm, - pte_t *ptep, pte_t entry) + pte_t *ptep, pte_t entry, + unsigned long address) { unsigned long set = pte_val(entry) & (_PAGE_DIRTY | _PAGE_ACCESSED | @@ -183,13 +185,7 @@ static inline void radix__ptep_set_access_flags(struct mm_struct *mm, * new value of pte */ new_pte = old_pte | set; - - /* - * For now let's do heavy pid flush - * radix__flush_tlb_page_psize(mm, addr, mmu_virtual_psize); - */ - radix__flush_tlb_mm(mm); - + radix__flush_tlb_pte_p9_dd1(old_pte, mm, address); __radix_pte_update(ptep, 0, new_pte); } else __radix_pte_update(ptep, 0, set); @@ -243,6 +239,8 @@ static inline int radix__pmd_trans_huge(pmd_t pmd) static inline pmd_t radix__pmd_mkhuge(pmd_t pmd) { + if (cpu_has_feature(CPU_FTR_POWER9_DD1)) + return __pmd(pmd_val(pmd) | _PAGE_PTE | _PAGE_LARGE); return __pmd(pmd_val(pmd) | _PAGE_PTE); } static inline void radix__pmdp_huge_split_prepare(struct vm_area_struct *vma, diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h index a9e19cb2f7c5..cc7fbde4f53c 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h @@ -42,4 +42,6 @@ extern void radix__flush_tlb_lpid_va(unsigned long lpid, unsigned long gpa, unsigned long page_size); extern void radix__flush_tlb_lpid(unsigned long lpid); extern void radix__flush_tlb_all(void); +extern void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm, + unsigned long address); #endif diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h index ffbafbf76b19..7657aa897a38 100644 --- a/arch/powerpc/include/asm/cache.h +++ b/arch/powerpc/include/asm/cache.h @@ -20,12 +20,15 @@ #endif #else /* CONFIG_PPC64 */ #define L1_CACHE_SHIFT 7 +#define IFETCH_ALIGN_SHIFT 4 /* POWER8,9 */ #endif #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) #define SMP_CACHE_BYTES L1_CACHE_BYTES +#define IFETCH_ALIGN_BYTES (1 << IFETCH_ALIGN_SHIFT) + #if defined(__powerpc64__) && !defined(__ASSEMBLY__) struct ppc64_caches { u32 dsize; /* L1 d-cache size */ diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h index ee655ed1ff1b..1e8fceb308a5 100644 --- a/arch/powerpc/include/asm/checksum.h +++ b/arch/powerpc/include/asm/checksum.h @@ -53,10 +53,8 @@ static inline __sum16 csum_fold(__wsum sum) return (__force __sum16)(~((__force u32)sum + tmp) >> 16); } -static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, - unsigned short len, - unsigned short proto, - __wsum sum) +static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, + __u8 proto, __wsum sum) { #ifdef __powerpc64__ unsigned long s = (__force u32)sum; @@ -83,10 +81,8 @@ static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, * computes the checksum of the TCP/UDP pseudo-header * returns a 16-bit checksum, already complemented */ -static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, - unsigned short len, - unsigned short proto, - __wsum sum) +static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, + __u8 proto, __wsum sum) { return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum)); } diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h index 44efe739b6b9..fc46b664c49e 100644 --- a/arch/powerpc/include/asm/cmpxchg.h +++ b/arch/powerpc/include/asm/cmpxchg.h @@ -7,6 +7,71 @@ #include <asm/asm-compat.h> #include <linux/bug.h> +#ifdef __BIG_ENDIAN +#define BITOFF_CAL(size, off) ((sizeof(u32) - size - off) * BITS_PER_BYTE) +#else +#define BITOFF_CAL(size, off) (off * BITS_PER_BYTE) +#endif + +#define XCHG_GEN(type, sfx, cl) \ +static inline u32 __xchg_##type##sfx(volatile void *p, u32 val) \ +{ \ + unsigned int prev, prev_mask, tmp, bitoff, off; \ + \ + off = (unsigned long)p % sizeof(u32); \ + bitoff = BITOFF_CAL(sizeof(type), off); \ + p -= off; \ + val <<= bitoff; \ + prev_mask = (u32)(type)-1 << bitoff; \ + \ + __asm__ __volatile__( \ +"1: lwarx %0,0,%3\n" \ +" andc %1,%0,%5\n" \ +" or %1,%1,%4\n" \ + PPC405_ERR77(0,%3) \ +" stwcx. %1,0,%3\n" \ +" bne- 1b\n" \ + : "=&r" (prev), "=&r" (tmp), "+m" (*(u32*)p) \ + : "r" (p), "r" (val), "r" (prev_mask) \ + : "cc", cl); \ + \ + return prev >> bitoff; \ +} + +#define CMPXCHG_GEN(type, sfx, br, br2, cl) \ +static inline \ +u32 __cmpxchg_##type##sfx(volatile void *p, u32 old, u32 new) \ +{ \ + unsigned int prev, prev_mask, tmp, bitoff, off; \ + \ + off = (unsigned long)p % sizeof(u32); \ + bitoff = BITOFF_CAL(sizeof(type), off); \ + p -= off; \ + old <<= bitoff; \ + new <<= bitoff; \ + prev_mask = (u32)(type)-1 << bitoff; \ + \ + __asm__ __volatile__( \ + br \ +"1: lwarx %0,0,%3\n" \ +" and %1,%0,%6\n" \ +" cmpw 0,%1,%4\n" \ +" bne- 2f\n" \ +" andc %1,%0,%6\n" \ +" or %1,%1,%5\n" \ + PPC405_ERR77(0,%3) \ +" stwcx. %1,0,%3\n" \ +" bne- 1b\n" \ + br2 \ + "\n" \ +"2:" \ + : "=&r" (prev), "=&r" (tmp), "+m" (*(u32*)p) \ + : "r" (p), "r" (old), "r" (new), "r" (prev_mask) \ + : "cc", cl); \ + \ + return prev >> bitoff; \ +} + /* * Atomic exchange * @@ -14,6 +79,11 @@ * the previous value stored there. */ +XCHG_GEN(u8, _local, "memory"); +XCHG_GEN(u8, _relaxed, "cc"); +XCHG_GEN(u16, _local, "memory"); +XCHG_GEN(u16, _relaxed, "cc"); + static __always_inline unsigned long __xchg_u32_local(volatile void *p, unsigned long val) { @@ -85,9 +155,13 @@ __xchg_u64_relaxed(u64 *p, unsigned long val) #endif static __always_inline unsigned long -__xchg_local(volatile void *ptr, unsigned long x, unsigned int size) +__xchg_local(void *ptr, unsigned long x, unsigned int size) { switch (size) { + case 1: + return __xchg_u8_local(ptr, x); + case 2: + return __xchg_u16_local(ptr, x); case 4: return __xchg_u32_local(ptr, x); #ifdef CONFIG_PPC64 @@ -103,6 +177,10 @@ static __always_inline unsigned long __xchg_relaxed(void *ptr, unsigned long x, unsigned int size) { switch (size) { + case 1: + return __xchg_u8_relaxed(ptr, x); + case 2: + return __xchg_u16_relaxed(ptr, x); case 4: return __xchg_u32_relaxed(ptr, x); #ifdef CONFIG_PPC64 @@ -131,6 +209,15 @@ __xchg_relaxed(void *ptr, unsigned long x, unsigned int size) * and return the old value of *p. */ +CMPXCHG_GEN(u8, , PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, "memory"); +CMPXCHG_GEN(u8, _local, , , "memory"); +CMPXCHG_GEN(u8, _acquire, , PPC_ACQUIRE_BARRIER, "memory"); +CMPXCHG_GEN(u8, _relaxed, , , "cc"); +CMPXCHG_GEN(u16, , PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, "memory"); +CMPXCHG_GEN(u16, _local, , , "memory"); +CMPXCHG_GEN(u16, _acquire, , PPC_ACQUIRE_BARRIER, "memory"); +CMPXCHG_GEN(u16, _relaxed, , , "cc"); + static __always_inline unsigned long __cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new) { @@ -316,6 +403,10 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, unsigned int size) { switch (size) { + case 1: + return __cmpxchg_u8(ptr, old, new); + case 2: + return __cmpxchg_u16(ptr, old, new); case 4: return __cmpxchg_u32(ptr, old, new); #ifdef CONFIG_PPC64 @@ -328,10 +419,14 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, } static __always_inline unsigned long -__cmpxchg_local(volatile void *ptr, unsigned long old, unsigned long new, +__cmpxchg_local(void *ptr, unsigned long old, unsigned long new, unsigned int size) { switch (size) { + case 1: + return __cmpxchg_u8_local(ptr, old, new); + case 2: + return __cmpxchg_u16_local(ptr, old, new); case 4: return __cmpxchg_u32_local(ptr, old, new); #ifdef CONFIG_PPC64 @@ -348,6 +443,10 @@ __cmpxchg_relaxed(void *ptr, unsigned long old, unsigned long new, unsigned int size) { switch (size) { + case 1: + return __cmpxchg_u8_relaxed(ptr, old, new); + case 2: + return __cmpxchg_u16_relaxed(ptr, old, new); case 4: return __cmpxchg_u32_relaxed(ptr, old, new); #ifdef CONFIG_PPC64 @@ -364,6 +463,10 @@ __cmpxchg_acquire(void *ptr, unsigned long old, unsigned long new, unsigned int size) { switch (size) { + case 1: + return __cmpxchg_u8_acquire(ptr, old, new); + case 2: + return __cmpxchg_u16_acquire(ptr, old, new); case 4: return __cmpxchg_u32_acquire(ptr, old, new); #ifdef CONFIG_PPC64 diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h index 4f60db074725..aa2e6a34b872 100644 --- a/arch/powerpc/include/asm/cputime.h +++ b/arch/powerpc/include/asm/cputime.h @@ -46,26 +46,12 @@ extern cputime_t cputime_one_jiffy; * Convert cputime <-> jiffies */ extern u64 __cputime_jiffies_factor; -DECLARE_PER_CPU(unsigned long, cputime_last_delta); -DECLARE_PER_CPU(unsigned long, cputime_scaled_last_delta); static inline unsigned long cputime_to_jiffies(const cputime_t ct) { return mulhdu((__force u64) ct, __cputime_jiffies_factor); } -/* Estimate the scaled cputime by scaling the real cputime based on - * the last scaled to real ratio */ -static inline cputime_t cputime_to_scaled(const cputime_t ct) -{ - if (cpu_has_feature(CPU_FTR_SPURR) && - __this_cpu_read(cputime_last_delta)) - return (__force u64) ct * - __this_cpu_read(cputime_scaled_last_delta) / - __this_cpu_read(cputime_last_delta); - return ct; -} - static inline cputime_t jiffies_to_cputime(const unsigned long jif) { u64 ct; diff --git a/arch/powerpc/include/asm/debug.h b/arch/powerpc/include/asm/debug.h index a954e4975049..86308f177f2d 100644 --- a/arch/powerpc/include/asm/debug.h +++ b/arch/powerpc/include/asm/debug.h @@ -10,7 +10,7 @@ struct pt_regs; extern struct dentry *powerpc_debugfs_root; -#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC) +#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC_CORE) extern int (*__debugger)(struct pt_regs *regs); extern int (*__debugger_ipi)(struct pt_regs *regs); diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 84d49b197c32..9a3eee661297 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -91,7 +91,7 @@ */ #define LOAD_HANDLER(reg, label) \ ld reg,PACAKBASE(r13); /* get high part of &label */ \ - ori reg,reg,(FIXED_SYMBOL_ABS_ADDR(label))@l; + ori reg,reg,FIXED_SYMBOL_ABS_ADDR(label); #define __LOAD_HANDLER(reg, label) \ ld reg,PACAKBASE(r13); \ @@ -158,14 +158,17 @@ BEGIN_FTR_SECTION_NESTED(943) \ std ra,offset(r13); \ END_FTR_SECTION_NESTED(ftr,ftr,943) -#define EXCEPTION_PROLOG_0(area) \ - GET_PACA(r13); \ +#define EXCEPTION_PROLOG_0_PACA(area) \ std r9,area+EX_R9(r13); /* save r9 */ \ OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR); \ HMT_MEDIUM; \ std r10,area+EX_R10(r13); /* save r10 - r12 */ \ OPT_GET_SPR(r10, SPRN_CFAR, CPU_FTR_CFAR) +#define EXCEPTION_PROLOG_0(area) \ + GET_PACA(r13); \ + EXCEPTION_PROLOG_0_PACA(area) + #define __EXCEPTION_PROLOG_1(area, extra, vec) \ OPT_SAVE_REG_TO_PACA(area+EX_PPR, r9, CPU_FTR_HAS_PPR); \ OPT_SAVE_REG_TO_PACA(area+EX_CFAR, r10, CPU_FTR_CFAR); \ @@ -196,6 +199,12 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) EXCEPTION_PROLOG_1(area, extra, vec); \ EXCEPTION_PROLOG_PSERIES_1(label, h); +/* Have the PACA in r13 already */ +#define EXCEPTION_PROLOG_PSERIES_PACA(area, label, h, extra, vec) \ + EXCEPTION_PROLOG_0_PACA(area); \ + EXCEPTION_PROLOG_1(area, extra, vec); \ + EXCEPTION_PROLOG_PSERIES_1(label, h); + #define __KVMTEST(h, n) \ lbz r10,HSTATE_IN_GUEST(r13); \ cmpwi r10,0; \ diff --git a/arch/powerpc/include/asm/futex.h b/arch/powerpc/include/asm/futex.h index 2a9cf845473b..eaada6c92344 100644 --- a/arch/powerpc/include/asm/futex.h +++ b/arch/powerpc/include/asm/futex.h @@ -23,10 +23,8 @@ "4: li %1,%3\n" \ "b 3b\n" \ ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - ".align 3\n" \ - PPC_LONG "1b,4b,2b,4b\n" \ - ".previous" \ + EX_TABLE(1b, 4b) \ + EX_TABLE(2b, 4b) \ : "=&r" (oldval), "=&r" (ret) \ : "b" (uaddr), "i" (-EFAULT), "r" (oparg) \ : "cr0", "memory") @@ -104,11 +102,9 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, "3: .section .fixup,\"ax\"\n\ 4: li %0,%6\n\ b 3b\n\ - .previous\n\ - .section __ex_table,\"a\"\n\ - .align 3\n\ - " PPC_LONG "1b,4b,2b,4b\n\ - .previous" \ + .previous\n" + EX_TABLE(1b, 4b) + EX_TABLE(2b, 4b) : "+r" (ret), "=&r" (prev), "+m" (*uaddr) : "r" (uaddr), "r" (oldval), "r" (newval), "i" (-EFAULT) : "cc", "memory"); diff --git a/arch/powerpc/include/asm/head-64.h b/arch/powerpc/include/asm/head-64.h index ab90c2fa1ea6..fca7033839a9 100644 --- a/arch/powerpc/include/asm/head-64.h +++ b/arch/powerpc/include/asm/head-64.h @@ -95,12 +95,12 @@ end_##sname: #define __FIXED_SECTION_ENTRY_BEGIN(sname, name, __align) \ USE_FIXED_SECTION(sname); \ - .align __align; \ + .balign __align; \ .global name; \ name: #define FIXED_SECTION_ENTRY_BEGIN(sname, name) \ - __FIXED_SECTION_ENTRY_BEGIN(sname, name, 0) + __FIXED_SECTION_ENTRY_BEGIN(sname, name, IFETCH_ALIGN_BYTES) #define FIXED_SECTION_ENTRY_BEGIN_LOCATION(sname, name, start) \ USE_FIXED_SECTION(sname); \ @@ -203,9 +203,9 @@ name: #define EXC_VIRT_END(name, start, end) \ FIXED_SECTION_ENTRY_END_LOCATION(virt_vectors, exc_virt_##start##_##name, end) -#define EXC_COMMON_BEGIN(name) \ +#define EXC_COMMON_BEGIN(name) \ USE_TEXT_SECTION(); \ - .align 7; \ + .balign IFETCH_ALIGN_BYTES; \ .global name; \ DEFINE_FIXED_SYMBOL(name); \ name: diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index c5517f463ec7..ede215167d1a 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -9,7 +9,7 @@ extern struct kmem_cache *hugepte_cache; #ifdef CONFIG_PPC_BOOK3S_64 -#include <asm/book3s/64/hugetlb-radix.h> +#include <asm/book3s/64/hugetlb.h> /* * This should work for other subarchs too. But right now we use the * new format only for 64bit book3s @@ -51,12 +51,20 @@ static inline void __local_flush_hugetlb_page(struct vm_area_struct *vma, static inline pte_t *hugepd_page(hugepd_t hpd) { BUG_ON(!hugepd_ok(hpd)); +#ifdef CONFIG_PPC_8xx + return (pte_t *)__va(hpd.pd & ~(_PMD_PAGE_MASK | _PMD_PRESENT_MASK)); +#else return (pte_t *)((hpd.pd & ~HUGEPD_SHIFT_MASK) | PD_HUGE); +#endif } static inline unsigned int hugepd_shift(hugepd_t hpd) { +#ifdef CONFIG_PPC_8xx + return ((hpd.pd & _PMD_PAGE_MASK) >> 1) + 17; +#else return hpd.pd & HUGEPD_SHIFT_MASK; +#endif } #endif /* CONFIG_PPC_BOOK3S_64 */ @@ -99,7 +107,15 @@ static inline int is_hugepage_only_range(struct mm_struct *mm, void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea, pte_t pte); +#ifdef CONFIG_PPC_8xx +static inline void flush_hugetlb_page(struct vm_area_struct *vma, + unsigned long vmaddr) +{ + flush_tlb_page(vma, vmaddr); +} +#else void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr); +#endif void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end, unsigned long floor, @@ -205,7 +221,8 @@ static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr, * are reserved early in the boot process by memblock instead of via * the .dts as on IBM platforms. */ -#if defined(CONFIG_HUGETLB_PAGE) && defined(CONFIG_PPC_FSL_BOOK3E) +#if defined(CONFIG_HUGETLB_PAGE) && (defined(CONFIG_PPC_FSL_BOOK3E) || \ + defined(CONFIG_PPC_8xx)) extern void __init reserve_hugetlb_gpages(void); #else static inline void reserve_hugetlb_gpages(void) diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 708edebcf147..77ff1ba99d1f 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -275,7 +275,9 @@ #define H_COP 0x304 #define H_GET_MPP_X 0x314 #define H_SET_MODE 0x31C -#define MAX_HCALL_OPCODE H_SET_MODE +#define H_CLEAR_HPT 0x358 +#define H_SIGNAL_SYS_RESET 0x380 +#define MAX_HCALL_OPCODE H_SIGNAL_SYS_RESET /* H_VIOCTL functions */ #define H_GET_VIOA_DUMP_SIZE 0x01 @@ -306,6 +308,11 @@ #define H_SET_MODE_RESOURCE_ADDR_TRANS_MODE 3 #define H_SET_MODE_RESOURCE_LE 4 +/* Values for argument to H_SIGNAL_SYS_RESET */ +#define H_SIGNAL_SYS_RESET_ALL -1 +#define H_SIGNAL_SYS_RESET_ALL_OTHERS -2 +/* >= 0 values are CPU number */ + #ifndef __ASSEMBLY__ /** @@ -412,27 +419,6 @@ static inline unsigned int get_longbusy_msecs(int longbusy_rc) } } -#ifdef CONFIG_PPC_PSERIES -extern int CMO_PrPSP; -extern int CMO_SecPSP; -extern unsigned long CMO_PageSize; - -static inline int cmo_get_primary_psp(void) -{ - return CMO_PrPSP; -} - -static inline int cmo_get_secondary_psp(void) -{ - return CMO_SecPSP; -} - -static inline unsigned long cmo_get_page_size(void) -{ - return CMO_PageSize; -} -#endif /* CONFIG_PPC_PSERIES */ - #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_HVCALL_H */ diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index f6fda8482f60..5ed292431b5b 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -33,6 +33,7 @@ extern struct pci_dev *isa_bridge_pcidev; #include <asm/synch.h> #include <asm/delay.h> #include <asm/mmu.h> +#include <asm/ppc_asm.h> #include <asm-generic/iomap.h> @@ -458,13 +459,10 @@ static inline unsigned int name(unsigned int port) \ "5: li %0,-1\n" \ " b 4b\n" \ ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 2\n" \ - " .long 0b,5b\n" \ - " .long 1b,5b\n" \ - " .long 2b,5b\n" \ - " .long 3b,5b\n" \ - ".previous" \ + EX_TABLE(0b, 5b) \ + EX_TABLE(1b, 5b) \ + EX_TABLE(2b, 5b) \ + EX_TABLE(3b, 5b) \ : "=&r" (x) \ : "r" (port + _IO_BASE) \ : "memory"); \ @@ -479,11 +477,8 @@ static inline void name(unsigned int val, unsigned int port) \ "0:" op " %0,0,%1\n" \ "1: sync\n" \ "2:\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 2\n" \ - " .long 0b,2b\n" \ - " .long 1b,2b\n" \ - ".previous" \ + EX_TABLE(0b, 2b) \ + EX_TABLE(1b, 2b) \ : : "r" (val), "r" (port + _IO_BASE) \ : "memory"); \ } diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index a46f5f45570c..6c3b71502fbc 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -53,7 +53,7 @@ typedef void (*crash_shutdown_t)(void); -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE /* * This function is responsible for capturing register states if coming @@ -91,7 +91,17 @@ static inline bool kdump_in_progress(void) return crashing_cpu >= 0; } -#else /* !CONFIG_KEXEC */ +#ifdef CONFIG_KEXEC_FILE +extern struct kexec_file_ops kexec_elf64_ops; + +int setup_purgatory(struct kimage *image, const void *slave_code, + const void *fdt, unsigned long kernel_load_addr, + unsigned long fdt_load_addr); +int setup_new_fdt(void *fdt, unsigned long initrd_load_addr, + unsigned long initrd_len, const char *cmdline); +#endif /* CONFIG_KEXEC_FILE */ + +#else /* !CONFIG_KEXEC_CORE */ static inline void crash_kexec_secondary(struct pt_regs *regs) { } static inline int overlaps_crashkernel(unsigned long start, unsigned long size) @@ -116,7 +126,7 @@ static inline bool kdump_in_progress(void) return false; } -#endif /* CONFIG_KEXEC */ +#endif /* CONFIG_KEXEC_CORE */ #endif /* ! __ASSEMBLY__ */ #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_KEXEC_H */ diff --git a/arch/powerpc/include/asm/kprobes.h b/arch/powerpc/include/asm/kprobes.h index 2c9759bdb63b..97b8c1f83453 100644 --- a/arch/powerpc/include/asm/kprobes.h +++ b/arch/powerpc/include/asm/kprobes.h @@ -32,6 +32,7 @@ #include <asm/probes.h> #include <asm/code-patching.h> +#ifdef CONFIG_KPROBES #define __ARCH_WANT_KPROBES_INSN_SLOT struct pt_regs; @@ -127,5 +128,11 @@ struct kprobe_ctlblk { extern int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data); extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr); +extern int kprobe_handler(struct pt_regs *regs); +extern int kprobe_post_handler(struct pt_regs *regs); +#else +static inline int kprobe_handler(struct pt_regs *regs) { return 0; } +static inline int kprobe_post_handler(struct pt_regs *regs) { return 0; } +#endif /* CONFIG_KPROBES */ #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_KPROBES_H */ diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h index 05cabed3d1bd..09a802bb702f 100644 --- a/arch/powerpc/include/asm/kvm_asm.h +++ b/arch/powerpc/include/asm/kvm_asm.h @@ -99,6 +99,7 @@ #define BOOK3S_INTERRUPT_H_EMUL_ASSIST 0xe40 #define BOOK3S_INTERRUPT_HMI 0xe60 #define BOOK3S_INTERRUPT_H_DOORBELL 0xe80 +#define BOOK3S_INTERRUPT_H_VIRT 0xea0 #define BOOK3S_INTERRUPT_PERFMON 0xf00 #define BOOK3S_INTERRUPT_ALTIVEC 0xf20 #define BOOK3S_INTERRUPT_VSX 0xf40 diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 28350a294b1e..e59b172666cd 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -48,7 +48,7 @@ #ifdef CONFIG_KVM_MMIO #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 #endif -#define KVM_HALT_POLL_NS_DEFAULT 500000 +#define KVM_HALT_POLL_NS_DEFAULT 10000 /* 10 us */ /* These values are internal and can be increased later */ #define KVM_NR_IRQCHIPS 1 @@ -244,8 +244,10 @@ struct kvm_arch_memory_slot { struct kvm_arch { unsigned int lpid; #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + unsigned int tlb_sets; unsigned long hpt_virt; struct revmap_entry *revmap; + atomic64_t mmio_update; unsigned int host_lpid; unsigned long host_lpcr; unsigned long sdr1; @@ -408,6 +410,24 @@ struct kvmppc_passthru_irqmap { #define KVMPPC_IRQ_MPIC 1 #define KVMPPC_IRQ_XICS 2 +#define MMIO_HPTE_CACHE_SIZE 4 + +struct mmio_hpte_cache_entry { + unsigned long hpte_v; + unsigned long hpte_r; + unsigned long rpte; + unsigned long pte_index; + unsigned long eaddr; + unsigned long slb_v; + long mmio_update; + unsigned int slb_base_pshift; +}; + +struct mmio_hpte_cache { + struct mmio_hpte_cache_entry entry[MMIO_HPTE_CACHE_SIZE]; + unsigned int index; +}; + struct openpic; struct kvm_vcpu_arch { @@ -498,6 +518,8 @@ struct kvm_vcpu_arch { ulong tcscr; ulong acop; ulong wort; + ulong tid; + ulong psscr; ulong shadow_srr1; #endif u32 vrsave; /* also USPRG0 */ @@ -546,6 +568,7 @@ struct kvm_vcpu_arch { u64 tfiar; u32 cr_tm; + u64 xer_tm; u64 lr_tm; u64 ctr_tm; u64 amr_tm; @@ -655,9 +678,11 @@ struct kvm_vcpu_arch { #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE struct kvm_vcpu_arch_shared shregs; + struct mmio_hpte_cache mmio_cache; unsigned long pgfault_addr; long pgfault_index; unsigned long pgfault_hpte[2]; + struct mmio_hpte_cache_entry *pgfault_cache; struct task_struct *run_task; struct kvm_run *kvm_run; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index f6e49640dbe1..2da67bf1f2ec 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -483,9 +483,10 @@ extern void kvmppc_xics_set_mapped(struct kvm *kvm, unsigned long guest_irq, unsigned long host_irq); extern void kvmppc_xics_clr_mapped(struct kvm *kvm, unsigned long guest_irq, unsigned long host_irq); -extern long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu, u32 xirr, - struct kvmppc_irq_map *irq_map, - struct kvmppc_passthru_irqmap *pimap); +extern long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu, __be32 xirr, + struct kvmppc_irq_map *irq_map, + struct kvmppc_passthru_irqmap *pimap, + bool *again); extern int h_ipi_redirect; #else static inline struct kvmppc_passthru_irqmap *kvmppc_get_passthru_irqmap( @@ -510,6 +511,48 @@ static inline int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd) #endif /* + * Prototypes for functions called only from assembler code. + * Having prototypes reduces sparse errors. + */ +long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, + unsigned long ioba, unsigned long tce); +long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, + unsigned long liobn, unsigned long ioba, + unsigned long tce_list, unsigned long npages); +long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu, + unsigned long liobn, unsigned long ioba, + unsigned long tce_value, unsigned long npages); +long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target, + unsigned int yield_count); +long kvmppc_h_random(struct kvm_vcpu *vcpu); +void kvmhv_commence_exit(int trap); +long kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu); +void kvmppc_subcore_enter_guest(void); +void kvmppc_subcore_exit_guest(void); +long kvmppc_realmode_hmi_handler(void); +long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, + long pte_index, unsigned long pteh, unsigned long ptel); +long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags, + unsigned long pte_index, unsigned long avpn); +long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu); +long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, + unsigned long pte_index, unsigned long avpn, + unsigned long va); +long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags, + unsigned long pte_index); +long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags, + unsigned long pte_index); +long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags, + unsigned long pte_index); +long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, + unsigned long slb_v, unsigned int status, bool data); +unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu); +int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server, + unsigned long mfrr); +int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr); +int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr); + +/* * Host-side operations we want to set up while running in real * mode in the guest operating on the xics. * Currently only VCPU wakeup is supported. diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index e02cbc6a6c70..5011b69107a7 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -183,7 +183,7 @@ struct machdep_calls { */ void (*machine_shutdown)(void); -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE void (*kexec_cpu_down)(int crash_shutdown, int secondary); /* Called to do what every setup is needed on image and the @@ -198,7 +198,7 @@ struct machdep_calls { * no return. */ void (*machine_kexec)(struct kimage *image); -#endif /* CONFIG_KEXEC */ +#endif /* CONFIG_KEXEC_CORE */ #ifdef CONFIG_SUSPEND /* These are called to disable and enable, respectively, IRQs when diff --git a/arch/powerpc/include/asm/mmu-8xx.h b/arch/powerpc/include/asm/mmu-8xx.h index 3e0e4927811c..798b5bf91427 100644 --- a/arch/powerpc/include/asm/mmu-8xx.h +++ b/arch/powerpc/include/asm/mmu-8xx.h @@ -172,6 +172,41 @@ typedef struct { #define PHYS_IMMR_BASE (mfspr(SPRN_IMMR) & 0xfff80000) #define VIRT_IMMR_BASE (__fix_to_virt(FIX_IMMR_BASE)) + +/* Page size definitions, common between 32 and 64-bit + * + * shift : is the "PAGE_SHIFT" value for that page size + * penc : is the pte encoding mask + * + */ +struct mmu_psize_def { + unsigned int shift; /* number of bits */ + unsigned int enc; /* PTE encoding */ + unsigned int ind; /* Corresponding indirect page size shift */ + unsigned int flags; +#define MMU_PAGE_SIZE_DIRECT 0x1 /* Supported as a direct size */ +#define MMU_PAGE_SIZE_INDIRECT 0x2 /* Supported as an indirect size */ +}; + +extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; + +static inline int shift_to_mmu_psize(unsigned int shift) +{ + int psize; + + for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) + if (mmu_psize_defs[psize].shift == shift) + return psize; + return -1; +} + +static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize) +{ + if (mmu_psize_defs[mmu_psize].shift) + return mmu_psize_defs[mmu_psize].shift; + BUG(); +} + #endif /* !__ASSEMBLY__ */ #if defined(CONFIG_PPC_4K_PAGES) diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index e88368354e49..a34c764ca8dd 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -29,6 +29,12 @@ */ /* + * Kernel read only support. + * We added the ppp value 0b110 in ISA 2.04. + */ +#define MMU_FTR_KERNEL_RO ASM_CONST(0x00004000) + +/* * We need to clear top 16bits of va (from the remaining 64 bits )in * tlbie* instructions */ @@ -103,10 +109,10 @@ #define MMU_FTRS_POWER4 MMU_FTRS_DEFAULT_HPTE_ARCH_V2 #define MMU_FTRS_PPC970 MMU_FTRS_POWER4 | MMU_FTR_TLBIE_CROP_VA #define MMU_FTRS_POWER5 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE -#define MMU_FTRS_POWER6 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE -#define MMU_FTRS_POWER7 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE -#define MMU_FTRS_POWER8 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE -#define MMU_FTRS_POWER9 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE +#define MMU_FTRS_POWER6 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_KERNEL_RO +#define MMU_FTRS_POWER7 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_KERNEL_RO +#define MMU_FTRS_POWER8 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_KERNEL_RO +#define MMU_FTRS_POWER9 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_KERNEL_RO #define MMU_FTRS_CELL MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \ MMU_FTR_CI_LARGE_PAGE #define MMU_FTRS_PA6T MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \ @@ -208,6 +214,11 @@ extern u64 ppc64_rma_size; /* Cleanup function used by kexec */ extern void mmu_cleanup_all(void); extern void radix__mmu_cleanup_all(void); + +/* Functions for creating and updating partition table on POWER9 */ +extern void mmu_partition_table_init(void); +extern void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0, + unsigned long dw1); #endif /* CONFIG_PPC64 */ struct mm_struct; @@ -264,19 +275,20 @@ static inline bool early_radix_enabled(void) #define MMU_PAGE_64K 2 #define MMU_PAGE_64K_AP 3 /* "Admixed pages" (hash64 only) */ #define MMU_PAGE_256K 4 -#define MMU_PAGE_1M 5 -#define MMU_PAGE_2M 6 -#define MMU_PAGE_4M 7 -#define MMU_PAGE_8M 8 -#define MMU_PAGE_16M 9 -#define MMU_PAGE_64M 10 -#define MMU_PAGE_256M 11 -#define MMU_PAGE_1G 12 -#define MMU_PAGE_16G 13 -#define MMU_PAGE_64G 14 +#define MMU_PAGE_512K 5 +#define MMU_PAGE_1M 6 +#define MMU_PAGE_2M 7 +#define MMU_PAGE_4M 8 +#define MMU_PAGE_8M 9 +#define MMU_PAGE_16M 10 +#define MMU_PAGE_64M 11 +#define MMU_PAGE_256M 12 +#define MMU_PAGE_1G 13 +#define MMU_PAGE_16G 14 +#define MMU_PAGE_64G 15 /* N.B. we need to change the type of hpte_page_sizes if this gets to be > 16 */ -#define MMU_PAGE_COUNT 15 +#define MMU_PAGE_COUNT 16 #ifdef CONFIG_PPC_BOOK3S_64 #include <asm/book3s/64/mmu.h> diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 5c451140660a..b9e3f0aca261 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -19,16 +19,18 @@ extern void destroy_context(struct mm_struct *mm); struct mm_iommu_table_group_mem_t; extern int isolate_lru_page(struct page *page); /* from internal.h */ -extern bool mm_iommu_preregistered(void); -extern long mm_iommu_get(unsigned long ua, unsigned long entries, +extern bool mm_iommu_preregistered(struct mm_struct *mm); +extern long mm_iommu_get(struct mm_struct *mm, + unsigned long ua, unsigned long entries, struct mm_iommu_table_group_mem_t **pmem); -extern long mm_iommu_put(struct mm_iommu_table_group_mem_t *mem); -extern void mm_iommu_init(mm_context_t *ctx); -extern void mm_iommu_cleanup(mm_context_t *ctx); -extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup(unsigned long ua, - unsigned long size); -extern struct mm_iommu_table_group_mem_t *mm_iommu_find(unsigned long ua, - unsigned long entries); +extern long mm_iommu_put(struct mm_struct *mm, + struct mm_iommu_table_group_mem_t *mem); +extern void mm_iommu_init(struct mm_struct *mm); +extern void mm_iommu_cleanup(struct mm_struct *mm); +extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm, + unsigned long ua, unsigned long size); +extern struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm, + unsigned long ua, unsigned long entries); extern long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, unsigned long ua, unsigned long *hpa); extern long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem); diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h index cd4ffd86765f..cc12c61ef315 100644 --- a/arch/powerpc/include/asm/module.h +++ b/arch/powerpc/include/asm/module.h @@ -90,10 +90,6 @@ static inline int module_finalize_ftrace(struct module *mod, const Elf_Shdr *sec } #endif -struct exception_table_entry; -void sort_ex_table(struct exception_table_entry *start, - struct exception_table_entry *finish); - #if defined(CONFIG_MODVERSIONS) && defined(CONFIG_PPC64) #define ARCH_RELOCATES_KCRCTAB #define reloc_start PHYSICAL_START diff --git a/arch/powerpc/include/asm/mutex.h b/arch/powerpc/include/asm/mutex.h deleted file mode 100644 index 078155fa1189..000000000000 --- a/arch/powerpc/include/asm/mutex.h +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Optimised mutex implementation of include/asm-generic/mutex-dec.h algorithm - */ -#ifndef _ASM_POWERPC_MUTEX_H -#define _ASM_POWERPC_MUTEX_H - -static inline int __mutex_cmpxchg_lock(atomic_t *v, int old, int new) -{ - int t; - - __asm__ __volatile__ ( -"1: lwarx %0,0,%1 # mutex trylock\n\ - cmpw 0,%0,%2\n\ - bne- 2f\n" - PPC405_ERR77(0,%1) -" stwcx. %3,0,%1\n\ - bne- 1b" - PPC_ACQUIRE_BARRIER - "\n\ -2:" - : "=&r" (t) - : "r" (&v->counter), "r" (old), "r" (new) - : "cc", "memory"); - - return t; -} - -static inline int __mutex_dec_return_lock(atomic_t *v) -{ - int t; - - __asm__ __volatile__( -"1: lwarx %0,0,%1 # mutex lock\n\ - addic %0,%0,-1\n" - PPC405_ERR77(0,%1) -" stwcx. %0,0,%1\n\ - bne- 1b" - PPC_ACQUIRE_BARRIER - : "=&r" (t) - : "r" (&v->counter) - : "cc", "memory"); - - return t; -} - -static inline int __mutex_inc_return_unlock(atomic_t *v) -{ - int t; - - __asm__ __volatile__( - PPC_RELEASE_BARRIER -"1: lwarx %0,0,%1 # mutex unlock\n\ - addic %0,%0,1\n" - PPC405_ERR77(0,%1) -" stwcx. %0,0,%1 \n\ - bne- 1b" - : "=&r" (t) - : "r" (&v->counter) - : "cc", "memory"); - - return t; -} - -/** - * __mutex_fastpath_lock - try to take the lock by moving the count - * from 1 to a 0 value - * @count: pointer of type atomic_t - * @fail_fn: function to call if the original value was not 1 - * - * Change the count from 1 to a value lower than 1, and call <fail_fn> if - * it wasn't 1 originally. This function MUST leave the value lower than - * 1 even when the "1" assertion wasn't true. - */ -static inline void -__mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *)) -{ - if (unlikely(__mutex_dec_return_lock(count) < 0)) - fail_fn(count); -} - -/** - * __mutex_fastpath_lock_retval - try to take the lock by moving the count - * from 1 to a 0 value - * @count: pointer of type atomic_t - * - * Change the count from 1 to a value lower than 1. This function returns 0 - * if the fastpath succeeds, or -1 otherwise. - */ -static inline int -__mutex_fastpath_lock_retval(atomic_t *count) -{ - if (unlikely(__mutex_dec_return_lock(count) < 0)) - return -1; - return 0; -} - -/** - * __mutex_fastpath_unlock - try to promote the count from 0 to 1 - * @count: pointer of type atomic_t - * @fail_fn: function to call if the original value was not 0 - * - * Try to promote the count from 0 to 1. If it wasn't 0, call <fail_fn>. - * In the failure case, this function is allowed to either set the value to - * 1, or to set it to a value lower than 1. - */ -static inline void -__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *)) -{ - if (unlikely(__mutex_inc_return_unlock(count) <= 0)) - fail_fn(count); -} - -#define __mutex_slowpath_needs_to_unlock() 1 - -/** - * __mutex_fastpath_trylock - try to acquire the mutex, without waiting - * - * @count: pointer of type atomic_t - * @fail_fn: fallback function - * - * Change the count from 1 to 0, and return 1 (success), or if the count - * was not 1, then return 0 (failure). - */ -static inline int -__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *)) -{ - if (likely(atomic_read(count) == 1 && __mutex_cmpxchg_lock(count, 1, 0) == 1)) - return 1; - return 0; -} - -#endif diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h index 76d6b9e0c8a9..633139291a48 100644 --- a/arch/powerpc/include/asm/nohash/32/pgalloc.h +++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h @@ -2,14 +2,42 @@ #define _ASM_POWERPC_PGALLOC_32_H #include <linux/threads.h> +#include <linux/slab.h> -/* For 32-bit, all levels of page tables are just drawn from get_free_page() */ -#define MAX_PGTABLE_INDEX_SIZE 0 +/* + * Functions that deal with pagetables that could be at any level of + * the table need to be passed an "index_size" so they know how to + * handle allocation. For PTE pages (which are linked to a struct + * page for now, and drawn from the main get_free_pages() pool), the + * allocation size will be (2^index_size * sizeof(pointer)) and + * allocations are drawn from the kmem_cache in PGT_CACHE(index_size). + * + * The maximum index size needs to be big enough to allow any + * pagetable sizes we need, but small enough to fit in the low bits of + * any page table pointer. In other words all pagetables, even tiny + * ones, must be aligned to allow at least enough low 0 bits to + * contain this value. This value is also used as a mask, so it must + * be one less than a power of two. + */ +#define MAX_PGTABLE_INDEX_SIZE 0xf extern void __bad_pte(pmd_t *pmd); -extern pgd_t *pgd_alloc(struct mm_struct *mm); -extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); +extern struct kmem_cache *pgtable_cache[]; +#define PGT_CACHE(shift) ({ \ + BUG_ON(!(shift)); \ + pgtable_cache[(shift) - 1]; \ + }) + +static inline pgd_t *pgd_alloc(struct mm_struct *mm) +{ + return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), GFP_KERNEL); +} + +static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) +{ + kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd); +} /* * We don't have any real pmd's, and this code never triggers because @@ -68,8 +96,12 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage) static inline void pgtable_free(void *table, unsigned index_size) { - BUG_ON(index_size); /* 32-bit doesn't use this */ - free_page((unsigned long)table); + if (!index_size) { + free_page((unsigned long)table); + } else { + BUG_ON(index_size > MAX_PGTABLE_INDEX_SIZE); + kmem_cache_free(PGT_CACHE(index_size), table); + } } #define check_pgt_cache() do { } while (0) diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index c219ef7be53b..ba9921bf202e 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -16,6 +16,23 @@ extern int icache_44x_need_flush; #endif /* __ASSEMBLY__ */ +#define PTE_INDEX_SIZE PTE_SHIFT +#define PMD_INDEX_SIZE 0 +#define PUD_INDEX_SIZE 0 +#define PGD_INDEX_SIZE (32 - PGDIR_SHIFT) + +#define PMD_CACHE_INDEX PMD_INDEX_SIZE + +#ifndef __ASSEMBLY__ +#define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE) +#define PMD_TABLE_SIZE 0 +#define PUD_TABLE_SIZE 0 +#define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE) +#endif /* __ASSEMBLY__ */ + +#define PTRS_PER_PTE (1 << PTE_INDEX_SIZE) +#define PTRS_PER_PGD (1 << PGD_INDEX_SIZE) + /* * The normal case is that PTEs are 32-bits and we have a 1-page * 1024-entry pgdir pointing to 1-page 1024-entry PTE pages. -- paulus @@ -27,22 +44,12 @@ extern int icache_44x_need_flush; * -Matt */ /* PGDIR_SHIFT determines what a top-level page table entry can map */ -#define PGDIR_SHIFT (PAGE_SHIFT + PTE_SHIFT) +#define PGDIR_SHIFT (PAGE_SHIFT + PTE_INDEX_SIZE) #define PGDIR_SIZE (1UL << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE-1)) -/* - * entries per page directory level: our page-table tree is two-level, so - * we don't really have any PMD directory. - */ -#ifndef __ASSEMBLY__ -#define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_SHIFT) -#define PGD_TABLE_SIZE (sizeof(pgd_t) << (32 - PGDIR_SHIFT)) -#endif /* __ASSEMBLY__ */ - -#define PTRS_PER_PTE (1 << PTE_SHIFT) -#define PTRS_PER_PMD 1 -#define PTRS_PER_PGD (1 << (32 - PGDIR_SHIFT)) +/* Bits to mask out from a PGD to get to the PUD page */ +#define PGD_MASKED_BITS 0 #define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE) #define FIRST_USER_ADDRESS 0UL @@ -268,7 +275,8 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, static inline void __ptep_set_access_flags(struct mm_struct *mm, - pte_t *ptep, pte_t entry) + pte_t *ptep, pte_t entry, + unsigned long address) { unsigned long set = pte_val(entry) & (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC); @@ -328,15 +336,6 @@ static inline void __ptep_set_access_flags(struct mm_struct *mm, #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 3 }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val << 3 }) -#ifndef CONFIG_PPC_4K_PAGES -void pgtable_cache_init(void); -#else -/* - * No page table caches to initialise - */ -#define pgtable_cache_init() do { } while (0) -#endif - extern int get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep, pmd_t **pmdp); diff --git a/arch/powerpc/include/asm/nohash/32/pte-8xx.h b/arch/powerpc/include/asm/nohash/32/pte-8xx.h index 3742b1919661..b4df2734c078 100644 --- a/arch/powerpc/include/asm/nohash/32/pte-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/pte-8xx.h @@ -49,6 +49,7 @@ #define _PMD_BAD 0x0ff0 #define _PMD_PAGE_MASK 0x000c #define _PMD_PAGE_8M 0x000c +#define _PMD_PAGE_512K 0x0004 /* Until my rework is finished, 8xx still needs atomic PTE updates */ #define PTE_ATOMIC_UPDATES 1 diff --git a/arch/powerpc/include/asm/nohash/64/pgtable-4k.h b/arch/powerpc/include/asm/nohash/64/pgtable-4k.h index fc7d51753f81..d0db98793dd8 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable-4k.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable-4k.h @@ -27,9 +27,6 @@ #define PMD_SIZE (1UL << PMD_SHIFT) #define PMD_MASK (~(PMD_SIZE-1)) -/* With 4k base page size, hugepage PTEs go at the PMD level */ -#define MIN_HUGEPTE_SHIFT PMD_SHIFT - /* PUD_SHIFT determines what a third-level page table entry can map */ #define PUD_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE) #define PUD_SIZE (1UL << PUD_SHIFT) diff --git a/arch/powerpc/include/asm/nohash/64/pgtable-64k.h b/arch/powerpc/include/asm/nohash/64/pgtable-64k.h index 908324574f77..55b28ef3409a 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable-64k.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable-64k.h @@ -31,9 +31,6 @@ #define PTRS_PER_PMD (1 << PMD_INDEX_SIZE) #define PTRS_PER_PGD (1 << PGD_INDEX_SIZE) -/* With 4k base page size, hugepage PTEs go at the PMD level */ -#define MIN_HUGEPTE_SHIFT PAGE_SHIFT - /* PMD_SHIFT determines what a second-level page table entry can map */ #define PMD_SHIFT (PAGE_SHIFT + PTE_INDEX_SIZE) #define PMD_SIZE (1UL << PMD_SHIFT) diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index 653a1838469d..c7f927e67d14 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -26,15 +26,11 @@ #else #define PMD_CACHE_INDEX PMD_INDEX_SIZE #endif + /* * Define the address range of the kernel non-linear virtual area */ - -#ifdef CONFIG_PPC_BOOK3E #define KERN_VIRT_START ASM_CONST(0x8000000000000000) -#else -#define KERN_VIRT_START ASM_CONST(0xD000000000000000) -#endif #define KERN_VIRT_SIZE ASM_CONST(0x0000100000000000) /* @@ -43,11 +39,7 @@ * (we keep a quarter for the virtual memmap) */ #define VMALLOC_START KERN_VIRT_START -#ifdef CONFIG_PPC_BOOK3E #define VMALLOC_SIZE (KERN_VIRT_SIZE >> 2) -#else -#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1) -#endif #define VMALLOC_END (VMALLOC_START + VMALLOC_SIZE) /* @@ -85,12 +77,8 @@ * Defines the address of the vmemap area, in its own region on * hash table CPUs and after the vmalloc space on Book3E */ -#ifdef CONFIG_PPC_BOOK3E #define VMEMMAP_BASE VMALLOC_END #define VMEMMAP_END KERN_IO_START -#else -#define VMEMMAP_BASE (VMEMMAP_REGION_ID << REGION_SHIFT) -#endif #define vmemmap ((struct page *)VMEMMAP_BASE) @@ -301,7 +289,8 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr, * function doesn't need to flush the hash entry */ static inline void __ptep_set_access_flags(struct mm_struct *mm, - pte_t *ptep, pte_t entry) + pte_t *ptep, pte_t entry, + unsigned long address) { unsigned long bits = pte_val(entry) & (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC); @@ -358,8 +347,6 @@ static inline void __ptep_set_access_flags(struct mm_struct *mm, #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) }) #define __swp_entry_to_pte(x) __pte((x).val) -void pgtable_cache_add(unsigned shift, void (*ctor)(void *)); -void pgtable_cache_init(void); extern int map_kernel_page(unsigned long ea, unsigned long pa, unsigned long flags); extern int __meminit vmemmap_create_mapping(unsigned long start, diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index 1263c22d60d8..172849727054 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -226,7 +226,11 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, #ifdef CONFIG_HUGETLB_PAGE static inline int hugepd_ok(hugepd_t hpd) { +#ifdef CONFIG_PPC_8xx + return ((hpd.pd & 0x4) != 0); +#else return (hpd.pd > 0); +#endif } static inline int pmd_huge(pmd_t pmd) diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index e958b7096f19..5c7db0f1a708 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -220,9 +220,12 @@ int64_t opal_pci_set_power_state(uint64_t async_token, uint64_t id, int64_t opal_pci_poll2(uint64_t id, uint64_t data); int64_t opal_int_get_xirr(uint32_t *out_xirr, bool just_poll); +int64_t opal_rm_int_get_xirr(__be32 *out_xirr, bool just_poll); int64_t opal_int_set_cppr(uint8_t cppr); int64_t opal_int_eoi(uint32_t xirr); +int64_t opal_rm_int_eoi(uint32_t xirr); int64_t opal_int_set_mfrr(uint32_t cpu, uint8_t mfrr); +int64_t opal_rm_int_set_mfrr(uint32_t cpu, uint8_t mfrr); int64_t opal_pci_tce_kill(uint64_t phb_id, uint32_t kill_type, uint32_t pe_num, uint32_t tce_size, uint64_t dma_addr, uint32_t npages); diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index 9bd87f269d6d..dd01212935ac 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -78,6 +78,8 @@ static inline pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned long vmalloc_to_phys(void *vmalloc_addr); +void pgtable_cache_add(unsigned shift, void (*ctor)(void *)); +void pgtable_cache_init(void); #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_PGTABLE_H */ diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h index 1b394247afc2..0bcc75e295e3 100644 --- a/arch/powerpc/include/asm/plpar_wrappers.h +++ b/arch/powerpc/include/asm/plpar_wrappers.h @@ -93,38 +93,6 @@ static inline long register_dtl(unsigned long cpu, unsigned long vpa) return vpa_call(H_VPA_REG_DTL, cpu, vpa); } -static inline long plpar_page_set_loaned(unsigned long vpa) -{ - unsigned long cmo_page_sz = cmo_get_page_size(); - long rc = 0; - int i; - - for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz) - rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED, vpa + i, 0); - - for (i -= cmo_page_sz; rc && i != 0; i -= cmo_page_sz) - plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE, - vpa + i - cmo_page_sz, 0); - - return rc; -} - -static inline long plpar_page_set_active(unsigned long vpa) -{ - unsigned long cmo_page_sz = cmo_get_page_size(); - long rc = 0; - int i; - - for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz) - rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE, vpa + i, 0); - - for (i -= cmo_page_sz; rc && i != 0; i -= cmo_page_sz) - plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED, - vpa + i - cmo_page_sz, 0); - - return rc; -} - extern void vpa_init(int cpu); static inline long plpar_pte_enter(unsigned long flags, @@ -340,4 +308,9 @@ static inline long plapr_set_watchpoint0(unsigned long dawr0, unsigned long dawr return plpar_set_mode(0, H_SET_MODE_RESOURCE_SET_DAWR, dawr0, dawrx0); } +static inline long plapr_signal_sys_reset(long cpu) +{ + return plpar_hcall_norets(H_SIGNAL_SYS_RESET, cpu); +} + #endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */ diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 0132831b3081..c56ea8c84abb 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -460,5 +460,6 @@ #define PPC_SLBIA(IH) stringify_in_c(.long PPC_INST_SLBIA | \ ((IH & 0x7) << 21)) +#define PPC_INVALIDATE_ERAT PPC_SLBIA(7) #endif /* _ASM_POWERPC_PPC_OPCODE_H */ diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h index 0f73de069f19..726288048652 100644 --- a/arch/powerpc/include/asm/ppc-pci.h +++ b/arch/powerpc/include/asm/ppc-pci.h @@ -53,7 +53,7 @@ void eeh_addr_cache_rmv_dev(struct pci_dev *dev); struct eeh_dev *eeh_addr_cache_get_dev(unsigned long addr); void eeh_slot_error_detail(struct eeh_pe *pe, int severity); int eeh_pci_enable(struct eeh_pe *pe, int function); -int eeh_reset_pe(struct eeh_pe *); +int eeh_pe_reset_full(struct eeh_pe *pe); void eeh_save_bars(struct eeh_dev *edev); int rtas_write_config(struct pci_dn *, int where, int size, u32 val); int rtas_read_config(struct pci_dn *, int where, int size, u32 *val); diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index c73750b0d9fa..025833b8df9f 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -10,9 +10,7 @@ #include <asm/ppc-opcode.h> #include <asm/firmware.h> -#ifndef __ASSEMBLY__ -#error __FILE__ should only be used in assembler files -#else +#ifdef __ASSEMBLY__ #define SZL (BITS_PER_LONG/8) @@ -265,10 +263,14 @@ n: * latter is for those that incdentially must be excluded from probing * and allows them to be linked at more optimal location within text. */ +#ifdef CONFIG_KPROBES #define _ASM_NOKPROBE_SYMBOL(entry) \ .pushsection "_kprobe_blacklist","aw"; \ PPC_LONG (entry) ; \ .popsection +#else +#define _ASM_NOKPROBE_SYMBOL(entry) +#endif #define FUNC_START(name) _GLOBAL(name) #define FUNC_END(name) @@ -779,5 +781,17 @@ END_FTR_SECTION_IFCLR(CPU_FTR_601) .long 0xa6037b7d; /* mtsrr1 r11 */ \ .long 0x2400004c /* rfid */ #endif /* !CONFIG_PPC_BOOK3E */ + #endif /* __ASSEMBLY__ */ + +/* + * Helper macro for exception table entries + */ +#define EX_TABLE(_fault, _target) \ + stringify_in_c(.section __ex_table,"a";)\ + stringify_in_c(.balign 4;) \ + stringify_in_c(.long (_fault) - . ;) \ + stringify_in_c(.long (_target) - . ;) \ + stringify_in_c(.previous) + #endif /* _ASM_POWERPC_PPC_ASM_H */ diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index c07c31b0e89e..1ba814436c73 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -312,8 +312,6 @@ struct thread_struct { unsigned long mmcr2; unsigned mmcr0; unsigned used_ebb; - unsigned long lmrr; - unsigned long lmser; #endif }; @@ -404,8 +402,6 @@ static inline unsigned long __pack_fe01(unsigned int fpmode) #define cpu_relax() barrier() #endif -#define cpu_relax_lowlatency() cpu_relax() - /* Check that a certain kernel stack pointer is valid in task_struct p */ int validate_sp(unsigned long sp, struct task_struct *p, unsigned long nbytes); diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h index 7f436ba1b56f..5e57705b4759 100644 --- a/arch/powerpc/include/asm/prom.h +++ b/arch/powerpc/include/asm/prom.h @@ -159,11 +159,5 @@ struct of_drconf_cell { /* Option Vector 6: IBM PAPR hints */ #define OV6_LINUX 0x02 /* Linux is our OS */ -/* - * The architecture vector has an array of PVR mask/value pairs, - * followed by # option vectors - 1, followed by the option vectors. - */ -extern unsigned char ibm_architecture_vec[]; - #endif /* __KERNEL__ */ #endif /* _POWERPC_PROM_H */ diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 9cd4e8cbc78c..0d4531aa2052 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -153,6 +153,8 @@ #define PSSCR_EC 0x00100000 /* Exit Criterion */ #define PSSCR_ESL 0x00200000 /* Enable State Loss */ #define PSSCR_SD 0x00400000 /* Status Disable */ +#define PSSCR_PLS 0xf000000000000000 /* Power-saving Level Status */ +#define PSSCR_GUEST_VIS 0xf0000000000003ff /* Guest-visible PSSCR fields */ /* Floating Point Status and Control Register (FPSCR) Fields */ #define FPSCR_FX 0x80000000 /* FPU exception summary */ @@ -236,6 +238,7 @@ #define SPRN_TEXASRU 0x83 /* '' '' '' Upper 32 */ #define TEXASR_FS __MASK(63-36) /* TEXASR Failure Summary */ #define SPRN_TFHAR 0x80 /* Transaction Failure Handler Addr */ +#define SPRN_TIDR 144 /* Thread ID register */ #define SPRN_CTRLF 0x088 #define SPRN_CTRLT 0x098 #define CTRL_CT 0xc0000000 /* current thread */ @@ -292,8 +295,7 @@ #define SPRN_HRMOR 0x139 /* Real mode offset register */ #define SPRN_HSRR0 0x13A /* Hypervisor Save/Restore 0 */ #define SPRN_HSRR1 0x13B /* Hypervisor Save/Restore 1 */ -#define SPRN_LMRR 0x32D /* Load Monitor Region Register */ -#define SPRN_LMSER 0x32E /* Load Monitor Section Enable Register */ +#define SPRN_ASDR 0x330 /* Access segment descriptor register */ #define SPRN_IC 0x350 /* Virtual Instruction Count */ #define SPRN_VTB 0x351 /* Virtual Time Base */ #define SPRN_LDBAR 0x352 /* LD Base Address Register */ @@ -304,7 +306,7 @@ #define SPRN_PMCR 0x374 /* Power Management Control Register */ /* HFSCR and FSCR bit numbers are the same */ -#define FSCR_LM_LG 11 /* Enable Load Monitor Registers */ +#define FSCR_MSGP_LG 10 /* Enable MSGP */ #define FSCR_TAR_LG 8 /* Enable Target Address Register */ #define FSCR_EBB_LG 7 /* Enable Event Based Branching */ #define FSCR_TM_LG 5 /* Enable Transactional Memory */ @@ -314,12 +316,11 @@ #define FSCR_VECVSX_LG 1 /* Enable VMX/VSX */ #define FSCR_FP_LG 0 /* Enable Floating Point */ #define SPRN_FSCR 0x099 /* Facility Status & Control Register */ -#define FSCR_LM __MASK(FSCR_LM_LG) #define FSCR_TAR __MASK(FSCR_TAR_LG) #define FSCR_EBB __MASK(FSCR_EBB_LG) #define FSCR_DSCR __MASK(FSCR_DSCR_LG) #define SPRN_HFSCR 0xbe /* HV=1 Facility Status & Control Register */ -#define HFSCR_LM __MASK(FSCR_LM_LG) +#define HFSCR_MSGP __MASK(FSCR_MSGP_LG) #define HFSCR_TAR __MASK(FSCR_TAR_LG) #define HFSCR_EBB __MASK(FSCR_EBB_LG) #define HFSCR_TM __MASK(FSCR_TM_LG) @@ -355,8 +356,10 @@ #define LPCR_PECE0 ASM_CONST(0x0000000000004000) /* ext. exceptions can cause exit */ #define LPCR_PECE1 ASM_CONST(0x0000000000002000) /* decrementer can cause exit */ #define LPCR_PECE2 ASM_CONST(0x0000000000001000) /* machine check etc can cause exit */ +#define LPCR_PECE_HVEE ASM_CONST(0x0000400000000000) /* P9 Wakeup on HV interrupts */ #define LPCR_MER ASM_CONST(0x0000000000000800) /* Mediated External Exception */ #define LPCR_MER_SH 11 +#define LPCR_GTSE ASM_CONST(0x0000000000000400) /* Guest Translation Shootdown Enable */ #define LPCR_TC ASM_CONST(0x0000000000000200) /* Translation control */ #define LPCR_LPES 0x0000000c #define LPCR_LPES0 ASM_CONST(0x0000000000000008) /* LPAR Env selector 0 */ @@ -377,6 +380,12 @@ #define PCR_VEC_DIS (1ul << (63-0)) /* Vec. disable (bit NA since POWER8) */ #define PCR_VSX_DIS (1ul << (63-1)) /* VSX disable (bit NA since POWER8) */ #define PCR_TM_DIS (1ul << (63-2)) /* Trans. memory disable (POWER8) */ +/* + * These bits are used in the function kvmppc_set_arch_compat() to specify and + * determine both the compatibility level which we want to emulate and the + * compatibility level which the host is capable of emulating. + */ +#define PCR_ARCH_207 0x8 /* Architecture 2.07 */ #define PCR_ARCH_206 0x4 /* Architecture 2.06 */ #define PCR_ARCH_205 0x2 /* Architecture 2.05 */ #define SPRN_HEIR 0x153 /* Hypervisor Emulated Instruction Register */ @@ -1218,6 +1227,7 @@ #define PVR_ARCH_206 0x0f000003 #define PVR_ARCH_206p 0x0f100003 #define PVR_ARCH_207 0x0f000004 +#define PVR_ARCH_300 0x0f000005 /* Macros for setting and retrieving special purpose registers */ #ifndef __ASSEMBLY__ diff --git a/arch/powerpc/include/asm/reg_8xx.h b/arch/powerpc/include/asm/reg_8xx.h index 0197e12f7d48..1f1636124a04 100644 --- a/arch/powerpc/include/asm/reg_8xx.h +++ b/arch/powerpc/include/asm/reg_8xx.h @@ -4,7 +4,7 @@ #ifndef _ASM_POWERPC_REG_8xx_H #define _ASM_POWERPC_REG_8xx_H -#include <asm/mmu-8xx.h> +#include <asm/mmu.h> /* Cache control on the MPC8xx is provided through some additional * special purpose registers. diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index 0d02c11dc331..32db16d2e7ad 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -176,7 +176,7 @@ static inline void set_hard_smp_processor_id(int cpu, int phys) #endif /* !CONFIG_SMP */ #endif /* !CONFIG_PPC64 */ -#if defined(CONFIG_PPC64) && (defined(CONFIG_SMP) || defined(CONFIG_KEXEC)) +#if defined(CONFIG_PPC64) && (defined(CONFIG_SMP) || defined(CONFIG_KEXEC_CORE)) extern void smp_release_cpus(void); #else static inline void smp_release_cpus(void) { }; diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h index fa37fe93bc02..8c1b913de6d7 100644 --- a/arch/powerpc/include/asm/spinlock.h +++ b/arch/powerpc/include/asm/spinlock.h @@ -52,6 +52,14 @@ #define SYNC_IO #endif +#ifdef CONFIG_PPC_PSERIES +#define vcpu_is_preempted vcpu_is_preempted +static inline bool vcpu_is_preempted(int cpu) +{ + return !!(be32_to_cpu(lppaca_of(cpu).yield_count) & 1); +} +#endif + static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock) { return lock.slock == 0; diff --git a/arch/powerpc/include/asm/stackprotector.h b/arch/powerpc/include/asm/stackprotector.h new file mode 100644 index 000000000000..6720190eabec --- /dev/null +++ b/arch/powerpc/include/asm/stackprotector.h @@ -0,0 +1,40 @@ +/* + * GCC stack protector support. + * + * Stack protector works by putting predefined pattern at the start of + * the stack frame and verifying that it hasn't been overwritten when + * returning from the function. The pattern is called stack canary + * and gcc expects it to be defined by a global variable called + * "__stack_chk_guard" on PPC. This unfortunately means that on SMP + * we cannot have a different canary value per task. + */ + +#ifndef _ASM_STACKPROTECTOR_H +#define _ASM_STACKPROTECTOR_H + +#include <linux/random.h> +#include <linux/version.h> +#include <asm/reg.h> + +extern unsigned long __stack_chk_guard; + +/* + * Initialize the stackprotector canary value. + * + * NOTE: this must only be called from functions that never return, + * and it must always be inlined. + */ +static __always_inline void boot_init_stack_canary(void) +{ + unsigned long canary; + + /* Try to get a semi random initial value. */ + get_random_bytes(&canary, sizeof(canary)); + canary ^= mftb(); + canary ^= LINUX_VERSION_CODE; + + current->stack_canary = canary; + __stack_chk_guard = current->stack_canary; +} + +#endif /* _ASM_STACKPROTECTOR_H */ diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index 2fc5d4db503c..4b369d83fe9c 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -386,3 +386,4 @@ SYSCALL(mlock2) SYSCALL(copy_file_range) COMPAT_SYS_SPU(preadv2) COMPAT_SYS_SPU(pwritev2) +SYSCALL(kexec_file_load) diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h index 99e1397b71da..609557569f65 100644 --- a/arch/powerpc/include/asm/tlb.h +++ b/arch/powerpc/include/asm/tlb.h @@ -28,6 +28,7 @@ #define tlb_start_vma(tlb, vma) do { } while (0) #define tlb_end_vma(tlb, vma) do { } while (0) #define __tlb_remove_tlb_entry __tlb_remove_tlb_entry +#define tlb_remove_check_page_size_change tlb_remove_check_page_size_change extern void tlb_flush(struct mmu_gather *tlb); @@ -46,6 +47,21 @@ static inline void __tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep, #endif } +static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, + unsigned int page_size) +{ + if (!tlb->page_size) + tlb->page_size = page_size; + else if (tlb->page_size != page_size) { + tlb_flush_mmu(tlb); + /* + * update the page size after flush for the new + * mmu_gather. + */ + tlb->page_size = page_size; + } +} + #ifdef CONFIG_SMP static inline int mm_is_core_local(struct mm_struct *mm) { diff --git a/arch/powerpc/include/asm/trace.h b/arch/powerpc/include/asm/trace.h index 32e36b16773f..c05cef6ee06c 100644 --- a/arch/powerpc/include/asm/trace.h +++ b/arch/powerpc/include/asm/trace.h @@ -54,7 +54,7 @@ DEFINE_EVENT(ppc64_interrupt_class, timer_interrupt_exit, ); #ifdef CONFIG_PPC_PSERIES -extern void hcall_tracepoint_regfunc(void); +extern int hcall_tracepoint_regfunc(void); extern void hcall_tracepoint_unregfunc(void); TRACE_EVENT_FN_COND(hcall_entry, @@ -104,7 +104,7 @@ TRACE_EVENT_FN_COND(hcall_exit, #endif #ifdef CONFIG_PPC_POWERNV -extern void opal_tracepoint_regfunc(void); +extern int opal_tracepoint_regfunc(void); extern void opal_tracepoint_unregfunc(void); TRACE_EVENT_FN(opal_entry, diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index c266227fdd5b..a15d84d59356 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -7,6 +7,7 @@ #include <linux/sched.h> #include <linux/errno.h> #include <asm/asm-compat.h> +#include <asm/ppc_asm.h> #include <asm/processor.h> #include <asm/page.h> @@ -63,23 +64,30 @@ __access_ok((__force unsigned long)(addr), (size), get_fs())) /* - * The exception table consists of pairs of addresses: the first is the - * address of an instruction that is allowed to fault, and the second is + * The exception table consists of pairs of relative addresses: the first is + * the address of an instruction that is allowed to fault, and the second is * the address at which the program should continue. No registers are - * modified, so it is entirely up to the continuation code to figure out - * what to do. + * modified, so it is entirely up to the continuation code to figure out what + * to do. * - * All the routines below use bits of fixup code that are out of line - * with the main instruction path. This means when everything is well, - * we don't even have to jump over them. Further, they do not intrude - * on our cache or tlb entries. + * All the routines below use bits of fixup code that are out of line with the + * main instruction path. This means when everything is well, we don't even + * have to jump over them. Further, they do not intrude on our cache or tlb + * entries. */ +#define ARCH_HAS_RELATIVE_EXTABLE + struct exception_table_entry { - unsigned long insn; - unsigned long fixup; + int insn; + int fixup; }; +static inline unsigned long extable_fixup(const struct exception_table_entry *x) +{ + return (unsigned long)&x->fixup + x->fixup; +} + /* * These are the main single-value transfer routines. They automatically * use the right size if we just have the right pointer type. @@ -132,10 +140,7 @@ extern long __put_user_bad(void); "3: li %0,%3\n" \ " b 2b\n" \ ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - PPC_LONG_ALIGN "\n" \ - PPC_LONG "1b,3b\n" \ - ".previous" \ + EX_TABLE(1b, 3b) \ : "=r" (err) \ : "r" (x), "b" (addr), "i" (-EFAULT), "0" (err)) @@ -152,11 +157,8 @@ extern long __put_user_bad(void); "4: li %0,%3\n" \ " b 3b\n" \ ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - PPC_LONG_ALIGN "\n" \ - PPC_LONG "1b,4b\n" \ - PPC_LONG "2b,4b\n" \ - ".previous" \ + EX_TABLE(1b, 4b) \ + EX_TABLE(2b, 4b) \ : "=r" (err) \ : "r" (x), "b" (addr), "i" (-EFAULT), "0" (err)) #endif /* __powerpc64__ */ @@ -215,10 +217,7 @@ extern long __get_user_bad(void); " li %1,0\n" \ " b 2b\n" \ ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - PPC_LONG_ALIGN "\n" \ - PPC_LONG "1b,3b\n" \ - ".previous" \ + EX_TABLE(1b, 3b) \ : "=r" (err), "=r" (x) \ : "b" (addr), "i" (-EFAULT), "0" (err)) @@ -237,11 +236,8 @@ extern long __get_user_bad(void); " li %1+1,0\n" \ " b 3b\n" \ ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - PPC_LONG_ALIGN "\n" \ - PPC_LONG "1b,4b\n" \ - PPC_LONG "2b,4b\n" \ - ".previous" \ + EX_TABLE(1b, 4b) \ + EX_TABLE(2b, 4b) \ : "=r" (err), "=&r" (x) \ : "b" (addr), "i" (-EFAULT), "0" (err)) #endif /* __powerpc64__ */ diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index e8cdfec8d512..eb1acee91a20 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -12,7 +12,7 @@ #include <uapi/asm/unistd.h> -#define NR_syscalls 382 +#define NR_syscalls 383 #define __NR__exit __NR_exit diff --git a/arch/powerpc/include/asm/word-at-a-time.h b/arch/powerpc/include/asm/word-at-a-time.h index 4afe66aa1400..f3f4710d4ff5 100644 --- a/arch/powerpc/include/asm/word-at-a-time.h +++ b/arch/powerpc/include/asm/word-at-a-time.h @@ -7,6 +7,7 @@ #include <linux/kernel.h> #include <asm/asm-compat.h> +#include <asm/ppc_asm.h> #ifdef __BIG_ENDIAN__ @@ -193,10 +194,7 @@ static inline unsigned long load_unaligned_zeropad(const void *addr) #endif "b 2b\n" ".previous\n" - ".section __ex_table,\"a\"\n\t" - PPC_LONG_ALIGN "\n\t" - PPC_LONG "1b,3b\n" - ".previous" + EX_TABLE(1b, 3b) : [tmp] "=&b" (tmp), [offset] "=&r" (offset), [ret] "=&r" (ret) : [addr] "b" (addr), "m" (*(unsigned long *)addr)); diff --git a/arch/powerpc/include/asm/xilinx_intc.h b/arch/powerpc/include/asm/xilinx_intc.h index 343612f8fece..3192d7f0a05b 100644 --- a/arch/powerpc/include/asm/xilinx_intc.h +++ b/arch/powerpc/include/asm/xilinx_intc.h @@ -14,7 +14,7 @@ #ifdef __KERNEL__ extern void __init xilinx_intc_init_tree(void); -extern unsigned int xilinx_intc_get_irq(void); +extern unsigned int xintc_get_irq(void); #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_XILINX_INTC_H */ diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index c93cf35ce379..3603b6f51b11 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -573,6 +573,10 @@ struct kvm_get_htab_header { #define KVM_REG_PPC_SPRG9 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xba) #define KVM_REG_PPC_DBSR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbb) +/* POWER9 registers */ +#define KVM_REG_PPC_TIDR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbc) +#define KVM_REG_PPC_PSSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbd) + /* Transactional Memory checkpointed state: * This is all GPRs, all VSX regs and a subset of SPRs */ @@ -596,6 +600,7 @@ struct kvm_get_htab_header { #define KVM_REG_PPC_TM_VSCR (KVM_REG_PPC_TM | KVM_REG_SIZE_U32 | 0x67) #define KVM_REG_PPC_TM_DSCR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x68) #define KVM_REG_PPC_TM_TAR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x69) +#define KVM_REG_PPC_TM_XER (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x6a) /* PPC64 eXternal Interrupt Controller Specification */ #define KVM_DEV_XICS_GRP_SOURCES 1 /* 64-bit source attributes */ diff --git a/arch/powerpc/include/uapi/asm/socket.h b/arch/powerpc/include/uapi/asm/socket.h index 1672e3398270..44583a52f882 100644 --- a/arch/powerpc/include/uapi/asm/socket.h +++ b/arch/powerpc/include/uapi/asm/socket.h @@ -97,4 +97,6 @@ #define SO_CNX_ADVICE 53 +#define SCM_TIMESTAMPING_OPT_STATS 54 + #endif /* _ASM_POWERPC_SOCKET_H */ diff --git a/arch/powerpc/include/uapi/asm/unistd.h b/arch/powerpc/include/uapi/asm/unistd.h index e9f5f41aa55a..2f26335a3c42 100644 --- a/arch/powerpc/include/uapi/asm/unistd.h +++ b/arch/powerpc/include/uapi/asm/unistd.h @@ -392,5 +392,6 @@ #define __NR_copy_file_range 379 #define __NR_preadv2 380 #define __NR_pwritev2 381 +#define __NR_kexec_file_load 382 #endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */ diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 1925341dbb9c..a3a6047fd395 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -19,6 +19,10 @@ CFLAGS_init.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) CFLAGS_btext.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) CFLAGS_prom.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) +# -fstack-protector triggers protection checks in this code, +# but it is being used too early to link to meaningful stack_chk logic. +CFLAGS_prom_init.o += $(call cc-option, -fno-stack-protector) + ifdef CONFIG_FUNCTION_TRACER # Do not trace early boot code CFLAGS_REMOVE_cputable.o = -mno-sched-epilog $(CC_FLAGS_FTRACE) @@ -58,8 +62,6 @@ obj-$(CONFIG_PPC_RTAS) += rtas.o rtas-rtc.o $(rtaspci-y-y) obj-$(CONFIG_PPC_RTAS_DAEMON) += rtasd.o obj-$(CONFIG_RTAS_FLASH) += rtas_flash.o obj-$(CONFIG_RTAS_PROC) += rtas-proc.o -obj-$(CONFIG_IBMVIO) += vio.o -obj-$(CONFIG_IBMEBUS) += ibmebus.o obj-$(CONFIG_EEH) += eeh.o eeh_pe.o eeh_dev.o eeh_cache.o \ eeh_driver.o eeh_event.o eeh_sysfs.o obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o @@ -107,8 +109,9 @@ pci64-$(CONFIG_PPC64) += pci_dn.o pci-hotplug.o isa-bridge.o obj-$(CONFIG_PCI) += pci_$(BITS).o $(pci64-y) \ pci-common.o pci_of_scan.o obj-$(CONFIG_PCI_MSI) += msi.o -obj-$(CONFIG_KEXEC) += machine_kexec.o crash.o \ +obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o crash.o \ machine_kexec_$(BITS).o +obj-$(CONFIG_KEXEC_FILE) += machine_kexec_file_$(BITS).o kexec_elf_$(BITS).o obj-$(CONFIG_AUDIT) += audit.o obj64-$(CONFIG_AUDIT) += compat_audit.o @@ -128,7 +131,7 @@ obj64-$(CONFIG_PPC_TRANSACTIONAL_MEM) += tm.o obj-$(CONFIG_PPC64) += $(obj64-y) obj-$(CONFIG_PPC32) += $(obj32-y) -ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC),) +ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC_CORE),) obj-y += ppc_save_regs.o endif diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index caec7bf3b99a..0601e6a7297c 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -91,6 +91,9 @@ int main(void) DEFINE(TI_livepatch_sp, offsetof(struct thread_info, livepatch_sp)); #endif +#ifdef CONFIG_CC_STACKPROTECTOR + DEFINE(TSK_STACK_CANARY, offsetof(struct task_struct, stack_canary)); +#endif DEFINE(KSP, offsetof(struct thread_struct, ksp)); DEFINE(PT_REGS, offsetof(struct thread_struct, regs)); #ifdef CONFIG_BOOKE @@ -487,6 +490,7 @@ int main(void) /* book3s */ #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + DEFINE(KVM_TLB_SETS, offsetof(struct kvm, arch.tlb_sets)); DEFINE(KVM_SDR1, offsetof(struct kvm, arch.sdr1)); DEFINE(KVM_HOST_LPID, offsetof(struct kvm, arch.host_lpid)); DEFINE(KVM_HOST_LPCR, offsetof(struct kvm, arch.host_lpcr)); @@ -548,6 +552,8 @@ int main(void) DEFINE(VCPU_TCSCR, offsetof(struct kvm_vcpu, arch.tcscr)); DEFINE(VCPU_ACOP, offsetof(struct kvm_vcpu, arch.acop)); DEFINE(VCPU_WORT, offsetof(struct kvm_vcpu, arch.wort)); + DEFINE(VCPU_TID, offsetof(struct kvm_vcpu, arch.tid)); + DEFINE(VCPU_PSSCR, offsetof(struct kvm_vcpu, arch.psscr)); DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_map)); DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest)); DEFINE(VCORE_NAPPING_THREADS, offsetof(struct kvmppc_vcore, napping_threads)); @@ -569,6 +575,7 @@ int main(void) DEFINE(VCPU_VRS_TM, offsetof(struct kvm_vcpu, arch.vr_tm.vr)); DEFINE(VCPU_VRSAVE_TM, offsetof(struct kvm_vcpu, arch.vrsave_tm)); DEFINE(VCPU_CR_TM, offsetof(struct kvm_vcpu, arch.cr_tm)); + DEFINE(VCPU_XER_TM, offsetof(struct kvm_vcpu, arch.xer_tm)); DEFINE(VCPU_LR_TM, offsetof(struct kvm_vcpu, arch.lr_tm)); DEFINE(VCPU_CTR_TM, offsetof(struct kvm_vcpu, arch.ctr_tm)); DEFINE(VCPU_AMR_TM, offsetof(struct kvm_vcpu, arch.amr_tm)); diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S index 52ff3f025437..917188615bf5 100644 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ b/arch/powerpc/kernel/cpu_setup_power.S @@ -96,10 +96,11 @@ _GLOBAL(__setup_cpu_power9) mtlr r11 beqlr li r0,0 + mtspr SPRN_PSSCR,r0 mtspr SPRN_LPID,r0 mfspr r3,SPRN_LPCR - ori r3, r3, LPCR_PECEDH - ori r3, r3, LPCR_HVICE + LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE) + or r3, r3, r4 bl __init_LPCR bl __init_HFSCR bl __init_tlb_power9 @@ -116,10 +117,11 @@ _GLOBAL(__restore_cpu_power9) mtlr r11 beqlr li r0,0 + mtspr SPRN_PSSCR,r0 mtspr SPRN_LPID,r0 mfspr r3,SPRN_LPCR - ori r3, r3, LPCR_PECEDH - ori r3, r3, LPCR_HVICE + LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE) + or r3, r3, r4 bl __init_LPCR bl __init_HFSCR bl __init_tlb_power9 @@ -174,7 +176,7 @@ __init_FSCR: __init_HFSCR: mfspr r3,SPRN_HFSCR ori r3,r3,HFSCR_TAR|HFSCR_TM|HFSCR_BHRB|HFSCR_PM|\ - HFSCR_DSCR|HFSCR_VECVSX|HFSCR_FP|HFSCR_EBB + HFSCR_DSCR|HFSCR_VECVSX|HFSCR_FP|HFSCR_EBB|HFSCR_MSGP mtspr SPRN_HFSCR,r3 blr diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index e64a6016fba7..6877e3fa95bb 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -203,6 +203,10 @@ static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, for_each_sg(sgl, sg, nents, i) { sg->dma_address = sg_phys(sg) + get_dma_offset(dev); sg->dma_length = sg->length; + + if (attrs & DMA_ATTR_SKIP_CPU_SYNC) + continue; + __dma_sync_page(sg_page(sg), sg->offset, sg->length, direction); } @@ -235,7 +239,10 @@ static inline dma_addr_t dma_direct_map_page(struct device *dev, unsigned long attrs) { BUG_ON(dir == DMA_NONE); - __dma_sync_page(page, offset, size, dir); + + if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) + __dma_sync_page(page, offset, size, dir); + return page_to_phys(page) + offset + get_dma_offset(dev); } diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index f25731627d7f..8180bfd7ab93 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -372,7 +372,7 @@ static int eeh_phb_check_failure(struct eeh_pe *pe) /* Find the PHB PE */ phb_pe = eeh_phb_pe_get(pe->phb); if (!phb_pe) { - pr_warn("%s Can't find PE for PHB#%d\n", + pr_warn("%s Can't find PE for PHB#%x\n", __func__, pe->phb->global_number); return -EEXIST; } @@ -664,7 +664,7 @@ int eeh_pci_enable(struct eeh_pe *pe, int function) rc = eeh_ops->set_option(pe, function); if (rc) pr_warn("%s: Unexpected state change %d on " - "PHB#%d-PE#%x, err=%d\n", + "PHB#%x-PE#%x, err=%d\n", __func__, function, pe->phb->global_number, pe->addr, rc); @@ -808,76 +808,67 @@ static void *eeh_set_dev_freset(void *data, void *flag) } /** - * eeh_reset_pe_once - Assert the pci #RST line for 1/4 second + * eeh_pe_reset_full - Complete a full reset process on the indicated PE * @pe: EEH PE * - * Assert the PCI #RST line for 1/4 second. + * This function executes a full reset procedure on a PE, including setting + * the appropriate flags, performing a fundamental or hot reset, and then + * deactivating the reset status. It is designed to be used within the EEH + * subsystem, as opposed to eeh_pe_reset which is exported to drivers and + * only performs a single operation at a time. + * + * This function will attempt to reset a PE three times before failing. */ -static void eeh_reset_pe_once(struct eeh_pe *pe) +int eeh_pe_reset_full(struct eeh_pe *pe) { + int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); + int reset_state = (EEH_PE_RESET | EEH_PE_CFG_BLOCKED); + int type = EEH_RESET_HOT; unsigned int freset = 0; + int i, state, ret; - /* Determine type of EEH reset required for - * Partitionable Endpoint, a hot-reset (1) - * or a fundamental reset (3). - * A fundamental reset required by any device under - * Partitionable Endpoint trumps hot-reset. + /* + * Determine the type of reset to perform - hot or fundamental. + * Hot reset is the default operation, unless any device under the + * PE requires a fundamental reset. */ eeh_pe_dev_traverse(pe, eeh_set_dev_freset, &freset); if (freset) - eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL); - else - eeh_ops->reset(pe, EEH_RESET_HOT); - - eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); -} - -/** - * eeh_reset_pe - Reset the indicated PE - * @pe: EEH PE - * - * This routine should be called to reset indicated device, including - * PE. A PE might include multiple PCI devices and sometimes PCI bridges - * might be involved as well. - */ -int eeh_reset_pe(struct eeh_pe *pe) -{ - int flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); - int i, state, ret; + type = EEH_RESET_FUNDAMENTAL; - /* Mark as reset and block config space */ - eeh_pe_state_mark(pe, EEH_PE_RESET | EEH_PE_CFG_BLOCKED); + /* Mark the PE as in reset state and block config space accesses */ + eeh_pe_state_mark(pe, reset_state); - /* Take three shots at resetting the bus */ + /* Make three attempts at resetting the bus */ for (i = 0; i < 3; i++) { - eeh_reset_pe_once(pe); + ret = eeh_pe_reset(pe, type); + if (ret) + break; - /* - * EEH_PE_ISOLATED is expected to be removed after - * BAR restore. - */ + ret = eeh_pe_reset(pe, EEH_RESET_DEACTIVATE); + if (ret) + break; + + /* Wait until the PE is in a functioning state */ state = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); - if ((state & flags) == flags) { - ret = 0; - goto out; - } + if ((state & active_flags) == active_flags) + break; if (state < 0) { - pr_warn("%s: Unrecoverable slot failure on PHB#%d-PE#%x", + pr_warn("%s: Unrecoverable slot failure on PHB#%x-PE#%x", __func__, pe->phb->global_number, pe->addr); ret = -ENOTRECOVERABLE; - goto out; + break; } - /* We might run out of credits */ + /* Set error in case this is our last attempt */ ret = -EIO; pr_warn("%s: Failure %d resetting PHB#%x-PE#%x\n (%d)\n", __func__, state, pe->phb->global_number, pe->addr, (i + 1)); } -out: - eeh_pe_state_clear(pe, EEH_PE_RESET | EEH_PE_CFG_BLOCKED); + eeh_pe_state_clear(pe, reset_state); return ret; } @@ -1601,6 +1592,7 @@ static int eeh_pe_reenable_devices(struct eeh_pe *pe) return eeh_unfreeze_pe(pe, true); } + /** * eeh_pe_reset - Issue PE reset according to specified type * @pe: EEH PE diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index a62be72da274..d88573bdd090 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -588,7 +588,7 @@ int eeh_pe_reset_and_recover(struct eeh_pe *pe) eeh_pe_dev_traverse(pe, eeh_dev_save_state, NULL); /* Issue reset */ - ret = eeh_reset_pe(pe); + ret = eeh_pe_reset_full(pe); if (ret) { eeh_pe_state_clear(pe, EEH_PE_RECOVERING); return ret; @@ -659,7 +659,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, * config accesses. So we prefer to block them. However, controlled * PCI config accesses initiated from EEH itself are allowed. */ - rc = eeh_reset_pe(pe); + rc = eeh_pe_reset_full(pe); if (rc) return rc; @@ -671,8 +671,10 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, /* Clear frozen state */ rc = eeh_clear_pe_frozen_state(pe, false); - if (rc) + if (rc) { + pci_unlock_rescan_remove(); return rc; + } /* Give the system 5 seconds to finish running the user-space * hotplug shutdown scripts, e.g. ifdown for ethernet. Yes, @@ -732,7 +734,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) frozen_bus = eeh_pe_bus_get(pe); if (!frozen_bus) { - pr_err("%s: Cannot find PCI bus for PHB#%d-PE#%x\n", + pr_err("%s: Cannot find PCI bus for PHB#%x-PE#%x\n", __func__, pe->phb->global_number, pe->addr); return; } @@ -876,7 +878,7 @@ excess_failures: * are due to poorly seated PCI cards. Only 10% or so are * due to actual, failed cards. */ - pr_err("EEH: PHB#%d-PE#%x has failed %d times in the\n" + pr_err("EEH: PHB#%x-PE#%x has failed %d times in the\n" "last hour and has been permanently disabled.\n" "Please try reseating or replacing it.\n", pe->phb->global_number, pe->addr, @@ -884,7 +886,7 @@ excess_failures: goto perm_error; hard_fail: - pr_err("EEH: Unable to recover from failure from PHB#%d-PE#%x.\n" + pr_err("EEH: Unable to recover from failure from PHB#%x-PE#%x.\n" "Please try reseating or replacing it\n", pe->phb->global_number, pe->addr); @@ -998,7 +1000,7 @@ static void eeh_handle_special_event(void) bus = eeh_pe_bus_get(phb_pe); if (!bus) { pr_err("%s: Cannot find PCI bus for " - "PHB#%d-PE#%x\n", + "PHB#%x-PE#%x\n", __func__, pe->phb->global_number, pe->addr); diff --git a/arch/powerpc/kernel/eeh_event.c b/arch/powerpc/kernel/eeh_event.c index 82e7327e3cd0..accbf8b5fd46 100644 --- a/arch/powerpc/kernel/eeh_event.c +++ b/arch/powerpc/kernel/eeh_event.c @@ -75,11 +75,11 @@ static int eeh_event_handler(void * dummy) if (pe) { eeh_pe_state_mark(pe, EEH_PE_RECOVERING); if (pe->type & EEH_PE_PHB) - pr_info("EEH: Detected error on PHB#%d\n", + pr_info("EEH: Detected error on PHB#%x\n", pe->phb->global_number); else pr_info("EEH: Detected PCI bus error on " - "PHB#%d-PE#%x\n", + "PHB#%x-PE#%x\n", pe->phb->global_number, pe->addr); eeh_handle_event(pe); eeh_pe_state_clear(pe, EEH_PE_RECOVERING); diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index de7d091c4c31..cc4b206f77e4 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -104,7 +104,7 @@ int eeh_phb_pe_create(struct pci_controller *phb) /* Put it into the list */ list_add_tail(&pe->child, &eeh_phb_pe); - pr_debug("EEH: Add PE for PHB#%d\n", phb->global_number); + pr_debug("EEH: Add PE for PHB#%x\n", phb->global_number); return 0; } @@ -333,7 +333,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) /* Check if the PE number is valid */ if (!eeh_has_flag(EEH_VALID_PE_ZERO) && !edev->pe_config_addr) { - pr_err("%s: Invalid PE#0 for edev 0x%x on PHB#%d\n", + pr_err("%s: Invalid PE#0 for edev 0x%x on PHB#%x\n", __func__, edev->config_addr, edev->phb->global_number); return -EINVAL; } diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 3841d749a430..5742dbdbee46 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -674,7 +674,11 @@ BEGIN_FTR_SECTION mtspr SPRN_SPEFSCR,r0 /* restore SPEFSCR reg */ END_FTR_SECTION_IFSET(CPU_FTR_SPE) #endif /* CONFIG_SPE */ - +#if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP) + lwz r0,TSK_STACK_CANARY(r2) + lis r4,__stack_chk_guard@ha + stw r0,__stack_chk_guard@l(r4) +#endif lwz r0,_CCR(r1) mtcrf 0xFF,r0 /* r3-r12 are destroyed -- Cort */ diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 38a1f96430e1..45b453e4d0c8 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -923,10 +923,10 @@ kernel_dbg_exc: PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0x340) addi r3,r1,STACK_FRAME_OVERHEAD - bl .save_nvgprs + bl save_nvgprs INTS_RESTORE_HARD - bl .unknown_exception - b .ret_from_except + bl unknown_exception + b ret_from_except /* * An interrupt came in while soft-disabled; We mark paca->irq_happened diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 08ba447a4b3d..d39d6118c6e9 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -116,7 +116,9 @@ EXC_VIRT_NONE(0x4000, 0x4100) EXC_REAL_BEGIN(system_reset, 0x100, 0x200) SET_SCRATCH0(r13) - EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD, + GET_PACA(r13) + clrrdi r13,r13,1 /* Last bit of HSPRG0 is set if waking from winkle */ + EXCEPTION_PROLOG_PSERIES_PACA(PACA_EXGEN, system_reset_common, EXC_STD, IDLETEST, 0x100) EXC_REAL_END(system_reset, 0x100, 0x200) @@ -124,6 +126,9 @@ EXC_VIRT_NONE(0x4100, 0x4200) #ifdef CONFIG_PPC_P7_NAP EXC_COMMON_BEGIN(system_reset_idle_common) +BEGIN_FTR_SECTION + GET_PACA(r13) /* Restore HSPRG0 to get the winkle bit in r13 */ +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) bl pnv_restore_hyp_resource li r0,PNV_THREAD_RUNNING @@ -169,7 +174,7 @@ EXC_REAL_BEGIN(machine_check, 0x200, 0x300) SET_SCRATCH0(r13) /* save r13 */ /* * Running native on arch 2.06 or later, we may wakeup from winkle - * inside machine check. If yes, then last bit of HSPGR0 would be set + * inside machine check. If yes, then last bit of HSPRG0 would be set * to 1. Hence clear it unconditionally. */ GET_PACA(r13) @@ -388,7 +393,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early) /* * Go back to winkle. Please note that this thread was woken up in * machine check from winkle and have not restored the per-subcore - * state. Hence before going back to winkle, set last bit of HSPGR0 + * state. Hence before going back to winkle, set last bit of HSPRG0 * to 1. This will make sure that if this thread gets woken up * again at reset vector 0x100 then it will get chance to restore * the subcore state. @@ -1403,7 +1408,7 @@ USE_TEXT_SECTION() /* * Hash table stuff */ - .align 7 + .balign IFETCH_ALIGN_BYTES do_hash_page: #ifdef CONFIG_PPC_STD_MMU_64 andis. r0,r4,0xa410 /* weird error? */ diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index a95639b8d4ac..5c9f50c1aa99 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -47,13 +47,11 @@ ftrace_modify_code(unsigned long ip, unsigned int old, unsigned int new) unsigned int replaced; /* - * Note: Due to modules and __init, code can - * disappear and change, we need to protect against faulting - * as well as code changing. We do this by using the - * probe_kernel_* functions. - * - * No real locking needed, this code is run through - * kstop_machine, or before SMP starts. + * Note: + * We are paranoid about modifying text, as if a bug was to happen, it + * could cause us to read or write to someplace that could cause harm. + * Carefully read and modify the code with probe_kernel_*(), and make + * sure what we read is what we expected it to be before modifying it. */ /* read the text we want to modify */ diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 04c546e20cc0..1dc5eae2ced3 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -107,12 +107,19 @@ __secondary_hold_acknowledge: * crash_kernel region. The loader is responsible for * observing the alignment requirement. */ + +#ifdef CONFIG_RELOCATABLE_TEST +#define RUN_AT_LOAD_DEFAULT 1 /* Test relocation, do not copy to 0 */ +#else +#define RUN_AT_LOAD_DEFAULT 0x72756e30 /* "run0" -- relocate to 0 by default */ +#endif + /* Do not move this variable as kexec-tools knows about it. */ . = 0x5c .globl __run_at_load __run_at_load: DEFINE_FIXED_SYMBOL(__run_at_load) - .long 0x72756e30 /* "run0" -- relocate to 0 by default */ + .long RUN_AT_LOAD_DEFAULT #endif . = 0x60 @@ -153,7 +160,7 @@ __secondary_hold: cmpdi 0,r12,0 beq 100b -#if defined(CONFIG_SMP) || defined(CONFIG_KEXEC) +#if defined(CONFIG_SMP) || defined(CONFIG_KEXEC_CORE) #ifdef CONFIG_PPC_BOOK3E tovirt(r12,r12) #endif @@ -214,9 +221,9 @@ booting_thread_hwid: */ _GLOBAL(book3e_start_thread) LOAD_REG_IMMEDIATE(r5, MSR_KERNEL) - cmpi 0, r3, 0 + cmpwi r3, 0 beq 10f - cmpi 0, r3, 1 + cmpwi r3, 1 beq 11f /* If the thread id is invalid, just exit. */ b 13f @@ -241,9 +248,9 @@ _GLOBAL(book3e_start_thread) * r3 = the thread physical id */ _GLOBAL(book3e_stop_thread) - cmpi 0, r3, 0 + cmpwi r3, 0 beq 10f - cmpi 0, r3, 1 + cmpwi r3, 1 beq 10f /* If the thread id is invalid, just exit. */ b 13f diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index fb133a163263..1a9c99d3e5d8 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -73,6 +73,9 @@ #define RPN_PATTERN 0x00f0 #endif +#define PAGE_SHIFT_512K 19 +#define PAGE_SHIFT_8M 23 + __HEAD _ENTRY(_stext); _ENTRY(_start); @@ -322,7 +325,7 @@ SystemCall: #endif InstructionTLBMiss: -#if defined(CONFIG_8xx_CPU6) || defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) +#if defined(CONFIG_8xx_CPU6) || defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) || defined (CONFIG_HUGETLB_PAGE) mtspr SPRN_SPRG_SCRATCH2, r3 #endif EXCEPTION_PROLOG_0 @@ -332,10 +335,12 @@ InstructionTLBMiss: */ mfspr r10, SPRN_SRR0 /* Get effective address of fault */ INVALIDATE_ADJACENT_PAGES_CPU15(r11, r10) -#if defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) /* Only modules will cause ITLB Misses as we always * pin the first 8MB of kernel memory */ +#if defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) || defined (CONFIG_HUGETLB_PAGE) mfcr r3 +#endif +#if defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) IS_KERNEL(r11, r10) #endif mfspr r11, SPRN_M_TW /* Get level 1 table */ @@ -343,7 +348,6 @@ InstructionTLBMiss: BRANCH_UNLESS_KERNEL(3f) lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha 3: - mtcr r3 #endif /* Insert level 1 index */ rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 @@ -351,14 +355,25 @@ InstructionTLBMiss: /* Extract level 2 index */ rlwinm r10, r10, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29 +#ifdef CONFIG_HUGETLB_PAGE + mtcr r11 + bt- 28, 10f /* bit 28 = Large page (8M) */ + bt- 29, 20f /* bit 29 = Large page (8M or 512k) */ +#endif rlwimi r10, r11, 0, 0, 32 - PAGE_SHIFT - 1 /* Add level 2 base */ lwz r10, 0(r10) /* Get the pte */ - +4: +#if defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) || defined (CONFIG_HUGETLB_PAGE) + mtcr r3 +#endif /* Insert the APG into the TWC from the Linux PTE. */ rlwimi r11, r10, 0, 25, 26 /* Load the MI_TWC with the attributes for this "segment." */ MTSPR_CPU6(SPRN_MI_TWC, r11, r3) /* Set segment attributes */ +#if defined (CONFIG_HUGETLB_PAGE) && defined (CONFIG_PPC_4K_PAGES) + rlwimi r10, r11, 1, MI_SPS16K +#endif #ifdef CONFIG_SWAP rlwinm r11, r10, 32-5, _PAGE_PRESENT and r11, r11, r10 @@ -371,16 +386,45 @@ InstructionTLBMiss: * set. All other Linux PTE bits control the behavior * of the MMU. */ +#if defined (CONFIG_HUGETLB_PAGE) && defined (CONFIG_PPC_4K_PAGES) + rlwimi r10, r11, 0, 0x0ff0 /* Set 24-27, clear 20-23 */ +#else rlwimi r10, r11, 0, 0x0ff8 /* Set 24-27, clear 20-23,28 */ +#endif MTSPR_CPU6(SPRN_MI_RPN, r10, r3) /* Update TLB entry */ /* Restore registers */ -#if defined(CONFIG_8xx_CPU6) || defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) +#if defined(CONFIG_8xx_CPU6) || defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) || defined (CONFIG_HUGETLB_PAGE) mfspr r3, SPRN_SPRG_SCRATCH2 #endif EXCEPTION_EPILOG_0 rfi +#ifdef CONFIG_HUGETLB_PAGE +10: /* 8M pages */ +#ifdef CONFIG_PPC_16K_PAGES + /* Extract level 2 index */ + rlwinm r10, r10, 32 - (PAGE_SHIFT_8M - PAGE_SHIFT), 32 + PAGE_SHIFT_8M - (PAGE_SHIFT << 1), 29 + /* Add level 2 base */ + rlwimi r10, r11, 0, 0, 32 + PAGE_SHIFT_8M - (PAGE_SHIFT << 1) - 1 +#else + /* Level 2 base */ + rlwinm r10, r11, 0, ~HUGEPD_SHIFT_MASK +#endif + lwz r10, 0(r10) /* Get the pte */ + rlwinm r11, r11, 0, 0xf + b 4b + +20: /* 512k pages */ + /* Extract level 2 index */ + rlwinm r10, r10, 32 - (PAGE_SHIFT_512K - PAGE_SHIFT), 32 + PAGE_SHIFT_512K - (PAGE_SHIFT << 1), 29 + /* Add level 2 base */ + rlwimi r10, r11, 0, 0, 32 + PAGE_SHIFT_512K - (PAGE_SHIFT << 1) - 1 + lwz r10, 0(r10) /* Get the pte */ + rlwinm r11, r11, 0, 0xf + b 4b +#endif + . = 0x1200 DataStoreTLBMiss: mtspr SPRN_SPRG_SCRATCH2, r3 @@ -407,7 +451,6 @@ _ENTRY(DTLBMiss_jmp) #endif blt cr7, DTLBMissLinear 3: - mtcr r3 mfspr r10, SPRN_MD_EPN /* Insert level 1 index */ @@ -418,8 +461,15 @@ _ENTRY(DTLBMiss_jmp) */ /* Extract level 2 index */ rlwinm r10, r10, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29 +#ifdef CONFIG_HUGETLB_PAGE + mtcr r11 + bt- 28, 10f /* bit 28 = Large page (8M) */ + bt- 29, 20f /* bit 29 = Large page (8M or 512k) */ +#endif rlwimi r10, r11, 0, 0, 32 - PAGE_SHIFT - 1 /* Add level 2 base */ lwz r10, 0(r10) /* Get the pte */ +4: + mtcr r3 /* Insert the Guarded flag and APG into the TWC from the Linux PTE. * It is bit 26-27 of both the Linux PTE and the TWC (at least @@ -434,6 +484,11 @@ _ENTRY(DTLBMiss_jmp) rlwimi r11, r10, 32-5, 30, 30 MTSPR_CPU6(SPRN_MD_TWC, r11, r3) + /* In 4k pages mode, SPS (bit 28) in RPN must match PS[1] (bit 29) + * In 16k pages mode, SPS is always 1 */ +#if defined (CONFIG_HUGETLB_PAGE) && defined (CONFIG_PPC_4K_PAGES) + rlwimi r10, r11, 1, MD_SPS16K +#endif /* Both _PAGE_ACCESSED and _PAGE_PRESENT has to be set. * We also need to know if the insn is a load/store, so: * Clear _PAGE_PRESENT and load that which will @@ -455,7 +510,11 @@ _ENTRY(DTLBMiss_jmp) * of the MMU. */ li r11, RPN_PATTERN +#if defined (CONFIG_HUGETLB_PAGE) && defined (CONFIG_PPC_4K_PAGES) + rlwimi r10, r11, 0, 24, 27 /* Set 24-27 */ +#else rlwimi r10, r11, 0, 24, 28 /* Set 24-27, clear 28 */ +#endif rlwimi r10, r11, 0, 20, 20 /* clear 20 */ MTSPR_CPU6(SPRN_MD_RPN, r10, r3) /* Update TLB entry */ @@ -465,6 +524,30 @@ _ENTRY(DTLBMiss_jmp) EXCEPTION_EPILOG_0 rfi +#ifdef CONFIG_HUGETLB_PAGE +10: /* 8M pages */ + /* Extract level 2 index */ +#ifdef CONFIG_PPC_16K_PAGES + rlwinm r10, r10, 32 - (PAGE_SHIFT_8M - PAGE_SHIFT), 32 + PAGE_SHIFT_8M - (PAGE_SHIFT << 1), 29 + /* Add level 2 base */ + rlwimi r10, r11, 0, 0, 32 + PAGE_SHIFT_8M - (PAGE_SHIFT << 1) - 1 +#else + /* Level 2 base */ + rlwinm r10, r11, 0, ~HUGEPD_SHIFT_MASK +#endif + lwz r10, 0(r10) /* Get the pte */ + rlwinm r11, r11, 0, 0xf + b 4b + +20: /* 512k pages */ + /* Extract level 2 index */ + rlwinm r10, r10, 32 - (PAGE_SHIFT_512K - PAGE_SHIFT), 32 + PAGE_SHIFT_512K - (PAGE_SHIFT << 1), 29 + /* Add level 2 base */ + rlwimi r10, r11, 0, 0, 32 + PAGE_SHIFT_512K - (PAGE_SHIFT << 1) - 1 + lwz r10, 0(r10) /* Get the pte */ + rlwinm r11, r11, 0, 0xf + b 4b +#endif /* This is an instruction TLB error on the MPC8xx. This could be due * to many reasons, such as executing guarded memory or illegal instruction @@ -586,6 +669,9 @@ _ENTRY(FixupDAR_cmp) /* Insert level 1 index */ 3: rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r11) /* Get the level 1 entry */ + mtcr r11 + bt 28,200f /* bit 28 = Large page (8M) */ + bt 29,202f /* bit 29 = Large page (8M or 512K) */ rlwinm r11, r11,0,0,19 /* Extract page descriptor page address */ /* Insert level 2 index */ rlwimi r11, r10, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29 @@ -611,6 +697,27 @@ _ENTRY(FixupDAR_cmp) 141: mfspr r10,SPRN_SPRG_SCRATCH2 b DARFixed /* Nope, go back to normal TLB processing */ + /* concat physical page address(r11) and page offset(r10) */ +200: +#ifdef CONFIG_PPC_16K_PAGES + rlwinm r11, r11, 0, 0, 32 + PAGE_SHIFT_8M - (PAGE_SHIFT << 1) - 1 + rlwimi r11, r10, 32 - (PAGE_SHIFT_8M - 2), 32 + PAGE_SHIFT_8M - (PAGE_SHIFT << 1), 29 +#else + rlwinm r11, r10, 0, ~HUGEPD_SHIFT_MASK +#endif + lwz r11, 0(r11) /* Get the pte */ + /* concat physical page address(r11) and page offset(r10) */ + rlwimi r11, r10, 0, 32 - PAGE_SHIFT_8M, 31 + b 201b + +202: + rlwinm r11, r11, 0, 0, 32 + PAGE_SHIFT_512K - (PAGE_SHIFT << 1) - 1 + rlwimi r11, r10, 32 - (PAGE_SHIFT_512K - 2), 32 + PAGE_SHIFT_512K - (PAGE_SHIFT << 1), 29 + lwz r11, 0(r11) /* Get the pte */ + /* concat physical page address(r11) and page offset(r10) */ + rlwimi r11, r10, 0, 32 - PAGE_SHIFT_512K, 31 + b 201b + 144: mfspr r10, SPRN_DSISR rlwinm r10, r10,0,7,5 /* Clear store bit for buggy dcbst insn */ mtspr SPRN_DSISR, r10 diff --git a/arch/powerpc/kernel/kexec_elf_64.c b/arch/powerpc/kernel/kexec_elf_64.c new file mode 100644 index 000000000000..6acffd34a70f --- /dev/null +++ b/arch/powerpc/kernel/kexec_elf_64.c @@ -0,0 +1,663 @@ +/* + * Load ELF vmlinux file for the kexec_file_load syscall. + * + * Copyright (C) 2004 Adam Litke (agl@us.ibm.com) + * Copyright (C) 2004 IBM Corp. + * Copyright (C) 2005 R Sharada (sharada@in.ibm.com) + * Copyright (C) 2006 Mohan Kumar M (mohan@in.ibm.com) + * Copyright (C) 2016 IBM Corporation + * + * Based on kexec-tools' kexec-elf-exec.c and kexec-elf-ppc64.c. + * Heavily modified for the kernel by + * Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com>. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#define pr_fmt(fmt) "kexec_elf: " fmt + +#include <linux/elf.h> +#include <linux/kexec.h> +#include <linux/libfdt.h> +#include <linux/module.h> +#include <linux/of_fdt.h> +#include <linux/slab.h> +#include <linux/types.h> + +#define PURGATORY_STACK_SIZE (16 * 1024) + +#define elf_addr_to_cpu elf64_to_cpu + +#ifndef Elf_Rel +#define Elf_Rel Elf64_Rel +#endif /* Elf_Rel */ + +struct elf_info { + /* + * Where the ELF binary contents are kept. + * Memory managed by the user of the struct. + */ + const char *buffer; + + const struct elfhdr *ehdr; + const struct elf_phdr *proghdrs; + struct elf_shdr *sechdrs; +}; + +static inline bool elf_is_elf_file(const struct elfhdr *ehdr) +{ + return memcmp(ehdr->e_ident, ELFMAG, SELFMAG) == 0; +} + +static uint64_t elf64_to_cpu(const struct elfhdr *ehdr, uint64_t value) +{ + if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB) + value = le64_to_cpu(value); + else if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB) + value = be64_to_cpu(value); + + return value; +} + +static uint16_t elf16_to_cpu(const struct elfhdr *ehdr, uint16_t value) +{ + if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB) + value = le16_to_cpu(value); + else if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB) + value = be16_to_cpu(value); + + return value; +} + +static uint32_t elf32_to_cpu(const struct elfhdr *ehdr, uint32_t value) +{ + if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB) + value = le32_to_cpu(value); + else if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB) + value = be32_to_cpu(value); + + return value; +} + +/** + * elf_is_ehdr_sane - check that it is safe to use the ELF header + * @buf_len: size of the buffer in which the ELF file is loaded. + */ +static bool elf_is_ehdr_sane(const struct elfhdr *ehdr, size_t buf_len) +{ + if (ehdr->e_phnum > 0 && ehdr->e_phentsize != sizeof(struct elf_phdr)) { + pr_debug("Bad program header size.\n"); + return false; + } else if (ehdr->e_shnum > 0 && + ehdr->e_shentsize != sizeof(struct elf_shdr)) { + pr_debug("Bad section header size.\n"); + return false; + } else if (ehdr->e_ident[EI_VERSION] != EV_CURRENT || + ehdr->e_version != EV_CURRENT) { + pr_debug("Unknown ELF version.\n"); + return false; + } + + if (ehdr->e_phoff > 0 && ehdr->e_phnum > 0) { + size_t phdr_size; + + /* + * e_phnum is at most 65535 so calculating the size of the + * program header cannot overflow. + */ + phdr_size = sizeof(struct elf_phdr) * ehdr->e_phnum; + + /* Sanity check the program header table location. */ + if (ehdr->e_phoff + phdr_size < ehdr->e_phoff) { + pr_debug("Program headers at invalid location.\n"); + return false; + } else if (ehdr->e_phoff + phdr_size > buf_len) { + pr_debug("Program headers truncated.\n"); + return false; + } + } + + if (ehdr->e_shoff > 0 && ehdr->e_shnum > 0) { + size_t shdr_size; + + /* + * e_shnum is at most 65536 so calculating + * the size of the section header cannot overflow. + */ + shdr_size = sizeof(struct elf_shdr) * ehdr->e_shnum; + + /* Sanity check the section header table location. */ + if (ehdr->e_shoff + shdr_size < ehdr->e_shoff) { + pr_debug("Section headers at invalid location.\n"); + return false; + } else if (ehdr->e_shoff + shdr_size > buf_len) { + pr_debug("Section headers truncated.\n"); + return false; + } + } + + return true; +} + +static int elf_read_ehdr(const char *buf, size_t len, struct elfhdr *ehdr) +{ + struct elfhdr *buf_ehdr; + + if (len < sizeof(*buf_ehdr)) { + pr_debug("Buffer is too small to hold ELF header.\n"); + return -ENOEXEC; + } + + memset(ehdr, 0, sizeof(*ehdr)); + memcpy(ehdr->e_ident, buf, sizeof(ehdr->e_ident)); + if (!elf_is_elf_file(ehdr)) { + pr_debug("No ELF header magic.\n"); + return -ENOEXEC; + } + + if (ehdr->e_ident[EI_CLASS] != ELF_CLASS) { + pr_debug("Not a supported ELF class.\n"); + return -ENOEXEC; + } else if (ehdr->e_ident[EI_DATA] != ELFDATA2LSB && + ehdr->e_ident[EI_DATA] != ELFDATA2MSB) { + pr_debug("Not a supported ELF data format.\n"); + return -ENOEXEC; + } + + buf_ehdr = (struct elfhdr *) buf; + if (elf16_to_cpu(ehdr, buf_ehdr->e_ehsize) != sizeof(*buf_ehdr)) { + pr_debug("Bad ELF header size.\n"); + return -ENOEXEC; + } + + ehdr->e_type = elf16_to_cpu(ehdr, buf_ehdr->e_type); + ehdr->e_machine = elf16_to_cpu(ehdr, buf_ehdr->e_machine); + ehdr->e_version = elf32_to_cpu(ehdr, buf_ehdr->e_version); + ehdr->e_entry = elf_addr_to_cpu(ehdr, buf_ehdr->e_entry); + ehdr->e_phoff = elf_addr_to_cpu(ehdr, buf_ehdr->e_phoff); + ehdr->e_shoff = elf_addr_to_cpu(ehdr, buf_ehdr->e_shoff); + ehdr->e_flags = elf32_to_cpu(ehdr, buf_ehdr->e_flags); + ehdr->e_phentsize = elf16_to_cpu(ehdr, buf_ehdr->e_phentsize); + ehdr->e_phnum = elf16_to_cpu(ehdr, buf_ehdr->e_phnum); + ehdr->e_shentsize = elf16_to_cpu(ehdr, buf_ehdr->e_shentsize); + ehdr->e_shnum = elf16_to_cpu(ehdr, buf_ehdr->e_shnum); + ehdr->e_shstrndx = elf16_to_cpu(ehdr, buf_ehdr->e_shstrndx); + + return elf_is_ehdr_sane(ehdr, len) ? 0 : -ENOEXEC; +} + +/** + * elf_is_phdr_sane - check that it is safe to use the program header + * @buf_len: size of the buffer in which the ELF file is loaded. + */ +static bool elf_is_phdr_sane(const struct elf_phdr *phdr, size_t buf_len) +{ + + if (phdr->p_offset + phdr->p_filesz < phdr->p_offset) { + pr_debug("ELF segment location wraps around.\n"); + return false; + } else if (phdr->p_offset + phdr->p_filesz > buf_len) { + pr_debug("ELF segment not in file.\n"); + return false; + } else if (phdr->p_paddr + phdr->p_memsz < phdr->p_paddr) { + pr_debug("ELF segment address wraps around.\n"); + return false; + } + + return true; +} + +static int elf_read_phdr(const char *buf, size_t len, struct elf_info *elf_info, + int idx) +{ + /* Override the const in proghdrs, we are the ones doing the loading. */ + struct elf_phdr *phdr = (struct elf_phdr *) &elf_info->proghdrs[idx]; + const char *pbuf; + struct elf_phdr *buf_phdr; + + pbuf = buf + elf_info->ehdr->e_phoff + (idx * sizeof(*buf_phdr)); + buf_phdr = (struct elf_phdr *) pbuf; + + phdr->p_type = elf32_to_cpu(elf_info->ehdr, buf_phdr->p_type); + phdr->p_offset = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_offset); + phdr->p_paddr = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_paddr); + phdr->p_vaddr = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_vaddr); + phdr->p_flags = elf32_to_cpu(elf_info->ehdr, buf_phdr->p_flags); + + /* + * The following fields have a type equivalent to Elf_Addr + * both in 32 bit and 64 bit ELF. + */ + phdr->p_filesz = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_filesz); + phdr->p_memsz = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_memsz); + phdr->p_align = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_align); + + return elf_is_phdr_sane(phdr, len) ? 0 : -ENOEXEC; +} + +/** + * elf_read_phdrs - read the program headers from the buffer + * + * This function assumes that the program header table was checked for sanity. + * Use elf_is_ehdr_sane() if it wasn't. + */ +static int elf_read_phdrs(const char *buf, size_t len, + struct elf_info *elf_info) +{ + size_t phdr_size, i; + const struct elfhdr *ehdr = elf_info->ehdr; + + /* + * e_phnum is at most 65535 so calculating the size of the + * program header cannot overflow. + */ + phdr_size = sizeof(struct elf_phdr) * ehdr->e_phnum; + + elf_info->proghdrs = kzalloc(phdr_size, GFP_KERNEL); + if (!elf_info->proghdrs) + return -ENOMEM; + + for (i = 0; i < ehdr->e_phnum; i++) { + int ret; + + ret = elf_read_phdr(buf, len, elf_info, i); + if (ret) { + kfree(elf_info->proghdrs); + elf_info->proghdrs = NULL; + return ret; + } + } + + return 0; +} + +/** + * elf_is_shdr_sane - check that it is safe to use the section header + * @buf_len: size of the buffer in which the ELF file is loaded. + */ +static bool elf_is_shdr_sane(const struct elf_shdr *shdr, size_t buf_len) +{ + bool size_ok; + + /* SHT_NULL headers have undefined values, so we can't check them. */ + if (shdr->sh_type == SHT_NULL) + return true; + + /* Now verify sh_entsize */ + switch (shdr->sh_type) { + case SHT_SYMTAB: + size_ok = shdr->sh_entsize == sizeof(Elf_Sym); + break; + case SHT_RELA: + size_ok = shdr->sh_entsize == sizeof(Elf_Rela); + break; + case SHT_DYNAMIC: + size_ok = shdr->sh_entsize == sizeof(Elf_Dyn); + break; + case SHT_REL: + size_ok = shdr->sh_entsize == sizeof(Elf_Rel); + break; + case SHT_NOTE: + case SHT_PROGBITS: + case SHT_HASH: + case SHT_NOBITS: + default: + /* + * This is a section whose entsize requirements + * I don't care about. If I don't know about + * the section I can't care about it's entsize + * requirements. + */ + size_ok = true; + break; + } + + if (!size_ok) { + pr_debug("ELF section with wrong entry size.\n"); + return false; + } else if (shdr->sh_addr + shdr->sh_size < shdr->sh_addr) { + pr_debug("ELF section address wraps around.\n"); + return false; + } + + if (shdr->sh_type != SHT_NOBITS) { + if (shdr->sh_offset + shdr->sh_size < shdr->sh_offset) { + pr_debug("ELF section location wraps around.\n"); + return false; + } else if (shdr->sh_offset + shdr->sh_size > buf_len) { + pr_debug("ELF section not in file.\n"); + return false; + } + } + + return true; +} + +static int elf_read_shdr(const char *buf, size_t len, struct elf_info *elf_info, + int idx) +{ + struct elf_shdr *shdr = &elf_info->sechdrs[idx]; + const struct elfhdr *ehdr = elf_info->ehdr; + const char *sbuf; + struct elf_shdr *buf_shdr; + + sbuf = buf + ehdr->e_shoff + idx * sizeof(*buf_shdr); + buf_shdr = (struct elf_shdr *) sbuf; + + shdr->sh_name = elf32_to_cpu(ehdr, buf_shdr->sh_name); + shdr->sh_type = elf32_to_cpu(ehdr, buf_shdr->sh_type); + shdr->sh_addr = elf_addr_to_cpu(ehdr, buf_shdr->sh_addr); + shdr->sh_offset = elf_addr_to_cpu(ehdr, buf_shdr->sh_offset); + shdr->sh_link = elf32_to_cpu(ehdr, buf_shdr->sh_link); + shdr->sh_info = elf32_to_cpu(ehdr, buf_shdr->sh_info); + + /* + * The following fields have a type equivalent to Elf_Addr + * both in 32 bit and 64 bit ELF. + */ + shdr->sh_flags = elf_addr_to_cpu(ehdr, buf_shdr->sh_flags); + shdr->sh_size = elf_addr_to_cpu(ehdr, buf_shdr->sh_size); + shdr->sh_addralign = elf_addr_to_cpu(ehdr, buf_shdr->sh_addralign); + shdr->sh_entsize = elf_addr_to_cpu(ehdr, buf_shdr->sh_entsize); + + return elf_is_shdr_sane(shdr, len) ? 0 : -ENOEXEC; +} + +/** + * elf_read_shdrs - read the section headers from the buffer + * + * This function assumes that the section header table was checked for sanity. + * Use elf_is_ehdr_sane() if it wasn't. + */ +static int elf_read_shdrs(const char *buf, size_t len, + struct elf_info *elf_info) +{ + size_t shdr_size, i; + + /* + * e_shnum is at most 65536 so calculating + * the size of the section header cannot overflow. + */ + shdr_size = sizeof(struct elf_shdr) * elf_info->ehdr->e_shnum; + + elf_info->sechdrs = kzalloc(shdr_size, GFP_KERNEL); + if (!elf_info->sechdrs) + return -ENOMEM; + + for (i = 0; i < elf_info->ehdr->e_shnum; i++) { + int ret; + + ret = elf_read_shdr(buf, len, elf_info, i); + if (ret) { + kfree(elf_info->sechdrs); + elf_info->sechdrs = NULL; + return ret; + } + } + + return 0; +} + +/** + * elf_read_from_buffer - read ELF file and sets up ELF header and ELF info + * @buf: Buffer to read ELF file from. + * @len: Size of @buf. + * @ehdr: Pointer to existing struct which will be populated. + * @elf_info: Pointer to existing struct which will be populated. + * + * This function allows reading ELF files with different byte order than + * the kernel, byte-swapping the fields as needed. + * + * Return: + * On success returns 0, and the caller should call elf_free_info(elf_info) to + * free the memory allocated for the section and program headers. + */ +int elf_read_from_buffer(const char *buf, size_t len, struct elfhdr *ehdr, + struct elf_info *elf_info) +{ + int ret; + + ret = elf_read_ehdr(buf, len, ehdr); + if (ret) + return ret; + + elf_info->buffer = buf; + elf_info->ehdr = ehdr; + if (ehdr->e_phoff > 0 && ehdr->e_phnum > 0) { + ret = elf_read_phdrs(buf, len, elf_info); + if (ret) + return ret; + } + if (ehdr->e_shoff > 0 && ehdr->e_shnum > 0) { + ret = elf_read_shdrs(buf, len, elf_info); + if (ret) { + kfree(elf_info->proghdrs); + return ret; + } + } + + return 0; +} + +/** + * elf_free_info - free memory allocated by elf_read_from_buffer + */ +void elf_free_info(struct elf_info *elf_info) +{ + kfree(elf_info->proghdrs); + kfree(elf_info->sechdrs); + memset(elf_info, 0, sizeof(*elf_info)); +} +/** + * build_elf_exec_info - read ELF executable and check that we can use it + */ +static int build_elf_exec_info(const char *buf, size_t len, struct elfhdr *ehdr, + struct elf_info *elf_info) +{ + int i; + int ret; + + ret = elf_read_from_buffer(buf, len, ehdr, elf_info); + if (ret) + return ret; + + /* Big endian vmlinux has type ET_DYN. */ + if (ehdr->e_type != ET_EXEC && ehdr->e_type != ET_DYN) { + pr_err("Not an ELF executable.\n"); + goto error; + } else if (!elf_info->proghdrs) { + pr_err("No ELF program header.\n"); + goto error; + } + + for (i = 0; i < ehdr->e_phnum; i++) { + /* + * Kexec does not support loading interpreters. + * In addition this check keeps us from attempting + * to kexec ordinay executables. + */ + if (elf_info->proghdrs[i].p_type == PT_INTERP) { + pr_err("Requires an ELF interpreter.\n"); + goto error; + } + } + + return 0; +error: + elf_free_info(elf_info); + return -ENOEXEC; +} + +static int elf64_probe(const char *buf, unsigned long len) +{ + struct elfhdr ehdr; + struct elf_info elf_info; + int ret; + + ret = build_elf_exec_info(buf, len, &ehdr, &elf_info); + if (ret) + return ret; + + elf_free_info(&elf_info); + + return elf_check_arch(&ehdr) ? 0 : -ENOEXEC; +} + +/** + * elf_exec_load - load ELF executable image + * @lowest_load_addr: On return, will be the address where the first PT_LOAD + * section will be loaded in memory. + * + * Return: + * 0 on success, negative value on failure. + */ +static int elf_exec_load(struct kimage *image, struct elfhdr *ehdr, + struct elf_info *elf_info, + unsigned long *lowest_load_addr) +{ + unsigned long base = 0, lowest_addr = UINT_MAX; + int ret; + size_t i; + struct kexec_buf kbuf = { .image = image, .buf_max = ppc64_rma_size, + .top_down = false }; + + /* Read in the PT_LOAD segments. */ + for (i = 0; i < ehdr->e_phnum; i++) { + unsigned long load_addr; + size_t size; + const struct elf_phdr *phdr; + + phdr = &elf_info->proghdrs[i]; + if (phdr->p_type != PT_LOAD) + continue; + + size = phdr->p_filesz; + if (size > phdr->p_memsz) + size = phdr->p_memsz; + + kbuf.buffer = (void *) elf_info->buffer + phdr->p_offset; + kbuf.bufsz = size; + kbuf.memsz = phdr->p_memsz; + kbuf.buf_align = phdr->p_align; + kbuf.buf_min = phdr->p_paddr + base; + ret = kexec_add_buffer(&kbuf); + if (ret) + goto out; + load_addr = kbuf.mem; + + if (load_addr < lowest_addr) + lowest_addr = load_addr; + } + + /* Update entry point to reflect new load address. */ + ehdr->e_entry += base; + + *lowest_load_addr = lowest_addr; + ret = 0; + out: + return ret; +} + +static void *elf64_load(struct kimage *image, char *kernel_buf, + unsigned long kernel_len, char *initrd, + unsigned long initrd_len, char *cmdline, + unsigned long cmdline_len) +{ + int ret; + unsigned int fdt_size; + unsigned long kernel_load_addr, purgatory_load_addr; + unsigned long initrd_load_addr = 0, fdt_load_addr; + void *fdt; + const void *slave_code; + struct elfhdr ehdr; + struct elf_info elf_info; + struct kexec_buf kbuf = { .image = image, .buf_min = 0, + .buf_max = ppc64_rma_size }; + + ret = build_elf_exec_info(kernel_buf, kernel_len, &ehdr, &elf_info); + if (ret) + goto out; + + ret = elf_exec_load(image, &ehdr, &elf_info, &kernel_load_addr); + if (ret) + goto out; + + pr_debug("Loaded the kernel at 0x%lx\n", kernel_load_addr); + + ret = kexec_load_purgatory(image, 0, ppc64_rma_size, true, + &purgatory_load_addr); + if (ret) { + pr_err("Loading purgatory failed.\n"); + goto out; + } + + pr_debug("Loaded purgatory at 0x%lx\n", purgatory_load_addr); + + if (initrd != NULL) { + kbuf.buffer = initrd; + kbuf.bufsz = kbuf.memsz = initrd_len; + kbuf.buf_align = PAGE_SIZE; + kbuf.top_down = false; + ret = kexec_add_buffer(&kbuf); + if (ret) + goto out; + initrd_load_addr = kbuf.mem; + + pr_debug("Loaded initrd at 0x%lx\n", initrd_load_addr); + } + + fdt_size = fdt_totalsize(initial_boot_params) * 2; + fdt = kmalloc(fdt_size, GFP_KERNEL); + if (!fdt) { + pr_err("Not enough memory for the device tree.\n"); + ret = -ENOMEM; + goto out; + } + ret = fdt_open_into(initial_boot_params, fdt, fdt_size); + if (ret < 0) { + pr_err("Error setting up the new device tree.\n"); + ret = -EINVAL; + goto out; + } + + ret = setup_new_fdt(fdt, initrd_load_addr, initrd_len, cmdline); + if (ret) + goto out; + + fdt_pack(fdt); + + kbuf.buffer = fdt; + kbuf.bufsz = kbuf.memsz = fdt_size; + kbuf.buf_align = PAGE_SIZE; + kbuf.top_down = true; + ret = kexec_add_buffer(&kbuf); + if (ret) + goto out; + fdt_load_addr = kbuf.mem; + + pr_debug("Loaded device tree at 0x%lx\n", fdt_load_addr); + + slave_code = elf_info.buffer + elf_info.proghdrs[0].p_offset; + ret = setup_purgatory(image, slave_code, fdt, kernel_load_addr, + fdt_load_addr); + if (ret) + pr_err("Error setting up the purgatory.\n"); + +out: + elf_free_info(&elf_info); + + /* Make kimage_file_post_load_cleanup free the fdt buffer for us. */ + return ret ? ERR_PTR(ret) : fdt; +} + +struct kexec_file_ops kexec_elf64_ops = { + .probe = elf64_probe, + .load = elf64_load, +}; diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index e785cc9e1ecd..ad108b842669 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -140,13 +140,16 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, regs->link = (unsigned long)kretprobe_trampoline; } -static int __kprobes kprobe_handler(struct pt_regs *regs) +int __kprobes kprobe_handler(struct pt_regs *regs) { struct kprobe *p; int ret = 0; unsigned int *addr = (unsigned int *)regs->nip; struct kprobe_ctlblk *kcb; + if (user_mode(regs)) + return 0; + /* * We don't want to be preempted for the entire * duration of kprobe processing @@ -359,12 +362,12 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, * single-stepped a copy of the instruction. The address of this * copy is p->ainsn.insn. */ -static int __kprobes post_kprobe_handler(struct pt_regs *regs) +int __kprobes kprobe_post_handler(struct pt_regs *regs) { struct kprobe *cur = kprobe_running(); struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - if (!cur) + if (!cur || user_mode(regs)) return 0; /* make sure we got here for instruction we have a kprobe on */ @@ -449,7 +452,7 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) * zero, try to fix up. */ if ((entry = search_exception_tables(regs->nip)) != NULL) { - regs->nip = entry->fixup; + regs->nip = extable_fixup(entry); return 1; } @@ -470,25 +473,7 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) int __kprobes kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data) { - struct die_args *args = (struct die_args *)data; - int ret = NOTIFY_DONE; - - if (args->regs && user_mode(args->regs)) - return ret; - - switch (val) { - case DIE_BPT: - if (kprobe_handler(args->regs)) - ret = NOTIFY_STOP; - break; - case DIE_SSTEP: - if (post_kprobe_handler(args->regs)) - ret = NOTIFY_STOP; - break; - default: - break; - } - return ret; + return NOTIFY_DONE; } unsigned long arch_deref_entry_point(void *entry) diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c index a205fa3d9bf3..5c12e21d0d1a 100644 --- a/arch/powerpc/kernel/machine_kexec_64.c +++ b/arch/powerpc/kernel/machine_kexec_64.c @@ -310,7 +310,7 @@ void default_machine_kexec(struct kimage *image) if (!kdump_in_progress()) kexec_prepare_cpus(); - pr_debug("kexec: Starting switchover sequence.\n"); + printk("kexec: Starting switchover sequence.\n"); /* switch to a staticly allocated stack. Based on irq stack code. * We setup preempt_count to avoid using VMX in memcpy. diff --git a/arch/powerpc/kernel/machine_kexec_file_64.c b/arch/powerpc/kernel/machine_kexec_file_64.c new file mode 100644 index 000000000000..7abc8a75ee48 --- /dev/null +++ b/arch/powerpc/kernel/machine_kexec_file_64.c @@ -0,0 +1,338 @@ +/* + * ppc64 code to implement the kexec_file_load syscall + * + * Copyright (C) 2004 Adam Litke (agl@us.ibm.com) + * Copyright (C) 2004 IBM Corp. + * Copyright (C) 2004,2005 Milton D Miller II, IBM Corporation + * Copyright (C) 2005 R Sharada (sharada@in.ibm.com) + * Copyright (C) 2006 Mohan Kumar M (mohan@in.ibm.com) + * Copyright (C) 2016 IBM Corporation + * + * Based on kexec-tools' kexec-elf-ppc64.c, fs2dt.c. + * Heavily modified for the kernel by + * Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com>. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/slab.h> +#include <linux/kexec.h> +#include <linux/memblock.h> +#include <linux/of_fdt.h> +#include <linux/libfdt.h> + +#define SLAVE_CODE_SIZE 256 + +static struct kexec_file_ops *kexec_file_loaders[] = { + &kexec_elf64_ops, +}; + +int arch_kexec_kernel_image_probe(struct kimage *image, void *buf, + unsigned long buf_len) +{ + int i, ret = -ENOEXEC; + struct kexec_file_ops *fops; + + /* We don't support crash kernels yet. */ + if (image->type == KEXEC_TYPE_CRASH) + return -ENOTSUPP; + + for (i = 0; i < ARRAY_SIZE(kexec_file_loaders); i++) { + fops = kexec_file_loaders[i]; + if (!fops || !fops->probe) + continue; + + ret = fops->probe(buf, buf_len); + if (!ret) { + image->fops = fops; + return ret; + } + } + + return ret; +} + +void *arch_kexec_kernel_image_load(struct kimage *image) +{ + if (!image->fops || !image->fops->load) + return ERR_PTR(-ENOEXEC); + + return image->fops->load(image, image->kernel_buf, + image->kernel_buf_len, image->initrd_buf, + image->initrd_buf_len, image->cmdline_buf, + image->cmdline_buf_len); +} + +int arch_kimage_file_post_load_cleanup(struct kimage *image) +{ + if (!image->fops || !image->fops->cleanup) + return 0; + + return image->fops->cleanup(image->image_loader_data); +} + +/** + * arch_kexec_walk_mem - call func(data) for each unreserved memory block + * @kbuf: Context info for the search. Also passed to @func. + * @func: Function to call for each memory block. + * + * This function is used by kexec_add_buffer and kexec_locate_mem_hole + * to find unreserved memory to load kexec segments into. + * + * Return: The memory walk will stop when func returns a non-zero value + * and that value will be returned. If all free regions are visited without + * func returning non-zero, then zero will be returned. + */ +int arch_kexec_walk_mem(struct kexec_buf *kbuf, int (*func)(u64, u64, void *)) +{ + int ret = 0; + u64 i; + phys_addr_t mstart, mend; + + if (kbuf->top_down) { + for_each_free_mem_range_reverse(i, NUMA_NO_NODE, 0, + &mstart, &mend, NULL) { + /* + * In memblock, end points to the first byte after the + * range while in kexec, end points to the last byte + * in the range. + */ + ret = func(mstart, mend - 1, kbuf); + if (ret) + break; + } + } else { + for_each_free_mem_range(i, NUMA_NO_NODE, 0, &mstart, &mend, + NULL) { + /* + * In memblock, end points to the first byte after the + * range while in kexec, end points to the last byte + * in the range. + */ + ret = func(mstart, mend - 1, kbuf); + if (ret) + break; + } + } + + return ret; +} + +/** + * setup_purgatory - initialize the purgatory's global variables + * @image: kexec image. + * @slave_code: Slave code for the purgatory. + * @fdt: Flattened device tree for the next kernel. + * @kernel_load_addr: Address where the kernel is loaded. + * @fdt_load_addr: Address where the flattened device tree is loaded. + * + * Return: 0 on success, or negative errno on error. + */ +int setup_purgatory(struct kimage *image, const void *slave_code, + const void *fdt, unsigned long kernel_load_addr, + unsigned long fdt_load_addr) +{ + unsigned int *slave_code_buf, master_entry; + int ret; + + slave_code_buf = kmalloc(SLAVE_CODE_SIZE, GFP_KERNEL); + if (!slave_code_buf) + return -ENOMEM; + + /* Get the slave code from the new kernel and put it in purgatory. */ + ret = kexec_purgatory_get_set_symbol(image, "purgatory_start", + slave_code_buf, SLAVE_CODE_SIZE, + true); + if (ret) { + kfree(slave_code_buf); + return ret; + } + + master_entry = slave_code_buf[0]; + memcpy(slave_code_buf, slave_code, SLAVE_CODE_SIZE); + slave_code_buf[0] = master_entry; + ret = kexec_purgatory_get_set_symbol(image, "purgatory_start", + slave_code_buf, SLAVE_CODE_SIZE, + false); + kfree(slave_code_buf); + + ret = kexec_purgatory_get_set_symbol(image, "kernel", &kernel_load_addr, + sizeof(kernel_load_addr), false); + if (ret) + return ret; + ret = kexec_purgatory_get_set_symbol(image, "dt_offset", &fdt_load_addr, + sizeof(fdt_load_addr), false); + if (ret) + return ret; + + return 0; +} + +/** + * delete_fdt_mem_rsv - delete memory reservation with given address and size + * + * Return: 0 on success, or negative errno on error. + */ +static int delete_fdt_mem_rsv(void *fdt, unsigned long start, unsigned long size) +{ + int i, ret, num_rsvs = fdt_num_mem_rsv(fdt); + + for (i = 0; i < num_rsvs; i++) { + uint64_t rsv_start, rsv_size; + + ret = fdt_get_mem_rsv(fdt, i, &rsv_start, &rsv_size); + if (ret) { + pr_err("Malformed device tree.\n"); + return -EINVAL; + } + + if (rsv_start == start && rsv_size == size) { + ret = fdt_del_mem_rsv(fdt, i); + if (ret) { + pr_err("Error deleting device tree reservation.\n"); + return -EINVAL; + } + + return 0; + } + } + + return -ENOENT; +} + +/* + * setup_new_fdt - modify /chosen and memory reservation for the next kernel + * @fdt: Flattened device tree for the next kernel. + * @initrd_load_addr: Address where the next initrd will be loaded. + * @initrd_len: Size of the next initrd, or 0 if there will be none. + * @cmdline: Command line for the next kernel, or NULL if there will + * be none. + * + * Return: 0 on success, or negative errno on error. + */ +int setup_new_fdt(void *fdt, unsigned long initrd_load_addr, + unsigned long initrd_len, const char *cmdline) +{ + int ret, chosen_node; + const void *prop; + + /* Remove memory reservation for the current device tree. */ + ret = delete_fdt_mem_rsv(fdt, __pa(initial_boot_params), + fdt_totalsize(initial_boot_params)); + if (ret == 0) + pr_debug("Removed old device tree reservation.\n"); + else if (ret != -ENOENT) + return ret; + + chosen_node = fdt_path_offset(fdt, "/chosen"); + if (chosen_node == -FDT_ERR_NOTFOUND) { + chosen_node = fdt_add_subnode(fdt, fdt_path_offset(fdt, "/"), + "chosen"); + if (chosen_node < 0) { + pr_err("Error creating /chosen.\n"); + return -EINVAL; + } + } else if (chosen_node < 0) { + pr_err("Malformed device tree: error reading /chosen.\n"); + return -EINVAL; + } + + /* Did we boot using an initrd? */ + prop = fdt_getprop(fdt, chosen_node, "linux,initrd-start", NULL); + if (prop) { + uint64_t tmp_start, tmp_end, tmp_size; + + tmp_start = fdt64_to_cpu(*((const fdt64_t *) prop)); + + prop = fdt_getprop(fdt, chosen_node, "linux,initrd-end", NULL); + if (!prop) { + pr_err("Malformed device tree.\n"); + return -EINVAL; + } + tmp_end = fdt64_to_cpu(*((const fdt64_t *) prop)); + + /* + * kexec reserves exact initrd size, while firmware may + * reserve a multiple of PAGE_SIZE, so check for both. + */ + tmp_size = tmp_end - tmp_start; + ret = delete_fdt_mem_rsv(fdt, tmp_start, tmp_size); + if (ret == -ENOENT) + ret = delete_fdt_mem_rsv(fdt, tmp_start, + round_up(tmp_size, PAGE_SIZE)); + if (ret == 0) + pr_debug("Removed old initrd reservation.\n"); + else if (ret != -ENOENT) + return ret; + + /* If there's no new initrd, delete the old initrd's info. */ + if (initrd_len == 0) { + ret = fdt_delprop(fdt, chosen_node, + "linux,initrd-start"); + if (ret) { + pr_err("Error deleting linux,initrd-start.\n"); + return -EINVAL; + } + + ret = fdt_delprop(fdt, chosen_node, "linux,initrd-end"); + if (ret) { + pr_err("Error deleting linux,initrd-end.\n"); + return -EINVAL; + } + } + } + + if (initrd_len) { + ret = fdt_setprop_u64(fdt, chosen_node, + "linux,initrd-start", + initrd_load_addr); + if (ret < 0) { + pr_err("Error setting up the new device tree.\n"); + return -EINVAL; + } + + /* initrd-end is the first address after the initrd image. */ + ret = fdt_setprop_u64(fdt, chosen_node, "linux,initrd-end", + initrd_load_addr + initrd_len); + if (ret < 0) { + pr_err("Error setting up the new device tree.\n"); + return -EINVAL; + } + + ret = fdt_add_mem_rsv(fdt, initrd_load_addr, initrd_len); + if (ret) { + pr_err("Error reserving initrd memory: %s\n", + fdt_strerror(ret)); + return -EINVAL; + } + } + + if (cmdline != NULL) { + ret = fdt_setprop_string(fdt, chosen_node, "bootargs", cmdline); + if (ret < 0) { + pr_err("Error setting up the new device tree.\n"); + return -EINVAL; + } + } else { + ret = fdt_delprop(fdt, chosen_node, "bootargs"); + if (ret && ret != -FDT_ERR_NOTFOUND) { + pr_err("Error deleting bootargs.\n"); + return -EINVAL; + } + } + + ret = fdt_setprop(fdt, chosen_node, "linux,booted-from-kexec", NULL, 0); + if (ret) { + pr_err("Error setting up the new device tree.\n"); + return -EINVAL; + } + + return 0; +} diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 5e7ece0fda9f..c6923ff45131 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -72,7 +72,6 @@ void save_mce_event(struct pt_regs *regs, long handled, struct mce_error_info *mce_err, uint64_t nip, uint64_t addr) { - uint64_t srr1; int index = __this_cpu_inc_return(mce_nest_count) - 1; struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]); @@ -99,8 +98,6 @@ void save_mce_event(struct pt_regs *regs, long handled, mce->disposition = MCE_DISPOSITION_NOT_RECOVERED; mce->severity = MCE_SEV_ERROR_SYNC; - srr1 = regs->msr; - /* * Populate the mce error_type and type-specific error_type. */ diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 93cf7a5846a6..1863324c6a3c 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -614,7 +614,7 @@ _GLOBAL(start_secondary_resume) _GLOBAL(__main) blr -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE /* * Must be relocatable PIC code callable as a C function. */ diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 4f178671f230..32be2a844947 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -478,7 +478,7 @@ _GLOBAL(kexec_wait) addi r5,r5,kexec_flag-1b 99: HMT_LOW -#ifdef CONFIG_KEXEC /* use no memory without kexec */ +#ifdef CONFIG_KEXEC_CORE /* use no memory without kexec */ lwz r4,0(r5) cmpwi 0,r4,0 beq 99b @@ -503,7 +503,7 @@ kexec_flag: .long 0 -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE #ifdef CONFIG_PPC_BOOK3E /* * BOOK3E has no real MMU mode, so we have to setup the initial TLB @@ -716,4 +716,4 @@ _GLOBAL(kexec_sequence) mtlr 4 li r5,0 blr /* image->start(physid, image->start, 0); */ -#endif /* CONFIG_KEXEC */ +#endif /* CONFIG_KEXEC_CORE */ diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 183368e008cf..bb1807184bad 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -652,6 +652,11 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, *location = value - (unsigned long)location; break; + case R_PPC64_REL32: + /* 32 bits relative (used by relative exception tables) */ + *(u32 *)location = value - (unsigned long)location; + break; + case R_PPC64_TOCSAVE: /* * Marker reloc indicates we don't have to save r2. diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c index b60a67d92ebd..34aeac54f120 100644 --- a/arch/powerpc/kernel/of_platform.c +++ b/arch/powerpc/kernel/of_platform.c @@ -114,11 +114,6 @@ static struct platform_driver of_pci_phb_driver = { }, }; -static __init int of_pci_phb_init(void) -{ - return platform_driver_register(&of_pci_phb_driver); -} - -device_initcall(of_pci_phb_init); +builtin_platform_driver(of_pci_phb_driver); #endif /* CONFIG_PPC_OF_PLATFORM_PCI */ diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index ce6dc61b15b2..04885cec24df 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -64,6 +64,12 @@ #include <linux/kprobes.h> #include <linux/kdebug.h> +#ifdef CONFIG_CC_STACKPROTECTOR +#include <linux/stackprotector.h> +unsigned long __stack_chk_guard __read_mostly; +EXPORT_SYMBOL(__stack_chk_guard); +#endif + /* Transactional Memory debug */ #ifdef TM_DEBUG_SW #define TM_DEBUG(x...) printk(KERN_INFO x) @@ -1051,14 +1057,6 @@ static inline void save_sprs(struct thread_struct *t) */ t->tar = mfspr(SPRN_TAR); } - - if (cpu_has_feature(CPU_FTR_ARCH_300)) { - /* Conditionally save Load Monitor registers, if enabled */ - if (t->fscr & FSCR_LM) { - t->lmrr = mfspr(SPRN_LMRR); - t->lmser = mfspr(SPRN_LMSER); - } - } #endif } @@ -1094,16 +1092,6 @@ static inline void restore_sprs(struct thread_struct *old_thread, if (old_thread->tar != new_thread->tar) mtspr(SPRN_TAR, new_thread->tar); } - - if (cpu_has_feature(CPU_FTR_ARCH_300)) { - /* Conditionally restore Load Monitor registers, if enabled */ - if (new_thread->fscr & FSCR_LM) { - if (old_thread->lmrr != new_thread->lmrr) - mtspr(SPRN_LMRR, new_thread->lmrr); - if (old_thread->lmser != new_thread->lmser) - mtspr(SPRN_LMSER, new_thread->lmser); - } - } #endif } @@ -1215,7 +1203,7 @@ static void show_instructions(struct pt_regs *regs) int instr; if (!(i % 8)) - printk("\n"); + pr_cont("\n"); #if !defined(CONFIG_BOOKE) /* If executing with the IMMU off, adjust pc rather @@ -1227,18 +1215,18 @@ static void show_instructions(struct pt_regs *regs) if (!__kernel_text_address(pc) || probe_kernel_address((unsigned int __user *)pc, instr)) { - printk(KERN_CONT "XXXXXXXX "); + pr_cont("XXXXXXXX "); } else { if (regs->nip == pc) - printk(KERN_CONT "<%08x> ", instr); + pr_cont("<%08x> ", instr); else - printk(KERN_CONT "%08x ", instr); + pr_cont("%08x ", instr); } pc += sizeof(int); } - printk("\n"); + pr_cont("\n"); } struct regbit { @@ -1282,7 +1270,7 @@ static void print_bits(unsigned long val, struct regbit *bits, const char *sep) for (; bits->bit; ++bits) if (val & bits->bit) { - printk("%s%s", s, bits->name); + pr_cont("%s%s", s, bits->name); s = sep; } } @@ -1305,9 +1293,9 @@ static void print_tm_bits(unsigned long val) * T: Transactional (bit 34) */ if (val & (MSR_TM | MSR_TS_S | MSR_TS_T)) { - printk(",TM["); + pr_cont(",TM["); print_bits(val, msr_tm_bits, ""); - printk("]"); + pr_cont("]"); } } #else @@ -1316,10 +1304,10 @@ static void print_tm_bits(unsigned long val) {} static void print_msr_bits(unsigned long val) { - printk("<"); + pr_cont("<"); print_bits(val, msr_bits, ","); print_tm_bits(val); - printk(">"); + pr_cont(">"); } #ifdef CONFIG_PPC64 @@ -1347,29 +1335,29 @@ void show_regs(struct pt_regs * regs) printk(" CR: %08lx XER: %08lx\n", regs->ccr, regs->xer); trap = TRAP(regs); if ((regs->trap != 0xc00) && cpu_has_feature(CPU_FTR_CFAR)) - printk("CFAR: "REG" ", regs->orig_gpr3); + pr_cont("CFAR: "REG" ", regs->orig_gpr3); if (trap == 0x200 || trap == 0x300 || trap == 0x600) #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) - printk("DEAR: "REG" ESR: "REG" ", regs->dar, regs->dsisr); + pr_cont("DEAR: "REG" ESR: "REG" ", regs->dar, regs->dsisr); #else - printk("DAR: "REG" DSISR: %08lx ", regs->dar, regs->dsisr); + pr_cont("DAR: "REG" DSISR: %08lx ", regs->dar, regs->dsisr); #endif #ifdef CONFIG_PPC64 - printk("SOFTE: %ld ", regs->softe); + pr_cont("SOFTE: %ld ", regs->softe); #endif #ifdef CONFIG_PPC_TRANSACTIONAL_MEM if (MSR_TM_ACTIVE(regs->msr)) - printk("\nPACATMSCRATCH: %016llx ", get_paca()->tm_scratch); + pr_cont("\nPACATMSCRATCH: %016llx ", get_paca()->tm_scratch); #endif for (i = 0; i < 32; i++) { if ((i % REGS_PER_LINE) == 0) - printk("\nGPR%02d: ", i); - printk(REG " ", regs->gpr[i]); + pr_cont("\nGPR%02d: ", i); + pr_cont(REG " ", regs->gpr[i]); if (i == LAST_VOLATILE && !FULL_REGS(regs)) break; } - printk("\n"); + pr_cont("\n"); #ifdef CONFIG_KALLSYMS /* * Lookup NIP late so we have the best change of getting the @@ -1900,14 +1888,14 @@ void show_stack(struct task_struct *tsk, unsigned long *stack) printk("["REG"] ["REG"] %pS", sp, ip, (void *)ip); #ifdef CONFIG_FUNCTION_GRAPH_TRACER if ((ip == rth) && curr_frame >= 0) { - printk(" (%pS)", + pr_cont(" (%pS)", (void *)current->ret_stack[curr_frame].ret); curr_frame--; } #endif if (firstframe) - printk(" (unreliable)"); - printk("\n"); + pr_cont(" (unreliable)"); + pr_cont("\n"); } firstframe = 0; diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index b0245bed6f54..f5d399e46193 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -156,21 +156,22 @@ static struct ibm_pa_feature { unsigned char pabit; /* bit number (big-endian) */ unsigned char invert; /* if 1, pa bit set => clear feature */ } ibm_pa_features[] __initdata = { - {0, 0, PPC_FEATURE_HAS_MMU, 0, 0, 0, 0}, - {0, 0, PPC_FEATURE_HAS_FPU, 0, 0, 1, 0}, - {CPU_FTR_CTRL, 0, 0, 0, 0, 3, 0}, - {CPU_FTR_NOEXECUTE, 0, 0, 0, 0, 6, 0}, - {CPU_FTR_NODSISRALIGN, 0, 0, 0, 1, 1, 1}, - {0, MMU_FTR_CI_LARGE_PAGE, 0, 0, 1, 2, 0}, - {CPU_FTR_REAL_LE, 0, PPC_FEATURE_TRUE_LE, 0, 5, 0, 0}, + { .pabyte = 0, .pabit = 0, .cpu_user_ftrs = PPC_FEATURE_HAS_MMU }, + { .pabyte = 0, .pabit = 1, .cpu_user_ftrs = PPC_FEATURE_HAS_FPU }, + { .pabyte = 0, .pabit = 3, .cpu_features = CPU_FTR_CTRL }, + { .pabyte = 0, .pabit = 6, .cpu_features = CPU_FTR_NOEXECUTE }, + { .pabyte = 1, .pabit = 2, .mmu_features = MMU_FTR_CI_LARGE_PAGE }, + { .pabyte = 40, .pabit = 0, .mmu_features = MMU_FTR_TYPE_RADIX }, + { .pabyte = 1, .pabit = 1, .invert = 1, .cpu_features = CPU_FTR_NODSISRALIGN }, + { .pabyte = 5, .pabit = 0, .cpu_features = CPU_FTR_REAL_LE, + .cpu_user_ftrs = PPC_FEATURE_TRUE_LE }, /* * If the kernel doesn't support TM (ie CONFIG_PPC_TRANSACTIONAL_MEM=n), * we don't want to turn on TM here, so we use the *_COMP versions * which are 0 if the kernel doesn't support TM. */ - {CPU_FTR_TM_COMP, 0, 0, - PPC_FEATURE2_HTM_COMP|PPC_FEATURE2_HTM_NOSC_COMP, 22, 0, 0}, - {0, MMU_FTR_TYPE_RADIX, 0, 0, 40, 0, 0}, + { .pabyte = 22, .pabit = 0, .cpu_features = CPU_FTR_TM_COMP, + .cpu_user_ftrs2 = PPC_FEATURE2_HTM_COMP | PPC_FEATURE2_HTM_NOSC_COMP }, }; static void __init scan_features(unsigned long node, const unsigned char *ftrs, @@ -427,7 +428,7 @@ static int __init early_init_dt_scan_chosen_ppc(unsigned long node, tce_alloc_end = *lprop; #endif -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE lprop = of_get_flat_dt_prop(node, "linux,crashkernel-base", NULL); if (lprop) crashk_res.start = *lprop; diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 88ac964f4858..ec47a939cbdd 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -461,14 +461,14 @@ static int __init prom_next_node(phandle *nodep) } } -static int inline prom_getprop(phandle node, const char *pname, +static inline int prom_getprop(phandle node, const char *pname, void *value, size_t valuelen) { return call_prom("getprop", 4, 1, node, ADDR(pname), (u32)(unsigned long) value, (u32) valuelen); } -static int inline prom_getproplen(phandle node, const char *pname) +static inline int prom_getproplen(phandle node, const char *pname) { return call_prom("getproplen", 2, 1, node, ADDR(pname)); } @@ -635,13 +635,7 @@ static void __init early_cmdline_parse(void) * * See prom.h for the definition of the bits specified in the * architecture vector. - * - * Because the description vector contains a mix of byte and word - * values, we declare it as an unsigned char array, and use this - * macro to put word values in. */ -#define W(x) ((x) >> 24) & 0xff, ((x) >> 16) & 0xff, \ - ((x) >> 8) & 0xff, (x) & 0xff /* Firmware expects the value to be n - 1, where n is the # of vectors */ #define NUM_VECTORS(n) ((n) - 1) @@ -652,92 +646,205 @@ static void __init early_cmdline_parse(void) */ #define VECTOR_LENGTH(n) (1 + (n) - 2) -unsigned char ibm_architecture_vec[] = { - W(0xfffe0000), W(0x003a0000), /* POWER5/POWER5+ */ - W(0xffff0000), W(0x003e0000), /* POWER6 */ - W(0xffff0000), W(0x003f0000), /* POWER7 */ - W(0xffff0000), W(0x004b0000), /* POWER8E */ - W(0xffff0000), W(0x004c0000), /* POWER8NVL */ - W(0xffff0000), W(0x004d0000), /* POWER8 */ - W(0xffffffff), W(0x0f000004), /* all 2.07-compliant */ - W(0xffffffff), W(0x0f000003), /* all 2.06-compliant */ - W(0xffffffff), W(0x0f000002), /* all 2.05-compliant */ - W(0xfffffffe), W(0x0f000001), /* all 2.04-compliant and earlier */ - NUM_VECTORS(6), /* 6 option vectors */ - - /* option vector 1: processor architectures supported */ - VECTOR_LENGTH(2), /* length */ - 0, /* don't ignore, don't halt */ - OV1_PPC_2_00 | OV1_PPC_2_01 | OV1_PPC_2_02 | OV1_PPC_2_03 | - OV1_PPC_2_04 | OV1_PPC_2_05 | OV1_PPC_2_06 | OV1_PPC_2_07, +struct option_vector1 { + u8 byte1; + u8 arch_versions; +} __packed; + +struct option_vector2 { + u8 byte1; + __be16 reserved; + __be32 real_base; + __be32 real_size; + __be32 virt_base; + __be32 virt_size; + __be32 load_base; + __be32 min_rma; + __be32 min_load; + u8 min_rma_percent; + u8 max_pft_size; +} __packed; + +struct option_vector3 { + u8 byte1; + u8 byte2; +} __packed; + +struct option_vector4 { + u8 byte1; + u8 min_vp_cap; +} __packed; + +struct option_vector5 { + u8 byte1; + u8 byte2; + u8 byte3; + u8 cmo; + u8 associativity; + u8 bin_opts; + u8 micro_checkpoint; + u8 reserved0; + __be32 max_cpus; + __be16 papr_level; + __be16 reserved1; + u8 platform_facilities; + u8 reserved2; + __be16 reserved3; + u8 subprocessors; +} __packed; + +struct option_vector6 { + u8 reserved; + u8 secondary_pteg; + u8 os_name; +} __packed; + +struct ibm_arch_vec { + struct { u32 mask, val; } pvrs[10]; + + u8 num_vectors; + + u8 vec1_len; + struct option_vector1 vec1; + + u8 vec2_len; + struct option_vector2 vec2; + + u8 vec3_len; + struct option_vector3 vec3; + + u8 vec4_len; + struct option_vector4 vec4; + + u8 vec5_len; + struct option_vector5 vec5; + + u8 vec6_len; + struct option_vector6 vec6; +} __packed; + +struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = { + .pvrs = { + { + .mask = cpu_to_be32(0xfffe0000), /* POWER5/POWER5+ */ + .val = cpu_to_be32(0x003a0000), + }, + { + .mask = cpu_to_be32(0xffff0000), /* POWER6 */ + .val = cpu_to_be32(0x003e0000), + }, + { + .mask = cpu_to_be32(0xffff0000), /* POWER7 */ + .val = cpu_to_be32(0x003f0000), + }, + { + .mask = cpu_to_be32(0xffff0000), /* POWER8E */ + .val = cpu_to_be32(0x004b0000), + }, + { + .mask = cpu_to_be32(0xffff0000), /* POWER8NVL */ + .val = cpu_to_be32(0x004c0000), + }, + { + .mask = cpu_to_be32(0xffff0000), /* POWER8 */ + .val = cpu_to_be32(0x004d0000), + }, + { + .mask = cpu_to_be32(0xffffffff), /* all 2.07-compliant */ + .val = cpu_to_be32(0x0f000004), + }, + { + .mask = cpu_to_be32(0xffffffff), /* all 2.06-compliant */ + .val = cpu_to_be32(0x0f000003), + }, + { + .mask = cpu_to_be32(0xffffffff), /* all 2.05-compliant */ + .val = cpu_to_be32(0x0f000002), + }, + { + .mask = cpu_to_be32(0xfffffffe), /* all 2.04-compliant and earlier */ + .val = cpu_to_be32(0x0f000001), + }, + }, + + .num_vectors = NUM_VECTORS(6), + .vec1_len = VECTOR_LENGTH(sizeof(struct option_vector1)), + .vec1 = { + .byte1 = 0, + .arch_versions = OV1_PPC_2_00 | OV1_PPC_2_01 | OV1_PPC_2_02 | OV1_PPC_2_03 | + OV1_PPC_2_04 | OV1_PPC_2_05 | OV1_PPC_2_06 | OV1_PPC_2_07, + }, + + .vec2_len = VECTOR_LENGTH(sizeof(struct option_vector2)), /* option vector 2: Open Firmware options supported */ - VECTOR_LENGTH(33), /* length */ - OV2_REAL_MODE, - 0, 0, - W(0xffffffff), /* real_base */ - W(0xffffffff), /* real_size */ - W(0xffffffff), /* virt_base */ - W(0xffffffff), /* virt_size */ - W(0xffffffff), /* load_base */ - W(256), /* 256MB min RMA */ - W(0xffffffff), /* full client load */ - 0, /* min RMA percentage of total RAM */ - 48, /* max log_2(hash table size) */ + .vec2 = { + .byte1 = OV2_REAL_MODE, + .reserved = 0, + .real_base = cpu_to_be32(0xffffffff), + .real_size = cpu_to_be32(0xffffffff), + .virt_base = cpu_to_be32(0xffffffff), + .virt_size = cpu_to_be32(0xffffffff), + .load_base = cpu_to_be32(0xffffffff), + .min_rma = cpu_to_be32(256), /* 256MB min RMA */ + .min_load = cpu_to_be32(0xffffffff), /* full client load */ + .min_rma_percent = 0, /* min RMA percentage of total RAM */ + .max_pft_size = 48, /* max log_2(hash table size) */ + }, + .vec3_len = VECTOR_LENGTH(sizeof(struct option_vector3)), /* option vector 3: processor options supported */ - VECTOR_LENGTH(2), /* length */ - 0, /* don't ignore, don't halt */ - OV3_FP | OV3_VMX | OV3_DFP, + .vec3 = { + .byte1 = 0, /* don't ignore, don't halt */ + .byte2 = OV3_FP | OV3_VMX | OV3_DFP, + }, + .vec4_len = VECTOR_LENGTH(sizeof(struct option_vector4)), /* option vector 4: IBM PAPR implementation */ - VECTOR_LENGTH(2), /* length */ - 0, /* don't halt */ - OV4_MIN_ENT_CAP, /* minimum VP entitled capacity */ + .vec4 = { + .byte1 = 0, /* don't halt */ + .min_vp_cap = OV4_MIN_ENT_CAP, /* minimum VP entitled capacity */ + }, + .vec5_len = VECTOR_LENGTH(sizeof(struct option_vector5)), /* option vector 5: PAPR/OF options */ - VECTOR_LENGTH(21), /* length */ - 0, /* don't ignore, don't halt */ - OV5_FEAT(OV5_LPAR) | OV5_FEAT(OV5_SPLPAR) | OV5_FEAT(OV5_LARGE_PAGES) | - OV5_FEAT(OV5_DRCONF_MEMORY) | OV5_FEAT(OV5_DONATE_DEDICATE_CPU) | + .vec5 = { + .byte1 = 0, /* don't ignore, don't halt */ + .byte2 = OV5_FEAT(OV5_LPAR) | OV5_FEAT(OV5_SPLPAR) | OV5_FEAT(OV5_LARGE_PAGES) | + OV5_FEAT(OV5_DRCONF_MEMORY) | OV5_FEAT(OV5_DONATE_DEDICATE_CPU) | #ifdef CONFIG_PCI_MSI - /* PCIe/MSI support. Without MSI full PCIe is not supported */ - OV5_FEAT(OV5_MSI), + /* PCIe/MSI support. Without MSI full PCIe is not supported */ + OV5_FEAT(OV5_MSI), #else - 0, + 0, #endif - 0, + .byte3 = 0, + .cmo = #ifdef CONFIG_PPC_SMLPAR - OV5_FEAT(OV5_CMO) | OV5_FEAT(OV5_XCMO), + OV5_FEAT(OV5_CMO) | OV5_FEAT(OV5_XCMO), #else - 0, + 0, #endif - OV5_FEAT(OV5_TYPE1_AFFINITY) | OV5_FEAT(OV5_PRRN), - 0, - 0, - 0, - /* WARNING: The offset of the "number of cores" field below - * must match by the macro below. Update the definition if - * the structure layout changes. - */ -#define IBM_ARCH_VEC_NRCORES_OFFSET 133 - W(NR_CPUS), /* number of cores supported */ - 0, - 0, - 0, - 0, - OV5_FEAT(OV5_PFO_HW_RNG) | OV5_FEAT(OV5_PFO_HW_ENCR) | - OV5_FEAT(OV5_PFO_HW_842), /* Byte 17 */ - 0, /* Byte 18 */ - 0, /* Byte 19 */ - 0, /* Byte 20 */ - OV5_FEAT(OV5_SUB_PROCESSORS), /* Byte 21 */ + .associativity = OV5_FEAT(OV5_TYPE1_AFFINITY) | OV5_FEAT(OV5_PRRN), + .bin_opts = 0, + .micro_checkpoint = 0, + .reserved0 = 0, + .max_cpus = cpu_to_be32(NR_CPUS), /* number of cores supported */ + .papr_level = 0, + .reserved1 = 0, + .platform_facilities = OV5_FEAT(OV5_PFO_HW_RNG) | OV5_FEAT(OV5_PFO_HW_ENCR) | OV5_FEAT(OV5_PFO_HW_842), + .reserved2 = 0, + .reserved3 = 0, + .subprocessors = 1, + }, /* option vector 6: IBM PAPR hints */ - VECTOR_LENGTH(3), /* length */ - 0, - 0, - OV6_LINUX, + .vec6_len = VECTOR_LENGTH(sizeof(struct option_vector6)), + .vec6 = { + .reserved = 0, + .secondary_pteg = 0, + .os_name = OV6_LINUX, + }, }; /* Old method - ELF header with PT_NOTE sections only works on BE */ @@ -873,7 +980,6 @@ static void __init prom_send_capabilities(void) ihandle root; prom_arg_t ret; u32 cores; - unsigned char *ptcores; root = call_prom("open", 1, 1, ADDR("/")); if (root != 0) { @@ -884,37 +990,18 @@ static void __init prom_send_capabilities(void) * divide NR_CPUS. */ - /* The core value may start at an odd address. If such a word - * access is made at a cache line boundary, this leads to an - * exception which may not be handled at this time. - * Forcing a per byte access to avoid exception. - */ - ptcores = &ibm_architecture_vec[IBM_ARCH_VEC_NRCORES_OFFSET]; - cores = 0; - cores |= ptcores[0] << 24; - cores |= ptcores[1] << 16; - cores |= ptcores[2] << 8; - cores |= ptcores[3]; - if (cores != NR_CPUS) { - prom_printf("WARNING ! " - "ibm_architecture_vec structure inconsistent: %lu!\n", - cores); - } else { - cores = DIV_ROUND_UP(NR_CPUS, prom_count_smt_threads()); - prom_printf("Max number of cores passed to firmware: %lu (NR_CPUS = %lu)\n", - cores, NR_CPUS); - ptcores[0] = (cores >> 24) & 0xff; - ptcores[1] = (cores >> 16) & 0xff; - ptcores[2] = (cores >> 8) & 0xff; - ptcores[3] = cores & 0xff; - } + cores = DIV_ROUND_UP(NR_CPUS, prom_count_smt_threads()); + prom_printf("Max number of cores passed to firmware: %lu (NR_CPUS = %lu)\n", + cores, NR_CPUS); + + ibm_architecture_vec.vec5.max_cpus = cpu_to_be32(cores); /* try calling the ibm,client-architecture-support method */ prom_printf("Calling ibm,client-architecture-support..."); if (call_prom_ret("call-method", 3, 2, &ret, ADDR("ibm,client-architecture-support"), root, - ADDR(ibm_architecture_vec)) == 0) { + ADDR(&ibm_architecture_vec)) == 0) { /* the call exists... */ if (ret) prom_printf("\nWARNING: ibm,client-architecture" diff --git a/arch/powerpc/kernel/ptrace32.c b/arch/powerpc/kernel/ptrace32.c index 010b7b310237..1e887f3a61a6 100644 --- a/arch/powerpc/kernel/ptrace32.c +++ b/arch/powerpc/kernel/ptrace32.c @@ -73,7 +73,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, if (get_user(addrOthers, (u32 __user * __user *)addr) != 0) break; - copied = access_process_vm(child, (u64)addrOthers, &tmp, + copied = ptrace_access_vm(child, (u64)addrOthers, &tmp, sizeof(tmp), FOLL_FORCE); if (copied != sizeof(tmp)) break; @@ -178,7 +178,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, if (get_user(addrOthers, (u32 __user * __user *)addr) != 0) break; ret = 0; - if (access_process_vm(child, (u64)addrOthers, &tmp, + if (ptrace_access_vm(child, (u64)addrOthers, &tmp, sizeof(tmp), FOLL_FORCE | FOLL_WRITE) == sizeof(tmp)) break; diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 270ee30abdcf..f516ac508ae3 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -915,7 +915,7 @@ void __init setup_arch(char **cmdline_p) init_mm.context.pte_frag = NULL; #endif #ifdef CONFIG_SPAPR_TCE_IOMMU - mm_iommu_init(&init_mm.context); + mm_iommu_init(&init_mm); #endif irqstack_early_init(); exc_lvl_early_init(); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 7ac8e6eaab5b..6824157e4d2e 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -226,17 +226,25 @@ static void __init configure_exceptions(void) if (firmware_has_feature(FW_FEATURE_OPAL)) opal_configure_cores(); - /* Enable AIL if supported, and we are in hypervisor mode */ - if (early_cpu_has_feature(CPU_FTR_HVMODE) && - early_cpu_has_feature(CPU_FTR_ARCH_207S)) { - unsigned long lpcr = mfspr(SPRN_LPCR); - mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3); - } + /* AIL on native is done in cpu_ready_for_interrupts() */ } } static void cpu_ready_for_interrupts(void) { + /* + * Enable AIL if supported, and we are in hypervisor mode. This + * is called once for every processor. + * + * If we are not in hypervisor mode the job is done once for + * the whole partition in configure_exceptions(). + */ + if (early_cpu_has_feature(CPU_FTR_HVMODE) && + early_cpu_has_feature(CPU_FTR_ARCH_207S)) { + unsigned long lpcr = mfspr(SPRN_LPCR); + mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3); + } + /* Set IR and DR in PACA MSR */ get_paca()->kernel_msr = MSR_KERNEL; } @@ -346,7 +354,7 @@ void early_setup_secondary(void) #endif /* CONFIG_SMP */ -#if defined(CONFIG_SMP) || defined(CONFIG_KEXEC) +#if defined(CONFIG_SMP) || defined(CONFIG_KEXEC_CORE) static bool use_spinloop(void) { if (!IS_ENABLED(CONFIG_PPC_BOOK3E)) @@ -391,7 +399,7 @@ void smp_release_cpus(void) DBG(" <- smp_release_cpus()\n"); } -#endif /* CONFIG_SMP || CONFIG_KEXEC */ +#endif /* CONFIG_SMP || CONFIG_KEXEC_CORE */ /* * Initialize some remaining members of the ppc64_caches and systemcfg diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 9c6f3fd58059..893bd7f79be6 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -193,7 +193,7 @@ int smp_request_message_ipi(int virq, int msg) if (msg < 0 || msg > PPC_MSG_DEBUGGER_BREAK) { return -EINVAL; } -#if !defined(CONFIG_DEBUGGER) && !defined(CONFIG_KEXEC) +#if !defined(CONFIG_DEBUGGER) && !defined(CONFIG_KEXEC_CORE) if (msg == PPC_MSG_DEBUGGER_BREAK) { return 1; } @@ -325,7 +325,7 @@ void tick_broadcast(const struct cpumask *mask) } #endif -#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC) +#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC_CORE) void smp_send_debugger_break(void) { int cpu; @@ -340,7 +340,7 @@ void smp_send_debugger_break(void) } #endif -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)) { crash_ipi_function_ptr = crash_ipi_callback; diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index c4f1d1f7bae0..c1fb255a60d6 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -703,7 +703,7 @@ static struct device_attribute pa6t_attrs[] = { #endif /* HAS_PPC_PMC_PA6T */ #endif /* HAS_PPC_PMC_CLASSIC */ -static void register_cpu_online(unsigned int cpu) +static int register_cpu_online(unsigned int cpu) { struct cpu *c = &per_cpu(cpu_devices, cpu); struct device *s = &c->dev; @@ -782,11 +782,12 @@ static void register_cpu_online(unsigned int cpu) } #endif cacheinfo_cpu_online(cpu); + return 0; } -#ifdef CONFIG_HOTPLUG_CPU -static void unregister_cpu_online(unsigned int cpu) +static int unregister_cpu_online(unsigned int cpu) { +#ifdef CONFIG_HOTPLUG_CPU struct cpu *c = &per_cpu(cpu_devices, cpu); struct device *s = &c->dev; struct device_attribute *attrs, *pmc_attrs; @@ -863,6 +864,8 @@ static void unregister_cpu_online(unsigned int cpu) } #endif cacheinfo_cpu_offline(cpu); +#endif /* CONFIG_HOTPLUG_CPU */ + return 0; } #ifdef CONFIG_ARCH_CPU_PROBE_RELEASE @@ -883,32 +886,6 @@ ssize_t arch_cpu_release(const char *buf, size_t count) } #endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */ -#endif /* CONFIG_HOTPLUG_CPU */ - -static int sysfs_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) -{ - unsigned int cpu = (unsigned int)(long)hcpu; - - switch (action) { - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: - register_cpu_online(cpu); - break; -#ifdef CONFIG_HOTPLUG_CPU - case CPU_DEAD: - case CPU_DEAD_FROZEN: - unregister_cpu_online(cpu); - break; -#endif - } - return NOTIFY_OK; -} - -static struct notifier_block sysfs_cpu_nb = { - .notifier_call = sysfs_cpu_notify, -}; - static DEFINE_MUTEX(cpu_mutex); int cpu_add_dev_attr(struct device_attribute *attr) @@ -1023,12 +1000,10 @@ static DEVICE_ATTR(physical_id, 0444, show_physical_id, NULL); static int __init topology_init(void) { - int cpu; + int cpu, r; register_nodes(); - cpu_notifier_register_begin(); - for_each_possible_cpu(cpu) { struct cpu *c = &per_cpu(cpu_devices, cpu); @@ -1047,15 +1022,10 @@ static int __init topology_init(void) device_create_file(&c->dev, &dev_attr_physical_id); } - - if (cpu_online(cpu)) - register_cpu_online(cpu); } - - __register_cpu_notifier(&sysfs_cpu_nb); - - cpu_notifier_register_done(); - + r = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "powerpc/topology:online", + register_cpu_online, unregister_cpu_online); + WARN_ON(r < 0); #ifdef CONFIG_PPC64 sysfs_create_dscr_default(); #endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index bc3f7d0d7b79..be9751f1cb2a 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -164,8 +164,6 @@ u64 __cputime_sec_factor; EXPORT_SYMBOL(__cputime_sec_factor); u64 __cputime_clockt_factor; EXPORT_SYMBOL(__cputime_clockt_factor); -DEFINE_PER_CPU(unsigned long, cputime_last_delta); -DEFINE_PER_CPU(unsigned long, cputime_scaled_last_delta); cputime_t cputime_one_jiffy; @@ -360,7 +358,8 @@ void vtime_account_system(struct task_struct *tsk) unsigned long delta, sys_scaled, stolen; delta = vtime_delta(tsk, &sys_scaled, &stolen); - account_system_time(tsk, 0, delta, sys_scaled); + account_system_time(tsk, 0, delta); + tsk->stimescaled += sys_scaled; if (stolen) account_steal_time(stolen); } @@ -393,7 +392,8 @@ void vtime_account_user(struct task_struct *tsk) acct->user_time = 0; acct->user_time_scaled = 0; acct->utime_sspurr = 0; - account_user_time(tsk, utime, utimescaled); + account_user_time(tsk, utime); + tsk->utimescaled += utimescaled; } #ifdef CONFIG_PPC32 diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 023a462725b5..4239aaf74886 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -64,8 +64,9 @@ #include <asm/asm-prototypes.h> #include <asm/hmi.h> #include <sysdev/fsl_pci.h> +#include <asm/kprobes.h> -#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC) +#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC_CORE) int (*__debugger)(struct pt_regs *regs) __read_mostly; int (*__debugger_ipi)(struct pt_regs *regs) __read_mostly; int (*__debugger_bpt)(struct pt_regs *regs) __read_mostly; @@ -122,9 +123,6 @@ static unsigned long oops_begin(struct pt_regs *regs) int cpu; unsigned long flags; - if (debugger(regs)) - return 1; - oops_enter(); /* racy, but better than risking deadlock. */ @@ -150,14 +148,15 @@ static void oops_end(unsigned long flags, struct pt_regs *regs, int signr) { bust_spinlocks(0); - die_owner = -1; add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); die_nest_count--; oops_exit(); printk("\n"); - if (!die_nest_count) + if (!die_nest_count) { /* Nest count reaches zero, release the lock. */ + die_owner = -1; arch_spin_unlock(&die_lock); + } raw_local_irq_restore(flags); crash_fadump(regs, "die oops"); @@ -227,8 +226,12 @@ NOKPROBE_SYMBOL(__die); void die(const char *str, struct pt_regs *regs, long err) { - unsigned long flags = oops_begin(regs); + unsigned long flags; + if (debugger(regs)) + return; + + flags = oops_begin(regs); if (__die(str, regs, err)) err = 0; oops_end(flags, regs, err); @@ -365,7 +368,7 @@ static inline int check_io_access(struct pt_regs *regs) (*nip & 0x100)? "OUT to": "IN from", regs->gpr[rb] - _IO_BASE, nip); regs->msr |= MSR_RI; - regs->nip = entry->fixup; + regs->nip = extable_fixup(entry); return 1; } } @@ -824,6 +827,9 @@ void single_step_exception(struct pt_regs *regs) clear_single_step(regs); + if (kprobe_post_handler(regs)) + return; + if (notify_die(DIE_SSTEP, "single_step", regs, 5, 5, SIGTRAP) == NOTIFY_STOP) goto bail; @@ -1177,6 +1183,9 @@ void program_check_exception(struct pt_regs *regs) if (debugger_bpt(regs)) goto bail; + if (kprobe_handler(regs)) + goto bail; + /* trap exception */ if (notify_die(DIE_BPT, "breakpoint", regs, 5, 5, SIGTRAP) == NOTIFY_STOP) @@ -1430,7 +1439,6 @@ void facility_unavailable_exception(struct pt_regs *regs) [FSCR_TM_LG] = "TM", [FSCR_EBB_LG] = "EBB", [FSCR_TAR_LG] = "TAR", - [FSCR_LM_LG] = "LM", }; char *facility = "unknown"; u64 value; @@ -1488,14 +1496,6 @@ void facility_unavailable_exception(struct pt_regs *regs) emulate_single_step(regs); } return; - } else if ((status == FSCR_LM_LG) && cpu_has_feature(CPU_FTR_ARCH_300)) { - /* - * This process has touched LM, so turn it on forever - * for this process - */ - current->thread.fscr |= FSCR_LM; - mtspr(SPRN_FSCR, current->thread.fscr); - return; } if (status == FSCR_TM_LG) { @@ -1519,7 +1519,8 @@ void facility_unavailable_exception(struct pt_regs *regs) return; } - if ((status < ARRAY_SIZE(facility_strings)) && + if ((hv || status >= 2) && + (status < ARRAY_SIZE(facility_strings)) && facility_strings[status]) facility = facility_strings[status]; @@ -1527,9 +1528,8 @@ void facility_unavailable_exception(struct pt_regs *regs) if (!arch_irq_disabled_regs(regs)) local_irq_enable(); - pr_err_ratelimited( - "%sFacility '%s' unavailable, exception at 0x%lx, MSR=%lx\n", - hv ? "Hypervisor " : "", facility, regs->nip, regs->msr); + pr_err_ratelimited("%sFacility '%s' unavailable (%d), exception at 0x%lx, MSR=%lx\n", + hv ? "Hypervisor " : "", facility, status, regs->nip, regs->msr); out: if (user_mode(regs)) { @@ -1754,6 +1754,9 @@ void DebugException(struct pt_regs *regs, unsigned long debug_status) return; } + if (kprobe_post_handler(regs)) + return; + if (notify_die(DIE_SSTEP, "block_step", regs, 5, 5, SIGTRAP) == NOTIFY_STOP) { return; @@ -1768,6 +1771,9 @@ void DebugException(struct pt_regs *regs, unsigned long debug_status) /* Clear the instruction completion event */ mtspr(SPRN_DBSR, DBSR_IC); + if (kprobe_post_handler(regs)) + return; + if (notify_die(DIE_SSTEP, "single_step", regs, 5, 5, SIGTRAP) == NOTIFY_STOP) { return; diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 8295f51c1a5f..7394b770ae1f 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -94,8 +94,17 @@ SECTIONS * detected, and will result in a crash at boot due to offsets being * wrong. */ +#ifdef CONFIG_PPC64 + /* + * BLOCK(0) overrides the default output section alignment because + * this needs to start right after .head.text in order for fixed + * section placement to work. + */ + .text BLOCK(0) : AT(ADDR(.text) - LOAD_OFFSET) { +#else .text : AT(ADDR(.text) - LOAD_OFFSET) { ALIGN_FUNCTION(); +#endif /* careful! __ftr_alt_* sections need to be close to .text */ *(.text .fixup __ftr_alt_* .ref.text) SCHED_TEXT diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 05f09ae82587..b795dd1ac2ef 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -88,6 +88,8 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp) /* 128 (2**7) bytes in each HPTEG */ kvm->arch.hpt_mask = (1ul << (order - 7)) - 1; + atomic64_set(&kvm->arch.mmio_update, 0); + /* Allocate reverse map array */ rev = vmalloc(sizeof(struct revmap_entry) * kvm->arch.hpt_npte); if (!rev) { @@ -255,7 +257,7 @@ static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu) kvmppc_set_msr(vcpu, msr); } -long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags, +static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags, long pte_index, unsigned long pteh, unsigned long ptel, unsigned long *pte_idx_ret) { @@ -312,7 +314,7 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, struct kvmppc_slb *slbe; unsigned long slb_v; unsigned long pp, key; - unsigned long v, gr; + unsigned long v, orig_v, gr; __be64 *hptep; int index; int virtmode = vcpu->arch.shregs.msr & (data ? MSR_DR : MSR_IR); @@ -337,10 +339,12 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, return -ENOENT; } hptep = (__be64 *)(kvm->arch.hpt_virt + (index << 4)); - v = be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK; + v = orig_v = be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK; + if (cpu_has_feature(CPU_FTR_ARCH_300)) + v = hpte_new_to_old_v(v, be64_to_cpu(hptep[1])); gr = kvm->arch.revmap[index].guest_rpte; - unlock_hpte(hptep, v); + unlock_hpte(hptep, orig_v); preempt_enable(); gpte->eaddr = eaddr; @@ -438,6 +442,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, { struct kvm *kvm = vcpu->kvm; unsigned long hpte[3], r; + unsigned long hnow_v, hnow_r; __be64 *hptep; unsigned long mmu_seq, psize, pte_size; unsigned long gpa_base, gfn_base; @@ -451,6 +456,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int writing, write_ok; struct vm_area_struct *vma; unsigned long rcbits; + long mmio_update; /* * Real-mode code has already searched the HPT and found the @@ -460,6 +466,19 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, */ if (ea != vcpu->arch.pgfault_addr) return RESUME_GUEST; + + if (vcpu->arch.pgfault_cache) { + mmio_update = atomic64_read(&kvm->arch.mmio_update); + if (mmio_update == vcpu->arch.pgfault_cache->mmio_update) { + r = vcpu->arch.pgfault_cache->rpte; + psize = hpte_page_size(vcpu->arch.pgfault_hpte[0], r); + gpa_base = r & HPTE_R_RPN & ~(psize - 1); + gfn_base = gpa_base >> PAGE_SHIFT; + gpa = gpa_base | (ea & (psize - 1)); + return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea, + dsisr & DSISR_ISSTORE); + } + } index = vcpu->arch.pgfault_index; hptep = (__be64 *)(kvm->arch.hpt_virt + (index << 4)); rev = &kvm->arch.revmap[index]; @@ -472,6 +491,10 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, unlock_hpte(hptep, hpte[0]); preempt_enable(); + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + hpte[0] = hpte_new_to_old_v(hpte[0], hpte[1]); + hpte[1] = hpte_new_to_old_r(hpte[1]); + } if (hpte[0] != vcpu->arch.pgfault_hpte[0] || hpte[1] != vcpu->arch.pgfault_hpte[1]) return RESUME_GUEST; @@ -575,16 +598,22 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, */ if (psize < PAGE_SIZE) psize = PAGE_SIZE; - r = (r & ~(HPTE_R_PP0 - psize)) | ((pfn << PAGE_SHIFT) & ~(psize - 1)); + r = (r & HPTE_R_KEY_HI) | (r & ~(HPTE_R_PP0 - psize)) | + ((pfn << PAGE_SHIFT) & ~(psize - 1)); if (hpte_is_writable(r) && !write_ok) r = hpte_make_readonly(r); ret = RESUME_GUEST; preempt_disable(); while (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) cpu_relax(); - if ((be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK) != hpte[0] || - be64_to_cpu(hptep[1]) != hpte[1] || - rev->guest_rpte != hpte[2]) + hnow_v = be64_to_cpu(hptep[0]); + hnow_r = be64_to_cpu(hptep[1]); + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + hnow_v = hpte_new_to_old_v(hnow_v, hnow_r); + hnow_r = hpte_new_to_old_r(hnow_r); + } + if ((hnow_v & ~HPTE_V_HVLOCK) != hpte[0] || hnow_r != hpte[1] || + rev->guest_rpte != hpte[2]) /* HPTE has been changed under us; let the guest retry */ goto out_unlock; hpte[0] = (hpte[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID; @@ -615,6 +644,10 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, kvmppc_add_revmap_chain(kvm, rev, rmap, index, 0); } + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + r = hpte_old_to_new_r(hpte[0], r); + hpte[0] = hpte_old_to_new_v(hpte[0]); + } hptep[1] = cpu_to_be64(r); eieio(); __unlock_hpte(hptep, hpte[0]); @@ -758,6 +791,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, hpte_rpn(ptel, psize) == gfn) { hptep[0] |= cpu_to_be64(HPTE_V_ABSENT); kvmppc_invalidate_hpte(kvm, hptep, i); + hptep[1] &= ~cpu_to_be64(HPTE_R_KEY_HI | HPTE_R_KEY_LO); /* Harvest R and C */ rcbits = be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C); *rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT; @@ -1165,7 +1199,7 @@ static long record_hpte(unsigned long flags, __be64 *hptp, unsigned long *hpte, struct revmap_entry *revp, int want_valid, int first_pass) { - unsigned long v, r; + unsigned long v, r, hr; unsigned long rcbits_unset; int ok = 1; int valid, dirty; @@ -1192,6 +1226,11 @@ static long record_hpte(unsigned long flags, __be64 *hptp, while (!try_lock_hpte(hptp, HPTE_V_HVLOCK)) cpu_relax(); v = be64_to_cpu(hptp[0]); + hr = be64_to_cpu(hptp[1]); + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + v = hpte_new_to_old_v(v, hr); + hr = hpte_new_to_old_r(hr); + } /* re-evaluate valid and dirty from synchronized HPTE value */ valid = !!(v & HPTE_V_VALID); @@ -1199,8 +1238,8 @@ static long record_hpte(unsigned long flags, __be64 *hptp, /* Harvest R and C into guest view if necessary */ rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C); - if (valid && (rcbits_unset & be64_to_cpu(hptp[1]))) { - revp->guest_rpte |= (be64_to_cpu(hptp[1]) & + if (valid && (rcbits_unset & hr)) { + revp->guest_rpte |= (hr & (HPTE_R_R | HPTE_R_C)) | HPTE_GR_MODIFIED; dirty = 1; } @@ -1608,7 +1647,7 @@ static ssize_t debugfs_htab_read(struct file *file, char __user *buf, return ret; } -ssize_t debugfs_htab_write(struct file *file, const char __user *buf, +static ssize_t debugfs_htab_write(struct file *file, const char __user *buf, size_t len, loff_t *ppos) { return -EACCES; diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index d461c440889a..e4c4ea973e57 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -39,7 +39,6 @@ #include <asm/udbg.h> #include <asm/iommu.h> #include <asm/tce.h> -#include <asm/iommu.h> #define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64)) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 3686471be32b..8dcbe37a4dac 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -54,6 +54,9 @@ #include <asm/dbell.h> #include <asm/hmi.h> #include <asm/pnv-pci.h> +#include <asm/mmu.h> +#include <asm/opal.h> +#include <asm/xics.h> #include <linux/gfp.h> #include <linux/vmalloc.h> #include <linux/highmem.h> @@ -62,6 +65,7 @@ #include <linux/irqbypass.h> #include <linux/module.h> #include <linux/compiler.h> +#include <linux/of.h> #include "book3s.h" @@ -104,23 +108,6 @@ module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect, MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core"); #endif -/* Maximum halt poll interval defaults to KVM_HALT_POLL_NS_DEFAULT */ -static unsigned int halt_poll_max_ns = KVM_HALT_POLL_NS_DEFAULT; -module_param(halt_poll_max_ns, uint, S_IRUGO | S_IWUSR); -MODULE_PARM_DESC(halt_poll_max_ns, "Maximum halt poll time in ns"); - -/* Factor by which the vcore halt poll interval is grown, default is to double - */ -static unsigned int halt_poll_ns_grow = 2; -module_param(halt_poll_ns_grow, int, S_IRUGO); -MODULE_PARM_DESC(halt_poll_ns_grow, "Factor halt poll time is grown by"); - -/* Factor by which the vcore halt poll interval is shrunk, default is to reset - */ -static unsigned int halt_poll_ns_shrink; -module_param(halt_poll_ns_shrink, int, S_IRUGO); -MODULE_PARM_DESC(halt_poll_ns_shrink, "Factor halt poll time is shrunk by"); - static void kvmppc_end_cede(struct kvm_vcpu *vcpu); static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); @@ -146,12 +133,21 @@ static inline struct kvm_vcpu *next_runnable_thread(struct kvmppc_vcore *vc, static bool kvmppc_ipi_thread(int cpu) { + unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER); + + /* On POWER9 we can use msgsnd to IPI any cpu */ + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + msg |= get_hard_smp_processor_id(cpu); + smp_mb(); + __asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg)); + return true; + } + /* On POWER8 for IPIs to threads in the same core, use msgsnd */ if (cpu_has_feature(CPU_FTR_ARCH_207S)) { preempt_disable(); if (cpu_first_thread_sibling(cpu) == cpu_first_thread_sibling(smp_processor_id())) { - unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER); msg |= cpu_thread_in_core(cpu); smp_mb(); __asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg)); @@ -162,8 +158,12 @@ static bool kvmppc_ipi_thread(int cpu) } #if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP) - if (cpu >= 0 && cpu < nr_cpu_ids && paca[cpu].kvm_hstate.xics_phys) { - xics_wake_cpu(cpu); + if (cpu >= 0 && cpu < nr_cpu_ids) { + if (paca[cpu].kvm_hstate.xics_phys) { + xics_wake_cpu(cpu); + return true; + } + opal_int_set_mfrr(get_hard_smp_processor_id(cpu), IPI_PRIORITY); return true; } #endif @@ -299,41 +299,54 @@ static void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr) vcpu->arch.pvr = pvr; } +/* Dummy value used in computing PCR value below */ +#define PCR_ARCH_300 (PCR_ARCH_207 << 1) + static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat) { - unsigned long pcr = 0; + unsigned long host_pcr_bit = 0, guest_pcr_bit = 0; struct kvmppc_vcore *vc = vcpu->arch.vcore; + /* We can (emulate) our own architecture version and anything older */ + if (cpu_has_feature(CPU_FTR_ARCH_300)) + host_pcr_bit = PCR_ARCH_300; + else if (cpu_has_feature(CPU_FTR_ARCH_207S)) + host_pcr_bit = PCR_ARCH_207; + else if (cpu_has_feature(CPU_FTR_ARCH_206)) + host_pcr_bit = PCR_ARCH_206; + else + host_pcr_bit = PCR_ARCH_205; + + /* Determine lowest PCR bit needed to run guest in given PVR level */ + guest_pcr_bit = host_pcr_bit; if (arch_compat) { switch (arch_compat) { case PVR_ARCH_205: - /* - * If an arch bit is set in PCR, all the defined - * higher-order arch bits also have to be set. - */ - pcr = PCR_ARCH_206 | PCR_ARCH_205; + guest_pcr_bit = PCR_ARCH_205; break; case PVR_ARCH_206: case PVR_ARCH_206p: - pcr = PCR_ARCH_206; + guest_pcr_bit = PCR_ARCH_206; break; case PVR_ARCH_207: + guest_pcr_bit = PCR_ARCH_207; + break; + case PVR_ARCH_300: + guest_pcr_bit = PCR_ARCH_300; break; default: return -EINVAL; } - - if (!cpu_has_feature(CPU_FTR_ARCH_207S)) { - /* POWER7 can't emulate POWER8 */ - if (!(pcr & PCR_ARCH_206)) - return -EINVAL; - pcr &= ~PCR_ARCH_206; - } } + /* Check requested PCR bits don't exceed our capabilities */ + if (guest_pcr_bit > host_pcr_bit) + return -EINVAL; + spin_lock(&vc->lock); vc->arch_compat = arch_compat; - vc->pcr = pcr; + /* Set all PCR bits for which guest_pcr_bit <= bit < host_pcr_bit */ + vc->pcr = host_pcr_bit - guest_pcr_bit; spin_unlock(&vc->lock); return 0; @@ -945,6 +958,7 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, break; case BOOK3S_INTERRUPT_EXTERNAL: case BOOK3S_INTERRUPT_H_DOORBELL: + case BOOK3S_INTERRUPT_H_VIRT: vcpu->stat.ext_intr_exits++; r = RESUME_GUEST; break; @@ -1229,6 +1243,12 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_WORT: *val = get_reg_val(id, vcpu->arch.wort); break; + case KVM_REG_PPC_TIDR: + *val = get_reg_val(id, vcpu->arch.tid); + break; + case KVM_REG_PPC_PSSCR: + *val = get_reg_val(id, vcpu->arch.psscr); + break; case KVM_REG_PPC_VPA_ADDR: spin_lock(&vcpu->arch.vpa_update_lock); *val = get_reg_val(id, vcpu->arch.vpa.next_gpa); @@ -1288,6 +1308,9 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_TM_CR: *val = get_reg_val(id, vcpu->arch.cr_tm); break; + case KVM_REG_PPC_TM_XER: + *val = get_reg_val(id, vcpu->arch.xer_tm); + break; case KVM_REG_PPC_TM_LR: *val = get_reg_val(id, vcpu->arch.lr_tm); break; @@ -1427,6 +1450,12 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_WORT: vcpu->arch.wort = set_reg_val(id, *val); break; + case KVM_REG_PPC_TIDR: + vcpu->arch.tid = set_reg_val(id, *val); + break; + case KVM_REG_PPC_PSSCR: + vcpu->arch.psscr = set_reg_val(id, *val) & PSSCR_GUEST_VIS; + break; case KVM_REG_PPC_VPA_ADDR: addr = set_reg_val(id, *val); r = -EINVAL; @@ -1498,6 +1527,9 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_TM_CR: vcpu->arch.cr_tm = set_reg_val(id, *val); break; + case KVM_REG_PPC_TM_XER: + vcpu->arch.xer_tm = set_reg_val(id, *val); + break; case KVM_REG_PPC_TM_LR: vcpu->arch.lr_tm = set_reg_val(id, *val); break; @@ -1540,6 +1572,20 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, return r; } +/* + * On POWER9, threads are independent and can be in different partitions. + * Therefore we consider each thread to be a subcore. + * There is a restriction that all threads have to be in the same + * MMU mode (radix or HPT), unfortunately, but since we only support + * HPT guests on a HPT host so far, that isn't an impediment yet. + */ +static int threads_per_vcore(void) +{ + if (cpu_has_feature(CPU_FTR_ARCH_300)) + return 1; + return threads_per_subcore; +} + static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core) { struct kvmppc_vcore *vcore; @@ -1554,7 +1600,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core) init_swait_queue_head(&vcore->wq); vcore->preempt_tb = TB_NIL; vcore->lpcr = kvm->arch.lpcr; - vcore->first_vcpuid = core * threads_per_subcore; + vcore->first_vcpuid = core * threads_per_vcore(); vcore->kvm = kvm; INIT_LIST_HEAD(&vcore->preempt_list); @@ -1717,7 +1763,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, int core; struct kvmppc_vcore *vcore; - core = id / threads_per_subcore; + core = id / threads_per_vcore(); if (core >= KVM_MAX_VCORES) goto out; @@ -1935,7 +1981,10 @@ static void kvmppc_wait_for_nap(void) { int cpu = smp_processor_id(); int i, loops; + int n_threads = threads_per_vcore(); + if (n_threads <= 1) + return; for (loops = 0; loops < 1000000; ++loops) { /* * Check if all threads are finished. @@ -1943,17 +1992,17 @@ static void kvmppc_wait_for_nap(void) * and the thread clears it when finished, so we look * for any threads that still have a non-NULL vcore ptr. */ - for (i = 1; i < threads_per_subcore; ++i) + for (i = 1; i < n_threads; ++i) if (paca[cpu + i].kvm_hstate.kvm_vcore) break; - if (i == threads_per_subcore) { + if (i == n_threads) { HMT_medium(); return; } HMT_low(); } HMT_medium(); - for (i = 1; i < threads_per_subcore; ++i) + for (i = 1; i < n_threads; ++i) if (paca[cpu + i].kvm_hstate.kvm_vcore) pr_err("KVM: CPU %d seems to be stuck\n", cpu + i); } @@ -2019,7 +2068,7 @@ static void kvmppc_vcore_preempt(struct kvmppc_vcore *vc) vc->vcore_state = VCORE_PREEMPT; vc->pcpu = smp_processor_id(); - if (vc->num_threads < threads_per_subcore) { + if (vc->num_threads < threads_per_vcore()) { spin_lock(&lp->lock); list_add_tail(&vc->preempt_list, &lp->list); spin_unlock(&lp->lock); @@ -2123,8 +2172,7 @@ static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip) cip->subcore_threads[sub] = vc->num_threads; cip->subcore_vm[sub] = vc->kvm; init_master_vcore(vc); - list_del(&vc->preempt_list); - list_add_tail(&vc->preempt_list, &cip->vcs[sub]); + list_move_tail(&vc->preempt_list, &cip->vcs[sub]); return true; } @@ -2254,12 +2302,12 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master) * enter the guest. Only do this if it is the primary thread of the * core (not if a subcore) that is entering the guest. */ -static inline void kvmppc_clear_host_core(int cpu) +static inline int kvmppc_clear_host_core(unsigned int cpu) { int core; if (!kvmppc_host_rm_ops_hv || cpu_thread_in_core(cpu)) - return; + return 0; /* * Memory barrier can be omitted here as we will do a smp_wmb() * later in kvmppc_start_thread and we need ensure that state is @@ -2267,6 +2315,7 @@ static inline void kvmppc_clear_host_core(int cpu) */ core = cpu >> threads_shift; kvmppc_host_rm_ops_hv->rm_core[core].rm_state.in_host = 0; + return 0; } /* @@ -2274,12 +2323,12 @@ static inline void kvmppc_clear_host_core(int cpu) * Only need to do this if it is the primary thread of the core that is * exiting. */ -static inline void kvmppc_set_host_core(int cpu) +static inline int kvmppc_set_host_core(unsigned int cpu) { int core; if (!kvmppc_host_rm_ops_hv || cpu_thread_in_core(cpu)) - return; + return 0; /* * Memory barrier can be omitted here because we do a spin_unlock @@ -2287,6 +2336,7 @@ static inline void kvmppc_set_host_core(int cpu) */ core = cpu >> threads_shift; kvmppc_host_rm_ops_hv->rm_core[core].rm_state.in_host = 1; + return 0; } /* @@ -2307,6 +2357,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) unsigned long cmd_bit, stat_bit; int pcpu, thr; int target_threads; + int controlled_threads; /* * Remove from the list any threads that have a signal pending @@ -2325,11 +2376,18 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) vc->preempt_tb = TB_NIL; /* + * Number of threads that we will be controlling: the same as + * the number of threads per subcore, except on POWER9, + * where it's 1 because the threads are (mostly) independent. + */ + controlled_threads = threads_per_vcore(); + + /* * Make sure we are running on primary threads, and that secondary * threads are offline. Also check if the number of threads in this * guest are greater than the current system threads per guest. */ - if ((threads_per_core > 1) && + if ((controlled_threads > 1) && ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) { for_each_runnable_thread(i, vcpu, vc) { vcpu->arch.ret = -EBUSY; @@ -2345,7 +2403,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) */ init_core_info(&core_info, vc); pcpu = smp_processor_id(); - target_threads = threads_per_subcore; + target_threads = controlled_threads; if (target_smt_mode && target_smt_mode < target_threads) target_threads = target_smt_mode; if (vc->num_threads < target_threads) @@ -2381,7 +2439,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) smp_wmb(); } pcpu = smp_processor_id(); - for (thr = 0; thr < threads_per_subcore; ++thr) + for (thr = 0; thr < controlled_threads; ++thr) paca[pcpu + thr].kvm_hstate.kvm_split_mode = sip; /* Initiate micro-threading (split-core) if required */ @@ -2491,7 +2549,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) } /* Let secondaries go back to the offline loop */ - for (i = 0; i < threads_per_subcore; ++i) { + for (i = 0; i < controlled_threads; ++i) { kvmppc_release_hwthread(pcpu + i); if (sip && sip->napped[i]) kvmppc_ipi_thread(pcpu + i); @@ -2543,9 +2601,6 @@ static void grow_halt_poll_ns(struct kvmppc_vcore *vc) vc->halt_poll_ns = 10000; else vc->halt_poll_ns *= halt_poll_ns_grow; - - if (vc->halt_poll_ns > halt_poll_max_ns) - vc->halt_poll_ns = halt_poll_max_ns; } static void shrink_halt_poll_ns(struct kvmppc_vcore *vc) @@ -2556,7 +2611,8 @@ static void shrink_halt_poll_ns(struct kvmppc_vcore *vc) vc->halt_poll_ns /= halt_poll_ns_shrink; } -/* Check to see if any of the runnable vcpus on the vcore have pending +/* + * Check to see if any of the runnable vcpus on the vcore have pending * exceptions or are no longer ceded */ static int kvmppc_vcore_check_block(struct kvmppc_vcore *vc) @@ -2655,16 +2711,18 @@ out: } /* Adjust poll time */ - if (halt_poll_max_ns) { + if (halt_poll_ns) { if (block_ns <= vc->halt_poll_ns) ; /* We slept and blocked for longer than the max halt time */ - else if (vc->halt_poll_ns && block_ns > halt_poll_max_ns) + else if (vc->halt_poll_ns && block_ns > halt_poll_ns) shrink_halt_poll_ns(vc); /* We slept and our poll time is too small */ - else if (vc->halt_poll_ns < halt_poll_max_ns && - block_ns < halt_poll_max_ns) + else if (vc->halt_poll_ns < halt_poll_ns && + block_ns < halt_poll_ns) grow_halt_poll_ns(vc); + if (vc->halt_poll_ns > halt_poll_ns) + vc->halt_poll_ns = halt_poll_ns; } else vc->halt_poll_ns = 0; @@ -2971,6 +3029,15 @@ static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm, struct kvm_memslots *slots; struct kvm_memory_slot *memslot; + /* + * If we are making a new memslot, it might make + * some address that was previously cached as emulated + * MMIO be no longer emulated MMIO, so invalidate + * all the caches of emulated MMIO translations. + */ + if (npages) + atomic64_inc(&kvm->arch.mmio_update); + if (npages && old->npages) { /* * If modifying a memslot, reset all the rmap dirty bits. @@ -3015,6 +3082,22 @@ static void kvmppc_mmu_destroy_hv(struct kvm_vcpu *vcpu) return; } +static void kvmppc_setup_partition_table(struct kvm *kvm) +{ + unsigned long dw0, dw1; + + /* PS field - page size for VRMA */ + dw0 = ((kvm->arch.vrma_slb_v & SLB_VSID_L) >> 1) | + ((kvm->arch.vrma_slb_v & SLB_VSID_LP) << 1); + /* HTABSIZE and HTABORG fields */ + dw0 |= kvm->arch.sdr1; + + /* Second dword has GR=0; other fields are unused since UPRT=0 */ + dw1 = 0; + + mmu_partition_table_set_entry(kvm->arch.lpid, dw0, dw1); +} + static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) { int err = 0; @@ -3066,17 +3149,20 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) psize == 0x1000000)) goto out_srcu; - /* Update VRMASD field in the LPCR */ senc = slb_pgsize_encoding(psize); kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T | (VRMA_VSID << SLB_VSID_SHIFT_1T); - /* the -4 is to account for senc values starting at 0x10 */ - lpcr = senc << (LPCR_VRMASD_SH - 4); - /* Create HPTEs in the hash page table for the VRMA */ kvmppc_map_vrma(vcpu, memslot, porder); - kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD); + /* Update VRMASD field in the LPCR */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) { + /* the -4 is to account for senc values starting at 0x10 */ + lpcr = senc << (LPCR_VRMASD_SH - 4); + kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD); + } else { + kvmppc_setup_partition_table(kvm); + } /* Order updates to kvm->arch.lpcr etc. vs. hpte_setup_done */ smp_wmb(); @@ -3094,36 +3180,6 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) } #ifdef CONFIG_KVM_XICS -static int kvmppc_cpu_notify(struct notifier_block *self, unsigned long action, - void *hcpu) -{ - unsigned long cpu = (long)hcpu; - - switch (action) { - case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: - kvmppc_set_host_core(cpu); - break; - -#ifdef CONFIG_HOTPLUG_CPU - case CPU_DEAD: - case CPU_DEAD_FROZEN: - case CPU_UP_CANCELED: - case CPU_UP_CANCELED_FROZEN: - kvmppc_clear_host_core(cpu); - break; -#endif - default: - break; - } - - return NOTIFY_OK; -} - -static struct notifier_block kvmppc_cpu_notifier = { - .notifier_call = kvmppc_cpu_notify, -}; - /* * Allocate a per-core structure for managing state about which cores are * running in the host versus the guest and for exchanging data between @@ -3185,15 +3241,17 @@ void kvmppc_alloc_host_rm_ops(void) return; } - register_cpu_notifier(&kvmppc_cpu_notifier); - + cpuhp_setup_state_nocalls(CPUHP_KVM_PPC_BOOK3S_PREPARE, + "ppc/kvm_book3s:prepare", + kvmppc_set_host_core, + kvmppc_clear_host_core); put_online_cpus(); } void kvmppc_free_host_rm_ops(void) { if (kvmppc_host_rm_ops_hv) { - unregister_cpu_notifier(&kvmppc_cpu_notifier); + cpuhp_remove_state_nocalls(CPUHP_KVM_PPC_BOOK3S_PREPARE); kfree(kvmppc_host_rm_ops_hv->rm_core); kfree(kvmppc_host_rm_ops_hv); kvmppc_host_rm_ops_hv = NULL; @@ -3219,14 +3277,18 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) * Since we don't flush the TLB when tearing down a VM, * and this lpid might have previously been used, * make sure we flush on each core before running the new VM. + * On POWER9, the tlbie in mmu_partition_table_set_entry() + * does this flush for us. */ - cpumask_setall(&kvm->arch.need_tlb_flush); + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + cpumask_setall(&kvm->arch.need_tlb_flush); /* Start out with the default set of hcalls enabled */ memcpy(kvm->arch.enabled_hcalls, default_enabled_hcalls, sizeof(kvm->arch.enabled_hcalls)); - kvm->arch.host_sdr1 = mfspr(SPRN_SDR1); + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + kvm->arch.host_sdr1 = mfspr(SPRN_SDR1); /* Init LPCR for virtual RMA mode */ kvm->arch.host_lpid = mfspr(SPRN_LPID); @@ -3239,9 +3301,29 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) /* On POWER8 turn on online bit to enable PURR/SPURR */ if (cpu_has_feature(CPU_FTR_ARCH_207S)) lpcr |= LPCR_ONL; + /* + * On POWER9, VPM0 bit is reserved (VPM0=1 behaviour is assumed) + * Set HVICE bit to enable hypervisor virtualization interrupts. + */ + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + lpcr &= ~LPCR_VPM0; + lpcr |= LPCR_HVICE; + } + kvm->arch.lpcr = lpcr; /* + * Work out how many sets the TLB has, for the use of + * the TLB invalidation loop in book3s_hv_rmhandlers.S. + */ + if (cpu_has_feature(CPU_FTR_ARCH_300)) + kvm->arch.tlb_sets = POWER9_TLB_SETS_HASH; /* 256 */ + else if (cpu_has_feature(CPU_FTR_ARCH_207S)) + kvm->arch.tlb_sets = POWER8_TLB_SETS; /* 512 */ + else + kvm->arch.tlb_sets = POWER7_TLB_SETS; /* 128 */ + + /* * Track that we now have a HV mode VM active. This blocks secondary * CPU threads from coming online. */ @@ -3305,9 +3387,9 @@ static int kvmppc_core_check_processor_compat_hv(void) !cpu_has_feature(CPU_FTR_ARCH_206)) return -EIO; /* - * Disable KVM for Power9, untill the required bits merged. + * Disable KVM for Power9 in radix mode. */ - if (cpu_has_feature(CPU_FTR_ARCH_300)) + if (cpu_has_feature(CPU_FTR_ARCH_300) && radix_enabled()) return -EIO; return 0; @@ -3661,6 +3743,23 @@ static int kvmppc_book3s_init_hv(void) if (r) return r; + /* + * We need a way of accessing the XICS interrupt controller, + * either directly, via paca[cpu].kvm_hstate.xics_phys, or + * indirectly, via OPAL. + */ +#ifdef CONFIG_SMP + if (!get_paca()->kvm_hstate.xics_phys) { + struct device_node *np; + + np = of_find_compatible_node(NULL, NULL, "ibm,opal-intc"); + if (!np) { + pr_err("KVM-HV: Cannot determine method for accessing XICS\n"); + return -ENODEV; + } + } +#endif + kvm_ops_hv.owner = THIS_MODULE; kvmppc_hv_ops = &kvm_ops_hv; @@ -3683,3 +3782,4 @@ module_exit(kvmppc_book3s_exit_hv); MODULE_LICENSE("GPL"); MODULE_ALIAS_MISCDEV(KVM_MINOR); MODULE_ALIAS("devname:kvm"); + diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 0c84d6bc8356..5bb24be0b346 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -26,6 +26,8 @@ #include <asm/dbell.h> #include <asm/cputhreads.h> #include <asm/io.h> +#include <asm/opal.h> +#include <asm/smp.h> #define KVM_CMA_CHUNK_ORDER 18 @@ -205,12 +207,18 @@ static inline void rm_writeb(unsigned long paddr, u8 val) void kvmhv_rm_send_ipi(int cpu) { unsigned long xics_phys; + unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER); - /* On POWER8 for IPIs to threads in the same core, use msgsnd */ + /* On POWER9 we can use msgsnd for any destination cpu. */ + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + msg |= get_hard_smp_processor_id(cpu); + __asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg)); + return; + } + /* On POWER8 for IPIs to threads in the same core, use msgsnd. */ if (cpu_has_feature(CPU_FTR_ARCH_207S) && cpu_first_thread_sibling(cpu) == cpu_first_thread_sibling(raw_smp_processor_id())) { - unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER); msg |= cpu_thread_in_core(cpu); __asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg)); return; @@ -218,7 +226,11 @@ void kvmhv_rm_send_ipi(int cpu) /* Else poke the target with an IPI */ xics_phys = paca[cpu].kvm_hstate.xics_phys; - rm_writeb(xics_phys + XICS_MFRR, IPI_PRIORITY); + if (xics_phys) + rm_writeb(xics_phys + XICS_MFRR, IPI_PRIORITY); + else + opal_rm_int_set_mfrr(get_hard_smp_processor_id(cpu), + IPI_PRIORITY); } /* @@ -329,7 +341,7 @@ static struct kvmppc_irq_map *get_irqmap(struct kvmppc_passthru_irqmap *pimap, * saved a copy of the XIRR in the PACA, it will be picked up by * the host ICP driver. */ -static int kvmppc_check_passthru(u32 xisr, __be32 xirr) +static int kvmppc_check_passthru(u32 xisr, __be32 xirr, bool *again) { struct kvmppc_passthru_irqmap *pimap; struct kvmppc_irq_map *irq_map; @@ -348,11 +360,11 @@ static int kvmppc_check_passthru(u32 xisr, __be32 xirr) /* We're handling this interrupt, generic code doesn't need to */ local_paca->kvm_hstate.saved_xirr = 0; - return kvmppc_deliver_irq_passthru(vcpu, xirr, irq_map, pimap); + return kvmppc_deliver_irq_passthru(vcpu, xirr, irq_map, pimap, again); } #else -static inline int kvmppc_check_passthru(u32 xisr, __be32 xirr) +static inline int kvmppc_check_passthru(u32 xisr, __be32 xirr, bool *again) { return 1; } @@ -367,14 +379,31 @@ static inline int kvmppc_check_passthru(u32 xisr, __be32 xirr) * -1 if there was a guest wakeup IPI (which has now been cleared) * -2 if there is PCI passthrough external interrupt that was handled */ +static long kvmppc_read_one_intr(bool *again); long kvmppc_read_intr(void) { + long ret = 0; + long rc; + bool again; + + do { + again = false; + rc = kvmppc_read_one_intr(&again); + if (rc && (ret == 0 || rc > ret)) + ret = rc; + } while (again); + return ret; +} + +static long kvmppc_read_one_intr(bool *again) +{ unsigned long xics_phys; u32 h_xirr; __be32 xirr; u32 xisr; u8 host_ipi; + int64_t rc; /* see if a host IPI is pending */ host_ipi = local_paca->kvm_hstate.host_ipi; @@ -383,8 +412,14 @@ long kvmppc_read_intr(void) /* Now read the interrupt from the ICP */ xics_phys = local_paca->kvm_hstate.xics_phys; - if (unlikely(!xics_phys)) - return 1; + if (!xics_phys) { + /* Use OPAL to read the XIRR */ + rc = opal_rm_int_get_xirr(&xirr, false); + if (rc < 0) + return 1; + } else { + xirr = _lwzcix(xics_phys + XICS_XIRR); + } /* * Save XIRR for later. Since we get control in reverse endian @@ -392,7 +427,6 @@ long kvmppc_read_intr(void) * host endian. Note that xirr is the value read from the * XIRR register, while h_xirr is the host endian version. */ - xirr = _lwzcix(xics_phys + XICS_XIRR); h_xirr = be32_to_cpu(xirr); local_paca->kvm_hstate.saved_xirr = h_xirr; xisr = h_xirr & 0xffffff; @@ -411,8 +445,16 @@ long kvmppc_read_intr(void) * If it is an IPI, clear the MFRR and EOI it. */ if (xisr == XICS_IPI) { - _stbcix(xics_phys + XICS_MFRR, 0xff); - _stwcix(xics_phys + XICS_XIRR, xirr); + if (xics_phys) { + _stbcix(xics_phys + XICS_MFRR, 0xff); + _stwcix(xics_phys + XICS_XIRR, xirr); + } else { + opal_rm_int_set_mfrr(hard_smp_processor_id(), 0xff); + rc = opal_rm_int_eoi(h_xirr); + /* If rc > 0, there is another interrupt pending */ + *again = rc > 0; + } + /* * Need to ensure side effects of above stores * complete before proceeding. @@ -429,7 +471,11 @@ long kvmppc_read_intr(void) /* We raced with the host, * we need to resend that IPI, bummer */ - _stbcix(xics_phys + XICS_MFRR, IPI_PRIORITY); + if (xics_phys) + _stbcix(xics_phys + XICS_MFRR, IPI_PRIORITY); + else + opal_rm_int_set_mfrr(hard_smp_processor_id(), + IPI_PRIORITY); /* Let side effects complete */ smp_mb(); return 1; @@ -440,5 +486,5 @@ long kvmppc_read_intr(void) return -1; } - return kvmppc_check_passthru(xisr, xirr); + return kvmppc_check_passthru(xisr, xirr, again); } diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c index 0fa70a9618d7..7ef0993214f3 100644 --- a/arch/powerpc/kvm/book3s_hv_ras.c +++ b/arch/powerpc/kvm/book3s_hv_ras.c @@ -16,6 +16,7 @@ #include <asm/machdep.h> #include <asm/cputhreads.h> #include <asm/hmi.h> +#include <asm/kvm_ppc.h> /* SRR1 bits for machine check on POWER7 */ #define SRR1_MC_LDSTERR (1ul << (63-42)) diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 99b4e9d5dd23..9ef3c4be952f 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -264,8 +264,10 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, if (pa) pteh |= HPTE_V_VALID; - else + else { pteh |= HPTE_V_ABSENT; + ptel &= ~(HPTE_R_KEY_HI | HPTE_R_KEY_LO); + } /*If we had host pte mapping then Check WIMG */ if (ptep && !hpte_cache_flags_ok(ptel, is_ci)) { @@ -351,6 +353,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, /* inval in progress, write a non-present HPTE */ pteh |= HPTE_V_ABSENT; pteh &= ~HPTE_V_VALID; + ptel &= ~(HPTE_R_KEY_HI | HPTE_R_KEY_LO); unlock_rmap(rmap); } else { kvmppc_add_revmap_chain(kvm, rev, rmap, pte_index, @@ -361,6 +364,11 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, } } + /* Convert to new format on P9 */ + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + ptel = hpte_old_to_new_r(pteh, ptel); + pteh = hpte_old_to_new_v(pteh); + } hpte[1] = cpu_to_be64(ptel); /* Write the first HPTE dword, unlocking the HPTE and making it valid */ @@ -386,6 +394,13 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, #define LOCK_TOKEN (*(u32 *)(&get_paca()->paca_index)) #endif +static inline int is_mmio_hpte(unsigned long v, unsigned long r) +{ + return ((v & HPTE_V_ABSENT) && + (r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) == + (HPTE_R_KEY_HI | HPTE_R_KEY_LO)); +} + static inline int try_lock_tlbie(unsigned int *lock) { unsigned int tmp, old; @@ -409,13 +424,18 @@ static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues, { long i; + /* + * We use the POWER9 5-operand versions of tlbie and tlbiel here. + * Since we are using RIC=0 PRS=0 R=0, and P7/P8 tlbiel ignores + * the RS field, this is backwards-compatible with P7 and P8. + */ if (global) { while (!try_lock_tlbie(&kvm->arch.tlbie_lock)) cpu_relax(); if (need_sync) asm volatile("ptesync" : : : "memory"); for (i = 0; i < npages; ++i) - asm volatile(PPC_TLBIE(%1,%0) : : + asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : : "r" (rbvalues[i]), "r" (kvm->arch.lpid)); asm volatile("eieio; tlbsync; ptesync" : : : "memory"); kvm->arch.tlbie_lock = 0; @@ -423,7 +443,8 @@ static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues, if (need_sync) asm volatile("ptesync" : : : "memory"); for (i = 0; i < npages; ++i) - asm volatile("tlbiel %0" : : "r" (rbvalues[i])); + asm volatile(PPC_TLBIEL(%0,%1,0,0,0) : : + "r" (rbvalues[i]), "r" (0)); asm volatile("ptesync" : : : "memory"); } } @@ -435,18 +456,23 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, __be64 *hpte; unsigned long v, r, rb; struct revmap_entry *rev; - u64 pte; + u64 pte, orig_pte, pte_r; if (pte_index >= kvm->arch.hpt_npte) return H_PARAMETER; hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4)); while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) cpu_relax(); - pte = be64_to_cpu(hpte[0]); + pte = orig_pte = be64_to_cpu(hpte[0]); + pte_r = be64_to_cpu(hpte[1]); + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + pte = hpte_new_to_old_v(pte, pte_r); + pte_r = hpte_new_to_old_r(pte_r); + } if ((pte & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 || ((flags & H_AVPN) && (pte & ~0x7fUL) != avpn) || ((flags & H_ANDCOND) && (pte & avpn) != 0)) { - __unlock_hpte(hpte, pte); + __unlock_hpte(hpte, orig_pte); return H_NOT_FOUND; } @@ -454,7 +480,7 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, v = pte & ~HPTE_V_HVLOCK; if (v & HPTE_V_VALID) { hpte[0] &= ~cpu_to_be64(HPTE_V_VALID); - rb = compute_tlbie_rb(v, be64_to_cpu(hpte[1]), pte_index); + rb = compute_tlbie_rb(v, pte_r, pte_index); do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true); /* * The reference (R) and change (C) bits in a HPT @@ -472,6 +498,9 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, note_hpte_modification(kvm, rev); unlock_hpte(hpte, 0); + if (is_mmio_hpte(v, pte_r)) + atomic64_inc(&kvm->arch.mmio_update); + if (v & HPTE_V_ABSENT) v = (v & ~HPTE_V_ABSENT) | HPTE_V_VALID; hpret[0] = v; @@ -498,7 +527,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) int global; long int ret = H_SUCCESS; struct revmap_entry *rev, *revs[4]; - u64 hp0; + u64 hp0, hp1; global = global_invalidates(kvm, 0); for (i = 0; i < 4 && ret == H_SUCCESS; ) { @@ -531,6 +560,11 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) } found = 0; hp0 = be64_to_cpu(hp[0]); + hp1 = be64_to_cpu(hp[1]); + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + hp0 = hpte_new_to_old_v(hp0, hp1); + hp1 = hpte_new_to_old_r(hp1); + } if (hp0 & (HPTE_V_ABSENT | HPTE_V_VALID)) { switch (flags & 3) { case 0: /* absolute */ @@ -561,13 +595,14 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C); args[j] |= rcbits << (56 - 5); hp[0] = 0; + if (is_mmio_hpte(hp0, hp1)) + atomic64_inc(&kvm->arch.mmio_update); continue; } /* leave it locked */ hp[0] &= ~cpu_to_be64(HPTE_V_VALID); - tlbrb[n] = compute_tlbie_rb(be64_to_cpu(hp[0]), - be64_to_cpu(hp[1]), pte_index); + tlbrb[n] = compute_tlbie_rb(hp0, hp1, pte_index); indexes[n] = j; hptes[n] = hp; revs[n] = rev; @@ -605,7 +640,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, __be64 *hpte; struct revmap_entry *rev; unsigned long v, r, rb, mask, bits; - u64 pte; + u64 pte_v, pte_r; if (pte_index >= kvm->arch.hpt_npte) return H_PARAMETER; @@ -613,14 +648,16 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4)); while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) cpu_relax(); - pte = be64_to_cpu(hpte[0]); - if ((pte & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 || - ((flags & H_AVPN) && (pte & ~0x7fUL) != avpn)) { - __unlock_hpte(hpte, pte); + v = pte_v = be64_to_cpu(hpte[0]); + if (cpu_has_feature(CPU_FTR_ARCH_300)) + v = hpte_new_to_old_v(v, be64_to_cpu(hpte[1])); + if ((v & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 || + ((flags & H_AVPN) && (v & ~0x7fUL) != avpn)) { + __unlock_hpte(hpte, pte_v); return H_NOT_FOUND; } - v = pte; + pte_r = be64_to_cpu(hpte[1]); bits = (flags << 55) & HPTE_R_PP0; bits |= (flags << 48) & HPTE_R_KEY_HI; bits |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO); @@ -642,22 +679,26 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, * readonly to writable. If it should be writable, we'll * take a trap and let the page fault code sort it out. */ - pte = be64_to_cpu(hpte[1]); - r = (pte & ~mask) | bits; - if (hpte_is_writable(r) && !hpte_is_writable(pte)) + r = (pte_r & ~mask) | bits; + if (hpte_is_writable(r) && !hpte_is_writable(pte_r)) r = hpte_make_readonly(r); /* If the PTE is changing, invalidate it first */ - if (r != pte) { + if (r != pte_r) { rb = compute_tlbie_rb(v, r, pte_index); - hpte[0] = cpu_to_be64((v & ~HPTE_V_VALID) | + hpte[0] = cpu_to_be64((pte_v & ~HPTE_V_VALID) | HPTE_V_ABSENT); do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true); + /* Don't lose R/C bit updates done by hardware */ + r |= be64_to_cpu(hpte[1]) & (HPTE_R_R | HPTE_R_C); hpte[1] = cpu_to_be64(r); } } - unlock_hpte(hpte, v & ~HPTE_V_HVLOCK); + unlock_hpte(hpte, pte_v & ~HPTE_V_HVLOCK); asm volatile("ptesync" : : : "memory"); + if (is_mmio_hpte(v, pte_r)) + atomic64_inc(&kvm->arch.mmio_update); + return H_SUCCESS; } @@ -681,6 +722,10 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags, hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4)); v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK; r = be64_to_cpu(hpte[1]); + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + v = hpte_new_to_old_v(v, r); + r = hpte_new_to_old_r(r); + } if (v & HPTE_V_ABSENT) { v &= ~HPTE_V_ABSENT; v |= HPTE_V_VALID; @@ -798,10 +843,16 @@ void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep, unsigned long pte_index) { unsigned long rb; + u64 hp0, hp1; hptep[0] &= ~cpu_to_be64(HPTE_V_VALID); - rb = compute_tlbie_rb(be64_to_cpu(hptep[0]), be64_to_cpu(hptep[1]), - pte_index); + hp0 = be64_to_cpu(hptep[0]); + hp1 = be64_to_cpu(hptep[1]); + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + hp0 = hpte_new_to_old_v(hp0, hp1); + hp1 = hpte_new_to_old_r(hp1); + } + rb = compute_tlbie_rb(hp0, hp1, pte_index); do_tlbies(kvm, &rb, 1, 1, true); } EXPORT_SYMBOL_GPL(kvmppc_invalidate_hpte); @@ -811,9 +862,15 @@ void kvmppc_clear_ref_hpte(struct kvm *kvm, __be64 *hptep, { unsigned long rb; unsigned char rbyte; + u64 hp0, hp1; - rb = compute_tlbie_rb(be64_to_cpu(hptep[0]), be64_to_cpu(hptep[1]), - pte_index); + hp0 = be64_to_cpu(hptep[0]); + hp1 = be64_to_cpu(hptep[1]); + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + hp0 = hpte_new_to_old_v(hp0, hp1); + hp1 = hpte_new_to_old_r(hp1); + } + rb = compute_tlbie_rb(hp0, hp1, pte_index); rbyte = (be64_to_cpu(hptep[1]) & ~HPTE_R_R) >> 8; /* modify only the second-last byte, which contains the ref bit */ *((char *)hptep + 14) = rbyte; @@ -828,6 +885,37 @@ static int slb_base_page_shift[4] = { 20, /* 1M, unsupported */ }; +static struct mmio_hpte_cache_entry *mmio_cache_search(struct kvm_vcpu *vcpu, + unsigned long eaddr, unsigned long slb_v, long mmio_update) +{ + struct mmio_hpte_cache_entry *entry = NULL; + unsigned int pshift; + unsigned int i; + + for (i = 0; i < MMIO_HPTE_CACHE_SIZE; i++) { + entry = &vcpu->arch.mmio_cache.entry[i]; + if (entry->mmio_update == mmio_update) { + pshift = entry->slb_base_pshift; + if ((entry->eaddr >> pshift) == (eaddr >> pshift) && + entry->slb_v == slb_v) + return entry; + } + } + return NULL; +} + +static struct mmio_hpte_cache_entry * + next_mmio_cache_entry(struct kvm_vcpu *vcpu) +{ + unsigned int index = vcpu->arch.mmio_cache.index; + + vcpu->arch.mmio_cache.index++; + if (vcpu->arch.mmio_cache.index == MMIO_HPTE_CACHE_SIZE) + vcpu->arch.mmio_cache.index = 0; + + return &vcpu->arch.mmio_cache.entry[index]; +} + /* When called from virtmode, this func should be protected by * preempt_disable(), otherwise, the holding of HPTE_V_HVLOCK * can trigger deadlock issue. @@ -842,7 +930,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v, unsigned long avpn; __be64 *hpte; unsigned long mask, val; - unsigned long v, r; + unsigned long v, r, orig_v; /* Get page shift, work out hash and AVPN etc. */ mask = SLB_VSID_B | HPTE_V_AVPN | HPTE_V_SECONDARY; @@ -877,6 +965,8 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v, for (i = 0; i < 16; i += 2) { /* Read the PTE racily */ v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK; + if (cpu_has_feature(CPU_FTR_ARCH_300)) + v = hpte_new_to_old_v(v, be64_to_cpu(hpte[i+1])); /* Check valid/absent, hash, segment size and AVPN */ if (!(v & valid) || (v & mask) != val) @@ -885,8 +975,12 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v, /* Lock the PTE and read it under the lock */ while (!try_lock_hpte(&hpte[i], HPTE_V_HVLOCK)) cpu_relax(); - v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK; + v = orig_v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK; r = be64_to_cpu(hpte[i+1]); + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + v = hpte_new_to_old_v(v, r); + r = hpte_new_to_old_r(r); + } /* * Check the HPTE again, including base page size @@ -896,7 +990,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v, /* Return with the HPTE still locked */ return (hash << 3) + (i >> 1); - __unlock_hpte(&hpte[i], v); + __unlock_hpte(&hpte[i], orig_v); } if (val & HPTE_V_SECONDARY) @@ -924,30 +1018,45 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, { struct kvm *kvm = vcpu->kvm; long int index; - unsigned long v, r, gr; + unsigned long v, r, gr, orig_v; __be64 *hpte; unsigned long valid; struct revmap_entry *rev; unsigned long pp, key; + struct mmio_hpte_cache_entry *cache_entry = NULL; + long mmio_update = 0; /* For protection fault, expect to find a valid HPTE */ valid = HPTE_V_VALID; - if (status & DSISR_NOHPTE) + if (status & DSISR_NOHPTE) { valid |= HPTE_V_ABSENT; - - index = kvmppc_hv_find_lock_hpte(kvm, addr, slb_v, valid); - if (index < 0) { - if (status & DSISR_NOHPTE) - return status; /* there really was no HPTE */ - return 0; /* for prot fault, HPTE disappeared */ + mmio_update = atomic64_read(&kvm->arch.mmio_update); + cache_entry = mmio_cache_search(vcpu, addr, slb_v, mmio_update); } - hpte = (__be64 *)(kvm->arch.hpt_virt + (index << 4)); - v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK; - r = be64_to_cpu(hpte[1]); - rev = real_vmalloc_addr(&kvm->arch.revmap[index]); - gr = rev->guest_rpte; + if (cache_entry) { + index = cache_entry->pte_index; + v = cache_entry->hpte_v; + r = cache_entry->hpte_r; + gr = cache_entry->rpte; + } else { + index = kvmppc_hv_find_lock_hpte(kvm, addr, slb_v, valid); + if (index < 0) { + if (status & DSISR_NOHPTE) + return status; /* there really was no HPTE */ + return 0; /* for prot fault, HPTE disappeared */ + } + hpte = (__be64 *)(kvm->arch.hpt_virt + (index << 4)); + v = orig_v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK; + r = be64_to_cpu(hpte[1]); + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + v = hpte_new_to_old_v(v, r); + r = hpte_new_to_old_r(r); + } + rev = real_vmalloc_addr(&kvm->arch.revmap[index]); + gr = rev->guest_rpte; - unlock_hpte(hpte, v); + unlock_hpte(hpte, orig_v); + } /* For not found, if the HPTE is valid by now, retry the instruction */ if ((status & DSISR_NOHPTE) && (v & HPTE_V_VALID)) @@ -985,12 +1094,32 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, vcpu->arch.pgfault_index = index; vcpu->arch.pgfault_hpte[0] = v; vcpu->arch.pgfault_hpte[1] = r; + vcpu->arch.pgfault_cache = cache_entry; /* Check the storage key to see if it is possibly emulated MMIO */ - if (data && (vcpu->arch.shregs.msr & MSR_IR) && - (r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) == - (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) - return -2; /* MMIO emulation - load instr word */ + if ((r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) == + (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) { + if (!cache_entry) { + unsigned int pshift = 12; + unsigned int pshift_index; + + if (slb_v & SLB_VSID_L) { + pshift_index = ((slb_v & SLB_VSID_LP) >> 4); + pshift = slb_base_page_shift[pshift_index]; + } + cache_entry = next_mmio_cache_entry(vcpu); + cache_entry->eaddr = addr; + cache_entry->slb_base_pshift = pshift; + cache_entry->pte_index = index; + cache_entry->hpte_v = v; + cache_entry->hpte_r = r; + cache_entry->rpte = gr; + cache_entry->slb_v = slb_v; + cache_entry->mmio_update = mmio_update; + } + if (data && (vcpu->arch.shregs.msr & MSR_IR)) + return -2; /* MMIO emulation - load instr word */ + } return -1; /* send fault up to host kernel mode */ } diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c index a0ea63ac2b52..06edc4366639 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_xics.c +++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c @@ -70,7 +70,11 @@ static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu) hcpu = hcore << threads_shift; kvmppc_host_rm_ops_hv->rm_core[hcore].rm_data = vcpu; smp_muxed_ipi_set_message(hcpu, PPC_MSG_RM_HOST_ACTION); - icp_native_cause_ipi_rm(hcpu); + if (paca[hcpu].kvm_hstate.xics_phys) + icp_native_cause_ipi_rm(hcpu); + else + opal_rm_int_set_mfrr(get_hard_smp_processor_id(hcpu), + IPI_PRIORITY); } #else static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu) { } @@ -737,7 +741,7 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) unsigned long eoi_rc; -static void icp_eoi(struct irq_chip *c, u32 hwirq, u32 xirr) +static void icp_eoi(struct irq_chip *c, u32 hwirq, __be32 xirr, bool *again) { unsigned long xics_phys; int64_t rc; @@ -751,7 +755,12 @@ static void icp_eoi(struct irq_chip *c, u32 hwirq, u32 xirr) /* EOI it */ xics_phys = local_paca->kvm_hstate.xics_phys; - _stwcix(xics_phys + XICS_XIRR, xirr); + if (xics_phys) { + _stwcix(xics_phys + XICS_XIRR, xirr); + } else { + rc = opal_rm_int_eoi(be32_to_cpu(xirr)); + *again = rc > 0; + } } static int xics_opal_rm_set_server(unsigned int hw_irq, int server_cpu) @@ -809,9 +818,10 @@ static void kvmppc_rm_handle_irq_desc(struct irq_desc *desc) } long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu, - u32 xirr, + __be32 xirr, struct kvmppc_irq_map *irq_map, - struct kvmppc_passthru_irqmap *pimap) + struct kvmppc_passthru_irqmap *pimap, + bool *again) { struct kvmppc_xics *xics; struct kvmppc_icp *icp; @@ -825,7 +835,8 @@ long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu, icp_rm_deliver_irq(xics, icp, irq); /* EOI the interrupt */ - icp_eoi(irq_desc_get_chip(irq_map->desc), irq_map->r_hwirq, xirr); + icp_eoi(irq_desc_get_chip(irq_map->desc), irq_map->r_hwirq, xirr, + again); if (check_too_hard(xics, icp) == H_TOO_HARD) return 2; diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index c3c1d1bcfc67..9338a818e05c 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -501,17 +501,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) cmpwi r0, 0 beq 57f li r3, (LPCR_PECEDH | LPCR_PECE0) >> 4 - mfspr r4, SPRN_LPCR - rlwimi r4, r3, 4, (LPCR_PECEDP | LPCR_PECEDH | LPCR_PECE0 | LPCR_PECE1) - mtspr SPRN_LPCR, r4 - isync - std r0, HSTATE_SCRATCH0(r13) - ptesync - ld r0, HSTATE_SCRATCH0(r13) -1: cmpd r0, r0 - bne 1b - nap - b . + mfspr r5, SPRN_LPCR + rlwimi r5, r3, 4, (LPCR_PECEDP | LPCR_PECEDH | LPCR_PECE0 | LPCR_PECE1) + b kvm_nap_sequence 57: li r0, 0 stbx r0, r3, r4 @@ -523,6 +515,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) * * *****************************************************************************/ +/* Stack frame offsets */ +#define STACK_SLOT_TID (112-16) +#define STACK_SLOT_PSSCR (112-24) + .global kvmppc_hv_entry kvmppc_hv_entry: @@ -581,12 +577,14 @@ kvmppc_hv_entry: ld r9,VCORE_KVM(r5) /* pointer to struct kvm */ cmpwi r6,0 bne 10f - ld r6,KVM_SDR1(r9) lwz r7,KVM_LPID(r9) +BEGIN_FTR_SECTION + ld r6,KVM_SDR1(r9) li r0,LPID_RSVD /* switch to reserved LPID */ mtspr SPRN_LPID,r0 ptesync mtspr SPRN_SDR1,r6 /* switch to partition page table */ +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) mtspr SPRN_LPID,r7 isync @@ -607,12 +605,8 @@ kvmppc_hv_entry: stdcx. r7,0,r6 bne 23b /* Flush the TLB of any entries for this LPID */ - /* use arch 2.07S as a proxy for POWER8 */ -BEGIN_FTR_SECTION - li r6,512 /* POWER8 has 512 sets */ -FTR_SECTION_ELSE - li r6,128 /* POWER7 has 128 sets */ -ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_207S) + lwz r6,KVM_TLB_SETS(r9) + li r0,0 /* RS for P9 version of tlbiel */ mtctr r6 li r7,0x800 /* IS field = 0b10 */ ptesync @@ -698,6 +692,14 @@ kvmppc_got_guest: mtspr SPRN_PURR,r7 mtspr SPRN_SPURR,r8 + /* Save host values of some registers */ +BEGIN_FTR_SECTION + mfspr r5, SPRN_TIDR + mfspr r6, SPRN_PSSCR + std r5, STACK_SLOT_TID(r1) + std r6, STACK_SLOT_PSSCR(r1) +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) + BEGIN_FTR_SECTION /* Set partition DABR */ /* Do this before re-enabling PMU to avoid P7 DABR corruption bug */ @@ -750,14 +752,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG) BEGIN_FTR_SECTION ld r5, VCPU_MMCR + 24(r4) ld r6, VCPU_SIER(r4) + mtspr SPRN_MMCR2, r5 + mtspr SPRN_SIER, r6 +BEGIN_FTR_SECTION_NESTED(96) lwz r7, VCPU_PMC + 24(r4) lwz r8, VCPU_PMC + 28(r4) ld r9, VCPU_MMCR + 32(r4) - mtspr SPRN_MMCR2, r5 - mtspr SPRN_SIER, r6 mtspr SPRN_SPMC1, r7 mtspr SPRN_SPMC2, r8 mtspr SPRN_MMCRS, r9 +END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) mtspr SPRN_MMCR0, r3 isync @@ -813,20 +817,30 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) mtspr SPRN_EBBHR, r8 ld r5, VCPU_EBBRR(r4) ld r6, VCPU_BESCR(r4) - ld r7, VCPU_CSIGR(r4) - ld r8, VCPU_TACR(r4) + lwz r7, VCPU_GUEST_PID(r4) + ld r8, VCPU_WORT(r4) mtspr SPRN_EBBRR, r5 mtspr SPRN_BESCR, r6 - mtspr SPRN_CSIGR, r7 - mtspr SPRN_TACR, r8 + mtspr SPRN_PID, r7 + mtspr SPRN_WORT, r8 +BEGIN_FTR_SECTION + /* POWER8-only registers */ ld r5, VCPU_TCSCR(r4) ld r6, VCPU_ACOP(r4) - lwz r7, VCPU_GUEST_PID(r4) - ld r8, VCPU_WORT(r4) + ld r7, VCPU_CSIGR(r4) + ld r8, VCPU_TACR(r4) mtspr SPRN_TCSCR, r5 mtspr SPRN_ACOP, r6 - mtspr SPRN_PID, r7 - mtspr SPRN_WORT, r8 + mtspr SPRN_CSIGR, r7 + mtspr SPRN_TACR, r8 +FTR_SECTION_ELSE + /* POWER9-only registers */ + ld r5, VCPU_TID(r4) + ld r6, VCPU_PSSCR(r4) + oris r6, r6, PSSCR_EC@h /* This makes stop trap to HV */ + mtspr SPRN_TIDR, r5 + mtspr SPRN_PSSCR, r6 +ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) 8: /* @@ -1341,20 +1355,29 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) std r8, VCPU_EBBHR(r9) mfspr r5, SPRN_EBBRR mfspr r6, SPRN_BESCR - mfspr r7, SPRN_CSIGR - mfspr r8, SPRN_TACR + mfspr r7, SPRN_PID + mfspr r8, SPRN_WORT std r5, VCPU_EBBRR(r9) std r6, VCPU_BESCR(r9) - std r7, VCPU_CSIGR(r9) - std r8, VCPU_TACR(r9) + stw r7, VCPU_GUEST_PID(r9) + std r8, VCPU_WORT(r9) +BEGIN_FTR_SECTION mfspr r5, SPRN_TCSCR mfspr r6, SPRN_ACOP - mfspr r7, SPRN_PID - mfspr r8, SPRN_WORT + mfspr r7, SPRN_CSIGR + mfspr r8, SPRN_TACR std r5, VCPU_TCSCR(r9) std r6, VCPU_ACOP(r9) - stw r7, VCPU_GUEST_PID(r9) - std r8, VCPU_WORT(r9) + std r7, VCPU_CSIGR(r9) + std r8, VCPU_TACR(r9) +FTR_SECTION_ELSE + mfspr r5, SPRN_TIDR + mfspr r6, SPRN_PSSCR + std r5, VCPU_TID(r9) + rldicl r6, r6, 4, 50 /* r6 &= PSSCR_GUEST_VIS */ + rotldi r6, r6, 60 + std r6, VCPU_PSSCR(r9) +ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) /* * Restore various registers to 0, where non-zero values * set by the guest could disrupt the host. @@ -1363,12 +1386,14 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) mtspr SPRN_IAMR, r0 mtspr SPRN_CIABR, r0 mtspr SPRN_DAWRX, r0 - mtspr SPRN_TCSCR, r0 mtspr SPRN_WORT, r0 +BEGIN_FTR_SECTION + mtspr SPRN_TCSCR, r0 /* Set MMCRS to 1<<31 to freeze and disable the SPMC counters */ li r0, 1 sldi r0, r0, 31 mtspr SPRN_MMCRS, r0 +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) 8: /* Save and reset AMR and UAMOR before turning on the MMU */ @@ -1502,15 +1527,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) stw r8, VCPU_PMC + 20(r9) BEGIN_FTR_SECTION mfspr r5, SPRN_SIER + std r5, VCPU_SIER(r9) +BEGIN_FTR_SECTION_NESTED(96) mfspr r6, SPRN_SPMC1 mfspr r7, SPRN_SPMC2 mfspr r8, SPRN_MMCRS - std r5, VCPU_SIER(r9) stw r6, VCPU_PMC + 24(r9) stw r7, VCPU_PMC + 28(r9) std r8, VCPU_MMCR + 32(r9) lis r4, 0x8000 mtspr SPRN_MMCRS, r4 +END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 22: /* Clear out SLB */ @@ -1519,6 +1546,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) slbia ptesync + /* Restore host values of some registers */ +BEGIN_FTR_SECTION + ld r5, STACK_SLOT_TID(r1) + ld r6, STACK_SLOT_PSSCR(r1) + mtspr SPRN_TIDR, r5 + mtspr SPRN_PSSCR, r6 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) + /* * POWER7/POWER8 guest -> host partition switch code. * We don't have to lock against tlbies but we do @@ -1552,12 +1587,14 @@ kvmhv_switch_to_host: beq 19f /* Primary thread switches back to host partition */ - ld r6,KVM_HOST_SDR1(r4) lwz r7,KVM_HOST_LPID(r4) +BEGIN_FTR_SECTION + ld r6,KVM_HOST_SDR1(r4) li r8,LPID_RSVD /* switch to reserved LPID */ mtspr SPRN_LPID,r8 ptesync - mtspr SPRN_SDR1,r6 /* switch to partition page table */ + mtspr SPRN_SDR1,r6 /* switch to host page table */ +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) mtspr SPRN_LPID,r7 isync @@ -2211,6 +2248,21 @@ BEGIN_FTR_SECTION ori r5, r5, LPCR_PECEDH rlwimi r5, r3, 0, LPCR_PECEDP END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) + +kvm_nap_sequence: /* desired LPCR value in r5 */ +BEGIN_FTR_SECTION + /* + * PSSCR bits: exit criterion = 1 (wakeup based on LPCR at sreset) + * enable state loss = 1 (allow SMT mode switch) + * requested level = 0 (just stop dispatching) + */ + lis r3, (PSSCR_EC | PSSCR_ESL)@h + mtspr SPRN_PSSCR, r3 + /* Set LPCR_PECE_HVEE bit to enable wakeup by HV interrupts */ + li r4, LPCR_PECE_HVEE@higher + sldi r4, r4, 32 + or r5, r5, r4 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) mtspr SPRN_LPCR,r5 isync li r0, 0 @@ -2219,7 +2271,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) ld r0, HSTATE_SCRATCH0(r13) 1: cmpd r0, r0 bne 1b +BEGIN_FTR_SECTION nap +FTR_SECTION_ELSE + PPC_STOP +ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) b . 33: mr r4, r3 @@ -2600,11 +2656,13 @@ kvmppc_save_tm: mfctr r7 mfspr r8, SPRN_AMR mfspr r10, SPRN_TAR + mfxer r11 std r5, VCPU_LR_TM(r9) stw r6, VCPU_CR_TM(r9) std r7, VCPU_CTR_TM(r9) std r8, VCPU_AMR_TM(r9) std r10, VCPU_TAR_TM(r9) + std r11, VCPU_XER_TM(r9) /* Restore r12 as trap number. */ lwz r12, VCPU_TRAP(r9) @@ -2697,11 +2755,13 @@ kvmppc_restore_tm: ld r7, VCPU_CTR_TM(r4) ld r8, VCPU_AMR_TM(r4) ld r9, VCPU_TAR_TM(r4) + ld r10, VCPU_XER_TM(r4) mtlr r5 mtcr r6 mtctr r7 mtspr SPRN_AMR, r8 mtspr SPRN_TAR, r9 + mtxer r10 /* * Load up PPR and DSCR values but don't put them in the actual SPRs diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 70963c845e96..efd1183a6b16 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -536,7 +536,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) #ifdef CONFIG_PPC_BOOK3S_64 case KVM_CAP_SPAPR_TCE: case KVM_CAP_SPAPR_TCE_64: - case KVM_CAP_PPC_ALLOC_HTAB: case KVM_CAP_PPC_RTAS: case KVM_CAP_PPC_FIXUP_HCALL: case KVM_CAP_PPC_ENABLE_HCALL: @@ -545,13 +544,20 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) #endif r = 1; break; + + case KVM_CAP_PPC_ALLOC_HTAB: + r = hv_enabled; + break; #endif /* CONFIG_PPC_BOOK3S_64 */ #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE case KVM_CAP_PPC_SMT: - if (hv_enabled) - r = threads_per_subcore; - else - r = 0; + r = 0; + if (hv_enabled) { + if (cpu_has_feature(CPU_FTR_ARCH_300)) + r = 1; + else + r = threads_per_subcore; + } break; case KVM_CAP_PPC_RMA: r = 0; diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h index fb21990c0fb4..ebc6dd449556 100644 --- a/arch/powerpc/kvm/trace_hv.h +++ b/arch/powerpc/kvm/trace_hv.h @@ -449,7 +449,7 @@ TRACE_EVENT(kvmppc_vcore_wakeup, __entry->tgid = current->tgid; ), - TP_printk("%s time %lld ns, tgid=%d", + TP_printk("%s time %llu ns, tgid=%d", __entry->waited ? "wait" : "poll", __entry->ns, __entry->tgid) ); diff --git a/arch/powerpc/lib/checksum_32.S b/arch/powerpc/lib/checksum_32.S index ea29a5d67743..9a671c774b22 100644 --- a/arch/powerpc/lib/checksum_32.S +++ b/arch/powerpc/lib/checksum_32.S @@ -103,17 +103,14 @@ EXPORT_SYMBOL(__csum_partial) adde r12,r12,r10 #define CSUM_COPY_16_BYTES_EXCODE(n) \ -.section __ex_table,"a"; \ - .align 2; \ - .long 8 ## n ## 0b,src_error; \ - .long 8 ## n ## 1b,src_error; \ - .long 8 ## n ## 2b,src_error; \ - .long 8 ## n ## 3b,src_error; \ - .long 8 ## n ## 4b,dst_error; \ - .long 8 ## n ## 5b,dst_error; \ - .long 8 ## n ## 6b,dst_error; \ - .long 8 ## n ## 7b,dst_error; \ - .text + EX_TABLE(8 ## n ## 0b, src_error); \ + EX_TABLE(8 ## n ## 1b, src_error); \ + EX_TABLE(8 ## n ## 2b, src_error); \ + EX_TABLE(8 ## n ## 3b, src_error); \ + EX_TABLE(8 ## n ## 4b, dst_error); \ + EX_TABLE(8 ## n ## 5b, dst_error); \ + EX_TABLE(8 ## n ## 6b, dst_error); \ + EX_TABLE(8 ## n ## 7b, dst_error); .text .stabs "arch/powerpc/lib/",N_SO,0,0,0f @@ -263,14 +260,11 @@ dst_error: stw r0,0(r8) blr - .section __ex_table,"a" - .align 2 - .long 70b,src_error - .long 71b,dst_error - .long 72b,src_error - .long 73b,dst_error - .long 54b,dst_error - .text + EX_TABLE(70b, src_error); + EX_TABLE(71b, dst_error); + EX_TABLE(72b, src_error); + EX_TABLE(73b, dst_error); + EX_TABLE(54b, dst_error); /* * this stuff handles faults in the cacheline loop and branches to either @@ -291,12 +285,11 @@ dst_error: #endif #endif - .section __ex_table,"a" - .align 2 - .long 30b,src_error - .long 31b,dst_error - .long 40b,src_error - .long 41b,dst_error - .long 50b,src_error - .long 51b,dst_error + EX_TABLE(30b, src_error); + EX_TABLE(31b, dst_error); + EX_TABLE(40b, src_error); + EX_TABLE(41b, dst_error); + EX_TABLE(50b, src_error); + EX_TABLE(51b, dst_error); + EXPORT_SYMBOL(csum_partial_copy_generic) diff --git a/arch/powerpc/lib/checksum_64.S b/arch/powerpc/lib/checksum_64.S index fd9176671f9f..d0d311e108ff 100644 --- a/arch/powerpc/lib/checksum_64.S +++ b/arch/powerpc/lib/checksum_64.S @@ -182,34 +182,22 @@ EXPORT_SYMBOL(__csum_partial) .macro srcnr 100: - .section __ex_table,"a" - .align 3 - .llong 100b,.Lsrc_error_nr - .previous + EX_TABLE(100b,.Lsrc_error_nr) .endm .macro source 150: - .section __ex_table,"a" - .align 3 - .llong 150b,.Lsrc_error - .previous + EX_TABLE(150b,.Lsrc_error) .endm .macro dstnr 200: - .section __ex_table,"a" - .align 3 - .llong 200b,.Ldest_error_nr - .previous + EX_TABLE(200b,.Ldest_error_nr) .endm .macro dest 250: - .section __ex_table,"a" - .align 3 - .llong 250b,.Ldest_error - .previous + EX_TABLE(250b,.Ldest_error) .endm /* diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S index 40cce33b08d6..ff0d894d7ff9 100644 --- a/arch/powerpc/lib/copy_32.S +++ b/arch/powerpc/lib/copy_32.S @@ -49,17 +49,14 @@ 9 ## n ## 1: \ addi r5,r5,-(16 * n); \ b 105f; \ -.section __ex_table,"a"; \ - .align 2; \ - .long 8 ## n ## 0b,9 ## n ## 0b; \ - .long 8 ## n ## 1b,9 ## n ## 0b; \ - .long 8 ## n ## 2b,9 ## n ## 0b; \ - .long 8 ## n ## 3b,9 ## n ## 0b; \ - .long 8 ## n ## 4b,9 ## n ## 1b; \ - .long 8 ## n ## 5b,9 ## n ## 1b; \ - .long 8 ## n ## 6b,9 ## n ## 1b; \ - .long 8 ## n ## 7b,9 ## n ## 1b; \ - .text + EX_TABLE(8 ## n ## 0b,9 ## n ## 0b); \ + EX_TABLE(8 ## n ## 1b,9 ## n ## 0b); \ + EX_TABLE(8 ## n ## 2b,9 ## n ## 0b); \ + EX_TABLE(8 ## n ## 3b,9 ## n ## 0b); \ + EX_TABLE(8 ## n ## 4b,9 ## n ## 1b); \ + EX_TABLE(8 ## n ## 5b,9 ## n ## 1b); \ + EX_TABLE(8 ## n ## 6b,9 ## n ## 1b); \ + EX_TABLE(8 ## n ## 7b,9 ## n ## 1b) .text .stabs "arch/powerpc/lib/",N_SO,0,0,0f @@ -323,13 +320,10 @@ _GLOBAL(__copy_tofrom_user) 73: stwu r9,4(r6) bdnz 72b - .section __ex_table,"a" - .align 2 - .long 70b,100f - .long 71b,101f - .long 72b,102f - .long 73b,103f - .text + EX_TABLE(70b,100f) + EX_TABLE(71b,101f) + EX_TABLE(72b,102f) + EX_TABLE(73b,103f) 58: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */ clrlwi r5,r5,32-LG_CACHELINE_BYTES @@ -364,10 +358,7 @@ _GLOBAL(__copy_tofrom_user) 53: dcbt r3,r4 54: dcbz r11,r6 - .section __ex_table,"a" - .align 2 - .long 54b,105f - .text + EX_TABLE(54b,105f) /* the main body of the cacheline loop */ COPY_16_BYTES_WITHEX(0) #if L1_CACHE_BYTES >= 32 @@ -500,15 +491,13 @@ _GLOBAL(__copy_tofrom_user) bdnz 114b 120: blr - .section __ex_table,"a" - .align 2 - .long 30b,108b - .long 31b,109b - .long 40b,110b - .long 41b,111b - .long 130b,132b - .long 131b,120b - .long 112b,120b - .long 114b,120b - .text + EX_TABLE(30b,108b) + EX_TABLE(31b,109b) + EX_TABLE(40b,110b) + EX_TABLE(41b,111b) + EX_TABLE(130b,132b) + EX_TABLE(131b,120b) + EX_TABLE(112b,120b) + EX_TABLE(114b,120b) + EXPORT_SYMBOL(__copy_tofrom_user) diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S index 60386b2c99bb..aee6e24e81ab 100644 --- a/arch/powerpc/lib/copyuser_64.S +++ b/arch/powerpc/lib/copyuser_64.S @@ -394,70 +394,66 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) 192: blr /* #bytes not copied in r3 */ - .section __ex_table,"a" - .align 3 - .llong 20b,120b - .llong 220b,320b - .llong 21b,121b - .llong 221b,321b - .llong 70b,170b - .llong 270b,370b - .llong 22b,122b - .llong 222b,322b - .llong 71b,171b - .llong 271b,371b - .llong 72b,172b - .llong 272b,372b - .llong 244b,344b - .llong 245b,345b - .llong 23b,123b - .llong 73b,173b - .llong 44b,144b - .llong 74b,174b - .llong 45b,145b - .llong 75b,175b - .llong 24b,124b - .llong 25b,125b - .llong 26b,126b - .llong 27b,127b - .llong 28b,128b - .llong 29b,129b - .llong 30b,130b - .llong 31b,131b - .llong 32b,132b - .llong 76b,176b - .llong 33b,133b - .llong 77b,177b - .llong 78b,178b - .llong 79b,179b - .llong 80b,180b - .llong 34b,134b - .llong 94b,194b - .llong 95b,195b - .llong 96b,196b - .llong 35b,135b - .llong 81b,181b - .llong 36b,136b - .llong 82b,182b - .llong 37b,137b - .llong 83b,183b - .llong 38b,138b - .llong 39b,139b - .llong 84b,184b - .llong 85b,185b - .llong 40b,140b - .llong 86b,186b - .llong 41b,141b - .llong 87b,187b - .llong 42b,142b - .llong 88b,188b - .llong 43b,143b - .llong 89b,189b - .llong 90b,190b - .llong 91b,191b - .llong 92b,192b - - .text + EX_TABLE(20b,120b) + EX_TABLE(220b,320b) + EX_TABLE(21b,121b) + EX_TABLE(221b,321b) + EX_TABLE(70b,170b) + EX_TABLE(270b,370b) + EX_TABLE(22b,122b) + EX_TABLE(222b,322b) + EX_TABLE(71b,171b) + EX_TABLE(271b,371b) + EX_TABLE(72b,172b) + EX_TABLE(272b,372b) + EX_TABLE(244b,344b) + EX_TABLE(245b,345b) + EX_TABLE(23b,123b) + EX_TABLE(73b,173b) + EX_TABLE(44b,144b) + EX_TABLE(74b,174b) + EX_TABLE(45b,145b) + EX_TABLE(75b,175b) + EX_TABLE(24b,124b) + EX_TABLE(25b,125b) + EX_TABLE(26b,126b) + EX_TABLE(27b,127b) + EX_TABLE(28b,128b) + EX_TABLE(29b,129b) + EX_TABLE(30b,130b) + EX_TABLE(31b,131b) + EX_TABLE(32b,132b) + EX_TABLE(76b,176b) + EX_TABLE(33b,133b) + EX_TABLE(77b,177b) + EX_TABLE(78b,178b) + EX_TABLE(79b,179b) + EX_TABLE(80b,180b) + EX_TABLE(34b,134b) + EX_TABLE(94b,194b) + EX_TABLE(95b,195b) + EX_TABLE(96b,196b) + EX_TABLE(35b,135b) + EX_TABLE(81b,181b) + EX_TABLE(36b,136b) + EX_TABLE(82b,182b) + EX_TABLE(37b,137b) + EX_TABLE(83b,183b) + EX_TABLE(38b,138b) + EX_TABLE(39b,139b) + EX_TABLE(84b,184b) + EX_TABLE(85b,185b) + EX_TABLE(40b,140b) + EX_TABLE(86b,186b) + EX_TABLE(41b,141b) + EX_TABLE(87b,187b) + EX_TABLE(42b,142b) + EX_TABLE(88b,188b) + EX_TABLE(43b,143b) + EX_TABLE(89b,189b) + EX_TABLE(90b,190b) + EX_TABLE(91b,191b) + EX_TABLE(92b,192b) /* * Routine to copy a whole page of data, optimized for POWER4. @@ -598,78 +594,77 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) li r5,4096 b .Ldst_aligned - .section __ex_table,"a" - .align 3 - .llong 20b,100b - .llong 21b,100b - .llong 22b,100b - .llong 23b,100b - .llong 24b,100b - .llong 25b,100b - .llong 26b,100b - .llong 27b,100b - .llong 28b,100b - .llong 29b,100b - .llong 30b,100b - .llong 31b,100b - .llong 32b,100b - .llong 33b,100b - .llong 34b,100b - .llong 35b,100b - .llong 36b,100b - .llong 37b,100b - .llong 38b,100b - .llong 39b,100b - .llong 40b,100b - .llong 41b,100b - .llong 42b,100b - .llong 43b,100b - .llong 44b,100b - .llong 45b,100b - .llong 46b,100b - .llong 47b,100b - .llong 48b,100b - .llong 49b,100b - .llong 50b,100b - .llong 51b,100b - .llong 52b,100b - .llong 53b,100b - .llong 54b,100b - .llong 55b,100b - .llong 56b,100b - .llong 57b,100b - .llong 58b,100b - .llong 59b,100b - .llong 60b,100b - .llong 61b,100b - .llong 62b,100b - .llong 63b,100b - .llong 64b,100b - .llong 65b,100b - .llong 66b,100b - .llong 67b,100b - .llong 68b,100b - .llong 69b,100b - .llong 70b,100b - .llong 71b,100b - .llong 72b,100b - .llong 73b,100b - .llong 74b,100b - .llong 75b,100b - .llong 76b,100b - .llong 77b,100b - .llong 78b,100b - .llong 79b,100b - .llong 80b,100b - .llong 81b,100b - .llong 82b,100b - .llong 83b,100b - .llong 84b,100b - .llong 85b,100b - .llong 86b,100b - .llong 87b,100b - .llong 88b,100b - .llong 89b,100b - .llong 90b,100b - .llong 91b,100b + EX_TABLE(20b,100b) + EX_TABLE(21b,100b) + EX_TABLE(22b,100b) + EX_TABLE(23b,100b) + EX_TABLE(24b,100b) + EX_TABLE(25b,100b) + EX_TABLE(26b,100b) + EX_TABLE(27b,100b) + EX_TABLE(28b,100b) + EX_TABLE(29b,100b) + EX_TABLE(30b,100b) + EX_TABLE(31b,100b) + EX_TABLE(32b,100b) + EX_TABLE(33b,100b) + EX_TABLE(34b,100b) + EX_TABLE(35b,100b) + EX_TABLE(36b,100b) + EX_TABLE(37b,100b) + EX_TABLE(38b,100b) + EX_TABLE(39b,100b) + EX_TABLE(40b,100b) + EX_TABLE(41b,100b) + EX_TABLE(42b,100b) + EX_TABLE(43b,100b) + EX_TABLE(44b,100b) + EX_TABLE(45b,100b) + EX_TABLE(46b,100b) + EX_TABLE(47b,100b) + EX_TABLE(48b,100b) + EX_TABLE(49b,100b) + EX_TABLE(50b,100b) + EX_TABLE(51b,100b) + EX_TABLE(52b,100b) + EX_TABLE(53b,100b) + EX_TABLE(54b,100b) + EX_TABLE(55b,100b) + EX_TABLE(56b,100b) + EX_TABLE(57b,100b) + EX_TABLE(58b,100b) + EX_TABLE(59b,100b) + EX_TABLE(60b,100b) + EX_TABLE(61b,100b) + EX_TABLE(62b,100b) + EX_TABLE(63b,100b) + EX_TABLE(64b,100b) + EX_TABLE(65b,100b) + EX_TABLE(66b,100b) + EX_TABLE(67b,100b) + EX_TABLE(68b,100b) + EX_TABLE(69b,100b) + EX_TABLE(70b,100b) + EX_TABLE(71b,100b) + EX_TABLE(72b,100b) + EX_TABLE(73b,100b) + EX_TABLE(74b,100b) + EX_TABLE(75b,100b) + EX_TABLE(76b,100b) + EX_TABLE(77b,100b) + EX_TABLE(78b,100b) + EX_TABLE(79b,100b) + EX_TABLE(80b,100b) + EX_TABLE(81b,100b) + EX_TABLE(82b,100b) + EX_TABLE(83b,100b) + EX_TABLE(84b,100b) + EX_TABLE(85b,100b) + EX_TABLE(86b,100b) + EX_TABLE(87b,100b) + EX_TABLE(88b,100b) + EX_TABLE(89b,100b) + EX_TABLE(90b,100b) + EX_TABLE(91b,100b) + EXPORT_SYMBOL(__copy_tofrom_user) diff --git a/arch/powerpc/lib/copyuser_power7.S b/arch/powerpc/lib/copyuser_power7.S index da0c568d18c4..a24b4039352c 100644 --- a/arch/powerpc/lib/copyuser_power7.S +++ b/arch/powerpc/lib/copyuser_power7.S @@ -29,35 +29,23 @@ .macro err1 100: - .section __ex_table,"a" - .align 3 - .llong 100b,.Ldo_err1 - .previous + EX_TABLE(100b,.Ldo_err1) .endm .macro err2 200: - .section __ex_table,"a" - .align 3 - .llong 200b,.Ldo_err2 - .previous + EX_TABLE(200b,.Ldo_err2) .endm #ifdef CONFIG_ALTIVEC .macro err3 300: - .section __ex_table,"a" - .align 3 - .llong 300b,.Ldo_err3 - .previous + EX_TABLE(300b,.Ldo_err3) .endm .macro err4 400: - .section __ex_table,"a" - .align 3 - .llong 400b,.Ldo_err4 - .previous + EX_TABLE(400b,.Ldo_err4) .endm diff --git a/arch/powerpc/lib/ldstfp.S b/arch/powerpc/lib/ldstfp.S index 5d0cdbfbe3f2..a58777c1b2cb 100644 --- a/arch/powerpc/lib/ldstfp.S +++ b/arch/powerpc/lib/ldstfp.S @@ -21,18 +21,12 @@ #define STKFRM (PPC_MIN_STKFRM + 16) - .macro extab instr,handler - .section __ex_table,"a" - PPC_LONG \instr,\handler - .previous - .endm - .macro inst32 op reg = 0 .rept 32 20: \op reg,0,r4 b 3f - extab 20b,99f + EX_TABLE(20b,99f) reg = reg + 1 .endr .endm @@ -100,7 +94,7 @@ _GLOBAL(do_lfs) mr r3,r9 addi r1,r1,STKFRM blr - extab 2b,3b + EX_TABLE(2b,3b) /* Load FP reg N from double at *p. N is in r3, p in r4. */ _GLOBAL(do_lfd) @@ -127,7 +121,7 @@ _GLOBAL(do_lfd) mr r3,r9 addi r1,r1,STKFRM blr - extab 2b,3b + EX_TABLE(2b,3b) /* Store FP reg N to float at *p. N is in r3, p in r4. */ _GLOBAL(do_stfs) @@ -154,7 +148,7 @@ _GLOBAL(do_stfs) mr r3,r9 addi r1,r1,STKFRM blr - extab 2b,3b + EX_TABLE(2b,3b) /* Store FP reg N to double at *p. N is in r3, p in r4. */ _GLOBAL(do_stfd) @@ -181,7 +175,7 @@ _GLOBAL(do_stfd) mr r3,r9 addi r1,r1,STKFRM blr - extab 2b,3b + EX_TABLE(2b,3b) #ifdef CONFIG_ALTIVEC /* Get the contents of vrN into v0; N is in r3. */ @@ -248,7 +242,7 @@ _GLOBAL(do_lvx) mr r3,r9 addi r1,r1,STKFRM blr - extab 2b,3b + EX_TABLE(2b,3b) /* Store vector reg N to *p. N is in r3, p in r4. */ _GLOBAL(do_stvx) @@ -276,7 +270,7 @@ _GLOBAL(do_stvx) mr r3,r9 addi r1,r1,STKFRM blr - extab 2b,3b + EX_TABLE(2b,3b) #endif /* CONFIG_ALTIVEC */ #ifdef CONFIG_VSX @@ -344,7 +338,7 @@ _GLOBAL(do_lxvd2x) mr r3,r9 addi r1,r1,STKFRM blr - extab 2b,3b + EX_TABLE(2b,3b) /* Store VSX reg N to vector doubleword *p. N is in r3, p in r4. */ _GLOBAL(do_stxvd2x) @@ -372,7 +366,7 @@ _GLOBAL(do_stxvd2x) mr r3,r9 addi r1,r1,STKFRM blr - extab 2b,3b + EX_TABLE(2b,3b) #endif /* CONFIG_VSX */ diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 3362299b1859..9c78a9c102c3 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -15,6 +15,7 @@ #include <asm/sstep.h> #include <asm/processor.h> #include <asm/uaccess.h> +#include <asm/cpu_has_feature.h> #include <asm/cputable.h> extern char system_call_common[]; @@ -493,10 +494,7 @@ static int __kprobes do_vsx_store(int rn, int (*func)(int, unsigned long), "3: li %0,%4\n" \ " b 2b\n" \ ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - PPC_LONG_ALIGN "\n" \ - PPC_LONG "1b,3b\n" \ - ".previous" \ + EX_TABLE(1b, 3b) \ : "=r" (err), "=r" (cr) \ : "r" (x), "r" (addr), "i" (-EFAULT), "0" (err)) @@ -508,10 +506,7 @@ static int __kprobes do_vsx_store(int rn, int (*func)(int, unsigned long), "3: li %0,%3\n" \ " b 2b\n" \ ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - PPC_LONG_ALIGN "\n" \ - PPC_LONG "1b,3b\n" \ - ".previous" \ + EX_TABLE(1b, 3b) \ : "=r" (err), "=r" (x) \ : "r" (addr), "i" (-EFAULT), "0" (err)) @@ -523,10 +518,7 @@ static int __kprobes do_vsx_store(int rn, int (*func)(int, unsigned long), "3: li %0,%3\n" \ " b 2b\n" \ ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - PPC_LONG_ALIGN "\n" \ - PPC_LONG "1b,3b\n" \ - ".previous" \ + EX_TABLE(1b, 3b) \ : "=r" (err) \ : "r" (addr), "i" (-EFAULT), "0" (err)) diff --git a/arch/powerpc/lib/string.S b/arch/powerpc/lib/string.S index d13e07603519..a787776822d8 100644 --- a/arch/powerpc/lib/string.S +++ b/arch/powerpc/lib/string.S @@ -13,8 +13,6 @@ #include <asm/ppc_asm.h> #include <asm/export.h> - .section __ex_table,"a" - PPC_LONG_ALIGN .text /* This clears out any unused part of the destination buffer, @@ -125,10 +123,9 @@ _GLOBAL(__clear_user) 92: mfctr r3 blr - .section __ex_table,"a" - PPC_LONG 11b,90b - PPC_LONG 1b,91b - PPC_LONG 8b,92b - .text + EX_TABLE(11b, 90b) + EX_TABLE(1b, 91b) + EX_TABLE(8b, 92b) + EXPORT_SYMBOL(__clear_user) #endif diff --git a/arch/powerpc/lib/string_64.S b/arch/powerpc/lib/string_64.S index 57ace356c949..c100f4d5d5d0 100644 --- a/arch/powerpc/lib/string_64.S +++ b/arch/powerpc/lib/string_64.S @@ -19,6 +19,7 @@ */ #include <asm/ppc_asm.h> +#include <asm/linkage.h> #include <asm/asm-offsets.h> #include <asm/export.h> @@ -41,26 +42,17 @@ PPC64_CACHES: .macro err1 100: - .section __ex_table,"a" - .align 3 - .llong 100b,.Ldo_err1 - .previous + EX_TABLE(100b,.Ldo_err1) .endm .macro err2 200: - .section __ex_table,"a" - .align 3 - .llong 200b,.Ldo_err2 - .previous + EX_TABLE(200b,.Ldo_err2) .endm .macro err3 300: - .section __ex_table,"a" - .align 3 - .llong 300b,.Ldo_err3 - .previous + EX_TABLE(300b,.Ldo_err3) .endm .Ldo_err1: diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 1a4e570f7894..7414034df1c3 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -7,7 +7,8 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) obj-y := fault.o mem.o pgtable.o mmap.o \ - init_$(BITS).o pgtable_$(BITS).o + init_$(BITS).o pgtable_$(BITS).o \ + init-common.o obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \ tlb_nohash_low.o obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(BITS)e.o @@ -42,3 +43,5 @@ obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o obj-$(CONFIG_HIGHMEM) += highmem.o obj-$(CONFIG_PPC_COPRO_BASE) += copro_fault.o obj-$(CONFIG_SPAPR_TCE_IOMMU) += mmu_context_iommu.o +obj-$(CONFIG_PPC_PTDUMP) += dump_linuxpagetables.o +obj-$(CONFIG_PPC_HTDUMP) += dump_hashpagetable.o diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c index 362954f98029..aaa7ec6788b9 100644 --- a/arch/powerpc/mm/copro_fault.c +++ b/arch/powerpc/mm/copro_fault.c @@ -134,6 +134,9 @@ int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb) pr_debug("%s: invalid region access at %016llx\n", __func__, ea); return 1; } + /* Bad address */ + if (!vsid) + return 1; vsid = (vsid << slb_vsid_shift(ssize)) | vsidkey; diff --git a/arch/powerpc/mm/dump_hashpagetable.c b/arch/powerpc/mm/dump_hashpagetable.c new file mode 100644 index 000000000000..d979709a0239 --- /dev/null +++ b/arch/powerpc/mm/dump_hashpagetable.c @@ -0,0 +1,551 @@ +/* + * Copyright 2016, Rashmica Gupta, IBM Corp. + * + * This traverses the kernel virtual memory and dumps the pages that are in + * the hash pagetable, along with their flags to + * /sys/kernel/debug/kernel_hash_pagetable. + * + * If radix is enabled then there is no hash page table and so no debugfs file + * is generated. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ +#include <linux/debugfs.h> +#include <linux/fs.h> +#include <linux/io.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/seq_file.h> +#include <asm/fixmap.h> +#include <asm/pgtable.h> +#include <linux/const.h> +#include <asm/page.h> +#include <asm/pgalloc.h> +#include <asm/plpar_wrappers.h> +#include <linux/memblock.h> +#include <asm/firmware.h> + +struct pg_state { + struct seq_file *seq; + const struct addr_marker *marker; + unsigned long start_address; + unsigned int level; + u64 current_flags; +}; + +struct addr_marker { + unsigned long start_address; + const char *name; +}; + +static struct addr_marker address_markers[] = { + { 0, "Start of kernel VM" }, + { 0, "vmalloc() Area" }, + { 0, "vmalloc() End" }, + { 0, "isa I/O start" }, + { 0, "isa I/O end" }, + { 0, "phb I/O start" }, + { 0, "phb I/O end" }, + { 0, "I/O remap start" }, + { 0, "I/O remap end" }, + { 0, "vmemmap start" }, + { -1, NULL }, +}; + +struct flag_info { + u64 mask; + u64 val; + const char *set; + const char *clear; + bool is_val; + int shift; +}; + +static const struct flag_info v_flag_array[] = { + { + .mask = SLB_VSID_B, + .val = SLB_VSID_B_256M, + .set = "ssize: 256M", + .clear = "ssize: 1T ", + }, { + .mask = HPTE_V_SECONDARY, + .val = HPTE_V_SECONDARY, + .set = "secondary", + .clear = "primary ", + }, { + .mask = HPTE_V_VALID, + .val = HPTE_V_VALID, + .set = "valid ", + .clear = "invalid", + }, { + .mask = HPTE_V_BOLTED, + .val = HPTE_V_BOLTED, + .set = "bolted", + .clear = "", + } +}; + +static const struct flag_info r_flag_array[] = { + { + .mask = HPTE_R_PP0 | HPTE_R_PP, + .val = PP_RWXX, + .set = "prot:RW--", + }, { + .mask = HPTE_R_PP0 | HPTE_R_PP, + .val = PP_RWRX, + .set = "prot:RWR-", + }, { + .mask = HPTE_R_PP0 | HPTE_R_PP, + .val = PP_RWRW, + .set = "prot:RWRW", + }, { + .mask = HPTE_R_PP0 | HPTE_R_PP, + .val = PP_RXRX, + .set = "prot:R-R-", + }, { + .mask = HPTE_R_PP0 | HPTE_R_PP, + .val = PP_RXXX, + .set = "prot:R---", + }, { + .mask = HPTE_R_KEY_HI | HPTE_R_KEY_LO, + .val = HPTE_R_KEY_HI | HPTE_R_KEY_LO, + .set = "key", + .clear = "", + .is_val = true, + }, { + .mask = HPTE_R_R, + .val = HPTE_R_R, + .set = "ref", + .clear = " ", + }, { + .mask = HPTE_R_C, + .val = HPTE_R_C, + .set = "changed", + .clear = " ", + }, { + .mask = HPTE_R_N, + .val = HPTE_R_N, + .set = "no execute", + }, { + .mask = HPTE_R_WIMG, + .val = HPTE_R_W, + .set = "writethru", + }, { + .mask = HPTE_R_WIMG, + .val = HPTE_R_I, + .set = "no cache", + }, { + .mask = HPTE_R_WIMG, + .val = HPTE_R_G, + .set = "guarded", + } +}; + +static int calculate_pagesize(struct pg_state *st, int ps, char s[]) +{ + static const char units[] = "BKMGTPE"; + const char *unit = units; + + while (ps > 9 && unit[1]) { + ps -= 10; + unit++; + } + seq_printf(st->seq, " %s_ps: %i%c\t", s, 1<<ps, *unit); + return ps; +} + +static void dump_flag_info(struct pg_state *st, const struct flag_info + *flag, u64 pte, int num) +{ + unsigned int i; + + for (i = 0; i < num; i++, flag++) { + const char *s = NULL; + u64 val; + + /* flag not defined so don't check it */ + if (flag->mask == 0) + continue; + /* Some 'flags' are actually values */ + if (flag->is_val) { + val = pte & flag->val; + if (flag->shift) + val = val >> flag->shift; + seq_printf(st->seq, " %s:%llx", flag->set, val); + } else { + if ((pte & flag->mask) == flag->val) + s = flag->set; + else + s = flag->clear; + if (s) + seq_printf(st->seq, " %s", s); + } + } +} + +static void dump_hpte_info(struct pg_state *st, unsigned long ea, u64 v, u64 r, + unsigned long rpn, int bps, int aps, unsigned long lp) +{ + int aps_index; + + while (ea >= st->marker[1].start_address) { + st->marker++; + seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); + } + seq_printf(st->seq, "0x%lx:\t", ea); + seq_printf(st->seq, "AVPN:%llx\t", HPTE_V_AVPN_VAL(v)); + dump_flag_info(st, v_flag_array, v, ARRAY_SIZE(v_flag_array)); + seq_printf(st->seq, " rpn: %lx\t", rpn); + dump_flag_info(st, r_flag_array, r, ARRAY_SIZE(r_flag_array)); + + calculate_pagesize(st, bps, "base"); + aps_index = calculate_pagesize(st, aps, "actual"); + if (aps_index != 2) + seq_printf(st->seq, "LP enc: %lx", lp); + seq_puts(st->seq, "\n"); +} + + +static int native_find(unsigned long ea, int psize, bool primary, u64 *v, u64 + *r) +{ + struct hash_pte *hptep; + unsigned long hash, vsid, vpn, hpte_group, want_v, hpte_v; + int i, ssize = mmu_kernel_ssize; + unsigned long shift = mmu_psize_defs[psize].shift; + + /* calculate hash */ + vsid = get_kernel_vsid(ea, ssize); + vpn = hpt_vpn(ea, vsid, ssize); + hash = hpt_hash(vpn, shift, ssize); + want_v = hpte_encode_avpn(vpn, psize, ssize); + + /* to check in the secondary hash table, we invert the hash */ + if (!primary) + hash = ~hash; + hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP; + for (i = 0; i < HPTES_PER_GROUP; i++) { + hptep = htab_address + hpte_group; + hpte_v = be64_to_cpu(hptep->v); + + if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) { + /* HPTE matches */ + *v = be64_to_cpu(hptep->v); + *r = be64_to_cpu(hptep->r); + return 0; + } + ++hpte_group; + } + return -1; +} + +#ifdef CONFIG_PPC_PSERIES +static int pseries_find(unsigned long ea, int psize, bool primary, u64 *v, u64 *r) +{ + struct hash_pte ptes[4]; + unsigned long vsid, vpn, hash, hpte_group, want_v; + int i, j, ssize = mmu_kernel_ssize; + long lpar_rc = 0; + unsigned long shift = mmu_psize_defs[psize].shift; + + /* calculate hash */ + vsid = get_kernel_vsid(ea, ssize); + vpn = hpt_vpn(ea, vsid, ssize); + hash = hpt_hash(vpn, shift, ssize); + want_v = hpte_encode_avpn(vpn, psize, ssize); + + /* to check in the secondary hash table, we invert the hash */ + if (!primary) + hash = ~hash; + hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; + /* see if we can find an entry in the hpte with this hash */ + for (i = 0; i < HPTES_PER_GROUP; i += 4, hpte_group += 4) { + lpar_rc = plpar_pte_read_4(0, hpte_group, (void *)ptes); + + if (lpar_rc != H_SUCCESS) + continue; + for (j = 0; j < 4; j++) { + if (HPTE_V_COMPARE(ptes[j].v, want_v) && + (ptes[j].v & HPTE_V_VALID)) { + /* HPTE matches */ + *v = ptes[j].v; + *r = ptes[j].r; + return 0; + } + } + } + return -1; +} +#endif + +static void decode_r(int bps, unsigned long r, unsigned long *rpn, int *aps, + unsigned long *lp_bits) +{ + struct mmu_psize_def entry; + unsigned long arpn, mask, lp; + int penc = -2, idx = 0, shift; + + /*. + * The LP field has 8 bits. Depending on the actual page size, some of + * these bits are concatenated with the APRN to get the RPN. The rest + * of the bits in the LP field is the LP value and is an encoding for + * the base page size and the actual page size. + * + * - find the mmu entry for our base page size + * - go through all page encodings and use the associated mask to + * find an encoding that matches our encoding in the LP field. + */ + arpn = (r & HPTE_R_RPN) >> HPTE_R_RPN_SHIFT; + lp = arpn & 0xff; + + entry = mmu_psize_defs[bps]; + while (idx < MMU_PAGE_COUNT) { + penc = entry.penc[idx]; + if ((penc != -1) && (mmu_psize_defs[idx].shift)) { + shift = mmu_psize_defs[idx].shift - HPTE_R_RPN_SHIFT; + mask = (0x1 << (shift)) - 1; + if ((lp & mask) == penc) { + *aps = mmu_psize_to_shift(idx); + *lp_bits = lp & mask; + *rpn = arpn >> shift; + return; + } + } + idx++; + } +} + +static int base_hpte_find(unsigned long ea, int psize, bool primary, u64 *v, + u64 *r) +{ +#ifdef CONFIG_PPC_PSERIES + if (firmware_has_feature(FW_FEATURE_LPAR)) + return pseries_find(ea, psize, primary, v, r); +#endif + return native_find(ea, psize, primary, v, r); +} + +static unsigned long hpte_find(struct pg_state *st, unsigned long ea, int psize) +{ + unsigned long slot; + u64 v = 0, r = 0; + unsigned long rpn, lp_bits; + int base_psize = 0, actual_psize = 0; + + if (ea <= PAGE_OFFSET) + return -1; + + /* Look in primary table */ + slot = base_hpte_find(ea, psize, true, &v, &r); + + /* Look in secondary table */ + if (slot == -1) + slot = base_hpte_find(ea, psize, true, &v, &r); + + /* No entry found */ + if (slot == -1) + return -1; + + /* + * We found an entry in the hash page table: + * - check that this has the same base page + * - find the actual page size + * - find the RPN + */ + base_psize = mmu_psize_to_shift(psize); + + if ((v & HPTE_V_LARGE) == HPTE_V_LARGE) { + decode_r(psize, r, &rpn, &actual_psize, &lp_bits); + } else { + /* 4K actual page size */ + actual_psize = 12; + rpn = (r & HPTE_R_RPN) >> HPTE_R_RPN_SHIFT; + /* In this case there are no LP bits */ + lp_bits = -1; + } + /* + * We didn't find a matching encoding, so the PTE we found isn't for + * this address. + */ + if (actual_psize == -1) + return -1; + + dump_hpte_info(st, ea, v, r, rpn, base_psize, actual_psize, lp_bits); + return 0; +} + +static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start) +{ + pte_t *pte = pte_offset_kernel(pmd, 0); + unsigned long addr, pteval, psize; + int i, status; + + for (i = 0; i < PTRS_PER_PTE; i++, pte++) { + addr = start + i * PAGE_SIZE; + pteval = pte_val(*pte); + + if (addr < VMALLOC_END) + psize = mmu_vmalloc_psize; + else + psize = mmu_io_psize; +#ifdef CONFIG_PPC_64K_PAGES + /* check for secret 4K mappings */ + if (((pteval & H_PAGE_COMBO) == H_PAGE_COMBO) || + ((pteval & H_PAGE_4K_PFN) == H_PAGE_4K_PFN)) + psize = mmu_io_psize; +#endif + /* check for hashpte */ + status = hpte_find(st, addr, psize); + + if (((pteval & H_PAGE_HASHPTE) != H_PAGE_HASHPTE) + && (status != -1)) { + /* found a hpte that is not in the linux page tables */ + seq_printf(st->seq, "page probably bolted before linux" + " pagetables were set: addr:%lx, pteval:%lx\n", + addr, pteval); + } + } +} + +static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) +{ + pmd_t *pmd = pmd_offset(pud, 0); + unsigned long addr; + unsigned int i; + + for (i = 0; i < PTRS_PER_PMD; i++, pmd++) { + addr = start + i * PMD_SIZE; + if (!pmd_none(*pmd)) + /* pmd exists */ + walk_pte(st, pmd, addr); + } +} + +static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) +{ + pud_t *pud = pud_offset(pgd, 0); + unsigned long addr; + unsigned int i; + + for (i = 0; i < PTRS_PER_PUD; i++, pud++) { + addr = start + i * PUD_SIZE; + if (!pud_none(*pud)) + /* pud exists */ + walk_pmd(st, pud, addr); + } +} + +static void walk_pagetables(struct pg_state *st) +{ + pgd_t *pgd = pgd_offset_k(0UL); + unsigned int i; + unsigned long addr; + + /* + * Traverse the linux pagetable structure and dump pages that are in + * the hash pagetable. + */ + for (i = 0; i < PTRS_PER_PGD; i++, pgd++) { + addr = KERN_VIRT_START + i * PGDIR_SIZE; + if (!pgd_none(*pgd)) + /* pgd exists */ + walk_pud(st, pgd, addr); + } +} + + +static void walk_linearmapping(struct pg_state *st) +{ + unsigned long addr; + + /* + * Traverse the linear mapping section of virtual memory and dump pages + * that are in the hash pagetable. + */ + unsigned long psize = 1 << mmu_psize_defs[mmu_linear_psize].shift; + + for (addr = PAGE_OFFSET; addr < PAGE_OFFSET + + memblock_phys_mem_size(); addr += psize) + hpte_find(st, addr, mmu_linear_psize); +} + +static void walk_vmemmap(struct pg_state *st) +{ +#ifdef CONFIG_SPARSEMEM_VMEMMAP + struct vmemmap_backing *ptr = vmemmap_list; + + /* + * Traverse the vmemmaped memory and dump pages that are in the hash + * pagetable. + */ + while (ptr->list) { + hpte_find(st, ptr->virt_addr, mmu_vmemmap_psize); + ptr = ptr->list; + } + seq_puts(st->seq, "---[ vmemmap end ]---\n"); +#endif +} + +static void populate_markers(void) +{ + address_markers[0].start_address = PAGE_OFFSET; + address_markers[1].start_address = VMALLOC_START; + address_markers[2].start_address = VMALLOC_END; + address_markers[3].start_address = ISA_IO_BASE; + address_markers[4].start_address = ISA_IO_END; + address_markers[5].start_address = PHB_IO_BASE; + address_markers[6].start_address = PHB_IO_END; + address_markers[7].start_address = IOREMAP_BASE; + address_markers[8].start_address = IOREMAP_END; +#ifdef CONFIG_PPC_STD_MMU_64 + address_markers[9].start_address = H_VMEMMAP_BASE; +#else + address_markers[9].start_address = VMEMMAP_BASE; +#endif +} + +static int ptdump_show(struct seq_file *m, void *v) +{ + struct pg_state st = { + .seq = m, + .start_address = PAGE_OFFSET, + .marker = address_markers, + }; + /* + * Traverse the 0xc, 0xd and 0xf areas of the kernel virtual memory and + * dump pages that are in the hash pagetable. + */ + walk_linearmapping(&st); + walk_pagetables(&st); + walk_vmemmap(&st); + return 0; +} + +static int ptdump_open(struct inode *inode, struct file *file) +{ + return single_open(file, ptdump_show, NULL); +} + +static const struct file_operations ptdump_fops = { + .open = ptdump_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int ptdump_init(void) +{ + struct dentry *debugfs_file; + + if (!radix_enabled()) { + populate_markers(); + debugfs_file = debugfs_create_file("kernel_hash_pagetable", + 0400, NULL, NULL, &ptdump_fops); + return debugfs_file ? 0 : -ENOMEM; + } + return 0; +} +device_initcall(ptdump_init); diff --git a/arch/powerpc/mm/dump_linuxpagetables.c b/arch/powerpc/mm/dump_linuxpagetables.c new file mode 100644 index 000000000000..49abaf4dc8e3 --- /dev/null +++ b/arch/powerpc/mm/dump_linuxpagetables.c @@ -0,0 +1,442 @@ +/* + * Copyright 2016, Rashmica Gupta, IBM Corp. + * + * This traverses the kernel pagetables and dumps the + * information about the used sections of memory to + * /sys/kernel/debug/kernel_pagetables. + * + * Derived from the arm64 implementation: + * Copyright (c) 2014, The Linux Foundation, Laura Abbott. + * (C) Copyright 2008 Intel Corporation, Arjan van de Ven. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ +#include <linux/debugfs.h> +#include <linux/fs.h> +#include <linux/io.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/seq_file.h> +#include <asm/fixmap.h> +#include <asm/pgtable.h> +#include <linux/const.h> +#include <asm/page.h> +#include <asm/pgalloc.h> + +/* + * To visualise what is happening, + * + * - PTRS_PER_P** = how many entries there are in the corresponding P** + * - P**_SHIFT = how many bits of the address we use to index into the + * corresponding P** + * - P**_SIZE is how much memory we can access through the table - not the + * size of the table itself. + * P**={PGD, PUD, PMD, PTE} + * + * + * Each entry of the PGD points to a PUD. Each entry of a PUD points to a + * PMD. Each entry of a PMD points to a PTE. And every PTE entry points to + * a page. + * + * In the case where there are only 3 levels, the PUD is folded into the + * PGD: every PUD has only one entry which points to the PMD. + * + * The page dumper groups page table entries of the same type into a single + * description. It uses pg_state to track the range information while + * iterating over the PTE entries. When the continuity is broken it then + * dumps out a description of the range - ie PTEs that are virtually contiguous + * with the same PTE flags are chunked together. This is to make it clear how + * different areas of the kernel virtual memory are used. + * + */ +struct pg_state { + struct seq_file *seq; + const struct addr_marker *marker; + unsigned long start_address; + unsigned int level; + u64 current_flags; +}; + +struct addr_marker { + unsigned long start_address; + const char *name; +}; + +static struct addr_marker address_markers[] = { + { 0, "Start of kernel VM" }, + { 0, "vmalloc() Area" }, + { 0, "vmalloc() End" }, + { 0, "isa I/O start" }, + { 0, "isa I/O end" }, + { 0, "phb I/O start" }, + { 0, "phb I/O end" }, + { 0, "I/O remap start" }, + { 0, "I/O remap end" }, + { 0, "vmemmap start" }, + { -1, NULL }, +}; + +struct flag_info { + u64 mask; + u64 val; + const char *set; + const char *clear; + bool is_val; + int shift; +}; + +static const struct flag_info flag_array[] = { + { +#ifdef CONFIG_PPC_STD_MMU_64 + .mask = _PAGE_PRIVILEGED, + .val = 0, +#else + .mask = _PAGE_USER, + .val = _PAGE_USER, +#endif + .set = "user", + .clear = " ", + }, { + .mask = _PAGE_RW, + .val = _PAGE_RW, + .set = "rw", + .clear = "ro", + }, { + .mask = _PAGE_EXEC, + .val = _PAGE_EXEC, + .set = " X ", + .clear = " ", + }, { + .mask = _PAGE_PTE, + .val = _PAGE_PTE, + .set = "pte", + .clear = " ", + }, { + .mask = _PAGE_PRESENT, + .val = _PAGE_PRESENT, + .set = "present", + .clear = " ", + }, { +#ifdef CONFIG_PPC_STD_MMU_64 + .mask = H_PAGE_HASHPTE, + .val = H_PAGE_HASHPTE, +#else + .mask = _PAGE_HASHPTE, + .val = _PAGE_HASHPTE, +#endif + .set = "hpte", + .clear = " ", + }, { +#ifndef CONFIG_PPC_STD_MMU_64 + .mask = _PAGE_GUARDED, + .val = _PAGE_GUARDED, + .set = "guarded", + .clear = " ", + }, { +#endif + .mask = _PAGE_DIRTY, + .val = _PAGE_DIRTY, + .set = "dirty", + .clear = " ", + }, { + .mask = _PAGE_ACCESSED, + .val = _PAGE_ACCESSED, + .set = "accessed", + .clear = " ", + }, { +#ifndef CONFIG_PPC_STD_MMU_64 + .mask = _PAGE_WRITETHRU, + .val = _PAGE_WRITETHRU, + .set = "write through", + .clear = " ", + }, { +#endif + .mask = _PAGE_NO_CACHE, + .val = _PAGE_NO_CACHE, + .set = "no cache", + .clear = " ", + }, { +#ifdef CONFIG_PPC_BOOK3S_64 + .mask = H_PAGE_BUSY, + .val = H_PAGE_BUSY, + .set = "busy", + }, { +#ifdef CONFIG_PPC_64K_PAGES + .mask = H_PAGE_COMBO, + .val = H_PAGE_COMBO, + .set = "combo", + }, { + .mask = H_PAGE_4K_PFN, + .val = H_PAGE_4K_PFN, + .set = "4K_pfn", + }, { +#endif + .mask = H_PAGE_F_GIX, + .val = H_PAGE_F_GIX, + .set = "f_gix", + .is_val = true, + .shift = H_PAGE_F_GIX_SHIFT, + }, { + .mask = H_PAGE_F_SECOND, + .val = H_PAGE_F_SECOND, + .set = "f_second", + }, { +#endif + .mask = _PAGE_SPECIAL, + .val = _PAGE_SPECIAL, + .set = "special", + } +}; + +struct pgtable_level { + const struct flag_info *flag; + size_t num; + u64 mask; +}; + +static struct pgtable_level pg_level[] = { + { + }, { /* pgd */ + .flag = flag_array, + .num = ARRAY_SIZE(flag_array), + }, { /* pud */ + .flag = flag_array, + .num = ARRAY_SIZE(flag_array), + }, { /* pmd */ + .flag = flag_array, + .num = ARRAY_SIZE(flag_array), + }, { /* pte */ + .flag = flag_array, + .num = ARRAY_SIZE(flag_array), + }, +}; + +static void dump_flag_info(struct pg_state *st, const struct flag_info + *flag, u64 pte, int num) +{ + unsigned int i; + + for (i = 0; i < num; i++, flag++) { + const char *s = NULL; + u64 val; + + /* flag not defined so don't check it */ + if (flag->mask == 0) + continue; + /* Some 'flags' are actually values */ + if (flag->is_val) { + val = pte & flag->val; + if (flag->shift) + val = val >> flag->shift; + seq_printf(st->seq, " %s:%llx", flag->set, val); + } else { + if ((pte & flag->mask) == flag->val) + s = flag->set; + else + s = flag->clear; + if (s) + seq_printf(st->seq, " %s", s); + } + st->current_flags &= ~flag->mask; + } + if (st->current_flags != 0) + seq_printf(st->seq, " unknown flags:%llx", st->current_flags); +} + +static void dump_addr(struct pg_state *st, unsigned long addr) +{ + static const char units[] = "KMGTPE"; + const char *unit = units; + unsigned long delta; + + seq_printf(st->seq, "0x%016lx-0x%016lx ", st->start_address, addr-1); + delta = (addr - st->start_address) >> 10; + /* Work out what appropriate unit to use */ + while (!(delta & 1023) && unit[1]) { + delta >>= 10; + unit++; + } + seq_printf(st->seq, "%9lu%c", delta, *unit); + +} + +static void note_page(struct pg_state *st, unsigned long addr, + unsigned int level, u64 val) +{ + u64 flag = val & pg_level[level].mask; + /* At first no level is set */ + if (!st->level) { + st->level = level; + st->current_flags = flag; + st->start_address = addr; + seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); + /* + * Dump the section of virtual memory when: + * - the PTE flags from one entry to the next differs. + * - we change levels in the tree. + * - the address is in a different section of memory and is thus + * used for a different purpose, regardless of the flags. + */ + } else if (flag != st->current_flags || level != st->level || + addr >= st->marker[1].start_address) { + + /* Check the PTE flags */ + if (st->current_flags) { + dump_addr(st, addr); + + /* Dump all the flags */ + if (pg_level[st->level].flag) + dump_flag_info(st, pg_level[st->level].flag, + st->current_flags, + pg_level[st->level].num); + + seq_puts(st->seq, "\n"); + } + + /* + * Address indicates we have passed the end of the + * current section of virtual memory + */ + while (addr >= st->marker[1].start_address) { + st->marker++; + seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); + } + st->start_address = addr; + st->current_flags = flag; + st->level = level; + } +} + +static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start) +{ + pte_t *pte = pte_offset_kernel(pmd, 0); + unsigned long addr; + unsigned int i; + + for (i = 0; i < PTRS_PER_PTE; i++, pte++) { + addr = start + i * PAGE_SIZE; + note_page(st, addr, 4, pte_val(*pte)); + + } +} + +static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) +{ + pmd_t *pmd = pmd_offset(pud, 0); + unsigned long addr; + unsigned int i; + + for (i = 0; i < PTRS_PER_PMD; i++, pmd++) { + addr = start + i * PMD_SIZE; + if (!pmd_none(*pmd)) + /* pmd exists */ + walk_pte(st, pmd, addr); + else + note_page(st, addr, 3, pmd_val(*pmd)); + } +} + +static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) +{ + pud_t *pud = pud_offset(pgd, 0); + unsigned long addr; + unsigned int i; + + for (i = 0; i < PTRS_PER_PUD; i++, pud++) { + addr = start + i * PUD_SIZE; + if (!pud_none(*pud)) + /* pud exists */ + walk_pmd(st, pud, addr); + else + note_page(st, addr, 2, pud_val(*pud)); + } +} + +static void walk_pagetables(struct pg_state *st) +{ + pgd_t *pgd = pgd_offset_k(0UL); + unsigned int i; + unsigned long addr; + + /* + * Traverse the linux pagetable structure and dump pages that are in + * the hash pagetable. + */ + for (i = 0; i < PTRS_PER_PGD; i++, pgd++) { + addr = KERN_VIRT_START + i * PGDIR_SIZE; + if (!pgd_none(*pgd)) + /* pgd exists */ + walk_pud(st, pgd, addr); + else + note_page(st, addr, 1, pgd_val(*pgd)); + } +} + +static void populate_markers(void) +{ + address_markers[0].start_address = PAGE_OFFSET; + address_markers[1].start_address = VMALLOC_START; + address_markers[2].start_address = VMALLOC_END; + address_markers[3].start_address = ISA_IO_BASE; + address_markers[4].start_address = ISA_IO_END; + address_markers[5].start_address = PHB_IO_BASE; + address_markers[6].start_address = PHB_IO_END; + address_markers[7].start_address = IOREMAP_BASE; + address_markers[8].start_address = IOREMAP_END; +#ifdef CONFIG_PPC_STD_MMU_64 + address_markers[9].start_address = H_VMEMMAP_BASE; +#else + address_markers[9].start_address = VMEMMAP_BASE; +#endif +} + +static int ptdump_show(struct seq_file *m, void *v) +{ + struct pg_state st = { + .seq = m, + .start_address = KERN_VIRT_START, + .marker = address_markers, + }; + /* Traverse kernel page tables */ + walk_pagetables(&st); + note_page(&st, 0, 0, 0); + return 0; +} + + +static int ptdump_open(struct inode *inode, struct file *file) +{ + return single_open(file, ptdump_show, NULL); +} + +static const struct file_operations ptdump_fops = { + .open = ptdump_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static void build_pgtable_complete_mask(void) +{ + unsigned int i, j; + + for (i = 0; i < ARRAY_SIZE(pg_level); i++) + if (pg_level[i].flag) + for (j = 0; j < pg_level[i].num; j++) + pg_level[i].mask |= pg_level[i].flag[j].mask; +} + +static int ptdump_init(void) +{ + struct dentry *debugfs_file; + + populate_markers(); + build_pgtable_complete_mask(); + debugfs_file = debugfs_create_file("kernel_pagetables", 0400, NULL, + NULL, &ptdump_fops); + return debugfs_file ? 0 : -ENOMEM; +} +device_initcall(ptdump_init); diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index d0b137d96df1..6fd30ac7d14a 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -391,6 +391,20 @@ good_area: if (is_exec) { /* + * An execution fault + no execute ? + * + * On CPUs that don't have CPU_FTR_COHERENT_ICACHE we + * deliberately create NX mappings, and use the fault to do the + * cache flush. This is usually handled in hash_page_do_lazy_icache() + * but we could end up here if that races with a concurrent PTE + * update. In that case we need to fall through here to the VMA + * check below. + */ + if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE) && + (regs->msr & SRR1_ISI_N_OR_G)) + goto bad_area; + + /* * Allow execution from readable areas if the MMU does not * provide separate controls over reading and executing. * @@ -404,6 +418,7 @@ good_area: (cpu_has_feature(CPU_FTR_NOEXECUTE) || !(vma->vm_flags & (VM_READ | VM_WRITE)))) goto bad_area; + #ifdef CONFIG_PPC_STD_MMU /* * protfault should only happen due to us @@ -512,7 +527,7 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) /* Are we prepared to handle this fault? */ if ((entry = search_exception_tables(regs->nip)) != NULL) { - regs->nip = entry->fixup; + regs->nip = extable_fixup(entry); return; } diff --git a/arch/powerpc/mm/hash64_4k.c b/arch/powerpc/mm/hash64_4k.c index 42c702b3be1f..6fa450c12d6d 100644 --- a/arch/powerpc/mm/hash64_4k.c +++ b/arch/powerpc/mm/hash64_4k.c @@ -55,7 +55,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, */ rflags = htab_convert_pte_flags(new_pte); - if (!cpu_has_feature(CPU_FTR_NOEXECUTE) && + if (cpu_has_feature(CPU_FTR_NOEXECUTE) && !cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap); diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c index 3bbbea07378c..1a68cb19b0e3 100644 --- a/arch/powerpc/mm/hash64_64k.c +++ b/arch/powerpc/mm/hash64_64k.c @@ -87,7 +87,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, subpg_pte = new_pte & ~subpg_prot; rflags = htab_convert_pte_flags(subpg_pte); - if (!cpu_has_feature(CPU_FTR_NOEXECUTE) && + if (cpu_has_feature(CPU_FTR_NOEXECUTE) && !cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) { /* @@ -258,7 +258,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access, rflags = htab_convert_pte_flags(new_pte); - if (!cpu_has_feature(CPU_FTR_NOEXECUTE) && + if (cpu_has_feature(CPU_FTR_NOEXECUTE) && !cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap); diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 83ddc0e171b0..cc332608e656 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -123,8 +123,9 @@ static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize) va |= ssize << 8; sllp = get_sllp_encoding(apsize); va |= sllp << 5; - asm volatile(".long 0x7c000224 | (%0 << 11) | (0 << 21)" - : : "r"(va) : "memory"); + asm volatile(ASM_FTR_IFSET("tlbiel %0", "tlbiel %0,0", %1) + : : "r" (va), "i" (CPU_FTR_ARCH_206) + : "memory"); break; default: /* We need 14 to 14 + i bits of va */ @@ -141,8 +142,9 @@ static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize) */ va |= (vpn & 0xfe); va |= 1; /* L */ - asm volatile(".long 0x7c000224 | (%0 << 11) | (1 << 21)" - : : "r"(va) : "memory"); + asm volatile(ASM_FTR_IFSET("tlbiel %0", "tlbiel %0,1", %1) + : : "r" (va), "i" (CPU_FTR_ARCH_206) + : "memory"); break; } @@ -221,13 +223,18 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn, return -1; hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID; - hpte_r = hpte_encode_r(pa, psize, apsize, ssize) | rflags; + hpte_r = hpte_encode_r(pa, psize, apsize) | rflags; if (!(vflags & HPTE_V_BOLTED)) { DBG_LOW(" i=%x hpte_v=%016lx, hpte_r=%016lx\n", i, hpte_v, hpte_r); } + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + hpte_r = hpte_old_to_new_r(hpte_v, hpte_r); + hpte_v = hpte_old_to_new_v(hpte_v); + } + hptep->r = cpu_to_be64(hpte_r); /* Guarantee the second dword is visible before the valid bit */ eieio(); @@ -295,6 +302,8 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, vpn, want_v & HPTE_V_AVPN, slot, newpp); hpte_v = be64_to_cpu(hptep->v); + if (cpu_has_feature(CPU_FTR_ARCH_300)) + hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r)); /* * We need to invalidate the TLB always because hpte_remove doesn't do * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less @@ -309,6 +318,8 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, native_lock_hpte(hptep); /* recheck with locks held */ hpte_v = be64_to_cpu(hptep->v); + if (cpu_has_feature(CPU_FTR_ARCH_300)) + hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r)); if (unlikely(!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))) { ret = -1; @@ -350,6 +361,8 @@ static long native_hpte_find(unsigned long vpn, int psize, int ssize) for (i = 0; i < HPTES_PER_GROUP; i++) { hptep = htab_address + slot; hpte_v = be64_to_cpu(hptep->v); + if (cpu_has_feature(CPU_FTR_ARCH_300)) + hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r)); if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) /* HPTE matches */ @@ -409,6 +422,8 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn, want_v = hpte_encode_avpn(vpn, bpsize, ssize); native_lock_hpte(hptep); hpte_v = be64_to_cpu(hptep->v); + if (cpu_has_feature(CPU_FTR_ARCH_300)) + hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r)); /* * We need to invalidate the TLB always because hpte_remove doesn't do @@ -467,6 +482,8 @@ static void native_hugepage_invalidate(unsigned long vsid, want_v = hpte_encode_avpn(vpn, psize, ssize); native_lock_hpte(hptep); hpte_v = be64_to_cpu(hptep->v); + if (cpu_has_feature(CPU_FTR_ARCH_300)) + hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r)); /* Even if we miss, we need to invalidate the TLB */ if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) @@ -504,6 +521,10 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot, /* Look at the 8 bit LP value */ unsigned int lp = (hpte_r >> LP_SHIFT) & ((1 << LP_BITS) - 1); + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + hpte_v = hpte_new_to_old_v(hpte_v, hpte_r); + hpte_r = hpte_new_to_old_r(hpte_r); + } if (!(hpte_v & HPTE_V_LARGE)) { size = MMU_PAGE_4K; a_size = MMU_PAGE_4K; @@ -512,11 +533,7 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot, a_size = hpte_page_sizes[lp] >> 4; } /* This works for all page sizes, and for 256M and 1T segments */ - if (cpu_has_feature(CPU_FTR_ARCH_300)) - *ssize = hpte_r >> HPTE_R_3_0_SSIZE_SHIFT; - else - *ssize = hpte_v >> HPTE_V_SSIZE_SHIFT; - + *ssize = hpte_v >> HPTE_V_SSIZE_SHIFT; shift = mmu_psize_defs[size].shift; avpn = (HPTE_V_AVPN_VAL(hpte_v) & ~mmu_psize_defs[size].avpnm); @@ -639,6 +656,9 @@ static void native_flush_hash_range(unsigned long number, int local) want_v = hpte_encode_avpn(vpn, psize, ssize); native_lock_hpte(hptep); hpte_v = be64_to_cpu(hptep->v); + if (cpu_has_feature(CPU_FTR_ARCH_300)) + hpte_v = hpte_new_to_old_v(hpte_v, + be64_to_cpu(hptep->r)); if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) native_unlock_hpte(hptep); diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 44d3c3a38e3e..8410b4bb36ed 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -193,8 +193,12 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags) /* * Kernel read only mapped with ppp bits 0b110 */ - if (!(pteflags & _PAGE_WRITE)) - rflags |= (HPTE_R_PP0 | 0x2); + if (!(pteflags & _PAGE_WRITE)) { + if (mmu_has_feature(MMU_FTR_KERNEL_RO)) + rflags |= (HPTE_R_PP0 | 0x2); + else + rflags |= 0x3; + } } else { if (pteflags & _PAGE_RWX) rflags |= 0x2; @@ -792,37 +796,17 @@ static void update_hid_for_hash(void) static void __init hash_init_partition_table(phys_addr_t hash_table, unsigned long htab_size) { - unsigned long ps_field; - unsigned long patb_size = 1UL << PATB_SIZE_SHIFT; + mmu_partition_table_init(); /* - * slb llp encoding for the page size used in VPM real mode. - * We can ignore that for lpid 0 + * PS field (VRMA page size) is not used for LPID 0, hence set to 0. + * For now, UPRT is 0 and we have no segment table. */ - ps_field = 0; htab_size = __ilog2(htab_size) - 18; - - BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 24), "Partition table size too large."); - partition_tb = __va(memblock_alloc_base(patb_size, patb_size, - MEMBLOCK_ALLOC_ANYWHERE)); - - /* Initialize the Partition Table with no entries */ - memset((void *)partition_tb, 0, patb_size); - partition_tb->patb0 = cpu_to_be64(ps_field | hash_table | htab_size); - /* - * FIXME!! This should be done via update_partition table - * For now UPRT is 0 for us. - */ - partition_tb->patb1 = 0; + mmu_partition_table_set_entry(0, hash_table | htab_size, 0); pr_info("Partition table %p\n", partition_tb); if (cpu_has_feature(CPU_FTR_POWER9_DD1)) update_hid_for_hash(); - /* - * update partition table control register, - * 64 K size. - */ - mtspr(SPRN_PTCR, __pa(partition_tb) | (PATB_SIZE_SHIFT - 12)); - } static void __init htab_initialize(void) @@ -1029,6 +1013,10 @@ void hash__early_init_mmu_secondary(void) { /* Initialize hash table for that CPU */ if (!firmware_has_feature(FW_FEATURE_LPAR)) { + + if (cpu_has_feature(CPU_FTR_POWER9_DD1)) + update_hid_for_hash(); + if (!cpu_has_feature(CPU_FTR_ARCH_300)) mtspr(SPRN_SDR1, _SDR1); else diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index a5d3ecdabc44..289df38fb7e0 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -26,6 +26,8 @@ #ifdef CONFIG_HUGETLB_PAGE #define PAGE_SHIFT_64K 16 +#define PAGE_SHIFT_512K 19 +#define PAGE_SHIFT_8M 23 #define PAGE_SHIFT_16M 24 #define PAGE_SHIFT_16G 34 @@ -38,7 +40,7 @@ unsigned int HPAGE_SHIFT; * implementations may have more than one gpage size, so we need multiple * arrays */ -#ifdef CONFIG_PPC_FSL_BOOK3E +#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx) #define MAX_NUMBER_GPAGES 128 struct psize_gpages { u64 gpage_list[MAX_NUMBER_GPAGES]; @@ -64,14 +66,16 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, { struct kmem_cache *cachep; pte_t *new; - -#ifdef CONFIG_PPC_FSL_BOOK3E int i; - int num_hugepd = 1 << (pshift - pdshift); - cachep = hugepte_cache; -#else - cachep = PGT_CACHE(pdshift - pshift); -#endif + int num_hugepd; + + if (pshift >= pdshift) { + cachep = hugepte_cache; + num_hugepd = 1 << (pshift - pdshift); + } else { + cachep = PGT_CACHE(pdshift - pshift); + num_hugepd = 1; + } new = kmem_cache_zalloc(cachep, GFP_KERNEL); @@ -89,7 +93,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, smp_wmb(); spin_lock(&mm->page_table_lock); -#ifdef CONFIG_PPC_FSL_BOOK3E + /* * We have multiple higher-level entries that point to the same * actual pte location. Fill in each as we go and backtrack on error. @@ -100,8 +104,18 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, if (unlikely(!hugepd_none(*hpdp))) break; else +#ifdef CONFIG_PPC_BOOK3S_64 + hpdp->pd = __pa(new) | + (shift_to_mmu_psize(pshift) << 2); +#elif defined(CONFIG_PPC_8xx) + hpdp->pd = __pa(new) | + (pshift == PAGE_SHIFT_8M ? _PMD_PAGE_8M : + _PMD_PAGE_512K) | + _PMD_PRESENT; +#else /* We use the old format for PPC_FSL_BOOK3E */ hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift; +#endif } /* If we bailed from the for loop early, an error occurred, clean up */ if (i < num_hugepd) { @@ -109,17 +123,6 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, hpdp->pd = 0; kmem_cache_free(cachep, new); } -#else - if (!hugepd_none(*hpdp)) - kmem_cache_free(cachep, new); - else { -#ifdef CONFIG_PPC_BOOK3S_64 - hpdp->pd = __pa(new) | (shift_to_mmu_psize(pshift) << 2); -#else - hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift; -#endif - } -#endif spin_unlock(&mm->page_table_lock); return 0; } @@ -128,7 +131,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, * These macros define how to determine which level of the page table holds * the hpdp. */ -#ifdef CONFIG_PPC_FSL_BOOK3E +#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx) #define HUGEPD_PGD_SHIFT PGDIR_SHIFT #define HUGEPD_PUD_SHIFT PUD_SHIFT #else @@ -136,7 +139,6 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, #define HUGEPD_PUD_SHIFT PMD_SHIFT #endif -#ifdef CONFIG_PPC_BOOK3S_64 /* * At this point we do the placement change only for BOOK3S 64. This would * possibly work on other subarchs. @@ -153,6 +155,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz addr &= ~(sz-1); pg = pgd_offset(mm, addr); +#ifdef CONFIG_PPC_BOOK3S_64 if (pshift == PGDIR_SHIFT) /* 16GB huge page */ return (pte_t *) pg; @@ -178,32 +181,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz hpdp = (hugepd_t *)pm; } } - if (!hpdp) - return NULL; - - BUG_ON(!hugepd_none(*hpdp) && !hugepd_ok(*hpdp)); - - if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift)) - return NULL; - - return hugepte_offset(*hpdp, addr, pdshift); -} - #else - -pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz) -{ - pgd_t *pg; - pud_t *pu; - pmd_t *pm; - hugepd_t *hpdp = NULL; - unsigned pshift = __ffs(sz); - unsigned pdshift = PGDIR_SHIFT; - - addr &= ~(sz-1); - - pg = pgd_offset(mm, addr); - if (pshift >= HUGEPD_PGD_SHIFT) { hpdp = (hugepd_t *)pg; } else { @@ -217,7 +195,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz hpdp = (hugepd_t *)pm; } } - +#endif if (!hpdp) return NULL; @@ -228,9 +206,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz return hugepte_offset(*hpdp, addr, pdshift); } -#endif -#ifdef CONFIG_PPC_FSL_BOOK3E +#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx) /* Build list of addresses of gigantic pages. This function is used in early * boot before the buddy allocator is setup. */ @@ -310,7 +287,11 @@ static int __init do_gpage_early_setup(char *param, char *val, npages = 0; if (npages > MAX_NUMBER_GPAGES) { pr_warn("MMU: %lu pages requested for page " +#ifdef CONFIG_PHYS_ADDR_T_64BIT "size %llu KB, limiting to " +#else + "size %u KB, limiting to " +#endif __stringify(MAX_NUMBER_GPAGES) "\n", npages, size / 1024); npages = MAX_NUMBER_GPAGES; @@ -392,7 +373,7 @@ int alloc_bootmem_huge_page(struct hstate *hstate) } #endif -#ifdef CONFIG_PPC_FSL_BOOK3E +#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx) #define HUGEPD_FREELIST_SIZE \ ((PAGE_SIZE - sizeof(struct hugepd_freelist)) / sizeof(pte_t)) @@ -442,6 +423,8 @@ static void hugepd_free(struct mmu_gather *tlb, void *hugepte) } put_cpu_var(hugepd_freelist_cur); } +#else +static inline void hugepd_free(struct mmu_gather *tlb, void *hugepte) {} #endif static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshift, @@ -453,13 +436,11 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif unsigned long pdmask = ~((1UL << pdshift) - 1); unsigned int num_hugepd = 1; + unsigned int shift = hugepd_shift(*hpdp); -#ifdef CONFIG_PPC_FSL_BOOK3E /* Note: On fsl the hpdp may be the first of several */ - num_hugepd = (1 << (hugepd_shift(*hpdp) - pdshift)); -#else - unsigned int shift = hugepd_shift(*hpdp); -#endif + if (shift > pdshift) + num_hugepd = 1 << (shift - pdshift); start &= pdmask; if (start < floor) @@ -475,11 +456,10 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif for (i = 0; i < num_hugepd; i++, hpdp++) hpdp->pd = 0; -#ifdef CONFIG_PPC_FSL_BOOK3E - hugepd_free(tlb, hugepte); -#else - pgtable_free_tlb(tlb, hugepte, pdshift - shift); -#endif + if (shift >= pdshift) + hugepd_free(tlb, hugepte); + else + pgtable_free_tlb(tlb, hugepte, pdshift - shift); } static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, @@ -492,6 +472,8 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, start = addr; do { + unsigned long more; + pmd = pmd_offset(pud, addr); next = pmd_addr_end(addr, end); if (!is_hugepd(__hugepd(pmd_val(*pmd)))) { @@ -502,15 +484,16 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, WARN_ON(!pmd_none_or_clear_bad(pmd)); continue; } -#ifdef CONFIG_PPC_FSL_BOOK3E /* * Increment next by the size of the huge mapping since * there may be more than one entry at this level for a * single hugepage, but all of them point to * the same kmem cache that holds the hugepte. */ - next = addr + (1 << hugepd_shift(*(hugepd_t *)pmd)); -#endif + more = addr + (1 << hugepd_shift(*(hugepd_t *)pmd)); + if (more > next) + next = more; + free_hugepd_range(tlb, (hugepd_t *)pmd, PMD_SHIFT, addr, next, floor, ceiling); } while (addr = next, addr != end); @@ -550,15 +533,17 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, hugetlb_free_pmd_range(tlb, pud, addr, next, floor, ceiling); } else { -#ifdef CONFIG_PPC_FSL_BOOK3E + unsigned long more; /* * Increment next by the size of the huge mapping since * there may be more than one entry at this level for a * single hugepage, but all of them point to * the same kmem cache that holds the hugepte. */ - next = addr + (1 << hugepd_shift(*(hugepd_t *)pud)); -#endif + more = addr + (1 << hugepd_shift(*(hugepd_t *)pud)); + if (more > next) + next = more; + free_hugepd_range(tlb, (hugepd_t *)pud, PUD_SHIFT, addr, next, floor, ceiling); } @@ -615,15 +600,17 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, continue; hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling); } else { -#ifdef CONFIG_PPC_FSL_BOOK3E + unsigned long more; /* * Increment next by the size of the huge mapping since * there may be more than one entry at the pgd level * for a single hugepage, but all of them point to the * same kmem cache that holds the hugepte. */ - next = addr + (1 << hugepd_shift(*(hugepd_t *)pgd)); -#endif + more = addr + (1 << hugepd_shift(*(hugepd_t *)pgd)); + if (more > next) + next = more; + free_hugepd_range(tlb, (hugepd_t *)pgd, PGDIR_SHIFT, addr, next, floor, ceiling); } @@ -753,12 +740,13 @@ static int __init add_huge_page_size(unsigned long long size) /* Check that it is a page size supported by the hardware and * that it fits within pagetable and slice limits. */ -#ifdef CONFIG_PPC_FSL_BOOK3E - if ((size < PAGE_SIZE) || !is_power_of_4(size)) + if (size <= PAGE_SIZE) return -EINVAL; -#else - if (!is_power_of_2(size) - || (shift > SLICE_HIGH_SHIFT) || (shift <= PAGE_SHIFT)) +#if defined(CONFIG_PPC_FSL_BOOK3E) + if (!is_power_of_4(size)) + return -EINVAL; +#elif !defined(CONFIG_PPC_8xx) + if (!is_power_of_2(size) || (shift > SLICE_HIGH_SHIFT)) return -EINVAL; #endif @@ -791,53 +779,15 @@ static int __init hugepage_setup_sz(char *str) } __setup("hugepagesz=", hugepage_setup_sz); -#ifdef CONFIG_PPC_FSL_BOOK3E struct kmem_cache *hugepte_cache; static int __init hugetlbpage_init(void) { int psize; - for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { - unsigned shift; - - if (!mmu_psize_defs[psize].shift) - continue; - - shift = mmu_psize_to_shift(psize); - - /* Don't treat normal page sizes as huge... */ - if (shift != PAGE_SHIFT) - if (add_huge_page_size(1ULL << shift) < 0) - continue; - } - - /* - * Create a kmem cache for hugeptes. The bottom bits in the pte have - * size information encoded in them, so align them to allow this - */ - hugepte_cache = kmem_cache_create("hugepte-cache", sizeof(pte_t), - HUGEPD_SHIFT_MASK + 1, 0, NULL); - if (hugepte_cache == NULL) - panic("%s: Unable to create kmem cache for hugeptes\n", - __func__); - - /* Default hpage size = 4M */ - if (mmu_psize_defs[MMU_PAGE_4M].shift) - HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_4M].shift; - else - panic("%s: Unable to set default huge page size\n", __func__); - - - return 0; -} -#else -static int __init hugetlbpage_init(void) -{ - int psize; - +#if !defined(CONFIG_PPC_FSL_BOOK3E) && !defined(CONFIG_PPC_8xx) if (!radix_enabled() && !mmu_has_feature(MMU_FTR_16M_PAGE)) return -ENODEV; - +#endif for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { unsigned shift; unsigned pdshift; @@ -850,9 +800,9 @@ static int __init hugetlbpage_init(void) if (add_huge_page_size(1ULL << shift) < 0) continue; - if (shift < PMD_SHIFT) + if (shift < HUGEPD_PUD_SHIFT) pdshift = PMD_SHIFT; - else if (shift < PUD_SHIFT) + else if (shift < HUGEPD_PGD_SHIFT) pdshift = PUD_SHIFT; else pdshift = PGDIR_SHIFT; @@ -860,14 +810,38 @@ static int __init hugetlbpage_init(void) * if we have pdshift and shift value same, we don't * use pgt cache for hugepd. */ - if (pdshift != shift) { + if (pdshift > shift) { pgtable_cache_add(pdshift - shift, NULL); if (!PGT_CACHE(pdshift - shift)) panic("hugetlbpage_init(): could not create " "pgtable cache for %d bit pagesize\n", shift); } +#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx) + else if (!hugepte_cache) { + /* + * Create a kmem cache for hugeptes. The bottom bits in + * the pte have size information encoded in them, so + * align them to allow this + */ + hugepte_cache = kmem_cache_create("hugepte-cache", + sizeof(pte_t), + HUGEPD_SHIFT_MASK + 1, + 0, NULL); + if (hugepte_cache == NULL) + panic("%s: Unable to create kmem cache " + "for hugeptes\n", __func__); + + } +#endif } +#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx) + /* Default hpage size = 4M on FSL_BOOK3E and 512k on 8xx */ + if (mmu_psize_defs[MMU_PAGE_4M].shift) + HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_4M].shift; + else if (mmu_psize_defs[MMU_PAGE_512K].shift) + HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_512K].shift; +#else /* Set default large page size. Currently, we pick 16M or 1M * depending on what is available */ @@ -877,11 +851,13 @@ static int __init hugetlbpage_init(void) HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_1M].shift; else if (mmu_psize_defs[MMU_PAGE_2M].shift) HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_2M].shift; - +#endif + else + panic("%s: Unable to set default huge page size\n", __func__); return 0; } -#endif + arch_initcall(hugetlbpage_init); void flush_dcache_icache_hugepage(struct page *page) diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c new file mode 100644 index 000000000000..a175cd82ae8c --- /dev/null +++ b/arch/powerpc/mm/init-common.c @@ -0,0 +1,107 @@ +/* + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + * + * Dave Engebretsen <engebret@us.ibm.com> + * Rework for PPC64 port. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#undef DEBUG + +#include <linux/string.h> +#include <asm/pgalloc.h> +#include <asm/pgtable.h> + +static void pgd_ctor(void *addr) +{ + memset(addr, 0, PGD_TABLE_SIZE); +} + +static void pud_ctor(void *addr) +{ + memset(addr, 0, PUD_TABLE_SIZE); +} + +static void pmd_ctor(void *addr) +{ + memset(addr, 0, PMD_TABLE_SIZE); +} + +struct kmem_cache *pgtable_cache[MAX_PGTABLE_INDEX_SIZE]; + +/* + * Create a kmem_cache() for pagetables. This is not used for PTE + * pages - they're linked to struct page, come from the normal free + * pages pool and have a different entry size (see real_pte_t) to + * everything else. Caches created by this function are used for all + * the higher level pagetables, and for hugepage pagetables. + */ +void pgtable_cache_add(unsigned shift, void (*ctor)(void *)) +{ + char *name; + unsigned long table_size = sizeof(void *) << shift; + unsigned long align = table_size; + + /* When batching pgtable pointers for RCU freeing, we store + * the index size in the low bits. Table alignment must be + * big enough to fit it. + * + * Likewise, hugeapge pagetable pointers contain a (different) + * shift value in the low bits. All tables must be aligned so + * as to leave enough 0 bits in the address to contain it. */ + unsigned long minalign = max(MAX_PGTABLE_INDEX_SIZE + 1, + HUGEPD_SHIFT_MASK + 1); + struct kmem_cache *new; + + /* It would be nice if this was a BUILD_BUG_ON(), but at the + * moment, gcc doesn't seem to recognize is_power_of_2 as a + * constant expression, so so much for that. */ + BUG_ON(!is_power_of_2(minalign)); + BUG_ON((shift < 1) || (shift > MAX_PGTABLE_INDEX_SIZE)); + + if (PGT_CACHE(shift)) + return; /* Already have a cache of this size */ + + align = max_t(unsigned long, align, minalign); + name = kasprintf(GFP_KERNEL, "pgtable-2^%d", shift); + new = kmem_cache_create(name, table_size, align, 0, ctor); + kfree(name); + pgtable_cache[shift - 1] = new; + pr_debug("Allocated pgtable cache for order %d\n", shift); +} + + +void pgtable_cache_init(void) +{ + pgtable_cache_add(PGD_INDEX_SIZE, pgd_ctor); + + if (PMD_INDEX_SIZE && !PGT_CACHE(PMD_INDEX_SIZE)) + pgtable_cache_add(PMD_CACHE_INDEX, pmd_ctor); + /* + * In all current configs, when the PUD index exists it's the + * same size as either the pgd or pmd index except with THP enabled + * on book3s 64 + */ + if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE)) + pgtable_cache_add(PUD_INDEX_SIZE, pud_ctor); + + if (!PGT_CACHE(PGD_INDEX_SIZE)) + panic("Couldn't allocate pgd cache"); + if (PMD_INDEX_SIZE && !PGT_CACHE(PMD_INDEX_SIZE)) + panic("Couldn't allocate pmd pgtable caches"); + if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE)) + panic("Couldn't allocate pud pgtable caches"); +} diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 16ada1eb7e26..a000c3585390 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -80,83 +80,6 @@ EXPORT_SYMBOL_GPL(memstart_addr); phys_addr_t kernstart_addr; EXPORT_SYMBOL_GPL(kernstart_addr); -static void pgd_ctor(void *addr) -{ - memset(addr, 0, PGD_TABLE_SIZE); -} - -static void pud_ctor(void *addr) -{ - memset(addr, 0, PUD_TABLE_SIZE); -} - -static void pmd_ctor(void *addr) -{ - memset(addr, 0, PMD_TABLE_SIZE); -} - -struct kmem_cache *pgtable_cache[MAX_PGTABLE_INDEX_SIZE]; - -/* - * Create a kmem_cache() for pagetables. This is not used for PTE - * pages - they're linked to struct page, come from the normal free - * pages pool and have a different entry size (see real_pte_t) to - * everything else. Caches created by this function are used for all - * the higher level pagetables, and for hugepage pagetables. - */ -void pgtable_cache_add(unsigned shift, void (*ctor)(void *)) -{ - char *name; - unsigned long table_size = sizeof(void *) << shift; - unsigned long align = table_size; - - /* When batching pgtable pointers for RCU freeing, we store - * the index size in the low bits. Table alignment must be - * big enough to fit it. - * - * Likewise, hugeapge pagetable pointers contain a (different) - * shift value in the low bits. All tables must be aligned so - * as to leave enough 0 bits in the address to contain it. */ - unsigned long minalign = max(MAX_PGTABLE_INDEX_SIZE + 1, - HUGEPD_SHIFT_MASK + 1); - struct kmem_cache *new; - - /* It would be nice if this was a BUILD_BUG_ON(), but at the - * moment, gcc doesn't seem to recognize is_power_of_2 as a - * constant expression, so so much for that. */ - BUG_ON(!is_power_of_2(minalign)); - BUG_ON((shift < 1) || (shift > MAX_PGTABLE_INDEX_SIZE)); - - if (PGT_CACHE(shift)) - return; /* Already have a cache of this size */ - - align = max_t(unsigned long, align, minalign); - name = kasprintf(GFP_KERNEL, "pgtable-2^%d", shift); - new = kmem_cache_create(name, table_size, align, 0, ctor); - kfree(name); - pgtable_cache[shift - 1] = new; - pr_debug("Allocated pgtable cache for order %d\n", shift); -} - - -void pgtable_cache_init(void) -{ - pgtable_cache_add(PGD_INDEX_SIZE, pgd_ctor); - pgtable_cache_add(PMD_CACHE_INDEX, pmd_ctor); - /* - * In all current configs, when the PUD index exists it's the - * same size as either the pgd or pmd index except with THP enabled - * on book3s 64 - */ - if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE)) - pgtable_cache_add(PUD_INDEX_SIZE, pud_ctor); - - if (!PGT_CACHE(PGD_INDEX_SIZE) || !PGT_CACHE(PMD_CACHE_INDEX)) - panic("Couldn't allocate pgtable caches"); - if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE)) - panic("Couldn't allocate pud pgtable caches"); -} - #ifdef CONFIG_SPARSEMEM_VMEMMAP /* * Given an address within the vmemmap, determine the pfn of the page that diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c index b114f8b93ec9..73bf6e14c3aa 100644 --- a/arch/powerpc/mm/mmu_context_book3s64.c +++ b/arch/powerpc/mm/mmu_context_book3s64.c @@ -115,7 +115,7 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) mm->context.pte_frag = NULL; #endif #ifdef CONFIG_SPAPR_TCE_IOMMU - mm_iommu_init(&mm->context); + mm_iommu_init(mm); #endif return 0; } @@ -156,13 +156,11 @@ static inline void destroy_pagetable_page(struct mm_struct *mm) } #endif - void destroy_context(struct mm_struct *mm) { #ifdef CONFIG_SPAPR_TCE_IOMMU - mm_iommu_cleanup(&mm->context); + WARN_ON_ONCE(!list_empty(&mm->context.iommu_group_mem_list)); #endif - #ifdef CONFIG_PPC_ICSWX drop_cop(mm->context.acop, mm); kfree(mm->context.cop_lockp); diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c index e0f1c33601dd..104bad029ce9 100644 --- a/arch/powerpc/mm/mmu_context_iommu.c +++ b/arch/powerpc/mm/mmu_context_iommu.c @@ -56,7 +56,7 @@ static long mm_iommu_adjust_locked_vm(struct mm_struct *mm, } pr_debug("[%d] RLIMIT_MEMLOCK HASH64 %c%ld %ld/%ld\n", - current->pid, + current ? current->pid : 0, incr ? '+' : '-', npages << PAGE_SHIFT, mm->locked_vm << PAGE_SHIFT, @@ -66,12 +66,9 @@ static long mm_iommu_adjust_locked_vm(struct mm_struct *mm, return ret; } -bool mm_iommu_preregistered(void) +bool mm_iommu_preregistered(struct mm_struct *mm) { - if (!current || !current->mm) - return false; - - return !list_empty(¤t->mm->context.iommu_group_mem_list); + return !list_empty(&mm->context.iommu_group_mem_list); } EXPORT_SYMBOL_GPL(mm_iommu_preregistered); @@ -124,19 +121,16 @@ static int mm_iommu_move_page_from_cma(struct page *page) return 0; } -long mm_iommu_get(unsigned long ua, unsigned long entries, +long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries, struct mm_iommu_table_group_mem_t **pmem) { struct mm_iommu_table_group_mem_t *mem; long i, j, ret = 0, locked_entries = 0; struct page *page = NULL; - if (!current || !current->mm) - return -ESRCH; /* process exited */ - mutex_lock(&mem_list_mutex); - list_for_each_entry_rcu(mem, ¤t->mm->context.iommu_group_mem_list, + list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) { if ((mem->ua == ua) && (mem->entries == entries)) { ++mem->used; @@ -154,7 +148,7 @@ long mm_iommu_get(unsigned long ua, unsigned long entries, } - ret = mm_iommu_adjust_locked_vm(current->mm, entries, true); + ret = mm_iommu_adjust_locked_vm(mm, entries, true); if (ret) goto unlock_exit; @@ -215,11 +209,11 @@ populate: mem->entries = entries; *pmem = mem; - list_add_rcu(&mem->next, ¤t->mm->context.iommu_group_mem_list); + list_add_rcu(&mem->next, &mm->context.iommu_group_mem_list); unlock_exit: if (locked_entries && ret) - mm_iommu_adjust_locked_vm(current->mm, locked_entries, false); + mm_iommu_adjust_locked_vm(mm, locked_entries, false); mutex_unlock(&mem_list_mutex); @@ -264,17 +258,13 @@ static void mm_iommu_free(struct rcu_head *head) static void mm_iommu_release(struct mm_iommu_table_group_mem_t *mem) { list_del_rcu(&mem->next); - mm_iommu_adjust_locked_vm(current->mm, mem->entries, false); call_rcu(&mem->rcu, mm_iommu_free); } -long mm_iommu_put(struct mm_iommu_table_group_mem_t *mem) +long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem) { long ret = 0; - if (!current || !current->mm) - return -ESRCH; /* process exited */ - mutex_lock(&mem_list_mutex); if (mem->used == 0) { @@ -297,6 +287,8 @@ long mm_iommu_put(struct mm_iommu_table_group_mem_t *mem) /* @mapped became 0 so now mappings are disabled, release the region */ mm_iommu_release(mem); + mm_iommu_adjust_locked_vm(mm, mem->entries, false); + unlock_exit: mutex_unlock(&mem_list_mutex); @@ -304,14 +296,12 @@ unlock_exit: } EXPORT_SYMBOL_GPL(mm_iommu_put); -struct mm_iommu_table_group_mem_t *mm_iommu_lookup(unsigned long ua, - unsigned long size) +struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm, + unsigned long ua, unsigned long size) { struct mm_iommu_table_group_mem_t *mem, *ret = NULL; - list_for_each_entry_rcu(mem, - ¤t->mm->context.iommu_group_mem_list, - next) { + list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) { if ((mem->ua <= ua) && (ua + size <= mem->ua + (mem->entries << PAGE_SHIFT))) { @@ -324,14 +314,12 @@ struct mm_iommu_table_group_mem_t *mm_iommu_lookup(unsigned long ua, } EXPORT_SYMBOL_GPL(mm_iommu_lookup); -struct mm_iommu_table_group_mem_t *mm_iommu_find(unsigned long ua, - unsigned long entries) +struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm, + unsigned long ua, unsigned long entries) { struct mm_iommu_table_group_mem_t *mem, *ret = NULL; - list_for_each_entry_rcu(mem, - ¤t->mm->context.iommu_group_mem_list, - next) { + list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) { if ((mem->ua == ua) && (mem->entries == entries)) { ret = mem; break; @@ -373,17 +361,7 @@ void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem) } EXPORT_SYMBOL_GPL(mm_iommu_mapped_dec); -void mm_iommu_init(mm_context_t *ctx) +void mm_iommu_init(struct mm_struct *mm) { - INIT_LIST_HEAD_RCU(&ctx->iommu_group_mem_list); -} - -void mm_iommu_cleanup(mm_context_t *ctx) -{ - struct mm_iommu_table_group_mem_t *mem, *tmp; - - list_for_each_entry_safe(mem, tmp, &ctx->iommu_group_mem_list, next) { - list_del_rcu(&mem->next); - mm_iommu_do_free(mem); - } + INIT_LIST_HEAD_RCU(&mm->context.iommu_group_mem_list); } diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index a51c188b81f3..0cb6bd8bfccf 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -1085,7 +1085,7 @@ static int hot_add_node_scn_to_nid(unsigned long scn_addr) int hot_add_scn_to_nid(unsigned long scn_addr) { struct device_node *memory = NULL; - int nid, found = 0; + int nid; if (!numa_enabled || (min_common_depth < 0)) return first_online_node; @@ -1101,17 +1101,6 @@ int hot_add_scn_to_nid(unsigned long scn_addr) if (nid < 0 || !node_online(nid)) nid = first_online_node; - if (NODE_DATA(nid)->node_spanned_pages) - return nid; - - for_each_online_node(nid) { - if (NODE_DATA(nid)->node_spanned_pages) { - found = 1; - break; - } - } - - BUG_ON(!found); return nid; } diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c index f4f437cbabf1..ebf9782bacf9 100644 --- a/arch/powerpc/mm/pgtable-book3s64.c +++ b/arch/powerpc/mm/pgtable-book3s64.c @@ -35,7 +35,8 @@ int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address, #endif changed = !pmd_same(*(pmdp), entry); if (changed) { - __ptep_set_access_flags(vma->vm_mm, pmdp_ptep(pmdp), pmd_pte(entry)); + __ptep_set_access_flags(vma->vm_mm, pmdp_ptep(pmdp), + pmd_pte(entry), address); flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); } return changed; diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index ed7bddc456b7..cfa53ccc8baf 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -159,7 +159,7 @@ redo: * Allocate Partition table and process table for the * host. */ - BUILD_BUG_ON_MSG((PRTB_SIZE_SHIFT > 23), "Process table size too large."); + BUILD_BUG_ON_MSG((PRTB_SIZE_SHIFT > 36), "Process table size too large."); process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT); /* * Fill in the process table. @@ -177,23 +177,15 @@ redo: static void __init radix_init_partition_table(void) { - unsigned long rts_field; + unsigned long rts_field, dw0; + mmu_partition_table_init(); rts_field = radix__get_tree_size(); + dw0 = rts_field | __pa(init_mm.pgd) | RADIX_PGD_INDEX_SIZE | PATB_HR; + mmu_partition_table_set_entry(0, dw0, 0); - BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 24), "Partition table size too large."); - partition_tb = early_alloc_pgtable(1UL << PATB_SIZE_SHIFT); - partition_tb->patb0 = cpu_to_be64(rts_field | __pa(init_mm.pgd) | - RADIX_PGD_INDEX_SIZE | PATB_HR); pr_info("Initializing Radix MMU\n"); pr_info("Partition table %p\n", partition_tb); - - memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); - /* - * update partition table control register, - * 64 K size. - */ - mtspr(SPRN_PTCR, __pa(partition_tb) | (PATB_SIZE_SHIFT - 12)); } void __init radix_init_native(void) @@ -248,7 +240,7 @@ static int __init radix_dt_scan_page_sizes(unsigned long node, /* top 3 bit is AP encoding */ shift = be32_to_cpu(prop[0]) & ~(0xe << 28); ap = be32_to_cpu(prop[0]) >> 29; - pr_info("Page size sift = %d AP=0x%x\n", shift, ap); + pr_info("Page size shift = %d AP=0x%x\n", shift, ap); idx = get_idx_from_shift(shift); if (idx < 0) @@ -320,6 +312,38 @@ static void update_hid_for_radix(void) cpu_relax(); } +static void radix_init_amor(void) +{ + /* + * In HV mode, we init AMOR (Authority Mask Override Register) so that + * the hypervisor and guest can setup IAMR (Instruction Authority Mask + * Register), enable key 0 and set it to 1. + * + * AMOR = 0b1100 .... 0000 (Mask for key 0 is 11) + */ + mtspr(SPRN_AMOR, (3ul << 62)); +} + +static void radix_init_iamr(void) +{ + unsigned long iamr; + + /* + * The IAMR should set to 0 on DD1. + */ + if (cpu_has_feature(CPU_FTR_POWER9_DD1)) + iamr = 0; + else + iamr = (1ul << 62); + + /* + * Radix always uses key0 of the IAMR to determine if an access is + * allowed. We set bit 0 (IBM bit 1) of key0, to prevent instruction + * fetch. + */ + mtspr(SPRN_IAMR, iamr); +} + void __init radix__early_init_mmu(void) { unsigned long lpcr; @@ -376,8 +400,12 @@ void __init radix__early_init_mmu(void) lpcr = mfspr(SPRN_LPCR); mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR); radix_init_partition_table(); + radix_init_amor(); } + memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); + + radix_init_iamr(); radix_init_pgtable(); } @@ -388,12 +416,18 @@ void radix__early_init_mmu_secondary(void) * update partition table control register and UPRT */ if (!firmware_has_feature(FW_FEATURE_LPAR)) { + + if (cpu_has_feature(CPU_FTR_POWER9_DD1)) + update_hid_for_radix(); + lpcr = mfspr(SPRN_LPCR); mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR); mtspr(SPRN_PTCR, __pa(partition_tb) | (PATB_SIZE_SHIFT - 12)); + radix_init_amor(); } + radix_init_iamr(); } void radix__mmu_cleanup_all(void) diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index 911fdfb63ec1..cb39c8bd2436 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -224,7 +224,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, if (changed) { if (!is_vm_hugetlb_page(vma)) assert_pte_locked(vma->vm_mm, address); - __ptep_set_access_flags(vma->vm_mm, ptep, entry); + __ptep_set_access_flags(vma->vm_mm, ptep, entry, address); flush_tlb_page(vma, address); } return changed; diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 0ae0572bc239..a65c0b4c0669 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -42,43 +42,6 @@ EXPORT_SYMBOL(ioremap_bot); /* aka VMALLOC_END */ extern char etext[], _stext[], _sinittext[], _einittext[]; -#define PGDIR_ORDER (32 + PGD_T_LOG2 - PGDIR_SHIFT) - -#ifndef CONFIG_PPC_4K_PAGES -static struct kmem_cache *pgtable_cache; - -void pgtable_cache_init(void) -{ - pgtable_cache = kmem_cache_create("PGDIR cache", 1 << PGDIR_ORDER, - 1 << PGDIR_ORDER, 0, NULL); - if (pgtable_cache == NULL) - panic("Couldn't allocate pgtable caches"); -} -#endif - -pgd_t *pgd_alloc(struct mm_struct *mm) -{ - pgd_t *ret; - - /* pgdir take page or two with 4K pages and a page fraction otherwise */ -#ifndef CONFIG_PPC_4K_PAGES - ret = kmem_cache_alloc(pgtable_cache, GFP_KERNEL | __GFP_ZERO); -#else - ret = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, - PGDIR_ORDER - PAGE_SHIFT); -#endif - return ret; -} - -void pgd_free(struct mm_struct *mm, pgd_t *pgd) -{ -#ifndef CONFIG_PPC_4K_PAGES - kmem_cache_free(pgtable_cache, (void *)pgd); -#else - free_pages((unsigned long)pgd, PGDIR_ORDER - PAGE_SHIFT); -#endif -} - __ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { pte_t *pte; diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index f5e8d4edb808..8bca7f58afc4 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -431,3 +431,37 @@ void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift) } } #endif + +#ifdef CONFIG_PPC_BOOK3S_64 +void __init mmu_partition_table_init(void) +{ + unsigned long patb_size = 1UL << PATB_SIZE_SHIFT; + + BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 36), "Partition table size too large."); + partition_tb = __va(memblock_alloc_base(patb_size, patb_size, + MEMBLOCK_ALLOC_ANYWHERE)); + + /* Initialize the Partition Table with no entries */ + memset((void *)partition_tb, 0, patb_size); + + /* + * update partition table control register, + * 64 K size. + */ + mtspr(SPRN_PTCR, __pa(partition_tb) | (PATB_SIZE_SHIFT - 12)); +} + +void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0, + unsigned long dw1) +{ + partition_tb[lpid].patb0 = cpu_to_be64(dw0); + partition_tb[lpid].patb1 = cpu_to_be64(dw1); + + /* Global flush of TLBs and partition table caches for this lpid */ + asm volatile("ptesync" : : : "memory"); + asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : : + "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid)); + asm volatile("eieio; tlbsync; ptesync" : : : "memory"); +} +EXPORT_SYMBOL_GPL(mmu_partition_table_set_entry); +#endif /* CONFIG_PPC_BOOK3S_64 */ diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index bda8c43be78a..61b79119065f 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -50,6 +50,8 @@ static inline void _tlbiel_pid(unsigned long pid, unsigned long ric) for (set = 0; set < POWER9_TLB_SETS_RADIX ; set++) { __tlbiel_pid(pid, set, ric); } + if (cpu_has_feature(CPU_FTR_POWER9_DD1)) + asm volatile(PPC_INVALIDATE_ERAT : : :"memory"); return; } @@ -83,6 +85,8 @@ static inline void _tlbiel_va(unsigned long va, unsigned long pid, asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); asm volatile("ptesync": : :"memory"); + if (cpu_has_feature(CPU_FTR_POWER9_DD1)) + asm volatile(PPC_INVALIDATE_ERAT : : :"memory"); } static inline void _tlbie_va(unsigned long va, unsigned long pid, @@ -424,3 +428,21 @@ void radix__flush_tlb_all(void) : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory"); asm volatile("eieio; tlbsync; ptesync": : :"memory"); } + +void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm, + unsigned long address) +{ + /* + * We track page size in pte only for DD1, So we can + * call this only on DD1. + */ + if (!cpu_has_feature(CPU_FTR_POWER9_DD1)) { + VM_WARN_ON(1); + return; + } + + if (old_pte & _PAGE_LARGE) + radix__flush_tlb_page_psize(mm, address, MMU_PAGE_2M); + else + radix__flush_tlb_page_psize(mm, address, mmu_virtual_psize); +} diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index 050badc0ebd3..ba28fcb98597 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c @@ -53,7 +53,7 @@ * other sizes not listed here. The .ind field is only used on MMUs that have * indirect page table entries. */ -#ifdef CONFIG_PPC_BOOK3E_MMU +#if defined(CONFIG_PPC_BOOK3E_MMU) || defined(CONFIG_PPC_8xx) #ifdef CONFIG_PPC_FSL_BOOK3E struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = { [MMU_PAGE_4K] = { @@ -85,6 +85,25 @@ struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = { .enc = BOOK3E_PAGESZ_1GB, }, }; +#elif defined(CONFIG_PPC_8xx) +struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = { + /* we only manage 4k and 16k pages as normal pages */ +#ifdef CONFIG_PPC_4K_PAGES + [MMU_PAGE_4K] = { + .shift = 12, + }, +#else + [MMU_PAGE_16K] = { + .shift = 14, + }, +#endif + [MMU_PAGE_512K] = { + .shift = 19, + }, + [MMU_PAGE_8M] = { + .shift = 23, + }, +}; #else struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = { [MMU_PAGE_4K] = { diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 0fe98a567125..73a5cf18fd84 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -766,7 +766,7 @@ emit_clear: func = (u8 *) __bpf_call_base + imm; /* Save skb pointer if we need to re-cache skb data */ - if (bpf_helper_changes_skb_data(func)) + if (bpf_helper_changes_pkt_data(func)) PPC_BPF_STL(3, 1, bpf_jit_stack_local(ctx)); bpf_jit_emit_func_call(image, ctx, (u64)func); @@ -775,7 +775,7 @@ emit_clear: PPC_MR(b2p[BPF_REG_0], 3); /* refresh skb cache */ - if (bpf_helper_changes_skb_data(func)) { + if (bpf_helper_changes_pkt_data(func)) { /* reload skb pointer to r3 */ PPC_BPF_LL(3, 1, bpf_jit_stack_local(ctx)); bpf_jit_emit_skb_loads(image, ctx); diff --git a/arch/powerpc/oprofile/cell/spu_task_sync.c b/arch/powerpc/oprofile/cell/spu_task_sync.c index 83d2b4ef7f0d..44d67b167e0b 100644 --- a/arch/powerpc/oprofile/cell/spu_task_sync.c +++ b/arch/powerpc/oprofile/cell/spu_task_sync.c @@ -295,7 +295,7 @@ out: * dcookie user still being registered (namely, the reader * of the event buffer). */ -static inline unsigned long fast_get_dcookie(struct path *path) +static inline unsigned long fast_get_dcookie(const struct path *path) { unsigned long cookie; diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index 6143c99f3ec5..50e598cf644b 100644 --- a/arch/powerpc/perf/isa207-common.c +++ b/arch/powerpc/perf/isa207-common.c @@ -12,6 +12,40 @@ */ #include "isa207-common.h" +PMU_FORMAT_ATTR(event, "config:0-49"); +PMU_FORMAT_ATTR(pmcxsel, "config:0-7"); +PMU_FORMAT_ATTR(mark, "config:8"); +PMU_FORMAT_ATTR(combine, "config:11"); +PMU_FORMAT_ATTR(unit, "config:12-15"); +PMU_FORMAT_ATTR(pmc, "config:16-19"); +PMU_FORMAT_ATTR(cache_sel, "config:20-23"); +PMU_FORMAT_ATTR(sample_mode, "config:24-28"); +PMU_FORMAT_ATTR(thresh_sel, "config:29-31"); +PMU_FORMAT_ATTR(thresh_stop, "config:32-35"); +PMU_FORMAT_ATTR(thresh_start, "config:36-39"); +PMU_FORMAT_ATTR(thresh_cmp, "config:40-49"); + +struct attribute *isa207_pmu_format_attr[] = { + &format_attr_event.attr, + &format_attr_pmcxsel.attr, + &format_attr_mark.attr, + &format_attr_combine.attr, + &format_attr_unit.attr, + &format_attr_pmc.attr, + &format_attr_cache_sel.attr, + &format_attr_sample_mode.attr, + &format_attr_thresh_sel.attr, + &format_attr_thresh_stop.attr, + &format_attr_thresh_start.attr, + &format_attr_thresh_cmp.attr, + NULL, +}; + +struct attribute_group isa207_pmu_format_group = { + .name = "format", + .attrs = isa207_pmu_format_attr, +}; + static inline bool event_is_fab_match(u64 event) { /* Only check pmc, unit and pmcxsel, ignore the edge bit (0) */ @@ -21,6 +55,48 @@ static inline bool event_is_fab_match(u64 event) return (event == 0x30056 || event == 0x4f052); } +static bool is_event_valid(u64 event) +{ + u64 valid_mask = EVENT_VALID_MASK; + + if (cpu_has_feature(CPU_FTR_ARCH_300) && !cpu_has_feature(CPU_FTR_POWER9_DD1)) + valid_mask = p9_EVENT_VALID_MASK; + + return !(event & ~valid_mask); +} + +static u64 mmcra_sdar_mode(u64 event) +{ + if (cpu_has_feature(CPU_FTR_ARCH_300) && !cpu_has_feature(CPU_FTR_POWER9_DD1)) + return p9_SDAR_MODE(event) << MMCRA_SDAR_MODE_SHIFT; + + return MMCRA_SDAR_MODE_TLB; +} + +static u64 thresh_cmp_val(u64 value) +{ + if (cpu_has_feature(CPU_FTR_ARCH_300) && !cpu_has_feature(CPU_FTR_POWER9_DD1)) + return value << p9_MMCRA_THR_CMP_SHIFT; + + return value << MMCRA_THR_CMP_SHIFT; +} + +static unsigned long combine_from_event(u64 event) +{ + if (cpu_has_feature(CPU_FTR_ARCH_300) && !cpu_has_feature(CPU_FTR_POWER9_DD1)) + return p9_EVENT_COMBINE(event); + + return EVENT_COMBINE(event); +} + +static unsigned long combine_shift(unsigned long pmc) +{ + if (cpu_has_feature(CPU_FTR_ARCH_300) && !cpu_has_feature(CPU_FTR_POWER9_DD1)) + return p9_MMCR1_COMBINE_SHIFT(pmc); + + return MMCR1_COMBINE_SHIFT(pmc); +} + int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) { unsigned int unit, pmc, cache, ebb; @@ -28,7 +104,7 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) mask = value = 0; - if (event & ~EVENT_VALID_MASK) + if (!is_event_valid(event)) return -1; pmc = (event >> EVENT_PMC_SHIFT) & EVENT_PMC_MASK; @@ -155,15 +231,13 @@ int isa207_compute_mmcr(u64 event[], int n_ev, pmc_inuse |= 1 << pmc; } - /* In continuous sampling mode, update SDAR on TLB miss */ - mmcra = MMCRA_SDAR_MODE_TLB; - mmcr1 = mmcr2 = 0; + mmcra = mmcr1 = mmcr2 = 0; /* Second pass: assign PMCs, set all MMCR1 fields */ for (i = 0; i < n_ev; ++i) { pmc = (event[i] >> EVENT_PMC_SHIFT) & EVENT_PMC_MASK; unit = (event[i] >> EVENT_UNIT_SHIFT) & EVENT_UNIT_MASK; - combine = (event[i] >> EVENT_COMBINE_SHIFT) & EVENT_COMBINE_MASK; + combine = combine_from_event(event[i]); psel = event[i] & EVENT_PSEL_MASK; if (!pmc) { @@ -177,10 +251,13 @@ int isa207_compute_mmcr(u64 event[], int n_ev, if (pmc <= 4) { mmcr1 |= unit << MMCR1_UNIT_SHIFT(pmc); - mmcr1 |= combine << MMCR1_COMBINE_SHIFT(pmc); + mmcr1 |= combine << combine_shift(pmc); mmcr1 |= psel << MMCR1_PMCSEL_SHIFT(pmc); } + /* In continuous sampling mode, update SDAR on TLB miss */ + mmcra |= mmcra_sdar_mode(event[i]); + if (event[i] & EVENT_IS_L1) { cache = event[i] >> EVENT_CACHE_SEL_SHIFT; mmcr1 |= (cache & 1) << MMCR1_IC_QUAL_SHIFT; @@ -211,7 +288,7 @@ int isa207_compute_mmcr(u64 event[], int n_ev, val = (event[i] >> EVENT_THR_SEL_SHIFT) & EVENT_THR_SEL_MASK; mmcra |= val << MMCRA_THR_SEL_SHIFT; val = (event[i] >> EVENT_THR_CMP_SHIFT) & EVENT_THR_CMP_MASK; - mmcra |= val << MMCRA_THR_CMP_SHIFT; + mmcra |= thresh_cmp_val(val); } if (event[i] & EVENT_WANTS_BHRB) { diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h index 4d0a4e5017c2..90495f1580c7 100644 --- a/arch/powerpc/perf/isa207-common.h +++ b/arch/powerpc/perf/isa207-common.h @@ -107,6 +107,7 @@ #define EVENT_UNIT_MASK 0xf #define EVENT_COMBINE_SHIFT 11 /* Combine bit */ #define EVENT_COMBINE_MASK 0x1 +#define EVENT_COMBINE(v) (((v) >> EVENT_COMBINE_SHIFT) & EVENT_COMBINE_MASK) #define EVENT_MARKED_SHIFT 8 /* Marked bit */ #define EVENT_MARKED_MASK 0x1 #define EVENT_IS_MARKED (EVENT_MARKED_MASK << EVENT_MARKED_SHIFT) @@ -134,6 +135,26 @@ PERF_SAMPLE_BRANCH_KERNEL |\ PERF_SAMPLE_BRANCH_HV) +/* Contants to support power9 raw encoding format */ +#define p9_EVENT_COMBINE_SHIFT 10 /* Combine bit */ +#define p9_EVENT_COMBINE_MASK 0x3ull +#define p9_EVENT_COMBINE(v) (((v) >> p9_EVENT_COMBINE_SHIFT) & p9_EVENT_COMBINE_MASK) +#define p9_SDAR_MODE_SHIFT 50 +#define p9_SDAR_MODE_MASK 0x3ull +#define p9_SDAR_MODE(v) (((v) >> p9_SDAR_MODE_SHIFT) & p9_SDAR_MODE_MASK) + +#define p9_EVENT_VALID_MASK \ + ((p9_SDAR_MODE_MASK << p9_SDAR_MODE_SHIFT | \ + (EVENT_THRESH_MASK << EVENT_THRESH_SHIFT) | \ + (EVENT_SAMPLE_MASK << EVENT_SAMPLE_SHIFT) | \ + (EVENT_CACHE_SEL_MASK << EVENT_CACHE_SEL_SHIFT) | \ + (EVENT_PMC_MASK << EVENT_PMC_SHIFT) | \ + (EVENT_UNIT_MASK << EVENT_UNIT_SHIFT) | \ + (p9_EVENT_COMBINE_MASK << p9_EVENT_COMBINE_SHIFT) | \ + (EVENT_MARKED_MASK << EVENT_MARKED_SHIFT) | \ + EVENT_LINUX_MASK | \ + EVENT_PSEL_MASK)) + /* * Layout of constraint bits: * @@ -210,15 +231,22 @@ #define MMCR1_DC_QUAL_SHIFT 47 #define MMCR1_IC_QUAL_SHIFT 46 +/* MMCR1 Combine bits macro for power9 */ +#define p9_MMCR1_COMBINE_SHIFT(pmc) (38 - ((pmc - 1) * 2)) + /* Bits in MMCRA for PowerISA v2.07 */ #define MMCRA_SAMP_MODE_SHIFT 1 #define MMCRA_SAMP_ELIG_SHIFT 4 #define MMCRA_THR_CTL_SHIFT 8 #define MMCRA_THR_SEL_SHIFT 16 #define MMCRA_THR_CMP_SHIFT 32 -#define MMCRA_SDAR_MODE_TLB (1ull << 42) +#define MMCRA_SDAR_MODE_SHIFT 42 +#define MMCRA_SDAR_MODE_TLB (1ull << MMCRA_SDAR_MODE_SHIFT) #define MMCRA_IFM_SHIFT 30 +/* MMCR1 Threshold Compare bit constant for power9 */ +#define p9_MMCRA_THR_CMP_SHIFT 45 + /* Bits in MMCR2 for PowerISA v2.07 */ #define MMCR2_FCS(pmc) (1ull << (63 - (((pmc) - 1) * 9))) #define MMCR2_FCP(pmc) (1ull << (62 - (((pmc) - 1) * 9))) diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c index ab830d106ec5..d07186382f3a 100644 --- a/arch/powerpc/perf/power8-pmu.c +++ b/arch/powerpc/perf/power8-pmu.c @@ -30,6 +30,9 @@ enum { #define POWER8_MMCRA_IFM2 0x0000000080000000UL #define POWER8_MMCRA_IFM3 0x00000000C0000000UL +/* PowerISA v2.07 format attribute structure*/ +extern struct attribute_group isa207_pmu_format_group; + /* Table of alternatives, sorted by column 0 */ static const unsigned int event_alternatives[][MAX_ALT] = { { PM_MRK_ST_CMPL, PM_MRK_ST_CMPL_ALT }, @@ -175,42 +178,8 @@ static struct attribute_group power8_pmu_events_group = { .attrs = power8_events_attr, }; -PMU_FORMAT_ATTR(event, "config:0-49"); -PMU_FORMAT_ATTR(pmcxsel, "config:0-7"); -PMU_FORMAT_ATTR(mark, "config:8"); -PMU_FORMAT_ATTR(combine, "config:11"); -PMU_FORMAT_ATTR(unit, "config:12-15"); -PMU_FORMAT_ATTR(pmc, "config:16-19"); -PMU_FORMAT_ATTR(cache_sel, "config:20-23"); -PMU_FORMAT_ATTR(sample_mode, "config:24-28"); -PMU_FORMAT_ATTR(thresh_sel, "config:29-31"); -PMU_FORMAT_ATTR(thresh_stop, "config:32-35"); -PMU_FORMAT_ATTR(thresh_start, "config:36-39"); -PMU_FORMAT_ATTR(thresh_cmp, "config:40-49"); - -static struct attribute *power8_pmu_format_attr[] = { - &format_attr_event.attr, - &format_attr_pmcxsel.attr, - &format_attr_mark.attr, - &format_attr_combine.attr, - &format_attr_unit.attr, - &format_attr_pmc.attr, - &format_attr_cache_sel.attr, - &format_attr_sample_mode.attr, - &format_attr_thresh_sel.attr, - &format_attr_thresh_stop.attr, - &format_attr_thresh_start.attr, - &format_attr_thresh_cmp.attr, - NULL, -}; - -static struct attribute_group power8_pmu_format_group = { - .name = "format", - .attrs = power8_pmu_format_attr, -}; - static const struct attribute_group *power8_pmu_attr_groups[] = { - &power8_pmu_format_group, + &isa207_pmu_format_group, &power8_pmu_events_group, NULL, }; diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c index 8e9a81967ff8..346010e8d463 100644 --- a/arch/powerpc/perf/power9-pmu.c +++ b/arch/powerpc/perf/power9-pmu.c @@ -16,6 +16,78 @@ #include "isa207-common.h" /* + * Raw event encoding for Power9: + * + * 60 56 52 48 44 40 36 32 + * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | + * | | [ ] [ ] [ thresh_cmp ] [ thresh_ctl ] + * | | | | | + * | | *- IFM (Linux) | thresh start/stop OR FAB match -* + * | *- BHRB (Linux) *sm + * *- EBB (Linux) + * + * 28 24 20 16 12 8 4 0 + * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | + * [ ] [ sample ] [cache] [ pmc ] [unit ] [] m [ pmcxsel ] + * | | | | | + * | | | | *- mark + * | | *- L1/L2/L3 cache_sel | + * | | | + * | *- sampling mode for marked events *- combine + * | + * *- thresh_sel + * + * Below uses IBM bit numbering. + * + * MMCR1[x:y] = unit (PMCxUNIT) + * MMCR1[24] = pmc1combine[0] + * MMCR1[25] = pmc1combine[1] + * MMCR1[26] = pmc2combine[0] + * MMCR1[27] = pmc2combine[1] + * MMCR1[28] = pmc3combine[0] + * MMCR1[29] = pmc3combine[1] + * MMCR1[30] = pmc4combine[0] + * MMCR1[31] = pmc4combine[1] + * + * if pmc == 3 and unit == 0 and pmcxsel[0:6] == 0b0101011 + * # PM_MRK_FAB_RSP_MATCH + * MMCR1[20:27] = thresh_ctl (FAB_CRESP_MATCH / FAB_TYPE_MATCH) + * else if pmc == 4 and unit == 0xf and pmcxsel[0:6] == 0b0101001 + * # PM_MRK_FAB_RSP_MATCH_CYC + * MMCR1[20:27] = thresh_ctl (FAB_CRESP_MATCH / FAB_TYPE_MATCH) + * else + * MMCRA[48:55] = thresh_ctl (THRESH START/END) + * + * if thresh_sel: + * MMCRA[45:47] = thresh_sel + * + * if thresh_cmp: + * MMCRA[9:11] = thresh_cmp[0:2] + * MMCRA[12:18] = thresh_cmp[3:9] + * + * if unit == 6 or unit == 7 + * MMCRC[53:55] = cache_sel[1:3] (L2EVENT_SEL) + * else if unit == 8 or unit == 9: + * if cache_sel[0] == 0: # L3 bank + * MMCRC[47:49] = cache_sel[1:3] (L3EVENT_SEL0) + * else if cache_sel[0] == 1: + * MMCRC[50:51] = cache_sel[2:3] (L3EVENT_SEL1) + * else if cache_sel[1]: # L1 event + * MMCR1[16] = cache_sel[2] + * MMCR1[17] = cache_sel[3] + * + * if mark: + * MMCRA[63] = 1 (SAMPLE_ENABLE) + * MMCRA[57:59] = sample[0:2] (RAND_SAMP_ELIG) + * MMCRA[61:62] = sample[3:4] (RAND_SAMP_MODE) + * + * if EBB and BHRB: + * MMCRA[32:33] = IFM + * + * MMCRA[SDAR_MODE] = sm + */ + +/* * Some power9 event codes. */ #define EVENT(_name, _code) _name = _code, @@ -31,6 +103,9 @@ enum { #define POWER9_MMCRA_IFM2 0x0000000080000000UL #define POWER9_MMCRA_IFM3 0x00000000C0000000UL +/* PowerISA v2.07 format attribute structure*/ +extern struct attribute_group isa207_pmu_format_group; + GENERIC_EVENT_ATTR(cpu-cycles, PM_CYC); GENERIC_EVENT_ATTR(stalled-cycles-frontend, PM_ICT_NOSLOT_CYC); GENERIC_EVENT_ATTR(stalled-cycles-backend, PM_CMPLU_STALL); @@ -90,10 +165,16 @@ static struct attribute_group power9_pmu_events_group = { .attrs = power9_events_attr, }; -PMU_FORMAT_ATTR(event, "config:0-49"); +static const struct attribute_group *power9_isa207_pmu_attr_groups[] = { + &isa207_pmu_format_group, + &power9_pmu_events_group, + NULL, +}; + +PMU_FORMAT_ATTR(event, "config:0-51"); PMU_FORMAT_ATTR(pmcxsel, "config:0-7"); PMU_FORMAT_ATTR(mark, "config:8"); -PMU_FORMAT_ATTR(combine, "config:11"); +PMU_FORMAT_ATTR(combine, "config:10-11"); PMU_FORMAT_ATTR(unit, "config:12-15"); PMU_FORMAT_ATTR(pmc, "config:16-19"); PMU_FORMAT_ATTR(cache_sel, "config:20-23"); @@ -102,6 +183,7 @@ PMU_FORMAT_ATTR(thresh_sel, "config:29-31"); PMU_FORMAT_ATTR(thresh_stop, "config:32-35"); PMU_FORMAT_ATTR(thresh_start, "config:36-39"); PMU_FORMAT_ATTR(thresh_cmp, "config:40-49"); +PMU_FORMAT_ATTR(sdar_mode, "config:50-51"); static struct attribute *power9_pmu_format_attr[] = { &format_attr_event.attr, @@ -116,6 +198,7 @@ static struct attribute *power9_pmu_format_attr[] = { &format_attr_thresh_stop.attr, &format_attr_thresh_start.attr, &format_attr_thresh_cmp.attr, + &format_attr_sdar_mode.attr, NULL, }; @@ -291,6 +374,24 @@ static int power9_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { #undef C +static struct power_pmu power9_isa207_pmu = { + .name = "POWER9", + .n_counter = MAX_PMU_COUNTERS, + .add_fields = ISA207_ADD_FIELDS, + .test_adder = ISA207_TEST_ADDER, + .compute_mmcr = isa207_compute_mmcr, + .config_bhrb = power9_config_bhrb, + .bhrb_filter_map = power9_bhrb_filter_map, + .get_constraint = isa207_get_constraint, + .disable_pmc = isa207_disable_pmc, + .flags = PPMU_HAS_SIER | PPMU_ARCH_207S, + .n_generic = ARRAY_SIZE(power9_generic_events), + .generic_events = power9_generic_events, + .cache_events = &power9_cache_events, + .attr_groups = power9_isa207_pmu_attr_groups, + .bhrb_nr = 32, +}; + static struct power_pmu power9_pmu = { .name = "POWER9", .n_counter = MAX_PMU_COUNTERS, @@ -311,14 +412,19 @@ static struct power_pmu power9_pmu = { static int __init init_power9_pmu(void) { - int rc; + int rc = 0; /* Comes from cpu_specs[] */ if (!cur_cpu_spec->oprofile_cpu_type || strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power9")) return -ENODEV; - rc = register_power_pmu(&power9_pmu); + if (cpu_has_feature(CPU_FTR_POWER9_DD1)) { + rc = register_power_pmu(&power9_isa207_pmu); + } else { + rc = register_power_pmu(&power9_pmu); + } + if (rc) return rc; diff --git a/arch/powerpc/platforms/40x/Kconfig b/arch/powerpc/platforms/40x/Kconfig index e3257f24a8a1..abc24501c4c0 100644 --- a/arch/powerpc/platforms/40x/Kconfig +++ b/arch/powerpc/platforms/40x/Kconfig @@ -64,6 +64,7 @@ config XILINX_VIRTEX_GENERIC_BOARD default n select XILINX_VIRTEX_II_PRO select XILINX_VIRTEX_4_FX + select XILINX_INTC help This option enables generic support for Xilinx Virtex based boards. @@ -102,18 +103,18 @@ config 405GP bool select IBM405_ERR77 select IBM405_ERR51 - select IBM_EMAC_ZMII + select IBM_EMAC_ZMII if IBM_EMAC config 405EX bool - select IBM_EMAC_EMAC4 - select IBM_EMAC_RGMII + select IBM_EMAC_EMAC4 if IBM_EMAC + select IBM_EMAC_RGMII if IBM_EMAC config 405EZ bool - select IBM_EMAC_NO_FLOW_CTRL - select IBM_EMAC_MAL_CLR_ICINTSTAT - select IBM_EMAC_MAL_COMMON_ERR + select IBM_EMAC_NO_FLOW_CTRL if IBM_EMAC + select IBM_EMAC_MAL_CLR_ICINTSTAT if IBM_EMAC + select IBM_EMAC_MAL_COMMON_ERR if IBM_EMAC config XILINX_VIRTEX bool diff --git a/arch/powerpc/platforms/40x/virtex.c b/arch/powerpc/platforms/40x/virtex.c index 91a08ea758a8..e3d5e095846b 100644 --- a/arch/powerpc/platforms/40x/virtex.c +++ b/arch/powerpc/platforms/40x/virtex.c @@ -48,7 +48,7 @@ define_machine(virtex) { .probe = virtex_probe, .setup_arch = xilinx_pci_init, .init_IRQ = xilinx_intc_init_tree, - .get_irq = xilinx_intc_get_irq, + .get_irq = xintc_get_irq, .restart = ppc4xx_reset_system, .calibrate_decr = generic_calibrate_decr, }; diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig index 48fc18041ff6..9b0afe935cc1 100644 --- a/arch/powerpc/platforms/44x/Kconfig +++ b/arch/powerpc/platforms/44x/Kconfig @@ -26,7 +26,7 @@ config BLUESTONE select PCI_MSI select PPC4xx_MSI select PPC4xx_PCI_EXPRESS - select IBM_EMAC_RGMII + select IBM_EMAC_RGMII if IBM_EMAC help This option enables support for the APM APM821xx Evaluation board. @@ -125,8 +125,8 @@ config CANYONLANDS select PPC4xx_PCI_EXPRESS select PCI_MSI select PPC4xx_MSI - select IBM_EMAC_RGMII - select IBM_EMAC_ZMII + select IBM_EMAC_RGMII if IBM_EMAC + select IBM_EMAC_ZMII if IBM_EMAC help This option enables support for the AMCC PPC460EX evaluation board. @@ -138,8 +138,8 @@ config GLACIER select 460EX # Odd since it uses 460GT but the effects are the same select PCI select PPC4xx_PCI_EXPRESS - select IBM_EMAC_RGMII - select IBM_EMAC_ZMII + select IBM_EMAC_RGMII if IBM_EMAC + select IBM_EMAC_ZMII if IBM_EMAC help This option enables support for the AMCC PPC460GT evaluation board. @@ -164,7 +164,7 @@ config EIGER select 460SX select PCI select PPC4xx_PCI_EXPRESS - select IBM_EMAC_RGMII + select IBM_EMAC_RGMII if IBM_EMAC help This option enables support for the AMCC PPC460SX evaluation board. @@ -213,7 +213,7 @@ config AKEBONO select NETDEVICES select ETHERNET select NET_VENDOR_IBM - select IBM_EMAC_EMAC4 + select IBM_EMAC_EMAC4 if IBM_EMAC select USB if USB_SUPPORT select USB_OHCI_HCD_PLATFORM if USB_OHCI_HCD select USB_EHCI_HCD_PLATFORM if USB_EHCI_HCD @@ -241,6 +241,7 @@ config XILINX_VIRTEX440_GENERIC_BOARD depends on 44x default n select XILINX_VIRTEX_5_FXT + select XILINX_INTC help This option enables generic support for Xilinx Virtex based boards that use a 440 based processor in the Virtex 5 FXT FPGA architecture. @@ -290,54 +291,54 @@ config 440EP bool select PPC_FPU select IBM440EP_ERR42 - select IBM_EMAC_ZMII + select IBM_EMAC_ZMII if IBM_EMAC config 440EPX bool select PPC_FPU - select IBM_EMAC_EMAC4 - select IBM_EMAC_RGMII - select IBM_EMAC_ZMII + select IBM_EMAC_EMAC4 if IBM_EMAC + select IBM_EMAC_RGMII if IBM_EMAC + select IBM_EMAC_ZMII if IBM_EMAC select USB_EHCI_BIG_ENDIAN_MMIO select USB_EHCI_BIG_ENDIAN_DESC config 440GRX bool - select IBM_EMAC_EMAC4 - select IBM_EMAC_RGMII - select IBM_EMAC_ZMII + select IBM_EMAC_EMAC4 if IBM_EMAC + select IBM_EMAC_RGMII if IBM_EMAC + select IBM_EMAC_ZMII if IBM_EMAC config 440GP bool - select IBM_EMAC_ZMII + select IBM_EMAC_ZMII if IBM_EMAC config 440GX bool - select IBM_EMAC_EMAC4 - select IBM_EMAC_RGMII - select IBM_EMAC_ZMII #test only - select IBM_EMAC_TAH #test only + select IBM_EMAC_EMAC4 if IBM_EMAC + select IBM_EMAC_RGMII if IBM_EMAC + select IBM_EMAC_ZMII if IBM_EMAC #test only + select IBM_EMAC_TAH if IBM_EMAC #test only config 440SP bool config 440SPe bool - select IBM_EMAC_EMAC4 + select IBM_EMAC_EMAC4 if IBM_EMAC config 460EX bool select PPC_FPU - select IBM_EMAC_EMAC4 - select IBM_EMAC_TAH + select IBM_EMAC_EMAC4 if IBM_EMAC + select IBM_EMAC_TAH if IBM_EMAC config 460SX bool select PPC_FPU - select IBM_EMAC_EMAC4 - select IBM_EMAC_RGMII - select IBM_EMAC_ZMII - select IBM_EMAC_TAH + select IBM_EMAC_EMAC4 if IBM_EMAC + select IBM_EMAC_RGMII if IBM_EMAC + select IBM_EMAC_ZMII if IBM_EMAC + select IBM_EMAC_TAH if IBM_EMAC config 476FPE bool @@ -346,8 +347,8 @@ config 476FPE config APM821xx bool select PPC_FPU - select IBM_EMAC_EMAC4 - select IBM_EMAC_TAH + select IBM_EMAC_EMAC4 if IBM_EMAC + select IBM_EMAC_TAH if IBM_EMAC config 476FPE_ERR46 depends on 476FPE diff --git a/arch/powerpc/platforms/44x/virtex.c b/arch/powerpc/platforms/44x/virtex.c index a7e08026097a..3eb13ed926ee 100644 --- a/arch/powerpc/platforms/44x/virtex.c +++ b/arch/powerpc/platforms/44x/virtex.c @@ -54,7 +54,7 @@ define_machine(virtex) { .probe = virtex_probe, .setup_arch = xilinx_pci_init, .init_IRQ = xilinx_intc_init_tree, - .get_irq = xilinx_intc_get_irq, + .get_irq = xintc_get_irq, .calibrate_decr = generic_calibrate_decr, .restart = ppc4xx_reset_system, }; diff --git a/arch/powerpc/platforms/83xx/suspend.c b/arch/powerpc/platforms/83xx/suspend.c index 24717d060008..08f92f6ed228 100644 --- a/arch/powerpc/platforms/83xx/suspend.c +++ b/arch/powerpc/platforms/83xx/suspend.c @@ -441,8 +441,4 @@ static struct platform_driver pmc_driver = { .remove = pmc_remove }; -static int pmc_init(void) -{ - return platform_driver_register(&pmc_driver); -} -device_initcall(pmc_init); +builtin_platform_driver(pmc_driver); diff --git a/arch/powerpc/platforms/85xx/Kconfig b/arch/powerpc/platforms/85xx/Kconfig index 9dc1d28975b9..47b389dc4938 100644 --- a/arch/powerpc/platforms/85xx/Kconfig +++ b/arch/powerpc/platforms/85xx/Kconfig @@ -253,6 +253,8 @@ endif # PPC32 config PPC_QEMU_E500 bool "QEMU generic e500 platform" select DEFAULT_UIMAGE + select E500 + select PPC_E500MC if PPC64 help This option enables support for running as a QEMU guest using QEMU's generic e500 machine. This is not required if you're diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c index 1179115a4b5c..3803b0addf65 100644 --- a/arch/powerpc/platforms/85xx/corenet_generic.c +++ b/arch/powerpc/platforms/85xx/corenet_generic.c @@ -220,7 +220,7 @@ define_machine(corenet_generic) { * * Likewise, problems have been seen with kexec when coreint is enabled. */ -#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_KEXEC) +#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_KEXEC_CORE) .get_irq = mpic_get_irq, #else .get_irq = mpic_get_coreint_irq, diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c index fe9f19e5e935..a83a6d26090d 100644 --- a/arch/powerpc/platforms/85xx/smp.c +++ b/arch/powerpc/platforms/85xx/smp.c @@ -349,13 +349,13 @@ struct smp_ops_t smp_85xx_ops = { .cpu_disable = generic_cpu_disable, .cpu_die = generic_cpu_die, #endif -#if defined(CONFIG_KEXEC) && !defined(CONFIG_PPC64) +#if defined(CONFIG_KEXEC_CORE) && !defined(CONFIG_PPC64) .give_timebase = smp_generic_give_timebase, .take_timebase = smp_generic_take_timebase, #endif }; -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE #ifdef CONFIG_PPC32 atomic_t kexec_down_cpus = ATOMIC_INIT(0); @@ -458,7 +458,7 @@ static void mpc85xx_smp_machine_kexec(struct kimage *image) default_machine_kexec(image); } -#endif /* CONFIG_KEXEC */ +#endif /* CONFIG_KEXEC_CORE */ static void smp_85xx_basic_setup(int cpu_nr) { @@ -512,7 +512,7 @@ void __init mpc85xx_smp_init(void) #endif smp_ops = &smp_85xx_ops; -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE ppc_md.kexec_cpu_down = mpc85xx_smp_kexec_cpu_down; ppc_md.machine_kexec = mpc85xx_smp_machine_kexec; #endif diff --git a/arch/powerpc/platforms/8xx/Kconfig b/arch/powerpc/platforms/8xx/Kconfig index 564d99bb2a26..80cbcb0ad9b1 100644 --- a/arch/powerpc/platforms/8xx/Kconfig +++ b/arch/powerpc/platforms/8xx/Kconfig @@ -130,6 +130,7 @@ config 8xx_CPU6 config 8xx_CPU15 bool "CPU15 Silicon Errata" + depends on !HUGETLB_PAGE default y help This enables a workaround for erratum CPU15 on MPC8xx chips. diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index fbdae8377b71..7e3a2ebba29b 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig @@ -168,17 +168,6 @@ config MPIC_BROKEN_REGREAD well, but enabling it uses about 8KB of memory to keep copies of the register contents in software. -config IBMVIO - depends on PPC_PSERIES - bool - default y - -config IBMEBUS - depends on PPC_PSERIES - bool "Support for GX bus based adapters" - help - Bus device driver for GX bus based adapters. - config EEH bool depends on (PPC_POWERNV || PPC_PSERIES) && PCI diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index ca2da30ad2ab..6e89e5a8d4fb 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -34,6 +34,7 @@ config PPC_8xx select FSL_SOC select 8xx select PPC_LIB_RHEAP + select SYS_SUPPORTS_HUGETLBFS config 40x bool "AMCC 40x" diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig index d9088f0b8fcc..a4522f09d65e 100644 --- a/arch/powerpc/platforms/cell/Kconfig +++ b/arch/powerpc/platforms/cell/Kconfig @@ -17,10 +17,10 @@ config PPC_CELL_NATIVE select PPC_CELL_COMMON select MPIC select PPC_IO_WORKAROUNDS - select IBM_EMAC_EMAC4 - select IBM_EMAC_RGMII - select IBM_EMAC_ZMII #test only - select IBM_EMAC_TAH #test only + select IBM_EMAC_EMAC4 if IBM_EMAC + select IBM_EMAC_RGMII if IBM_EMAC + select IBM_EMAC_ZMII if IBM_EMAC #test only + select IBM_EMAC_TAH if IBM_EMAC #test only default n config PPC_IBM_CELL_BLADE @@ -46,7 +46,6 @@ config SPU_FS default m depends on PPC_CELL select SPU_BASE - select MEMORY_HOTPLUG help The SPU file system is used to access Synergistic Processing Units on machines implementing the Broadband Processor diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c index e84d8fbc2e21..96c2b8a40630 100644 --- a/arch/powerpc/platforms/cell/spu_base.c +++ b/arch/powerpc/platforms/cell/spu_base.c @@ -676,7 +676,7 @@ static ssize_t spu_stat_show(struct device *dev, static DEVICE_ATTR(stat, 0444, spu_stat_show, NULL); -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE struct crash_spu_info { struct spu *spu; diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c index 06254467e4dd..3a147122bc98 100644 --- a/arch/powerpc/platforms/cell/spufs/file.c +++ b/arch/powerpc/platforms/cell/spufs/file.c @@ -236,7 +236,6 @@ static int spufs_mem_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { struct spu_context *ctx = vma->vm_file->private_data; - unsigned long address = (unsigned long)vmf->virtual_address; unsigned long pfn, offset; offset = vmf->pgoff << PAGE_SHIFT; @@ -244,7 +243,7 @@ spufs_mem_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) return VM_FAULT_SIGBUS; pr_debug("spufs_mem_mmap_fault address=0x%lx, offset=0x%lx\n", - address, offset); + vmf->address, offset); if (spu_acquire(ctx)) return VM_FAULT_NOPAGE; @@ -256,7 +255,7 @@ spufs_mem_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot); pfn = (ctx->spu->local_store_phys + offset) >> PAGE_SHIFT; } - vm_insert_pfn(vma, address, pfn); + vm_insert_pfn(vma, vmf->address, pfn); spu_release(ctx); @@ -355,8 +354,7 @@ static int spufs_ps_fault(struct vm_area_struct *vma, down_read(¤t->mm->mmap_sem); } else { area = ctx->spu->problem_phys + ps_offs; - vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, - (area + offset) >> PAGE_SHIFT); + vm_insert_pfn(vma, vmf->address, (area + offset) >> PAGE_SHIFT); spu_context_trace(spufs_ps_fault__insert, ctx, ctx->spu); } diff --git a/arch/powerpc/platforms/embedded6xx/holly.c b/arch/powerpc/platforms/embedded6xx/holly.c index dfd310031549..0409714e8070 100644 --- a/arch/powerpc/platforms/embedded6xx/holly.c +++ b/arch/powerpc/platforms/embedded6xx/holly.c @@ -263,7 +263,7 @@ static int ppc750_machine_check_exception(struct pt_regs *regs) if ((entry = search_exception_tables(regs->nip)) != NULL) { tsi108_clear_pci_cfg_error(); regs->msr |= MSR_RI; - regs->nip = entry->fixup; + regs->nip = extable_fixup(entry); return 1; } return 0; diff --git a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c index f97bab8e37a2..9de100e22bf3 100644 --- a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c +++ b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c @@ -174,7 +174,7 @@ static int mpc7448_machine_check_exception(struct pt_regs *regs) if ((entry = search_exception_tables(regs->nip)) != NULL) { tsi108_clear_pci_cfg_error(); regs->msr |= MSR_RI; - regs->nip = entry->fixup; + regs->nip = extable_fixup(entry); return 1; } return 0; diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c index c8c217b7dd33..f627c9fd7b48 100644 --- a/arch/powerpc/platforms/powermac/low_i2c.c +++ b/arch/powerpc/platforms/powermac/low_i2c.c @@ -90,6 +90,7 @@ struct pmac_i2c_bus int opened; int polled; /* open mode */ struct platform_device *platform_dev; + struct lock_class_key lock_key; /* ops */ int (*open)(struct pmac_i2c_bus *bus); @@ -587,6 +588,7 @@ static void __init kw_i2c_add(struct pmac_i2c_host_kw *host, bus->close = kw_i2c_close; bus->xfer = kw_i2c_xfer; mutex_init(&bus->mutex); + lockdep_set_class(&bus->mutex, &bus->lock_key); if (controller == busnode) bus->flags = pmac_i2c_multibus; list_add(&bus->link, &pmac_i2c_busses); @@ -815,6 +817,7 @@ static void __init pmu_i2c_probe(void) bus->hostdata = bus + 1; bus->xfer = pmu_i2c_xfer; mutex_init(&bus->mutex); + lockdep_set_class(&bus->mutex, &bus->lock_key); bus->flags = pmac_i2c_multibus; list_add(&bus->link, &pmac_i2c_busses); @@ -938,6 +941,7 @@ static void __init smu_i2c_probe(void) bus->hostdata = bus + 1; bus->xfer = smu_i2c_xfer; mutex_init(&bus->mutex); + lockdep_set_class(&bus->mutex, &bus->lock_key); bus->flags = 0; list_add(&bus->link, &pmac_i2c_busses); diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 2354ea51e871..6fb5522acd70 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -393,7 +393,7 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data) /* Create PE */ ret = eeh_add_to_parent_pe(edev); if (ret) { - pr_warn("%s: Can't add PCI dev %04x:%02x:%02x.%01x to parent PE (%d)\n", + pr_warn("%s: Can't add PCI dev %04x:%02x:%02x.%01x to parent PE (%x)\n", __func__, hose->global_number, pdn->busno, PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn), ret); return NULL; @@ -1097,7 +1097,7 @@ static int pnv_eeh_reset(struct eeh_pe *pe, int option) bus = eeh_pe_bus_get(pe); if (!bus) { - pr_err("%s: Cannot find PCI bus for PHB#%d-PE#%x\n", + pr_err("%s: Cannot find PCI bus for PHB#%x-PE#%x\n", __func__, pe->phb->global_number, pe->addr); return -EIO; } diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index aec85e778028..73b155fd4481 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -263,7 +263,7 @@ static int pnv_npu_dma_set_bypass(struct pnv_ioda_pe *npe) /* Enable the bypass window */ top = roundup_pow_of_two(top); - dev_info(&npe->pdev->dev, "Enabling bypass for PE %d\n", + dev_info(&npe->pdev->dev, "Enabling bypass for PE %x\n", npe->pe_number); rc = opal_pci_map_pe_dma_window_real(phb->opal_id, npe->pe_number, npe->pe_number, diff --git a/arch/powerpc/platforms/powernv/opal-tracepoints.c b/arch/powerpc/platforms/powernv/opal-tracepoints.c index 1e496b780efd..3c447002edff 100644 --- a/arch/powerpc/platforms/powernv/opal-tracepoints.c +++ b/arch/powerpc/platforms/powernv/opal-tracepoints.c @@ -6,9 +6,10 @@ #ifdef HAVE_JUMP_LABEL struct static_key opal_tracepoint_key = STATIC_KEY_INIT; -void opal_tracepoint_regfunc(void) +int opal_tracepoint_regfunc(void) { static_key_slow_inc(&opal_tracepoint_key); + return 0; } void opal_tracepoint_unregfunc(void) @@ -25,9 +26,10 @@ void opal_tracepoint_unregfunc(void) /* NB: reg/unreg are called while guarded with the tracepoints_mutex */ extern long opal_tracepoint_refcount; -void opal_tracepoint_regfunc(void) +int opal_tracepoint_regfunc(void) { opal_tracepoint_refcount++; + return 0; } void opal_tracepoint_unregfunc(void) diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index 44d2d842cee7..3aa40f1b20f5 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -304,8 +304,11 @@ OPAL_CALL(opal_pci_get_presence_state, OPAL_PCI_GET_PRESENCE_STATE); OPAL_CALL(opal_pci_get_power_state, OPAL_PCI_GET_POWER_STATE); OPAL_CALL(opal_pci_set_power_state, OPAL_PCI_SET_POWER_STATE); OPAL_CALL(opal_int_get_xirr, OPAL_INT_GET_XIRR); +OPAL_CALL_REAL(opal_rm_int_get_xirr, OPAL_INT_GET_XIRR); OPAL_CALL(opal_int_set_cppr, OPAL_INT_SET_CPPR); OPAL_CALL(opal_int_eoi, OPAL_INT_EOI); +OPAL_CALL_REAL(opal_rm_int_eoi, OPAL_INT_EOI); OPAL_CALL(opal_int_set_mfrr, OPAL_INT_SET_MFRR); +OPAL_CALL_REAL(opal_rm_int_set_mfrr, OPAL_INT_SET_MFRR); OPAL_CALL(opal_pci_tce_kill, OPAL_PCI_TCE_KILL); OPAL_CALL_REAL(opal_rm_pci_tce_kill, OPAL_PCI_TCE_KILL); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 6c9a65b52e63..282293572dc8 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -632,21 +632,11 @@ static void __init opal_dump_region_init(void) "rc = %d\n", rc); } -static void opal_pdev_init(struct device_node *opal_node, - const char *compatible) +static void opal_pdev_init(const char *compatible) { struct device_node *np; - for_each_child_of_node(opal_node, np) - if (of_device_is_compatible(np, compatible)) - of_platform_device_create(np, NULL, NULL); -} - -static void opal_i2c_create_devs(void) -{ - struct device_node *np; - - for_each_compatible_node(np, NULL, "ibm,opal-i2c") + for_each_compatible_node(np, NULL, compatible) of_platform_device_create(np, NULL, NULL); } @@ -718,7 +708,7 @@ static int __init opal_init(void) opal_hmi_handler_init(); /* Create i2c platform devices */ - opal_i2c_create_devs(); + opal_pdev_init("ibm,opal-i2c"); /* Setup a heatbeat thread if requested by OPAL */ opal_init_heartbeat(); @@ -753,12 +743,12 @@ static int __init opal_init(void) } /* Initialize platform devices: IPMI backend, PRD & flash interface */ - opal_pdev_init(opal_node, "ibm,opal-ipmi"); - opal_pdev_init(opal_node, "ibm,opal-flash"); - opal_pdev_init(opal_node, "ibm,opal-prd"); + opal_pdev_init("ibm,opal-ipmi"); + opal_pdev_init("ibm,opal-flash"); + opal_pdev_init("ibm,opal-prd"); /* Initialise platform device: oppanel interface */ - opal_pdev_init(opal_node, "ibm,opal-oppanel"); + opal_pdev_init("ibm,opal-oppanel"); /* Initialise OPAL kmsg dumper for flushing console on panic */ opal_kmsg_init(); @@ -896,3 +886,5 @@ EXPORT_SYMBOL_GPL(opal_leds_get_ind); EXPORT_SYMBOL_GPL(opal_leds_set_ind); /* Export this symbol for PowerNV Operator Panel class driver */ EXPORT_SYMBOL_GPL(opal_write_oppanel_async); +/* Export this for KVM */ +EXPORT_SYMBOL_GPL(opal_int_set_mfrr); diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index d4b33dd2d9e7..b07680cd2518 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -83,7 +83,7 @@ void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, PCI_SLOT(pe->rid), PCI_FUNC(pe->rid)); #endif /* CONFIG_PCI_IOV*/ - printk("%spci %s: [PE# %.3d] %pV", + printk("%spci %s: [PE# %.2x] %pV", level, pfix, pe->pe_number, &vaf); va_end(args); @@ -145,8 +145,8 @@ static struct pnv_ioda_pe *pnv_ioda_init_pe(struct pnv_phb *phb, int pe_no) */ rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no, OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); - if (rc != OPAL_SUCCESS) - pr_warn("%s: Error %lld unfreezing PHB#%d-PE#%d\n", + if (rc != OPAL_SUCCESS && rc != OPAL_UNSUPPORTED) + pr_warn("%s: Error %lld unfreezing PHB#%x-PE#%x\n", __func__, rc, phb->hose->global_number, pe_no); return &phb->ioda.pe_array[pe_no]; @@ -155,13 +155,13 @@ static struct pnv_ioda_pe *pnv_ioda_init_pe(struct pnv_phb *phb, int pe_no) static void pnv_ioda_reserve_pe(struct pnv_phb *phb, int pe_no) { if (!(pe_no >= 0 && pe_no < phb->ioda.total_pe_num)) { - pr_warn("%s: Invalid PE %d on PHB#%x\n", + pr_warn("%s: Invalid PE %x on PHB#%x\n", __func__, pe_no, phb->hose->global_number); return; } if (test_and_set_bit(pe_no, phb->ioda.pe_alloc)) - pr_debug("%s: PE %d was reserved on PHB#%x\n", + pr_debug("%s: PE %x was reserved on PHB#%x\n", __func__, pe_no, phb->hose->global_number); pnv_ioda_init_pe(phb, pe_no); @@ -229,7 +229,7 @@ static int pnv_ioda2_init_m64(struct pnv_phb *phb) else if (phb->ioda.reserved_pe_idx == (phb->ioda.total_pe_num - 1)) r->end -= (2 * phb->ioda.m64_segsize); else - pr_warn(" Cannot strip M64 segment for reserved PE#%d\n", + pr_warn(" Cannot strip M64 segment for reserved PE#%x\n", phb->ioda.reserved_pe_idx); return 0; @@ -291,7 +291,7 @@ static int pnv_ioda1_init_m64(struct pnv_phb *phb) OPAL_M64_WINDOW_TYPE, index, base, 0, PNV_IODA1_M64_SEGS * segsz); if (rc != OPAL_SUCCESS) { - pr_warn(" Error %lld setting M64 PHB#%d-BAR#%d\n", + pr_warn(" Error %lld setting M64 PHB#%x-BAR#%d\n", rc, phb->hose->global_number, index); goto fail; } @@ -300,7 +300,7 @@ static int pnv_ioda1_init_m64(struct pnv_phb *phb) OPAL_M64_WINDOW_TYPE, index, OPAL_ENABLE_M64_SPLIT); if (rc != OPAL_SUCCESS) { - pr_warn(" Error %lld enabling M64 PHB#%d-BAR#%d\n", + pr_warn(" Error %lld enabling M64 PHB#%x-BAR#%d\n", rc, phb->hose->global_number, index); goto fail; } @@ -316,7 +316,7 @@ static int pnv_ioda1_init_m64(struct pnv_phb *phb) else if (phb->ioda.reserved_pe_idx == (phb->ioda.total_pe_num - 1)) r->end -= (2 * phb->ioda.m64_segsize); else - WARN(1, "Wrong reserved PE#%d on PHB#%d\n", + WARN(1, "Wrong reserved PE#%x on PHB#%x\n", phb->ioda.reserved_pe_idx, phb->hose->global_number); return 0; @@ -414,7 +414,7 @@ static struct pnv_ioda_pe *pnv_ioda_pick_m64_pe(struct pci_bus *bus, bool all) pe->pe_number / PNV_IODA1_M64_SEGS, pe->pe_number % PNV_IODA1_M64_SEGS); if (rc != OPAL_SUCCESS) - pr_warn("%s: Error %lld mapping M64 for PHB#%d-PE#%d\n", + pr_warn("%s: Error %lld mapping M64 for PHB#%x-PE#%x\n", __func__, rc, phb->hose->global_number, pe->pe_number); } @@ -941,14 +941,14 @@ static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) pe->mve_number = pe->pe_number; rc = opal_pci_set_mve(phb->opal_id, pe->mve_number, pe->pe_number); if (rc != OPAL_SUCCESS) { - pe_err(pe, "OPAL error %ld setting up MVE %d\n", + pe_err(pe, "OPAL error %ld setting up MVE %x\n", rc, pe->mve_number); pe->mve_number = -1; } else { rc = opal_pci_set_mve_enable(phb->opal_id, pe->mve_number, OPAL_ENABLE_MVE); if (rc) { - pe_err(pe, "OPAL error %ld enabling MVE %d\n", + pe_err(pe, "OPAL error %ld enabling MVE %x\n", rc, pe->mve_number); pe->mve_number = -1; } @@ -1159,10 +1159,10 @@ static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all) pe->rid = bus->busn_res.start << 8; if (all) - pe_info(pe, "Secondary bus %d..%d associated with PE#%d\n", + pe_info(pe, "Secondary bus %d..%d associated with PE#%x\n", bus->busn_res.start, bus->busn_res.end, pe->pe_number); else - pe_info(pe, "Secondary bus %d associated with PE#%d\n", + pe_info(pe, "Secondary bus %d associated with PE#%x\n", bus->busn_res.start, pe->pe_number); if (pnv_ioda_configure_pe(phb, pe)) { @@ -1213,7 +1213,7 @@ static struct pnv_ioda_pe *pnv_ioda_setup_npu_PE(struct pci_dev *npu_pdev) * peer NPU. */ dev_info(&npu_pdev->dev, - "Associating to existing PE %d\n", pe_num); + "Associating to existing PE %x\n", pe_num); pci_dev_get(npu_pdev); npu_pdn = pci_get_pdn(npu_pdev); rid = npu_pdev->bus->number << 8 | npu_pdn->devfn; @@ -1539,7 +1539,7 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) pe->rid = (pci_iov_virtfn_bus(pdev, vf_index) << 8) | pci_iov_virtfn_devfn(pdev, vf_index); - pe_info(pe, "VF %04d:%02d:%02d.%d associated with PE#%d\n", + pe_info(pe, "VF %04d:%02d:%02d.%d associated with PE#%x\n", hose->global_number, pdev->bus->number, PCI_SLOT(pci_iov_virtfn_devfn(pdev, vf_index)), PCI_FUNC(pci_iov_virtfn_devfn(pdev, vf_index)), pe_num); @@ -2844,7 +2844,7 @@ static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev, pnv_set_msi_irq_chip(phb, virq); pr_devel("%s: %s-bit MSI on hwirq %x (xive #%d)," - " address=%x_%08x data=%x PE# %d\n", + " address=%x_%08x data=%x PE# %x\n", pci_name(dev), is_64 ? "64" : "32", hwirq, xive_num, msg->address_hi, msg->address_lo, data, pe->pe_number); @@ -2993,7 +2993,7 @@ static void pnv_ioda_setup_pe_res(struct pnv_ioda_pe *pe, rc = opal_pci_map_pe_mmio_window(phb->opal_id, pe->pe_number, OPAL_IO_WINDOW_TYPE, 0, index); if (rc != OPAL_SUCCESS) { - pr_err("%s: Error %lld mapping IO segment#%d to PE#%d\n", + pr_err("%s: Error %lld mapping IO segment#%d to PE#%x\n", __func__, rc, index, pe->pe_number); break; } @@ -3017,7 +3017,7 @@ static void pnv_ioda_setup_pe_res(struct pnv_ioda_pe *pe, rc = opal_pci_map_pe_mmio_window(phb->opal_id, pe->pe_number, OPAL_M32_WINDOW_TYPE, 0, index); if (rc != OPAL_SUCCESS) { - pr_err("%s: Error %lld mapping M32 segment#%d to PE#%d", + pr_err("%s: Error %lld mapping M32 segment#%d to PE#%x", __func__, rc, index, pe->pe_number); break; } @@ -3281,7 +3281,7 @@ static void pnv_pci_setup_bridge(struct pci_bus *bus, unsigned long type) pnv_pci_ioda2_setup_dma_pe(phb, pe); break; default: - pr_warn("%s: No DMA for PHB#%d (type %d)\n", + pr_warn("%s: No DMA for PHB#%x (type %d)\n", __func__, phb->hose->global_number, phb->type); } } diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index db7b8020f68e..c6d554fe585c 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -234,7 +234,7 @@ static void pnv_pci_dump_p7ioc_diag_data(struct pci_controller *hose, int i; data = (struct OpalIoP7IOCPhbErrorData *)common; - pr_info("P7IOC PHB#%d Diag-data (Version: %d)\n", + pr_info("P7IOC PHB#%x Diag-data (Version: %d)\n", hose->global_number, be32_to_cpu(common->version)); if (data->brdgCtl) @@ -326,7 +326,7 @@ static void pnv_pci_dump_phb3_diag_data(struct pci_controller *hose, int i; data = (struct OpalIoPhb3ErrorData*)common; - pr_info("PHB3 PHB#%d Diag-data (Version: %d)\n", + pr_info("PHB3 PHB#%x Diag-data (Version: %d)\n", hose->global_number, be32_to_cpu(common->version)); if (data->brdgCtl) pr_info("brdgCtl: %08x\n", @@ -516,7 +516,7 @@ static void pnv_pci_config_check_eeh(struct pci_dn *pdn) } } - pr_devel(" -> EEH check, bdfn=%04x PE#%d fstate=%x\n", + pr_devel(" -> EEH check, bdfn=%04x PE#%x fstate=%x\n", (pdn->busno << 8) | (pdn->devfn), pe_no, fstate); /* Clear the frozen state if applicable */ diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index efe8b6bb168b..d50c7d99baaf 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -174,7 +174,7 @@ static void pnv_shutdown(void) opal_shutdown(); } -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE static void pnv_kexec_wait_secondaries_down(void) { int my_cpu, i, notified = -1; @@ -245,7 +245,7 @@ static void pnv_kexec_cpu_down(int crash_shutdown, int secondary) opal_reinit_cpus(OPAL_REINIT_CPUS_HILE_BE); } } -#endif /* CONFIG_KEXEC */ +#endif /* CONFIG_KEXEC_CORE */ #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE static unsigned long pnv_memory_block_size(void) @@ -311,7 +311,7 @@ define_machine(powernv) { .machine_shutdown = pnv_shutdown, .power_save = NULL, .calibrate_decr = generic_calibrate_decr, -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE .kexec_cpu_down = pnv_kexec_cpu_down, #endif #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE diff --git a/arch/powerpc/platforms/ps3/htab.c b/arch/powerpc/platforms/ps3/htab.c index cb3c50328de8..cc2b281a3766 100644 --- a/arch/powerpc/platforms/ps3/htab.c +++ b/arch/powerpc/platforms/ps3/htab.c @@ -63,7 +63,7 @@ static long ps3_hpte_insert(unsigned long hpte_group, unsigned long vpn, vflags &= ~HPTE_V_SECONDARY; hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID; - hpte_r = hpte_encode_r(ps3_mm_phys_to_lpar(pa), psize, apsize, ssize) | rflags; + hpte_r = hpte_encode_r(ps3_mm_phys_to_lpar(pa), psize, apsize) | rflags; spin_lock_irqsave(&ps3_htab_lock, flags); diff --git a/arch/powerpc/platforms/ps3/setup.c b/arch/powerpc/platforms/ps3/setup.c index 3a487e7f4a5e..6244bc849469 100644 --- a/arch/powerpc/platforms/ps3/setup.c +++ b/arch/powerpc/platforms/ps3/setup.c @@ -250,7 +250,7 @@ static int __init ps3_probe(void) return 1; } -#if defined(CONFIG_KEXEC) +#if defined(CONFIG_KEXEC_CORE) static void ps3_kexec_cpu_down(int crash_shutdown, int secondary) { int cpu = smp_processor_id(); @@ -276,7 +276,7 @@ define_machine(ps3) { .progress = ps3_progress, .restart = ps3_restart, .halt = ps3_halt, -#if defined(CONFIG_KEXEC) +#if defined(CONFIG_KEXEC_CORE) .kexec_cpu_down = ps3_kexec_cpu_down, #endif }; diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index bec90fb30425..e1c280a95d58 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -127,3 +127,14 @@ config HV_PERF_CTRS systems. 24x7 is available on Power 8 systems. If unsure, select Y. + +config IBMVIO + depends on PPC_PSERIES + bool + default y + +config IBMEBUS + depends on PPC_PSERIES && !CPU_LITTLE_ENDIAN + bool "Support for GX bus based adapters" + help + Bus device driver for GX bus based adapters. diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index fedc2ccf029d..8f4ba089e802 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -8,7 +8,7 @@ obj-y := lpar.o hvCall.o nvram.o reconfig.o \ pci.o pci_dlpar.o eeh_pseries.o msi.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_SCANLOG) += scanlog.o -obj-$(CONFIG_KEXEC) += kexec.o +obj-$(CONFIG_KEXEC_CORE) += kexec.o obj-$(CONFIG_PSERIES_ENERGY) += pseries_energy.o obj-$(CONFIG_HOTPLUG_CPU) += hotplug-cpu.o @@ -21,6 +21,8 @@ obj-$(CONFIG_CMM) += cmm.o obj-$(CONFIG_DTL) += dtl.o obj-$(CONFIG_IO_EVENT_IRQ) += io_event_irq.o obj-$(CONFIG_LPARCFG) += lparcfg.o +obj-$(CONFIG_IBMVIO) += vio.o +obj-$(CONFIG_IBMEBUS) += ibmebus.o ifeq ($(CONFIG_PPC_PSERIES),y) obj-$(CONFIG_SUSPEND) += suspend.o diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c index 66e7227469b8..972328829387 100644 --- a/arch/powerpc/platforms/pseries/cmm.c +++ b/arch/powerpc/platforms/pseries/cmm.c @@ -41,6 +41,8 @@ #include <linux/memory.h> #include <asm/plpar_wrappers.h> +#include "pseries.h" + #define CMM_DRIVER_VERSION "1.0.0" #define CMM_DEFAULT_DELAY 1 #define CMM_HOTPLUG_DELAY 5 @@ -109,6 +111,38 @@ static int hotplug_occurred; /* protected by the hotplug mutex */ static struct task_struct *cmm_thread_ptr; +static long plpar_page_set_loaned(unsigned long vpa) +{ + unsigned long cmo_page_sz = cmo_get_page_size(); + long rc = 0; + int i; + + for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz) + rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED, vpa + i, 0); + + for (i -= cmo_page_sz; rc && i != 0; i -= cmo_page_sz) + plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE, + vpa + i - cmo_page_sz, 0); + + return rc; +} + +static long plpar_page_set_active(unsigned long vpa) +{ + unsigned long cmo_page_sz = cmo_get_page_size(); + long rc = 0; + int i; + + for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz) + rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE, vpa + i, 0); + + for (i -= cmo_page_sz; rc && i != 0; i -= cmo_page_sz) + plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED, + vpa + i - cmo_page_sz, 0); + + return rc; +} + /** * cmm_alloc_pages - Allocate pages and mark them as loaned * @nr: number of pages to allocate diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index 423e450efe07..76caa4a45ccd 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -418,84 +418,136 @@ void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog, } } -static ssize_t dlpar_store(struct class *class, struct class_attribute *attr, - const char *buf, size_t count) +static int dlpar_parse_resource(char **cmd, struct pseries_hp_errorlog *hp_elog) { - struct pseries_hp_errorlog *hp_elog; - struct completion hotplug_done; - const char *arg; - int rc; + char *arg; - hp_elog = kzalloc(sizeof(*hp_elog), GFP_KERNEL); - if (!hp_elog) { - rc = -ENOMEM; - goto dlpar_store_out; - } + arg = strsep(cmd, " "); + if (!arg) + return -EINVAL; - /* Parse out the request from the user, this will be in the form - * <resource> <action> <id_type> <id> - */ - arg = buf; - if (!strncmp(arg, "memory", 6)) { + if (sysfs_streq(arg, "memory")) { hp_elog->resource = PSERIES_HP_ELOG_RESOURCE_MEM; - arg += strlen("memory "); - } else if (!strncmp(arg, "cpu", 3)) { + } else if (sysfs_streq(arg, "cpu")) { hp_elog->resource = PSERIES_HP_ELOG_RESOURCE_CPU; - arg += strlen("cpu "); } else { - pr_err("Invalid resource specified: \"%s\"\n", buf); - rc = -EINVAL; - goto dlpar_store_out; + pr_err("Invalid resource specified.\n"); + return -EINVAL; } - if (!strncmp(arg, "add", 3)) { + return 0; +} + +static int dlpar_parse_action(char **cmd, struct pseries_hp_errorlog *hp_elog) +{ + char *arg; + + arg = strsep(cmd, " "); + if (!arg) + return -EINVAL; + + if (sysfs_streq(arg, "add")) { hp_elog->action = PSERIES_HP_ELOG_ACTION_ADD; - arg += strlen("add "); - } else if (!strncmp(arg, "remove", 6)) { + } else if (sysfs_streq(arg, "remove")) { hp_elog->action = PSERIES_HP_ELOG_ACTION_REMOVE; - arg += strlen("remove "); } else { - pr_err("Invalid action specified: \"%s\"\n", buf); - rc = -EINVAL; - goto dlpar_store_out; + pr_err("Invalid action specified.\n"); + return -EINVAL; } - if (!strncmp(arg, "index", 5)) { - u32 index; + return 0; +} +static int dlpar_parse_id_type(char **cmd, struct pseries_hp_errorlog *hp_elog) +{ + char *arg; + u32 count, index; + + arg = strsep(cmd, " "); + if (!arg) + return -EINVAL; + + if (sysfs_streq(arg, "index")) { hp_elog->id_type = PSERIES_HP_ELOG_ID_DRC_INDEX; - arg += strlen("index "); + arg = strsep(cmd, " "); + if (!arg) { + pr_err("No DRC Index specified.\n"); + return -EINVAL; + } + if (kstrtou32(arg, 0, &index)) { - rc = -EINVAL; - pr_err("Invalid drc_index specified: \"%s\"\n", buf); - goto dlpar_store_out; + pr_err("Invalid DRC Index specified.\n"); + return -EINVAL; } hp_elog->_drc_u.drc_index = cpu_to_be32(index); - } else if (!strncmp(arg, "count", 5)) { - u32 count; - + } else if (sysfs_streq(arg, "count")) { hp_elog->id_type = PSERIES_HP_ELOG_ID_DRC_COUNT; - arg += strlen("count "); + arg = strsep(cmd, " "); + if (!arg) { + pr_err("No DRC count specified.\n"); + return -EINVAL; + } + if (kstrtou32(arg, 0, &count)) { - rc = -EINVAL; - pr_err("Invalid count specified: \"%s\"\n", buf); - goto dlpar_store_out; + pr_err("Invalid DRC count specified.\n"); + return -EINVAL; } hp_elog->_drc_u.drc_count = cpu_to_be32(count); } else { - pr_err("Invalid id_type specified: \"%s\"\n", buf); - rc = -EINVAL; - goto dlpar_store_out; + pr_err("Invalid id_type specified.\n"); + return -EINVAL; + } + + return 0; +} + +static ssize_t dlpar_store(struct class *class, struct class_attribute *attr, + const char *buf, size_t count) +{ + struct pseries_hp_errorlog *hp_elog; + struct completion hotplug_done; + char *argbuf; + char *args; + int rc; + + args = argbuf = kstrdup(buf, GFP_KERNEL); + hp_elog = kzalloc(sizeof(*hp_elog), GFP_KERNEL); + if (!hp_elog || !argbuf) { + pr_info("Could not allocate resources for DLPAR operation\n"); + kfree(argbuf); + kfree(hp_elog); + return -ENOMEM; } + /* + * Parse out the request from the user, this will be in the form: + * <resource> <action> <id_type> <id> + */ + rc = dlpar_parse_resource(&args, hp_elog); + if (rc) + goto dlpar_store_out; + + rc = dlpar_parse_action(&args, hp_elog); + if (rc) + goto dlpar_store_out; + + rc = dlpar_parse_id_type(&args, hp_elog); + if (rc) + goto dlpar_store_out; + init_completion(&hotplug_done); queue_hotplug_event(hp_elog, &hotplug_done, &rc); wait_for_completion(&hotplug_done); dlpar_store_out: + kfree(argbuf); kfree(hp_elog); + + if (rc) + pr_err("Could not handle DLPAR request \"%s\"\n", buf); + return rc ? rc : count; } diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index 1c428f06b14c..1eef46d9cf30 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -270,7 +270,7 @@ static void *pseries_eeh_probe(struct pci_dn *pdn, void *data) eeh_add_flag(EEH_ENABLED); eeh_add_to_parent_pe(edev); - pr_debug("%s: EEH enabled on %02x:%02x.%01x PHB#%d-PE#%x\n", + pr_debug("%s: EEH enabled on %02x:%02x.%01x PHB#%x-PE#%x\n", __func__, pdn->busno, PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn), pe.phb->global_number, pe.addr); @@ -371,7 +371,7 @@ static int pseries_eeh_get_pe_addr(struct eeh_pe *pe) pe->config_addr, BUID_HI(pe->phb->buid), BUID_LO(pe->phb->buid), 0); if (ret) { - pr_warn("%s: Failed to get address for PHB#%d-PE#%x\n", + pr_warn("%s: Failed to get address for PHB#%x-PE#%x\n", __func__, pe->phb->global_number, pe->config_addr); return 0; } @@ -384,7 +384,7 @@ static int pseries_eeh_get_pe_addr(struct eeh_pe *pe) pe->config_addr, BUID_HI(pe->phb->buid), BUID_LO(pe->phb->buid), 0); if (ret) { - pr_warn("%s: Failed to get address for PHB#%d-PE#%x\n", + pr_warn("%s: Failed to get address for PHB#%x-PE#%x\n", __func__, pe->phb->global_number, pe->config_addr); return 0; } @@ -653,7 +653,7 @@ static int pseries_eeh_configure_bridge(struct eeh_pe *pe) rtas_busy_delay(ret); } - pr_warn("%s: Unable to configure bridge PHB#%d-PE#%x (%d)\n", + pr_warn("%s: Unable to configure bridge PHB#%x-PE#%x (%d)\n", __func__, pe->phb->global_number, pe->addr, ret); return ret; } diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 76ec104e88be..2617f9f356bd 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -472,12 +472,15 @@ static int dlpar_memory_remove_by_count(u32 lmbs_to_remove, /* Validate that there are enough LMBs to satisfy the request */ for (i = 0; i < num_lmbs; i++) { - if (lmbs[i].flags & DRCONF_MEM_ASSIGNED) + if (lmb_is_removable(&lmbs[i])) lmbs_available++; } - if (lmbs_available < lmbs_to_remove) + if (lmbs_available < lmbs_to_remove) { + pr_info("Not enough LMBs available (%d of %d) to satisfy request\n", + lmbs_available, lmbs_to_remove); return -EINVAL; + } for (i = 0; i < num_lmbs && lmbs_removed < lmbs_to_remove; i++) { rc = dlpar_remove_lmb(&lmbs[i]); diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/platforms/pseries/ibmebus.c index 6ca9a2ffaac7..614c28537141 100644 --- a/arch/powerpc/kernel/ibmebus.c +++ b/arch/powerpc/platforms/pseries/ibmebus.c @@ -180,6 +180,7 @@ static int ibmebus_create_device(struct device_node *dn) static int ibmebus_create_devices(const struct of_device_id *matches) { struct device_node *root, *child; + struct device *dev; int ret = 0; root = of_find_node_by_path("/"); @@ -188,9 +189,12 @@ static int ibmebus_create_devices(const struct of_device_id *matches) if (!of_match_node(matches, child)) continue; - if (bus_find_device(&ibmebus_bus_type, NULL, child, - ibmebus_match_node)) + dev = bus_find_device(&ibmebus_bus_type, NULL, child, + ibmebus_match_node); + if (dev) { + put_device(dev); continue; + } ret = ibmebus_create_device(child); if (ret) { @@ -262,6 +266,7 @@ static ssize_t ibmebus_store_probe(struct bus_type *bus, const char *buf, size_t count) { struct device_node *dn = NULL; + struct device *dev; char *path; ssize_t rc = 0; @@ -269,8 +274,10 @@ static ssize_t ibmebus_store_probe(struct bus_type *bus, if (!path) return -ENOMEM; - if (bus_find_device(&ibmebus_bus_type, NULL, path, - ibmebus_match_path)) { + dev = bus_find_device(&ibmebus_bus_type, NULL, path, + ibmebus_match_path); + if (dev) { + put_device(dev); printk(KERN_WARNING "%s: %s has already been probed\n", __func__, path); rc = -EEXIST; @@ -307,6 +314,7 @@ static ssize_t ibmebus_store_remove(struct bus_type *bus, if ((dev = bus_find_device(&ibmebus_bus_type, NULL, path, ibmebus_match_path))) { of_device_unregister(to_platform_device(dev)); + put_device(dev); kfree(path); return count; @@ -415,303 +423,6 @@ static struct device_attribute ibmebus_bus_device_attrs[] = { __ATTR_NULL }; -#ifdef CONFIG_PM_SLEEP -static int ibmebus_bus_legacy_suspend(struct device *dev, pm_message_t mesg) -{ - struct platform_device *of_dev = to_platform_device(dev); - struct platform_driver *drv = to_platform_driver(dev->driver); - int ret = 0; - - if (dev->driver && drv->suspend) - ret = drv->suspend(of_dev, mesg); - return ret; -} - -static int ibmebus_bus_legacy_resume(struct device *dev) -{ - struct platform_device *of_dev = to_platform_device(dev); - struct platform_driver *drv = to_platform_driver(dev->driver); - int ret = 0; - - if (dev->driver && drv->resume) - ret = drv->resume(of_dev); - return ret; -} - -static int ibmebus_bus_pm_prepare(struct device *dev) -{ - struct device_driver *drv = dev->driver; - int ret = 0; - - if (drv && drv->pm && drv->pm->prepare) - ret = drv->pm->prepare(dev); - - return ret; -} - -static void ibmebus_bus_pm_complete(struct device *dev) -{ - struct device_driver *drv = dev->driver; - - if (drv && drv->pm && drv->pm->complete) - drv->pm->complete(dev); -} - -#ifdef CONFIG_SUSPEND - -static int ibmebus_bus_pm_suspend(struct device *dev) -{ - struct device_driver *drv = dev->driver; - int ret = 0; - - if (!drv) - return 0; - - if (drv->pm) { - if (drv->pm->suspend) - ret = drv->pm->suspend(dev); - } else { - ret = ibmebus_bus_legacy_suspend(dev, PMSG_SUSPEND); - } - - return ret; -} - -static int ibmebus_bus_pm_suspend_noirq(struct device *dev) -{ - struct device_driver *drv = dev->driver; - int ret = 0; - - if (!drv) - return 0; - - if (drv->pm) { - if (drv->pm->suspend_noirq) - ret = drv->pm->suspend_noirq(dev); - } - - return ret; -} - -static int ibmebus_bus_pm_resume(struct device *dev) -{ - struct device_driver *drv = dev->driver; - int ret = 0; - - if (!drv) - return 0; - - if (drv->pm) { - if (drv->pm->resume) - ret = drv->pm->resume(dev); - } else { - ret = ibmebus_bus_legacy_resume(dev); - } - - return ret; -} - -static int ibmebus_bus_pm_resume_noirq(struct device *dev) -{ - struct device_driver *drv = dev->driver; - int ret = 0; - - if (!drv) - return 0; - - if (drv->pm) { - if (drv->pm->resume_noirq) - ret = drv->pm->resume_noirq(dev); - } - - return ret; -} - -#else /* !CONFIG_SUSPEND */ - -#define ibmebus_bus_pm_suspend NULL -#define ibmebus_bus_pm_resume NULL -#define ibmebus_bus_pm_suspend_noirq NULL -#define ibmebus_bus_pm_resume_noirq NULL - -#endif /* !CONFIG_SUSPEND */ - -#ifdef CONFIG_HIBERNATE_CALLBACKS - -static int ibmebus_bus_pm_freeze(struct device *dev) -{ - struct device_driver *drv = dev->driver; - int ret = 0; - - if (!drv) - return 0; - - if (drv->pm) { - if (drv->pm->freeze) - ret = drv->pm->freeze(dev); - } else { - ret = ibmebus_bus_legacy_suspend(dev, PMSG_FREEZE); - } - - return ret; -} - -static int ibmebus_bus_pm_freeze_noirq(struct device *dev) -{ - struct device_driver *drv = dev->driver; - int ret = 0; - - if (!drv) - return 0; - - if (drv->pm) { - if (drv->pm->freeze_noirq) - ret = drv->pm->freeze_noirq(dev); - } - - return ret; -} - -static int ibmebus_bus_pm_thaw(struct device *dev) -{ - struct device_driver *drv = dev->driver; - int ret = 0; - - if (!drv) - return 0; - - if (drv->pm) { - if (drv->pm->thaw) - ret = drv->pm->thaw(dev); - } else { - ret = ibmebus_bus_legacy_resume(dev); - } - - return ret; -} - -static int ibmebus_bus_pm_thaw_noirq(struct device *dev) -{ - struct device_driver *drv = dev->driver; - int ret = 0; - - if (!drv) - return 0; - - if (drv->pm) { - if (drv->pm->thaw_noirq) - ret = drv->pm->thaw_noirq(dev); - } - - return ret; -} - -static int ibmebus_bus_pm_poweroff(struct device *dev) -{ - struct device_driver *drv = dev->driver; - int ret = 0; - - if (!drv) - return 0; - - if (drv->pm) { - if (drv->pm->poweroff) - ret = drv->pm->poweroff(dev); - } else { - ret = ibmebus_bus_legacy_suspend(dev, PMSG_HIBERNATE); - } - - return ret; -} - -static int ibmebus_bus_pm_poweroff_noirq(struct device *dev) -{ - struct device_driver *drv = dev->driver; - int ret = 0; - - if (!drv) - return 0; - - if (drv->pm) { - if (drv->pm->poweroff_noirq) - ret = drv->pm->poweroff_noirq(dev); - } - - return ret; -} - -static int ibmebus_bus_pm_restore(struct device *dev) -{ - struct device_driver *drv = dev->driver; - int ret = 0; - - if (!drv) - return 0; - - if (drv->pm) { - if (drv->pm->restore) - ret = drv->pm->restore(dev); - } else { - ret = ibmebus_bus_legacy_resume(dev); - } - - return ret; -} - -static int ibmebus_bus_pm_restore_noirq(struct device *dev) -{ - struct device_driver *drv = dev->driver; - int ret = 0; - - if (!drv) - return 0; - - if (drv->pm) { - if (drv->pm->restore_noirq) - ret = drv->pm->restore_noirq(dev); - } - - return ret; -} - -#else /* !CONFIG_HIBERNATE_CALLBACKS */ - -#define ibmebus_bus_pm_freeze NULL -#define ibmebus_bus_pm_thaw NULL -#define ibmebus_bus_pm_poweroff NULL -#define ibmebus_bus_pm_restore NULL -#define ibmebus_bus_pm_freeze_noirq NULL -#define ibmebus_bus_pm_thaw_noirq NULL -#define ibmebus_bus_pm_poweroff_noirq NULL -#define ibmebus_bus_pm_restore_noirq NULL - -#endif /* !CONFIG_HIBERNATE_CALLBACKS */ - -static struct dev_pm_ops ibmebus_bus_dev_pm_ops = { - .prepare = ibmebus_bus_pm_prepare, - .complete = ibmebus_bus_pm_complete, - .suspend = ibmebus_bus_pm_suspend, - .resume = ibmebus_bus_pm_resume, - .freeze = ibmebus_bus_pm_freeze, - .thaw = ibmebus_bus_pm_thaw, - .poweroff = ibmebus_bus_pm_poweroff, - .restore = ibmebus_bus_pm_restore, - .suspend_noirq = ibmebus_bus_pm_suspend_noirq, - .resume_noirq = ibmebus_bus_pm_resume_noirq, - .freeze_noirq = ibmebus_bus_pm_freeze_noirq, - .thaw_noirq = ibmebus_bus_pm_thaw_noirq, - .poweroff_noirq = ibmebus_bus_pm_poweroff_noirq, - .restore_noirq = ibmebus_bus_pm_restore_noirq, -}; - -#define IBMEBUS_BUS_PM_OPS_PTR (&ibmebus_bus_dev_pm_ops) - -#else /* !CONFIG_PM_SLEEP */ - -#define IBMEBUS_BUS_PM_OPS_PTR NULL - -#endif /* !CONFIG_PM_SLEEP */ - struct bus_type ibmebus_bus_type = { .name = "ibmebus", .uevent = of_device_uevent_modalias, @@ -721,7 +432,6 @@ struct bus_type ibmebus_bus_type = { .remove = ibmebus_bus_device_remove, .shutdown = ibmebus_bus_device_shutdown, .dev_attrs = ibmebus_bus_device_attrs, - .pm = IBMEBUS_BUS_PM_OPS_PTR, }; EXPORT_SYMBOL(ibmebus_bus_type); diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index aa35245d8d6d..5dc1c3c6e716 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -145,7 +145,7 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group, hpte_group, vpn, pa, rflags, vflags, psize); hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID; - hpte_r = hpte_encode_r(pa, psize, apsize, ssize) | rflags; + hpte_r = hpte_encode_r(pa, psize, apsize) | rflags; if (!(vflags & HPTE_V_BOLTED)) pr_devel(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r); @@ -221,7 +221,7 @@ static long pSeries_lpar_hpte_remove(unsigned long hpte_group) return -1; } -static void pSeries_lpar_hptab_clear(void) +static void manual_hpte_clear_all(void) { unsigned long size_bytes = 1UL << ppc64_pft_size; unsigned long hpte_count = size_bytes >> 4; @@ -249,6 +249,26 @@ static void pSeries_lpar_hptab_clear(void) &(ptes[j].pteh), &(ptes[j].ptel)); } } +} + +static int hcall_hpte_clear_all(void) +{ + int rc; + + do { + rc = plpar_hcall_norets(H_CLEAR_HPT); + } while (rc == H_CONTINUE); + + return rc; +} + +static void pseries_hpte_clear_all(void) +{ + int rc; + + rc = hcall_hpte_clear_all(); + if (rc != H_SUCCESS) + manual_hpte_clear_all(); #ifdef __LITTLE_ENDIAN__ /* @@ -598,7 +618,7 @@ void __init hpte_init_pseries(void) mmu_hash_ops.hpte_remove = pSeries_lpar_hpte_remove; mmu_hash_ops.hpte_removebolted = pSeries_lpar_hpte_removebolted; mmu_hash_ops.flush_hash_range = pSeries_lpar_flush_hash_range; - mmu_hash_ops.hpte_clear_all = pSeries_lpar_hptab_clear; + mmu_hash_ops.hpte_clear_all = pseries_hpte_clear_all; mmu_hash_ops.hugepage_invalidate = pSeries_lpar_hugepage_invalidate; } @@ -661,9 +681,10 @@ EXPORT_SYMBOL(arch_free_page); #ifdef HAVE_JUMP_LABEL struct static_key hcall_tracepoint_key = STATIC_KEY_INIT; -void hcall_tracepoint_regfunc(void) +int hcall_tracepoint_regfunc(void) { static_key_slow_inc(&hcall_tracepoint_key); + return 0; } void hcall_tracepoint_unregfunc(void) @@ -680,9 +701,10 @@ void hcall_tracepoint_unregfunc(void) /* NB: reg/unreg are called while guarded with the tracepoints_mutex */ extern long hcall_tracepoint_refcount; -void hcall_tracepoint_regfunc(void) +int hcall_tracepoint_regfunc(void) { hcall_tracepoint_refcount++; + return 0; } void hcall_tracepoint_unregfunc(void) diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c index afa05a2cb702..e6397976060e 100644 --- a/arch/powerpc/platforms/pseries/lparcfg.c +++ b/arch/powerpc/platforms/pseries/lparcfg.c @@ -37,6 +37,7 @@ #include <asm/mmu.h> #include <asm/machdep.h> +#include "pseries.h" /* * This isn't a module but we expose that to userspace diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index b1be7b713fe6..1361a9db534b 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -79,4 +79,23 @@ extern struct pci_controller_ops pseries_pci_controller_ops; unsigned long pseries_memory_block_size(void); +extern int CMO_PrPSP; +extern int CMO_SecPSP; +extern unsigned long CMO_PageSize; + +static inline int cmo_get_primary_psp(void) +{ + return CMO_PrPSP; +} + +static inline int cmo_get_secondary_psp(void) +{ + return CMO_SecPSP; +} + +static inline unsigned long cmo_get_page_size(void) +{ + return CMO_PageSize; +} + #endif /* _PSERIES_PSERIES_H */ diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 97aa3f332f24..7736352f7279 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -367,7 +367,7 @@ void pseries_disable_reloc_on_exc(void) } EXPORT_SYMBOL(pseries_disable_reloc_on_exc); -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE static void pSeries_machine_kexec(struct kimage *image) { if (firmware_has_feature(FW_FEATURE_SET_MODE)) @@ -725,7 +725,7 @@ define_machine(pseries) { .progress = rtas_progress, .system_reset_exception = pSeries_system_reset_exception, .machine_check_exception = pSeries_machine_check_exception, -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE .machine_kexec = pSeries_machine_kexec, .kexec_cpu_down = pseries_kexec_cpu_down, #endif diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/platforms/pseries/vio.c index b3813ddb2fb4..2c8fb3ec989e 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/platforms/pseries/vio.c @@ -1648,6 +1648,9 @@ static struct vio_dev *vio_find_name(const char *name) /** * vio_find_node - find an already-registered vio_dev * @vnode: device_node of the virtual device we're looking for + * + * Takes a reference to the embedded struct device which needs to be dropped + * after use. */ struct vio_dev *vio_find_node(struct device_node *vnode) { diff --git a/arch/powerpc/purgatory/.gitignore b/arch/powerpc/purgatory/.gitignore new file mode 100644 index 000000000000..e9e66f178a6d --- /dev/null +++ b/arch/powerpc/purgatory/.gitignore @@ -0,0 +1,2 @@ +kexec-purgatory.c +purgatory.ro diff --git a/arch/powerpc/purgatory/Makefile b/arch/powerpc/purgatory/Makefile new file mode 100644 index 000000000000..ac8793c13348 --- /dev/null +++ b/arch/powerpc/purgatory/Makefile @@ -0,0 +1,15 @@ +targets += trampoline.o purgatory.ro kexec-purgatory.c + +LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined + +$(obj)/purgatory.ro: $(obj)/trampoline.o FORCE + $(call if_changed,ld) + +CMD_BIN2C = $(objtree)/scripts/basic/bin2c +quiet_cmd_bin2c = BIN2C $@ + cmd_bin2c = $(CMD_BIN2C) kexec_purgatory < $< > $@ + +$(obj)/kexec-purgatory.c: $(obj)/purgatory.ro FORCE + $(call if_changed,bin2c) + +obj-y += kexec-purgatory.o diff --git a/arch/powerpc/purgatory/trampoline.S b/arch/powerpc/purgatory/trampoline.S new file mode 100644 index 000000000000..f9760ccf4032 --- /dev/null +++ b/arch/powerpc/purgatory/trampoline.S @@ -0,0 +1,128 @@ +/* + * kexec trampoline + * + * Based on code taken from kexec-tools and kexec-lite. + * + * Copyright (C) 2004 - 2005, Milton D Miller II, IBM Corporation + * Copyright (C) 2006, Mohan Kumar M, IBM Corporation + * Copyright (C) 2013, Anton Blanchard, IBM Corporation + * + * This program is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation (version 2 of the License). + */ + +#if defined(__LITTLE_ENDIAN__) +#define STWX_BE stwbrx +#define LWZX_BE lwbrx +#elif defined(__BIG_ENDIAN__) +#define STWX_BE stwx +#define LWZX_BE lwzx +#else +#error no endianness defined! +#endif + + .machine ppc64 + .balign 256 + .globl purgatory_start +purgatory_start: + b master + + /* ABI: possible run_at_load flag at 0x5c */ + .org purgatory_start + 0x5c + .globl run_at_load +run_at_load: + .long 0 + .size run_at_load, . - run_at_load + + /* ABI: slaves start at 60 with r3=phys */ + .org purgatory_start + 0x60 +slave: + b . + /* ABI: end of copied region */ + .org purgatory_start + 0x100 + .size purgatory_start, . - purgatory_start + +/* + * The above 0x100 bytes at purgatory_start are replaced with the + * code from the kernel (or next stage) by setup_purgatory(). + */ + +master: + or %r1,%r1,%r1 /* low priority to let other threads catchup */ + isync + mr %r17,%r3 /* save cpu id to r17 */ + mr %r15,%r4 /* save physical address in reg15 */ + + or %r3,%r3,%r3 /* ok now to high priority, lets boot */ + lis %r6,0x1 + mtctr %r6 /* delay a bit for slaves to catch up */ + bdnz . /* before we overwrite 0-100 again */ + + bl 0f /* Work out where we're running */ +0: mflr %r18 + + /* load device-tree address */ + ld %r3, (dt_offset - 0b)(%r18) + mr %r16,%r3 /* save dt address in reg16 */ + li %r4,20 + LWZX_BE %r6,%r3,%r4 /* fetch __be32 version number at byte 20 */ + cmpwi %r0,%r6,2 /* v2 or later? */ + blt 1f + li %r4,28 + STWX_BE %r17,%r3,%r4 /* Store my cpu as __be32 at byte 28 */ +1: + /* load the kernel address */ + ld %r4,(kernel - 0b)(%r18) + + /* load the run_at_load flag */ + /* possibly patched by kexec */ + ld %r6,(run_at_load - 0b)(%r18) + /* and patch it into the kernel */ + stw %r6,(0x5c)(%r4) + + mr %r3,%r16 /* restore dt address */ + + li %r5,0 /* r5 will be 0 for kernel */ + + mfmsr %r11 + andi. %r10,%r11,1 /* test MSR_LE */ + bne .Little_endian + + mtctr %r4 /* prepare branch to */ + bctr /* start kernel */ + +.Little_endian: + mtsrr0 %r4 /* prepare branch to */ + + clrrdi %r11,%r11,1 /* clear MSR_LE */ + mtsrr1 %r11 + + rfid /* update MSR and start kernel */ + + + .balign 8 + .globl kernel +kernel: + .llong 0x0 + .size kernel, . - kernel + + .balign 8 + .globl dt_offset +dt_offset: + .llong 0x0 + .size dt_offset, . - dt_offset + + + .data + .balign 8 +.globl sha256_digest +sha256_digest: + .skip 32 + .size sha256_digest, . - sha256_digest + + .balign 8 +.globl sha_regions +sha_regions: + .skip 8 * 2 * 16 + .size sha_regions, . - sha_regions diff --git a/arch/powerpc/sysdev/fsl_lbc.c b/arch/powerpc/sysdev/fsl_lbc.c index 424b67fdb57f..5340a483cf55 100644 --- a/arch/powerpc/sysdev/fsl_lbc.c +++ b/arch/powerpc/sysdev/fsl_lbc.c @@ -31,7 +31,7 @@ #include <asm/prom.h> #include <asm/fsl_lbc.h> -static spinlock_t fsl_lbc_lock = __SPIN_LOCK_UNLOCKED(fsl_lbc_lock); +static DEFINE_SPINLOCK(fsl_lbc_lock); struct fsl_lbc_ctrl *fsl_lbc_ctrl_dev; EXPORT_SYMBOL(fsl_lbc_ctrl_dev); diff --git a/arch/powerpc/sysdev/fsl_pmc.c b/arch/powerpc/sysdev/fsl_pmc.c index 1d6fd7c59fe9..232225e7f863 100644 --- a/arch/powerpc/sysdev/fsl_pmc.c +++ b/arch/powerpc/sysdev/fsl_pmc.c @@ -85,8 +85,4 @@ static struct platform_driver pmc_driver = { .probe = pmc_probe, }; -static int __init pmc_init(void) -{ - return platform_driver_register(&pmc_driver); -} -device_initcall(pmc_init); +builtin_platform_driver(pmc_driver); diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c index 3cc7cace194a..1c41c51f22cb 100644 --- a/arch/powerpc/sysdev/fsl_rio.c +++ b/arch/powerpc/sysdev/fsl_rio.c @@ -80,10 +80,8 @@ "3: li %1,-1\n" \ " li %0,%3\n" \ " b 2b\n" \ - ".section __ex_table,\"a\"\n" \ - PPC_LONG_ALIGN "\n" \ - PPC_LONG "1b,3b\n" \ - ".text" \ + ".previous\n" \ + EX_TABLE(1b, 3b) \ : "=r" (err), "=r" (x) \ : "b" (addr), "i" (-EFAULT), "0" (err)) @@ -113,7 +111,7 @@ int fsl_rio_mcheck_exception(struct pt_regs *regs) out_be32((u32 *)(rio_regs_win + RIO_LTLEDCSR), 0); regs->msr |= MSR_RI; - regs->nip = entry->fixup; + regs->nip = extable_fixup(entry); return 1; } } diff --git a/arch/powerpc/sysdev/fsl_soc.c b/arch/powerpc/sysdev/fsl_soc.c index d93056eedcb0..19101f9cfcfc 100644 --- a/arch/powerpc/sysdev/fsl_soc.c +++ b/arch/powerpc/sysdev/fsl_soc.c @@ -77,13 +77,10 @@ phys_addr_t get_immrbase(void) EXPORT_SYMBOL(get_immrbase); -static u32 sysfreq = -1; - u32 fsl_get_sys_freq(void) { + static u32 sysfreq = -1; struct device_node *soc; - const u32 *prop; - int size; if (sysfreq != -1) return sysfreq; @@ -92,12 +89,9 @@ u32 fsl_get_sys_freq(void) if (!soc) return -1; - prop = of_get_property(soc, "clock-frequency", &size); - if (!prop || size != sizeof(*prop) || *prop == 0) - prop = of_get_property(soc, "bus-frequency", &size); - - if (prop && size == sizeof(*prop)) - sysfreq = *prop; + of_property_read_u32(soc, "clock-frequency", &sysfreq); + if (sysfreq == -1 || !sysfreq) + of_property_read_u32(soc, "bus-frequency", &sysfreq); of_node_put(soc); return sysfreq; @@ -106,23 +100,17 @@ EXPORT_SYMBOL(fsl_get_sys_freq); #if defined(CONFIG_CPM2) || defined(CONFIG_QUICC_ENGINE) || defined(CONFIG_8xx) -static u32 brgfreq = -1; - u32 get_brgfreq(void) { + static u32 brgfreq = -1; struct device_node *node; - const unsigned int *prop; - int size; if (brgfreq != -1) return brgfreq; node = of_find_compatible_node(NULL, NULL, "fsl,cpm-brg"); if (node) { - prop = of_get_property(node, "clock-frequency", &size); - if (prop && size == 4) - brgfreq = *prop; - + of_property_read_u32(node, "clock-frequency", &brgfreq); of_node_put(node); return brgfreq; } @@ -135,15 +123,11 @@ u32 get_brgfreq(void) node = of_find_node_by_type(NULL, "qe"); if (node) { - prop = of_get_property(node, "brg-frequency", &size); - if (prop && size == 4) - brgfreq = *prop; - - if (brgfreq == -1 || brgfreq == 0) { - prop = of_get_property(node, "bus-frequency", &size); - if (prop && size == 4) - brgfreq = *prop / 2; - } + of_property_read_u32(node, "brg-frequency", &brgfreq); + if (brgfreq == -1 || !brgfreq) + if (!of_property_read_u32(node, "bus-frequency", + &brgfreq)) + brgfreq /= 2; of_node_put(node); } @@ -152,10 +136,9 @@ u32 get_brgfreq(void) EXPORT_SYMBOL(get_brgfreq); -static u32 fs_baudrate = -1; - u32 get_baudrate(void) { + static u32 fs_baudrate = -1; struct device_node *node; if (fs_baudrate != -1) @@ -163,12 +146,7 @@ u32 get_baudrate(void) node = of_find_node_by_type(NULL, "serial"); if (node) { - int size; - const unsigned int *prop = of_get_property(node, - "current-speed", &size); - - if (prop) - fs_baudrate = *prop; + of_property_read_u32(node, "current-speed", &fs_baudrate); of_node_put(node); } diff --git a/arch/powerpc/sysdev/tsi108_pci.c b/arch/powerpc/sysdev/tsi108_pci.c index 57c971b7839c..53a16aa4d384 100644 --- a/arch/powerpc/sysdev/tsi108_pci.c +++ b/arch/powerpc/sysdev/tsi108_pci.c @@ -137,10 +137,8 @@ void tsi108_clear_pci_error(u32 pci_cfg_base) ".section .fixup,\"ax\"\n" \ "3: li %0,-1\n" \ " b 2b\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 2\n" \ - " .long 1b,3b\n" \ - ".text" \ + ".previous\n" \ + EX_TABLE(1b, 3b) \ : "=r"(x) : "r"(addr)) int diff --git a/arch/powerpc/sysdev/xilinx_intc.c b/arch/powerpc/sysdev/xilinx_intc.c index 0f52d7955796..4a86dcff3fcd 100644 --- a/arch/powerpc/sysdev/xilinx_intc.c +++ b/arch/powerpc/sysdev/xilinx_intc.c @@ -29,194 +29,7 @@ #include <asm/processor.h> #include <asm/i8259.h> #include <asm/irq.h> - -/* - * INTC Registers - */ -#define XINTC_ISR 0 /* Interrupt Status */ -#define XINTC_IPR 4 /* Interrupt Pending */ -#define XINTC_IER 8 /* Interrupt Enable */ -#define XINTC_IAR 12 /* Interrupt Acknowledge */ -#define XINTC_SIE 16 /* Set Interrupt Enable bits */ -#define XINTC_CIE 20 /* Clear Interrupt Enable bits */ -#define XINTC_IVR 24 /* Interrupt Vector */ -#define XINTC_MER 28 /* Master Enable */ - -static struct irq_domain *master_irqhost; - -#define XILINX_INTC_MAXIRQS (32) - -/* The following table allows the interrupt type, edge or level, - * to be cached after being read from the device tree until the interrupt - * is mapped - */ -static int xilinx_intc_typetable[XILINX_INTC_MAXIRQS]; - -/* Map the interrupt type from the device tree to the interrupt types - * used by the interrupt subsystem - */ -static unsigned char xilinx_intc_map_senses[] = { - IRQ_TYPE_EDGE_RISING, - IRQ_TYPE_EDGE_FALLING, - IRQ_TYPE_LEVEL_HIGH, - IRQ_TYPE_LEVEL_LOW, -}; - -/* - * The interrupt controller is setup such that it doesn't work well with - * the level interrupt handler in the kernel because the handler acks the - * interrupt before calling the application interrupt handler. To deal with - * that, we use 2 different irq chips so that different functions can be - * used for level and edge type interrupts. - * - * IRQ Chip common (across level and edge) operations - */ -static void xilinx_intc_mask(struct irq_data *d) -{ - int irq = irqd_to_hwirq(d); - void * regs = irq_data_get_irq_chip_data(d); - pr_debug("mask: %d\n", irq); - out_be32(regs + XINTC_CIE, 1 << irq); -} - -static int xilinx_intc_set_type(struct irq_data *d, unsigned int flow_type) -{ - return 0; -} - -/* - * IRQ Chip level operations - */ -static void xilinx_intc_level_unmask(struct irq_data *d) -{ - int irq = irqd_to_hwirq(d); - void * regs = irq_data_get_irq_chip_data(d); - pr_debug("unmask: %d\n", irq); - out_be32(regs + XINTC_SIE, 1 << irq); - - /* ack level irqs because they can't be acked during - * ack function since the handle_level_irq function - * acks the irq before calling the inerrupt handler - */ - out_be32(regs + XINTC_IAR, 1 << irq); -} - -static struct irq_chip xilinx_intc_level_irqchip = { - .name = "Xilinx Level INTC", - .irq_mask = xilinx_intc_mask, - .irq_mask_ack = xilinx_intc_mask, - .irq_unmask = xilinx_intc_level_unmask, - .irq_set_type = xilinx_intc_set_type, -}; - -/* - * IRQ Chip edge operations - */ -static void xilinx_intc_edge_unmask(struct irq_data *d) -{ - int irq = irqd_to_hwirq(d); - void *regs = irq_data_get_irq_chip_data(d); - pr_debug("unmask: %d\n", irq); - out_be32(regs + XINTC_SIE, 1 << irq); -} - -static void xilinx_intc_edge_ack(struct irq_data *d) -{ - int irq = irqd_to_hwirq(d); - void * regs = irq_data_get_irq_chip_data(d); - pr_debug("ack: %d\n", irq); - out_be32(regs + XINTC_IAR, 1 << irq); -} - -static struct irq_chip xilinx_intc_edge_irqchip = { - .name = "Xilinx Edge INTC", - .irq_mask = xilinx_intc_mask, - .irq_unmask = xilinx_intc_edge_unmask, - .irq_ack = xilinx_intc_edge_ack, - .irq_set_type = xilinx_intc_set_type, -}; - -/* - * IRQ Host operations - */ - -/** - * xilinx_intc_xlate - translate virq# from device tree interrupts property - */ -static int xilinx_intc_xlate(struct irq_domain *h, struct device_node *ct, - const u32 *intspec, unsigned int intsize, - irq_hw_number_t *out_hwirq, - unsigned int *out_flags) -{ - if ((intsize < 2) || (intspec[0] >= XILINX_INTC_MAXIRQS)) - return -EINVAL; - - /* keep a copy of the interrupt type til the interrupt is mapped - */ - xilinx_intc_typetable[intspec[0]] = xilinx_intc_map_senses[intspec[1]]; - - /* Xilinx uses 2 interrupt entries, the 1st being the h/w - * interrupt number, the 2nd being the interrupt type, edge or level - */ - *out_hwirq = intspec[0]; - *out_flags = xilinx_intc_map_senses[intspec[1]]; - - return 0; -} -static int xilinx_intc_map(struct irq_domain *h, unsigned int virq, - irq_hw_number_t irq) -{ - irq_set_chip_data(virq, h->host_data); - - if (xilinx_intc_typetable[irq] == IRQ_TYPE_LEVEL_HIGH || - xilinx_intc_typetable[irq] == IRQ_TYPE_LEVEL_LOW) { - irq_set_chip_and_handler(virq, &xilinx_intc_level_irqchip, - handle_level_irq); - } else { - irq_set_chip_and_handler(virq, &xilinx_intc_edge_irqchip, - handle_edge_irq); - } - return 0; -} - -static const struct irq_domain_ops xilinx_intc_ops = { - .map = xilinx_intc_map, - .xlate = xilinx_intc_xlate, -}; - -struct irq_domain * __init -xilinx_intc_init(struct device_node *np) -{ - struct irq_domain * irq; - void * regs; - - /* Find and map the intc registers */ - regs = of_iomap(np, 0); - if (!regs) { - pr_err("xilinx_intc: could not map registers\n"); - return NULL; - } - - /* Setup interrupt controller */ - out_be32(regs + XINTC_IER, 0); /* disable all irqs */ - out_be32(regs + XINTC_IAR, ~(u32) 0); /* Acknowledge pending irqs */ - out_be32(regs + XINTC_MER, 0x3UL); /* Turn on the Master Enable. */ - - /* Allocate and initialize an irq_domain structure. */ - irq = irq_domain_add_linear(np, XILINX_INTC_MAXIRQS, &xilinx_intc_ops, - regs); - if (!irq) - panic(__FILE__ ": Cannot allocate IRQ host\n"); - - return irq; -} - -int xilinx_intc_get_irq(void) -{ - void * regs = master_irqhost->host_data; - pr_debug("get_irq:\n"); - return irq_linear_revmap(master_irqhost, in_be32(regs + XINTC_IVR)); -} +#include <linux/irqchip.h> #if defined(CONFIG_PPC_I8259) /* @@ -265,31 +78,11 @@ static void __init xilinx_i8259_setup_cascade(void) static inline void xilinx_i8259_setup_cascade(void) { return; } #endif /* defined(CONFIG_PPC_I8259) */ -static const struct of_device_id xilinx_intc_match[] __initconst = { - { .compatible = "xlnx,opb-intc-1.00.c", }, - { .compatible = "xlnx,xps-intc-1.00.a", }, - {} -}; - /* * Initialize master Xilinx interrupt controller */ void __init xilinx_intc_init_tree(void) { - struct device_node *np; - - /* find top level interrupt controller */ - for_each_matching_node(np, xilinx_intc_match) { - if (!of_get_property(np, "interrupts", NULL)) - break; - } - BUG_ON(!np); - - master_irqhost = xilinx_intc_init(np); - BUG_ON(!master_irqhost); - - irq_set_default_host(master_irqhost); - of_node_put(np); - + irqchip_init(); xilinx_i8259_setup_cascade(); } diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 760545519a0b..9c0e17cf6886 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -10,6 +10,8 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ + +#include <linux/kernel.h> #include <linux/errno.h> #include <linux/sched.h> #include <linux/smp.h> @@ -225,6 +227,7 @@ Commands:\n\ #endif "\ dr dump stream of raw bytes\n\ + dt dump the tracing buffers (uses printk)\n\ e print exception information\n\ f flush cache\n\ la lookup symbol+offset of specified address\n\ @@ -2364,6 +2367,9 @@ dump(void) dump_log_buf(); } else if (c == 'o') { dump_opal_msglog(); + } else if (c == 't') { + ftrace_dump(DUMP_ALL); + tracing_on(); } else if (c == 'r') { scanhex(&ndump); if (ndump == 0) |