summaryrefslogtreecommitdiffstats
path: root/arch/powerpc
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc')
-rw-r--r--arch/powerpc/boot/Makefile21
-rw-r--r--arch/powerpc/boot/addnote.c128
-rw-r--r--arch/powerpc/boot/crt0.S180
-rw-r--r--arch/powerpc/boot/elf_util.c4
-rw-r--r--arch/powerpc/boot/main.c8
-rw-r--r--arch/powerpc/boot/of.c4
-rw-r--r--arch/powerpc/boot/of.h19
-rw-r--r--arch/powerpc/boot/ofconsole.c6
-rw-r--r--arch/powerpc/boot/oflib.c92
-rw-r--r--arch/powerpc/boot/ops.h2
-rw-r--r--arch/powerpc/boot/ppc_asm.h12
-rw-r--r--arch/powerpc/boot/ps3.c4
-rw-r--r--arch/powerpc/boot/pseries-head.S8
-rw-r--r--arch/powerpc/boot/stdio.c14
-rw-r--r--arch/powerpc/boot/swab.h29
-rwxr-xr-xarch/powerpc/boot/wrapper17
-rw-r--r--arch/powerpc/boot/zImage.lds.S25
-rw-r--r--arch/powerpc/include/asm/eeh.h47
-rw-r--r--arch/powerpc/include/asm/machdep.h5
-rw-r--r--arch/powerpc/include/asm/opal.h43
-rw-r--r--arch/powerpc/include/asm/ppc-pci.h1
-rw-r--r--arch/powerpc/include/asm/reg.h1
-rw-r--r--arch/powerpc/include/uapi/asm/setup.h7
-rw-r--r--arch/powerpc/kernel/eeh.c211
-rw-r--r--arch/powerpc/kernel/eeh_driver.c118
-rw-r--r--arch/powerpc/kernel/eeh_pe.c47
-rw-r--r--arch/powerpc/kernel/fadump.c5
-rw-r--r--arch/powerpc/kernel/legacy_serial.c21
-rw-r--r--arch/powerpc/kernel/pci-common.c108
-rw-r--r--arch/powerpc/kernel/pci_of_scan.c9
-rw-r--r--arch/powerpc/kernel/ppc_ksyms.c1
-rw-r--r--arch/powerpc/kernel/rtas_flash.c2
-rw-r--r--arch/powerpc/kernel/rtas_pci.c66
-rw-r--r--arch/powerpc/kernel/setup-common.c16
-rw-r--r--arch/powerpc/kernel/tm.S40
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S18
-rw-r--r--arch/powerpc/mm/hash_native_64.c38
-rw-r--r--arch/powerpc/perf/hv-24x7.c35
-rw-r--r--arch/powerpc/perf/hv-gpci.c6
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype5
-rw-r--r--arch/powerpc/platforms/embedded6xx/Kconfig1
-rw-r--r--arch/powerpc/platforms/powernv/eeh-ioda.c349
-rw-r--r--arch/powerpc/platforms/powernv/eeh-powernv.c4
-rw-r--r--arch/powerpc/platforms/powernv/opal-dump.c94
-rw-r--r--arch/powerpc/platforms/powernv/opal-elog.c11
-rw-r--r--arch/powerpc/platforms/powernv/opal-flash.c165
-rw-r--r--arch/powerpc/platforms/powernv/opal-sysparam.c32
-rw-r--r--arch/powerpc/platforms/powernv/opal.c69
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c29
-rw-r--r--arch/powerpc/platforms/powernv/pci.c202
-rw-r--r--arch/powerpc/platforms/powernv/pci.h11
-rw-r--r--arch/powerpc/platforms/powernv/setup.c94
-rw-r--r--arch/powerpc/platforms/powernv/smp.c3
-rw-r--r--arch/powerpc/platforms/pseries/eeh_pseries.c43
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-cpu.c5
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-memory.c10
-rw-r--r--arch/powerpc/sysdev/ppc4xx_pci.c2
57 files changed, 1709 insertions, 838 deletions
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index a1f8c7f1ec60..a33c23308e97 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -22,8 +22,14 @@ all: $(obj)/zImage
BOOTCFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
-fno-strict-aliasing -Os -msoft-float -pipe \
-fomit-frame-pointer -fno-builtin -fPIC -nostdinc \
- -isystem $(shell $(CROSS32CC) -print-file-name=include) \
- -mbig-endian
+ -isystem $(shell $(CROSS32CC) -print-file-name=include)
+ifdef CONFIG_PPC64_BOOT_WRAPPER
+BOOTCFLAGS += -m64
+endif
+ifdef CONFIG_CPU_BIG_ENDIAN
+BOOTCFLAGS += -mbig-endian
+endif
+
BOOTAFLAGS := -D__ASSEMBLY__ $(BOOTCFLAGS) -traditional -nostdinc
ifdef CONFIG_DEBUG_INFO
@@ -99,6 +105,11 @@ src-plat-$(CONFIG_EMBEDDED6xx) += cuboot-pq2.c cuboot-mpc7448hpc2.c \
src-plat-$(CONFIG_AMIGAONE) += cuboot-amigaone.c
src-plat-$(CONFIG_PPC_PS3) += ps3-head.S ps3-hvcall.S ps3.c
src-plat-$(CONFIG_EPAPR_BOOT) += epapr.c epapr-wrapper.c
+src-plat-$(CONFIG_PPC_PSERIES) += pseries-head.S
+src-plat-$(CONFIG_PPC_POWERNV) += pseries-head.S
+src-plat-$(CONFIG_PPC_IBM_CELL_BLADE) += pseries-head.S
+src-plat-$(CONFIG_PPC_CELLEB) += pseries-head.S
+src-plat-$(CONFIG_PPC_CELL_QPACE) += pseries-head.S
src-wlib := $(sort $(src-wlib-y))
src-plat := $(sort $(src-plat-y))
@@ -137,7 +148,11 @@ $(addprefix $(obj)/,$(libfdt) $(libfdtheader)): $(obj)/%: $(srctree)/scripts/dtc
$(obj)/empty.c:
@touch $@
-$(obj)/zImage.lds $(obj)/zImage.coff.lds $(obj)/zImage.ps3.lds: $(obj)/%: $(srctree)/$(src)/%.S
+$(obj)/zImage.lds: $(obj)/%: $(srctree)/$(src)/%.S
+ $(CROSS32CC) $(cpp_flags) -E -Wp,-MD,$(depfile) -P -Upowerpc \
+ -D__ASSEMBLY__ -DLINKER_SCRIPT -o $@ $<
+
+$(obj)/zImage.coff.lds $(obj)/zImage.ps3.lds : $(obj)/%: $(srctree)/$(src)/%.S
@cp $< $@
clean-files := $(zlib) $(zlibheader) $(zliblinuxheader) \
diff --git a/arch/powerpc/boot/addnote.c b/arch/powerpc/boot/addnote.c
index 349b5530d2c4..9d9f6f334d3c 100644
--- a/arch/powerpc/boot/addnote.c
+++ b/arch/powerpc/boot/addnote.c
@@ -6,6 +6,8 @@
*
* Copyright 2000 Paul Mackerras.
*
+ * Adapted for 64 bit little endian images by Andrew Tauferner.
+ *
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
@@ -55,36 +57,61 @@ unsigned int rpanote[N_RPA_DESCR] = {
#define ROUNDUP(len) (((len) + 3) & ~3)
-unsigned char buf[512];
+unsigned char buf[1024];
+#define ELFDATA2LSB 1
+#define ELFDATA2MSB 2
+static int e_data = ELFDATA2MSB;
+#define ELFCLASS32 1
+#define ELFCLASS64 2
+static int e_class = ELFCLASS32;
#define GET_16BE(off) ((buf[off] << 8) + (buf[(off)+1]))
-#define GET_32BE(off) ((GET_16BE(off) << 16) + GET_16BE((off)+2))
-
-#define PUT_16BE(off, v) (buf[off] = ((v) >> 8) & 0xff, \
- buf[(off) + 1] = (v) & 0xff)
-#define PUT_32BE(off, v) (PUT_16BE((off), (v) >> 16), \
- PUT_16BE((off) + 2, (v)))
+#define GET_32BE(off) ((GET_16BE(off) << 16U) + GET_16BE((off)+2U))
+#define GET_64BE(off) ((((unsigned long long)GET_32BE(off)) << 32ULL) + \
+ ((unsigned long long)GET_32BE((off)+4ULL)))
+#define PUT_16BE(off, v)(buf[off] = ((v) >> 8) & 0xff, \
+ buf[(off) + 1] = (v) & 0xff)
+#define PUT_32BE(off, v)(PUT_16BE((off), (v) >> 16L), PUT_16BE((off) + 2, (v)))
+#define PUT_64BE(off, v)((PUT_32BE((off), (v) >> 32L), \
+ PUT_32BE((off) + 4, (v))))
+
+#define GET_16LE(off) ((buf[off]) + (buf[(off)+1] << 8))
+#define GET_32LE(off) (GET_16LE(off) + (GET_16LE((off)+2U) << 16U))
+#define GET_64LE(off) ((unsigned long long)GET_32LE(off) + \
+ (((unsigned long long)GET_32LE((off)+4ULL)) << 32ULL))
+#define PUT_16LE(off, v) (buf[off] = (v) & 0xff, \
+ buf[(off) + 1] = ((v) >> 8) & 0xff)
+#define PUT_32LE(off, v) (PUT_16LE((off), (v)), PUT_16LE((off) + 2, (v) >> 16L))
+#define PUT_64LE(off, v) (PUT_32LE((off), (v)), PUT_32LE((off) + 4, (v) >> 32L))
+
+#define GET_16(off) (e_data == ELFDATA2MSB ? GET_16BE(off) : GET_16LE(off))
+#define GET_32(off) (e_data == ELFDATA2MSB ? GET_32BE(off) : GET_32LE(off))
+#define GET_64(off) (e_data == ELFDATA2MSB ? GET_64BE(off) : GET_64LE(off))
+#define PUT_16(off, v) (e_data == ELFDATA2MSB ? PUT_16BE(off, v) : \
+ PUT_16LE(off, v))
+#define PUT_32(off, v) (e_data == ELFDATA2MSB ? PUT_32BE(off, v) : \
+ PUT_32LE(off, v))
+#define PUT_64(off, v) (e_data == ELFDATA2MSB ? PUT_64BE(off, v) : \
+ PUT_64LE(off, v))
/* Structure of an ELF file */
#define E_IDENT 0 /* ELF header */
-#define E_PHOFF 28
-#define E_PHENTSIZE 42
-#define E_PHNUM 44
-#define E_HSIZE 52 /* size of ELF header */
+#define E_PHOFF (e_class == ELFCLASS32 ? 28 : 32)
+#define E_PHENTSIZE (e_class == ELFCLASS32 ? 42 : 54)
+#define E_PHNUM (e_class == ELFCLASS32 ? 44 : 56)
+#define E_HSIZE (e_class == ELFCLASS32 ? 52 : 64)
#define EI_MAGIC 0 /* offsets in E_IDENT area */
#define EI_CLASS 4
#define EI_DATA 5
#define PH_TYPE 0 /* ELF program header */
-#define PH_OFFSET 4
-#define PH_FILESZ 16
-#define PH_HSIZE 32 /* size of program header */
+#define PH_OFFSET (e_class == ELFCLASS32 ? 4 : 8)
+#define PH_FILESZ (e_class == ELFCLASS32 ? 16 : 32)
+#define PH_HSIZE (e_class == ELFCLASS32 ? 32 : 56)
#define PT_NOTE 4 /* Program header type = note */
-#define ELFCLASS32 1
-#define ELFDATA2MSB 2
unsigned char elf_magic[4] = { 0x7f, 'E', 'L', 'F' };
@@ -92,8 +119,8 @@ int
main(int ac, char **av)
{
int fd, n, i;
- int ph, ps, np;
- int nnote, nnote2, ns;
+ unsigned long ph, ps, np;
+ long nnote, nnote2, ns;
if (ac != 2) {
fprintf(stderr, "Usage: %s elf-file\n", av[0]);
@@ -114,26 +141,27 @@ main(int ac, char **av)
exit(1);
}
- if (n < E_HSIZE || memcmp(&buf[E_IDENT+EI_MAGIC], elf_magic, 4) != 0)
+ if (memcmp(&buf[E_IDENT+EI_MAGIC], elf_magic, 4) != 0)
+ goto notelf;
+ e_class = buf[E_IDENT+EI_CLASS];
+ if (e_class != ELFCLASS32 && e_class != ELFCLASS64)
+ goto notelf;
+ e_data = buf[E_IDENT+EI_DATA];
+ if (e_data != ELFDATA2MSB && e_data != ELFDATA2LSB)
+ goto notelf;
+ if (n < E_HSIZE)
goto notelf;
- if (buf[E_IDENT+EI_CLASS] != ELFCLASS32
- || buf[E_IDENT+EI_DATA] != ELFDATA2MSB) {
- fprintf(stderr, "%s is not a big-endian 32-bit ELF image\n",
- av[1]);
- exit(1);
- }
-
- ph = GET_32BE(E_PHOFF);
- ps = GET_16BE(E_PHENTSIZE);
- np = GET_16BE(E_PHNUM);
+ ph = (e_class == ELFCLASS32 ? GET_32(E_PHOFF) : GET_64(E_PHOFF));
+ ps = GET_16(E_PHENTSIZE);
+ np = GET_16(E_PHNUM);
if (ph < E_HSIZE || ps < PH_HSIZE || np < 1)
goto notelf;
if (ph + (np + 2) * ps + nnote + nnote2 > n)
goto nospace;
for (i = 0; i < np; ++i) {
- if (GET_32BE(ph + PH_TYPE) == PT_NOTE) {
+ if (GET_32(ph + PH_TYPE) == PT_NOTE) {
fprintf(stderr, "%s already has a note entry\n",
av[1]);
exit(0);
@@ -148,15 +176,22 @@ main(int ac, char **av)
/* fill in the program header entry */
ns = ph + 2 * ps;
- PUT_32BE(ph + PH_TYPE, PT_NOTE);
- PUT_32BE(ph + PH_OFFSET, ns);
- PUT_32BE(ph + PH_FILESZ, nnote);
+ PUT_32(ph + PH_TYPE, PT_NOTE);
+ if (e_class == ELFCLASS32)
+ PUT_32(ph + PH_OFFSET, ns);
+ else
+ PUT_64(ph + PH_OFFSET, ns);
+
+ if (e_class == ELFCLASS32)
+ PUT_32(ph + PH_FILESZ, nnote);
+ else
+ PUT_64(ph + PH_FILESZ, nnote);
/* fill in the note area we point to */
/* XXX we should probably make this a proper section */
- PUT_32BE(ns, strlen(arch) + 1);
- PUT_32BE(ns + 4, N_DESCR * 4);
- PUT_32BE(ns + 8, 0x1275);
+ PUT_32(ns, strlen(arch) + 1);
+ PUT_32(ns + 4, N_DESCR * 4);
+ PUT_32(ns + 8, 0x1275);
strcpy((char *) &buf[ns + 12], arch);
ns += 12 + strlen(arch) + 1;
for (i = 0; i < N_DESCR; ++i, ns += 4)
@@ -164,21 +199,28 @@ main(int ac, char **av)
/* fill in the second program header entry and the RPA note area */
ph += ps;
- PUT_32BE(ph + PH_TYPE, PT_NOTE);
- PUT_32BE(ph + PH_OFFSET, ns);
- PUT_32BE(ph + PH_FILESZ, nnote2);
+ PUT_32(ph + PH_TYPE, PT_NOTE);
+ if (e_class == ELFCLASS32)
+ PUT_32(ph + PH_OFFSET, ns);
+ else
+ PUT_64(ph + PH_OFFSET, ns);
+
+ if (e_class == ELFCLASS32)
+ PUT_32(ph + PH_FILESZ, nnote);
+ else
+ PUT_64(ph + PH_FILESZ, nnote2);
/* fill in the note area we point to */
- PUT_32BE(ns, strlen(rpaname) + 1);
- PUT_32BE(ns + 4, sizeof(rpanote));
- PUT_32BE(ns + 8, 0x12759999);
+ PUT_32(ns, strlen(rpaname) + 1);
+ PUT_32(ns + 4, sizeof(rpanote));
+ PUT_32(ns + 8, 0x12759999);
strcpy((char *) &buf[ns + 12], rpaname);
ns += 12 + ROUNDUP(strlen(rpaname) + 1);
for (i = 0; i < N_RPA_DESCR; ++i, ns += 4)
PUT_32BE(ns, rpanote[i]);
/* Update the number of program headers */
- PUT_16BE(E_PHNUM, np + 2);
+ PUT_16(E_PHNUM, np + 2);
/* write back */
lseek(fd, (long) 0, SEEK_SET);
diff --git a/arch/powerpc/boot/crt0.S b/arch/powerpc/boot/crt0.S
index 0f7428a37efb..14de4f8778a7 100644
--- a/arch/powerpc/boot/crt0.S
+++ b/arch/powerpc/boot/crt0.S
@@ -1,17 +1,20 @@
/*
* Copyright (C) Paul Mackerras 1997.
*
+ * Adapted for 64 bit LE PowerPC by Andrew Tauferner
+ *
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
- * NOTE: this code runs in 32 bit mode, is position-independent,
- * and is packaged as ELF32.
*/
#include "ppc_asm.h"
+RELA = 7
+RELACOUNT = 0x6ffffff9
+
.text
/* A procedure descriptor used when booting this as a COFF file.
* When making COFF, this comes first in the link and we're
@@ -21,6 +24,20 @@
_zimage_start_opd:
.long 0x500000, 0, 0, 0
+#ifdef __powerpc64__
+.balign 8
+p_start: .llong _start
+p_etext: .llong _etext
+p_bss_start: .llong __bss_start
+p_end: .llong _end
+
+p_toc: .llong __toc_start + 0x8000 - p_base
+p_dyn: .llong __dynamic_start - p_base
+p_rela: .llong __rela_dyn_start - p_base
+p_prom: .llong 0
+ .weak _platform_stack_top
+p_pstack: .llong _platform_stack_top
+#else
p_start: .long _start
p_etext: .long _etext
p_bss_start: .long __bss_start
@@ -28,6 +45,7 @@ p_end: .long _end
.weak _platform_stack_top
p_pstack: .long _platform_stack_top
+#endif
.weak _zimage_start
.globl _zimage_start
@@ -38,6 +56,7 @@ _zimage_start_lib:
and the address where we're running. */
bl .+4
p_base: mflr r10 /* r10 now points to runtime addr of p_base */
+#ifndef __powerpc64__
/* grab the link address of the dynamic section in r11 */
addis r11,r10,(_GLOBAL_OFFSET_TABLE_-p_base)@ha
lwz r11,(_GLOBAL_OFFSET_TABLE_-p_base)@l(r11)
@@ -51,8 +70,6 @@ p_base: mflr r10 /* r10 now points to runtime addr of p_base */
/* The dynamic section contains a series of tagged entries.
* We need the RELA and RELACOUNT entries. */
-RELA = 7
-RELACOUNT = 0x6ffffff9
li r9,0
li r0,0
9: lwz r8,0(r12) /* get tag */
@@ -120,9 +137,164 @@ RELACOUNT = 0x6ffffff9
li r0,0
stwu r0,-16(r1) /* establish a stack frame */
6:
+#else /* __powerpc64__ */
+ /* Save the prom pointer at p_prom. */
+ std r5,(p_prom-p_base)(r10)
+
+ /* Set r2 to the TOC. */
+ ld r2,(p_toc-p_base)(r10)
+ add r2,r2,r10
+
+ /* Grab the link address of the dynamic section in r11. */
+ ld r11,-32768(r2)
+ cmpwi r11,0
+ beq 3f /* if not linked -pie then no dynamic section */
+
+ ld r11,(p_dyn-p_base)(r10)
+ add r11,r11,r10
+ ld r9,(p_rela-p_base)(r10)
+ add r9,r9,r10
+ li r7,0
+ li r8,0
+9: ld r6,0(r11) /* get tag */
+ cmpdi r6,0
+ beq 12f /* end of list */
+ cmpdi r6,RELA
+ bne 10f
+ ld r7,8(r11) /* get RELA pointer in r7 */
+ b 11f
+10: addis r6,r6,(-RELACOUNT)@ha
+ cmpdi r6,RELACOUNT@l
+ bne 11f
+ ld r8,8(r11) /* get RELACOUNT value in r8 */
+11: addi r11,r11,16
+ b 9b
+12:
+ cmpdi r7,0 /* check we have both RELA and RELACOUNT */
+ cmpdi cr1,r8,0
+ beq 3f
+ beq cr1,3f
+
+ /* Calcuate the runtime offset. */
+ subf r7,r7,r9
+
+ /* Run through the list of relocations and process the
+ * R_PPC64_RELATIVE ones. */
+ mtctr r8
+13: ld r0,8(r9) /* ELF64_R_TYPE(reloc->r_info) */
+ cmpdi r0,22 /* R_PPC64_RELATIVE */
+ bne 3f
+ ld r6,0(r9) /* reloc->r_offset */
+ ld r0,16(r9) /* reloc->r_addend */
+ add r0,r0,r7
+ stdx r0,r7,r6
+ addi r9,r9,24
+ bdnz 13b
+
+ /* Do a cache flush for our text, in case the loader didn't */
+3: ld r9,p_start-p_base(r10) /* note: these are relocated now */
+ ld r8,p_etext-p_base(r10)
+4: dcbf r0,r9
+ icbi r0,r9
+ addi r9,r9,0x20
+ cmpld cr0,r9,r8
+ blt 4b
+ sync
+ isync
+
+ /* Clear the BSS */
+ ld r9,p_bss_start-p_base(r10)
+ ld r8,p_end-p_base(r10)
+ li r0,0
+5: std r0,0(r9)
+ addi r9,r9,8
+ cmpld cr0,r9,r8
+ blt 5b
+
+ /* Possibly set up a custom stack */
+ ld r8,p_pstack-p_base(r10)
+ cmpdi r8,0
+ beq 6f
+ ld r1,0(r8)
+ li r0,0
+ stdu r0,-16(r1) /* establish a stack frame */
+6:
+#endif /* __powerpc64__ */
/* Call platform_init() */
bl platform_init
/* Call start */
b start
+
+#ifdef __powerpc64__
+
+#define PROM_FRAME_SIZE 512
+#define SAVE_GPR(n, base) std n,8*(n)(base)
+#define REST_GPR(n, base) ld n,8*(n)(base)
+#define SAVE_2GPRS(n, base) SAVE_GPR(n, base); SAVE_GPR(n+1, base)
+#define SAVE_4GPRS(n, base) SAVE_2GPRS(n, base); SAVE_2GPRS(n+2, base)
+#define SAVE_8GPRS(n, base) SAVE_4GPRS(n, base); SAVE_4GPRS(n+4, base)
+#define SAVE_10GPRS(n, base) SAVE_8GPRS(n, base); SAVE_2GPRS(n+8, base)
+#define REST_2GPRS(n, base) REST_GPR(n, base); REST_GPR(n+1, base)
+#define REST_4GPRS(n, base) REST_2GPRS(n, base); REST_2GPRS(n+2, base)
+#define REST_8GPRS(n, base) REST_4GPRS(n, base); REST_4GPRS(n+4, base)
+#define REST_10GPRS(n, base) REST_8GPRS(n, base); REST_2GPRS(n+8, base)
+
+/* prom handles the jump into and return from firmware. The prom args pointer
+ is loaded in r3. */
+.globl prom
+prom:
+ mflr r0
+ std r0,16(r1)
+ stdu r1,-PROM_FRAME_SIZE(r1) /* Save SP and create stack space */
+
+ SAVE_GPR(2, r1)
+ SAVE_GPR(13, r1)
+ SAVE_8GPRS(14, r1)
+ SAVE_10GPRS(22, r1)
+ mfcr r10
+ std r10,8*32(r1)
+ mfmsr r10
+ std r10,8*33(r1)
+
+ /* remove MSR_LE from msr but keep MSR_SF */
+ mfmsr r10
+ rldicr r10,r10,0,62
+ mtsrr1 r10
+
+ /* Load FW address, set LR to label 1, and jump to FW */
+ bl 0f
+0: mflr r10
+ addi r11,r10,(1f-0b)
+ mtlr r11
+
+ ld r10,(p_prom-0b)(r10)
+ mtsrr0 r10
+
+ rfid
+
+1: /* Return from OF */
+ FIXUP_ENDIAN
+
+ /* Restore registers and return. */
+ rldicl r1,r1,0,32
+
+ /* Restore the MSR (back to 64 bits) */
+ ld r10,8*(33)(r1)
+ mtmsr r10
+ isync
+
+ /* Restore other registers */
+ REST_GPR(2, r1)
+ REST_GPR(13, r1)
+ REST_8GPRS(14, r1)
+ REST_10GPRS(22, r1)
+ ld r10,8*32(r1)
+ mtcr r10
+
+ addi r1,r1,PROM_FRAME_SIZE
+ ld r0,16(r1)
+ mtlr r0
+ blr
+#endif
diff --git a/arch/powerpc/boot/elf_util.c b/arch/powerpc/boot/elf_util.c
index 1567a0c0f05c..316552dea4d8 100644
--- a/arch/powerpc/boot/elf_util.c
+++ b/arch/powerpc/boot/elf_util.c
@@ -26,7 +26,11 @@ int parse_elf64(void *hdr, struct elf_info *info)
elf64->e_ident[EI_MAG2] == ELFMAG2 &&
elf64->e_ident[EI_MAG3] == ELFMAG3 &&
elf64->e_ident[EI_CLASS] == ELFCLASS64 &&
+#ifdef __LITTLE_ENDIAN__
+ elf64->e_ident[EI_DATA] == ELFDATA2LSB &&
+#else
elf64->e_ident[EI_DATA] == ELFDATA2MSB &&
+#endif
(elf64->e_type == ET_EXEC ||
elf64->e_type == ET_DYN) &&
elf64->e_machine == EM_PPC64))
diff --git a/arch/powerpc/boot/main.c b/arch/powerpc/boot/main.c
index a28f02165e97..d367a0aece2a 100644
--- a/arch/powerpc/boot/main.c
+++ b/arch/powerpc/boot/main.c
@@ -139,18 +139,18 @@ static struct addr_range prep_initrd(struct addr_range vmlinux, void *chosen,
* edit the command line passed to vmlinux (by setting /chosen/bootargs).
* The buffer is put in it's own section so that tools may locate it easier.
*/
-static char cmdline[COMMAND_LINE_SIZE]
+static char cmdline[BOOT_COMMAND_LINE_SIZE]
__attribute__((__section__("__builtin_cmdline")));
static void prep_cmdline(void *chosen)
{
if (cmdline[0] == '\0')
- getprop(chosen, "bootargs", cmdline, COMMAND_LINE_SIZE-1);
+ getprop(chosen, "bootargs", cmdline, BOOT_COMMAND_LINE_SIZE-1);
printf("\n\rLinux/PowerPC load: %s", cmdline);
/* If possible, edit the command line */
if (console_ops.edit_cmdline)
- console_ops.edit_cmdline(cmdline, COMMAND_LINE_SIZE);
+ console_ops.edit_cmdline(cmdline, BOOT_COMMAND_LINE_SIZE);
printf("\n\r");
/* Put the command line back into the devtree for the kernel */
@@ -174,7 +174,7 @@ void start(void)
* built-in command line wasn't set by an external tool */
if ((loader_info.cmdline_len > 0) && (cmdline[0] == '\0'))
memmove(cmdline, loader_info.cmdline,
- min(loader_info.cmdline_len, COMMAND_LINE_SIZE-1));
+ min(loader_info.cmdline_len, BOOT_COMMAND_LINE_SIZE-1));
if (console_ops.open && (console_ops.open() < 0))
exit();
diff --git a/arch/powerpc/boot/of.c b/arch/powerpc/boot/of.c
index 62e2f43ec1df..7ca910cb2fc6 100644
--- a/arch/powerpc/boot/of.c
+++ b/arch/powerpc/boot/of.c
@@ -40,8 +40,8 @@ static void *of_try_claim(unsigned long size)
#ifdef DEBUG
printf(" trying: 0x%08lx\n\r", claim_base);
#endif
- addr = (unsigned long)of_claim(claim_base, size, 0);
- if ((void *)addr != (void *)-1)
+ addr = (unsigned long) of_claim(claim_base, size, 0);
+ if (addr != PROM_ERROR)
break;
}
if (addr == 0)
diff --git a/arch/powerpc/boot/of.h b/arch/powerpc/boot/of.h
index e4c68f7391c5..c8c1750aba0c 100644
--- a/arch/powerpc/boot/of.h
+++ b/arch/powerpc/boot/of.h
@@ -1,12 +1,15 @@
#ifndef _PPC_BOOT_OF_H_
#define _PPC_BOOT_OF_H_
+#include "swab.h"
+
typedef void *phandle;
-typedef void *ihandle;
+typedef u32 ihandle;
void of_init(void *promptr);
int of_call_prom(const char *service, int nargs, int nret, ...);
-void *of_claim(unsigned long virt, unsigned long size, unsigned long align);
+unsigned int of_claim(unsigned long virt, unsigned long size,
+ unsigned long align);
void *of_vmlinux_alloc(unsigned long size);
void of_exit(void);
void *of_finddevice(const char *name);
@@ -18,4 +21,16 @@ int of_setprop(const void *phandle, const char *name, const void *buf,
/* Console functions */
void of_console_init(void);
+typedef u32 __be32;
+
+#ifdef __LITTLE_ENDIAN__
+#define cpu_to_be32(x) swab32(x)
+#define be32_to_cpu(x) swab32(x)
+#else
+#define cpu_to_be32(x) (x)
+#define be32_to_cpu(x) (x)
+#endif
+
+#define PROM_ERROR (-1u)
+
#endif /* _PPC_BOOT_OF_H_ */
diff --git a/arch/powerpc/boot/ofconsole.c b/arch/powerpc/boot/ofconsole.c
index ce0e02424453..8b754702460a 100644
--- a/arch/powerpc/boot/ofconsole.c
+++ b/arch/powerpc/boot/ofconsole.c
@@ -18,7 +18,7 @@
#include "of.h"
-static void *of_stdout_handle;
+static unsigned int of_stdout_handle;
static int of_console_open(void)
{
@@ -27,8 +27,10 @@ static int of_console_open(void)
if (((devp = of_finddevice("/chosen")) != NULL)
&& (of_getprop(devp, "stdout", &of_stdout_handle,
sizeof(of_stdout_handle))
- == sizeof(of_stdout_handle)))
+ == sizeof(of_stdout_handle))) {
+ of_stdout_handle = be32_to_cpu(of_stdout_handle);
return 0;
+ }
return -1;
}
diff --git a/arch/powerpc/boot/oflib.c b/arch/powerpc/boot/oflib.c
index b0ec9cf3eaaf..46c98a47d949 100644
--- a/arch/powerpc/boot/oflib.c
+++ b/arch/powerpc/boot/oflib.c
@@ -16,74 +16,83 @@
#include "of.h"
+typedef u32 prom_arg_t;
+
+/* The following structure is used to communicate with open firmware.
+ * All arguments in and out are in big endian format. */
+struct prom_args {
+ __be32 service; /* Address of service name string. */
+ __be32 nargs; /* Number of input arguments. */
+ __be32 nret; /* Number of output arguments. */
+ __be32 args[10]; /* Input/output arguments. */
+};
+
+#ifdef __powerpc64__
+extern int prom(void *);
+#else
static int (*prom) (void *);
+#endif
void of_init(void *promptr)
{
+#ifndef __powerpc64__
prom = (int (*)(void *))promptr;
+#endif
}
+#define ADDR(x) (u32)(unsigned long)(x)
+
int of_call_prom(const char *service, int nargs, int nret, ...)
{
int i;
- struct prom_args {
- const char *service;
- int nargs;
- int nret;
- unsigned int args[12];
- } args;
+ struct prom_args args;
va_list list;
- args.service = service;
- args.nargs = nargs;
- args.nret = nret;
+ args.service = cpu_to_be32(ADDR(service));
+ args.nargs = cpu_to_be32(nargs);
+ args.nret = cpu_to_be32(nret);
va_start(list, nret);
for (i = 0; i < nargs; i++)
- args.args[i] = va_arg(list, unsigned int);
+ args.args[i] = cpu_to_be32(va_arg(list, prom_arg_t));
va_end(list);
for (i = 0; i < nret; i++)
args.args[nargs+i] = 0;
if (prom(&args) < 0)
- return -1;
+ return PROM_ERROR;
- return (nret > 0)? args.args[nargs]: 0;
+ return (nret > 0) ? be32_to_cpu(args.args[nargs]) : 0;
}
static int of_call_prom_ret(const char *service, int nargs, int nret,
- unsigned int *rets, ...)
+ prom_arg_t *rets, ...)
{
int i;
- struct prom_args {
- const char *service;
- int nargs;
- int nret;
- unsigned int args[12];
- } args;
+ struct prom_args args;
va_list list;
- args.service = service;
- args.nargs = nargs;
- args.nret = nret;
+ args.service = cpu_to_be32(ADDR(service));
+ args.nargs = cpu_to_be32(nargs);
+ args.nret = cpu_to_be32(nret);
va_start(list, rets);
for (i = 0; i < nargs; i++)
- args.args[i] = va_arg(list, unsigned int);
+ args.args[i] = cpu_to_be32(va_arg(list, prom_arg_t));
va_end(list);
for (i = 0; i < nret; i++)
args.args[nargs+i] = 0;
if (prom(&args) < 0)
- return -1;
+ return PROM_ERROR;
- if (rets != (void *) 0)
+ if (rets != NULL)
for (i = 1; i < nret; ++i)
- rets[i-1] = args.args[nargs+i];
+ rets[i-1] = be32_to_cpu(args.args[nargs+i]);
- return (nret > 0)? args.args[nargs]: 0;
+ return (nret > 0) ? be32_to_cpu(args.args[nargs]) : 0;
}
/* returns true if s2 is a prefix of s1 */
@@ -103,7 +112,7 @@ static int string_match(const char *s1, const char *s2)
*/
static int need_map = -1;
static ihandle chosen_mmu;
-static phandle memory;
+static ihandle memory;
static int check_of_version(void)
{
@@ -132,10 +141,10 @@ static int check_of_version(void)
printf("no mmu\n");
return 0;
}
- memory = (ihandle) of_call_prom("open", 1, 1, "/memory");
- if (memory == (ihandle) -1) {
- memory = (ihandle) of_call_prom("open", 1, 1, "/memory@0");
- if (memory == (ihandle) -1) {
+ memory = of_call_prom("open", 1, 1, "/memory");
+ if (memory == PROM_ERROR) {
+ memory = of_call_prom("open", 1, 1, "/memory@0");
+ if (memory == PROM_ERROR) {
printf("no memory node\n");
return 0;
}
@@ -144,40 +153,41 @@ static int check_of_version(void)
return 1;
}
-void *of_claim(unsigned long virt, unsigned long size, unsigned long align)
+unsigned int of_claim(unsigned long virt, unsigned long size,
+ unsigned long align)
{
int ret;
- unsigned int result;
+ prom_arg_t result;
if (need_map < 0)
need_map = check_of_version();
if (align || !need_map)
- return (void *) of_call_prom("claim", 3, 1, virt, size, align);
+ return of_call_prom("claim", 3, 1, virt, size, align);
ret = of_call_prom_ret("call-method", 5, 2, &result, "claim", memory,
align, size, virt);
if (ret != 0 || result == -1)
- return (void *) -1;
+ return -1;
ret = of_call_prom_ret("call-method", 5, 2, &result, "claim", chosen_mmu,
align, size, virt);
/* 0x12 == coherent + read/write */
ret = of_call_prom("call-method", 6, 1, "map", chosen_mmu,
0x12, size, virt, virt);
- return (void *) virt;
+ return virt;
}
void *of_vmlinux_alloc(unsigned long size)
{
unsigned long start = (unsigned long)_start, end = (unsigned long)_end;
- void *addr;
+ unsigned long addr;
void *p;
/* With some older POWER4 firmware we need to claim the area the kernel
* will reside in. Newer firmwares don't need this so we just ignore
* the return value.
*/
- addr = of_claim(start, end - start, 0);
- printf("Trying to claim from 0x%lx to 0x%lx (0x%lx) got %p\r\n",
+ addr = (unsigned long) of_claim(start, end - start, 0);
+ printf("Trying to claim from 0x%lx to 0x%lx (0x%lx) got %lx\r\n",
start, end, end - start, addr);
p = malloc(size);
@@ -197,7 +207,7 @@ void of_exit(void)
*/
void *of_finddevice(const char *name)
{
- return (phandle) of_call_prom("finddevice", 1, 1, name);
+ return (void *) (unsigned long) of_call_prom("finddevice", 1, 1, name);
}
int of_getprop(const void *phandle, const char *name, void *buf,
diff --git a/arch/powerpc/boot/ops.h b/arch/powerpc/boot/ops.h
index b3218ce451bb..8aad3c55aeda 100644
--- a/arch/powerpc/boot/ops.h
+++ b/arch/powerpc/boot/ops.h
@@ -15,7 +15,7 @@
#include "types.h"
#include "string.h"
-#define COMMAND_LINE_SIZE 512
+#define BOOT_COMMAND_LINE_SIZE 2048
#define MAX_PATH_LEN 256
#define MAX_PROP_LEN 256 /* What should this be? */
diff --git a/arch/powerpc/boot/ppc_asm.h b/arch/powerpc/boot/ppc_asm.h
index eb0e98be69e0..35ea60c1f070 100644
--- a/arch/powerpc/boot/ppc_asm.h
+++ b/arch/powerpc/boot/ppc_asm.h
@@ -62,4 +62,16 @@
#define SPRN_TBRL 268
#define SPRN_TBRU 269
+#define FIXUP_ENDIAN \
+ tdi 0, 0, 0x48; /* Reverse endian of b . + 8 */ \
+ b $+36; /* Skip trampoline if endian is good */ \
+ .long 0x05009f42; /* bcl 20,31,$+4 */ \
+ .long 0xa602487d; /* mflr r10 */ \
+ .long 0x1c004a39; /* addi r10,r10,28 */ \
+ .long 0xa600607d; /* mfmsr r11 */ \
+ .long 0x01006b69; /* xori r11,r11,1 */ \
+ .long 0xa6035a7d; /* mtsrr0 r10 */ \
+ .long 0xa6037b7d; /* mtsrr1 r11 */ \
+ .long 0x2400004c /* rfid */
+
#endif /* _PPC64_PPC_ASM_H */
diff --git a/arch/powerpc/boot/ps3.c b/arch/powerpc/boot/ps3.c
index 9954d98871d0..4ec2d86d3c50 100644
--- a/arch/powerpc/boot/ps3.c
+++ b/arch/powerpc/boot/ps3.c
@@ -47,13 +47,13 @@ BSS_STACK(4096);
* The buffer is put in it's own section so that tools may locate it easier.
*/
-static char cmdline[COMMAND_LINE_SIZE]
+static char cmdline[BOOT_COMMAND_LINE_SIZE]
__attribute__((__section__("__builtin_cmdline")));
static void prep_cmdline(void *chosen)
{
if (cmdline[0] == '\0')
- getprop(chosen, "bootargs", cmdline, COMMAND_LINE_SIZE-1);
+ getprop(chosen, "bootargs", cmdline, BOOT_COMMAND_LINE_SIZE-1);
else
setprop_str(chosen, "bootargs", cmdline);
diff --git a/arch/powerpc/boot/pseries-head.S b/arch/powerpc/boot/pseries-head.S
new file mode 100644
index 000000000000..6ef6e02e80f9
--- /dev/null
+++ b/arch/powerpc/boot/pseries-head.S
@@ -0,0 +1,8 @@
+#include "ppc_asm.h"
+
+ .text
+
+ .globl _zimage_start
+_zimage_start:
+ FIXUP_ENDIAN
+ b _zimage_start_lib
diff --git a/arch/powerpc/boot/stdio.c b/arch/powerpc/boot/stdio.c
index 5b57800bbc67..a701261b1781 100644
--- a/arch/powerpc/boot/stdio.c
+++ b/arch/powerpc/boot/stdio.c
@@ -21,6 +21,18 @@ size_t strnlen(const char * s, size_t count)
return sc - s;
}
+#ifdef __powerpc64__
+
+# define do_div(n, base) ({ \
+ unsigned int __base = (base); \
+ unsigned int __rem; \
+ __rem = ((unsigned long long)(n)) % __base; \
+ (n) = ((unsigned long long)(n)) / __base; \
+ __rem; \
+})
+
+#else
+
extern unsigned int __div64_32(unsigned long long *dividend,
unsigned int divisor);
@@ -39,6 +51,8 @@ extern unsigned int __div64_32(unsigned long long *dividend,
__rem; \
})
+#endif /* __powerpc64__ */
+
static int skip_atoi(const char **s)
{
int i, c;
diff --git a/arch/powerpc/boot/swab.h b/arch/powerpc/boot/swab.h
new file mode 100644
index 000000000000..d0e1431084ca
--- /dev/null
+++ b/arch/powerpc/boot/swab.h
@@ -0,0 +1,29 @@
+#ifndef _PPC_BOOT_SWAB_H_
+#define _PPC_BOOT_SWAB_H_
+
+static inline u16 swab16(u16 x)
+{
+ return ((x & (u16)0x00ffU) << 8) |
+ ((x & (u16)0xff00U) >> 8);
+}
+
+static inline u32 swab32(u32 x)
+{
+ return ((x & (u32)0x000000ffUL) << 24) |
+ ((x & (u32)0x0000ff00UL) << 8) |
+ ((x & (u32)0x00ff0000UL) >> 8) |
+ ((x & (u32)0xff000000UL) >> 24);
+}
+
+static inline u64 swab64(u64 x)
+{
+ return (u64)((x & (u64)0x00000000000000ffULL) << 56) |
+ (u64)((x & (u64)0x000000000000ff00ULL) << 40) |
+ (u64)((x & (u64)0x0000000000ff0000ULL) << 24) |
+ (u64)((x & (u64)0x00000000ff000000ULL) << 8) |
+ (u64)((x & (u64)0x000000ff00000000ULL) >> 8) |
+ (u64)((x & (u64)0x0000ff0000000000ULL) >> 24) |
+ (u64)((x & (u64)0x00ff000000000000ULL) >> 40) |
+ (u64)((x & (u64)0xff00000000000000ULL) >> 56);
+}
+#endif /* _PPC_BOOT_SWAB_H_ */
diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper
index d27a25518b01..1948cf8b8a40 100755
--- a/arch/powerpc/boot/wrapper
+++ b/arch/powerpc/boot/wrapper
@@ -40,6 +40,7 @@ cacheit=
binary=
gzip=.gz
pie=
+format=
# cross-compilation prefix
CROSS=
@@ -136,6 +137,14 @@ if [ -z "$kernel" ]; then
kernel=vmlinux
fi
+elfformat="`${CROSS}objdump -p "$kernel" | grep 'file format' | awk '{print $4}'`"
+case "$elfformat" in
+ elf64-powerpcle) format=elf64lppc ;;
+ elf64-powerpc) format=elf32ppc ;;
+ elf32-powerpc) format=elf32ppc ;;
+esac
+
+
platformo=$object/"$platform".o
lds=$object/zImage.lds
ext=strip
@@ -152,8 +161,12 @@ of)
make_space=n
;;
pseries)
- platformo="$object/of.o $object/epapr.o"
+ platformo="$object/pseries-head.o $object/of.o $object/epapr.o"
link_address='0x4000000'
+ if [ "$format" != "elf32ppc" ]; then
+ link_address=
+ pie=-pie
+ fi
make_space=n
;;
maple)
@@ -379,7 +392,7 @@ if [ "$platform" != "miboot" ]; then
if [ -n "$link_address" ] ; then
text_start="-Ttext $link_address"
fi
- ${CROSS}ld -m elf32ppc -T $lds $text_start $pie -o "$ofile" \
+ ${CROSS}ld -m $format -T $lds $text_start $pie -o "$ofile" \
$platformo $tmp $object/wrapper.a
rm $tmp
fi
diff --git a/arch/powerpc/boot/zImage.lds.S b/arch/powerpc/boot/zImage.lds.S
index 2bd8731f1365..861e72109df2 100644
--- a/arch/powerpc/boot/zImage.lds.S
+++ b/arch/powerpc/boot/zImage.lds.S
@@ -1,4 +1,10 @@
+#include <asm-generic/vmlinux.lds.h>
+
+#ifdef CONFIG_PPC64_BOOT_WRAPPER
+OUTPUT_ARCH(powerpc:common64)
+#else
OUTPUT_ARCH(powerpc:common)
+#endif
ENTRY(_zimage_start)
EXTERN(_zimage_start)
SECTIONS
@@ -16,7 +22,9 @@ SECTIONS
*(.rodata*)
*(.data*)
*(.sdata*)
+#ifndef CONFIG_PPC64_BOOT_WRAPPER
*(.got2)
+#endif
}
.dynsym : { *(.dynsym) }
.dynstr : { *(.dynstr) }
@@ -27,7 +35,13 @@ SECTIONS
}
.hash : { *(.hash) }
.interp : { *(.interp) }
- .rela.dyn : { *(.rela*) }
+ .rela.dyn :
+ {
+#ifdef CONFIG_PPC64_BOOT_WRAPPER
+ __rela_dyn_start = .;
+#endif
+ *(.rela*)
+ }
. = ALIGN(8);
.kernel:dtb :
@@ -53,6 +67,15 @@ SECTIONS
_initrd_end = .;
}
+#ifdef CONFIG_PPC64_BOOT_WRAPPER
+ .got :
+ {
+ __toc_start = .;
+ *(.got)
+ *(.toc)
+ }
+#endif
+
. = ALIGN(4096);
.bss :
{
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index d4dd41fb951b..d12529f34524 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -32,6 +32,22 @@ struct device_node;
#ifdef CONFIG_EEH
+/* EEH subsystem flags */
+#define EEH_ENABLED 0x1 /* EEH enabled */
+#define EEH_FORCE_DISABLED 0x2 /* EEH disabled */
+#define EEH_PROBE_MODE_DEV 0x4 /* From PCI device */
+#define EEH_PROBE_MODE_DEVTREE 0x8 /* From device tree */
+
+/*
+ * Delay for PE reset, all in ms
+ *
+ * PCI specification has reset hold time of 100 milliseconds.
+ * We have 250 milliseconds here. The PCI bus settlement time
+ * is specified as 1.5 seconds and we have 1.8 seconds.
+ */
+#define EEH_PE_RST_HOLD_TIME 250
+#define EEH_PE_RST_SETTLE_TIME 1800
+
/*
* The struct is used to trace PE related EEH functionality.
* In theory, there will have one instance of the struct to
@@ -53,7 +69,7 @@ struct device_node;
#define EEH_PE_ISOLATED (1 << 0) /* Isolated PE */
#define EEH_PE_RECOVERING (1 << 1) /* Recovering PE */
-#define EEH_PE_PHB_DEAD (1 << 2) /* Dead PHB */
+#define EEH_PE_RESET (1 << 2) /* PE reset in progress */
#define EEH_PE_KEEP (1 << 8) /* Keep PE on hotplug */
@@ -92,6 +108,8 @@ struct eeh_pe {
#define EEH_DEV_NO_HANDLER (1 << 8) /* No error handler */
#define EEH_DEV_SYSFS (1 << 9) /* Sysfs created */
+#define EEH_DEV_REMOVED (1 << 10) /* Removed permanently */
+#define EEH_DEV_FRESET (1 << 11) /* Fundamental reset */
struct eeh_dev {
int mode; /* EEH mode */
@@ -99,7 +117,9 @@ struct eeh_dev {
int config_addr; /* Config address */
int pe_config_addr; /* PE config address */
u32 config_space[16]; /* Saved PCI config space */
- u8 pcie_cap; /* Saved PCIe capability */
+ int pcix_cap; /* Saved PCIx capability */
+ int pcie_cap; /* Saved PCIe capability */
+ int aer_cap; /* Saved AER capability */
struct eeh_pe *pe; /* Associated PE */
struct list_head list; /* Form link list in the PE */
struct pci_controller *phb; /* Associated PHB */
@@ -171,37 +191,40 @@ struct eeh_ops {
int (*restore_config)(struct device_node *dn);
};
+extern int eeh_subsystem_flags;
extern struct eeh_ops *eeh_ops;
-extern bool eeh_subsystem_enabled;
extern raw_spinlock_t confirm_error_lock;
-extern int eeh_probe_mode;
static inline bool eeh_enabled(void)
{
- return eeh_subsystem_enabled;
+ if ((eeh_subsystem_flags & EEH_FORCE_DISABLED) ||
+ !(eeh_subsystem_flags & EEH_ENABLED))
+ return false;
+
+ return true;
}
static inline void eeh_set_enable(bool mode)
{
- eeh_subsystem_enabled = mode;
+ if (mode)
+ eeh_subsystem_flags |= EEH_ENABLED;
+ else
+ eeh_subsystem_flags &= ~EEH_ENABLED;
}
-#define EEH_PROBE_MODE_DEV (1<<0) /* From PCI device */
-#define EEH_PROBE_MODE_DEVTREE (1<<1) /* From device tree */
-
static inline void eeh_probe_mode_set(int flag)
{
- eeh_probe_mode = flag;
+ eeh_subsystem_flags |= flag;
}
static inline int eeh_probe_mode_devtree(void)
{
- return (eeh_probe_mode == EEH_PROBE_MODE_DEVTREE);
+ return (eeh_subsystem_flags & EEH_PROBE_MODE_DEVTREE);
}
static inline int eeh_probe_mode_dev(void)
{
- return (eeh_probe_mode == EEH_PROBE_MODE_DEV);
+ return (eeh_subsystem_flags & EEH_PROBE_MODE_DEV);
}
static inline void eeh_serialize_lock(unsigned long *flags)
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index 5b6c03f1058f..374abc2e41d7 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -113,6 +113,8 @@ struct machdep_calls {
/* Optional, may be NULL. */
void (*show_cpuinfo)(struct seq_file *m);
void (*show_percpuinfo)(struct seq_file *m, int i);
+ /* Returns the current operating frequency of "cpu" in Hz */
+ unsigned long (*get_proc_freq)(unsigned int cpu);
void (*init_IRQ)(void);
@@ -241,6 +243,9 @@ struct machdep_calls {
/* Called during PCI resource reassignment */
resource_size_t (*pcibios_window_alignment)(struct pci_bus *, unsigned long type);
+ /* Reset the secondary bus of bridge */
+ void (*pcibios_reset_secondary_bus)(struct pci_dev *dev);
+
/* Called to shutdown machine specific hardware not already controlled
* by other drivers.
*/
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index a2efdaa020b0..81720ff59a10 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -41,14 +41,14 @@ struct opal_takeover_args {
* size except the last one in the list to be as well.
*/
struct opal_sg_entry {
- void *data;
- long length;
+ __be64 data;
+ __be64 length;
};
-/* sg list */
+/* SG list */
struct opal_sg_list {
- unsigned long num_entries;
- struct opal_sg_list *next;
+ __be64 length;
+ __be64 next;
struct opal_sg_entry entry[];
};
@@ -858,8 +858,8 @@ int64_t opal_lpc_write(uint32_t chip_id, enum OpalLPCAddressType addr_type,
int64_t opal_lpc_read(uint32_t chip_id, enum OpalLPCAddressType addr_type,
uint32_t addr, __be32 *data, uint32_t sz);
-int64_t opal_read_elog(uint64_t buffer, size_t size, uint64_t log_id);
-int64_t opal_get_elog_size(uint64_t *log_id, size_t *size, uint64_t *elog_type);
+int64_t opal_read_elog(uint64_t buffer, uint64_t size, uint64_t log_id);
+int64_t opal_get_elog_size(__be64 *log_id, __be64 *size, __be64 *elog_type);
int64_t opal_write_elog(uint64_t buffer, uint64_t size, uint64_t offset);
int64_t opal_send_ack_elog(uint64_t log_id);
void opal_resend_pending_logs(void);
@@ -868,23 +868,24 @@ int64_t opal_validate_flash(uint64_t buffer, uint32_t *size, uint32_t *result);
int64_t opal_manage_flash(uint8_t op);
int64_t opal_update_flash(uint64_t blk_list);
int64_t opal_dump_init(uint8_t dump_type);
-int64_t opal_dump_info(uint32_t *dump_id, uint32_t *dump_size);
-int64_t opal_dump_info2(uint32_t *dump_id, uint32_t *dump_size, uint32_t *dump_type);
+int64_t opal_dump_info(__be32 *dump_id, __be32 *dump_size);
+int64_t opal_dump_info2(__be32 *dump_id, __be32 *dump_size, __be32 *dump_type);
int64_t opal_dump_read(uint32_t dump_id, uint64_t buffer);
int64_t opal_dump_ack(uint32_t dump_id);
int64_t opal_dump_resend_notification(void);
-int64_t opal_get_msg(uint64_t buffer, size_t size);
-int64_t opal_check_completion(uint64_t buffer, size_t size, uint64_t token);
+int64_t opal_get_msg(uint64_t buffer, uint64_t size);
+int64_t opal_check_completion(uint64_t buffer, uint64_t size, uint64_t token);
int64_t opal_sync_host_reboot(void);
int64_t opal_get_param(uint64_t token, uint32_t param_id, uint64_t buffer,
- size_t length);
+ uint64_t length);
int64_t opal_set_param(uint64_t token, uint32_t param_id, uint64_t buffer,
- size_t length);
+ uint64_t length);
int64_t opal_sensor_read(uint32_t sensor_hndl, int token, __be32 *sensor_data);
/* Internal functions */
-extern int early_init_dt_scan_opal(unsigned long node, const char *uname, int depth, void *data);
+extern int early_init_dt_scan_opal(unsigned long node, const char *uname,
+ int depth, void *data);
extern int early_init_dt_scan_recoverable_ranges(unsigned long node,
const char *uname, int depth, void *data);
@@ -893,10 +894,6 @@ extern int opal_put_chars(uint32_t vtermno, const char *buf, int total_len);
extern void hvc_opal_init_early(void);
-/* Internal functions */
-extern int early_init_dt_scan_opal(unsigned long node, const char *uname,
- int depth, void *data);
-
extern int opal_notifier_register(struct notifier_block *nb);
extern int opal_notifier_unregister(struct notifier_block *nb);
@@ -906,9 +903,6 @@ extern void opal_notifier_enable(void);
extern void opal_notifier_disable(void);
extern void opal_notifier_update_evt(uint64_t evt_mask, uint64_t evt_val);
-extern int opal_get_chars(uint32_t vtermno, char *buf, int count);
-extern int opal_put_chars(uint32_t vtermno, const char *buf, int total_len);
-
extern int __opal_async_get_token(void);
extern int opal_async_get_token_interruptible(void);
extern int __opal_async_release_token(int token);
@@ -916,14 +910,13 @@ extern int opal_async_release_token(int token);
extern int opal_async_wait_response(uint64_t token, struct opal_msg *msg);
extern int opal_get_sensor_data(u32 sensor_hndl, u32 *sensor_data);
-extern void hvc_opal_init_early(void);
-
struct rtc_time;
extern int opal_set_rtc_time(struct rtc_time *tm);
extern void opal_get_rtc_time(struct rtc_time *tm);
extern unsigned long opal_get_boot_time(void);
extern void opal_nvram_init(void);
extern void opal_flash_init(void);
+extern void opal_flash_term_callback(void);
extern int opal_elog_init(void);
extern void opal_platform_dump_init(void);
extern void opal_sys_param_init(void);
@@ -937,6 +930,10 @@ extern int opal_resync_timebase(void);
extern void opal_lpc_init(void);
+struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr,
+ unsigned long vmalloc_size);
+void opal_free_sg_list(struct opal_sg_list *sg);
+
#endif /* __ASSEMBLY__ */
#endif /* __OPAL_H */
diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h
index ed57fa7920c8..db1e2b8eff3c 100644
--- a/arch/powerpc/include/asm/ppc-pci.h
+++ b/arch/powerpc/include/asm/ppc-pci.h
@@ -58,6 +58,7 @@ int rtas_write_config(struct pci_dn *, int where, int size, u32 val);
int rtas_read_config(struct pci_dn *, int where, int size, u32 *val);
void eeh_pe_state_mark(struct eeh_pe *pe, int state);
void eeh_pe_state_clear(struct eeh_pe *pe, int state);
+void eeh_pe_dev_mode_mark(struct eeh_pe *pe, int mode);
void eeh_sysfs_add_device(struct pci_dev *pdev);
void eeh_sysfs_remove_device(struct pci_dev *pdev);
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index e5d2e0bc7e03..29de0152878f 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -215,6 +215,7 @@
#define SPRN_TEXASR 0x82 /* Transaction EXception & Summary */
#define TEXASR_FS __MASK(63-36) /* Transaction Failure Summary */
#define SPRN_TEXASRU 0x83 /* '' '' '' Upper 32 */
+#define TEXASR_FS __MASK(63-36) /* TEXASR Failure Summary */
#define SPRN_TFHAR 0x80 /* Transaction Failure Handler Addr */
#define SPRN_CTRLF 0x088
#define SPRN_CTRLT 0x098
diff --git a/arch/powerpc/include/uapi/asm/setup.h b/arch/powerpc/include/uapi/asm/setup.h
index 552df83f1a49..ae3fb68cb28e 100644
--- a/arch/powerpc/include/uapi/asm/setup.h
+++ b/arch/powerpc/include/uapi/asm/setup.h
@@ -1 +1,6 @@
-#include <asm-generic/setup.h>
+#ifndef _UAPI_ASM_POWERPC_SETUP_H
+#define _UAPI_ASM_POWERPC_SETUP_H
+
+#define COMMAND_LINE_SIZE 2048
+
+#endif /* _UAPI_ASM_POWERPC_SETUP_H */
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index e7b76a6bf150..3764fb788d6c 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -22,6 +22,7 @@
*/
#include <linux/delay.h>
+#include <linux/debugfs.h>
#include <linux/sched.h>
#include <linux/init.h>
#include <linux/list.h>
@@ -87,22 +88,21 @@
/* Time to wait for a PCI slot to report status, in milliseconds */
#define PCI_BUS_RESET_WAIT_MSEC (5*60*1000)
-/* Platform dependent EEH operations */
-struct eeh_ops *eeh_ops = NULL;
-
-bool eeh_subsystem_enabled = false;
-EXPORT_SYMBOL(eeh_subsystem_enabled);
-
/*
- * EEH probe mode support. The intention is to support multiple
- * platforms for EEH. Some platforms like pSeries do PCI emunation
- * based on device tree. However, other platforms like powernv probe
- * PCI devices from hardware. The flag is used to distinguish that.
- * In addition, struct eeh_ops::probe would be invoked for particular
- * OF node or PCI device so that the corresponding PE would be created
- * there.
+ * EEH probe mode support, which is part of the flags,
+ * is to support multiple platforms for EEH. Some platforms
+ * like pSeries do PCI emunation based on device tree.
+ * However, other platforms like powernv probe PCI devices
+ * from hardware. The flag is used to distinguish that.
+ * In addition, struct eeh_ops::probe would be invoked for
+ * particular OF node or PCI device so that the corresponding
+ * PE would be created there.
*/
-int eeh_probe_mode;
+int eeh_subsystem_flags;
+EXPORT_SYMBOL(eeh_subsystem_flags);
+
+/* Platform dependent EEH operations */
+struct eeh_ops *eeh_ops = NULL;
/* Lock to avoid races due to multiple reports of an error */
DEFINE_RAW_SPINLOCK(confirm_error_lock);
@@ -133,6 +133,15 @@ static struct eeh_stats eeh_stats;
#define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE)
+static int __init eeh_setup(char *str)
+{
+ if (!strcmp(str, "off"))
+ eeh_subsystem_flags |= EEH_FORCE_DISABLED;
+
+ return 1;
+}
+__setup("eeh=", eeh_setup);
+
/**
* eeh_gather_pci_data - Copy assorted PCI config space registers to buff
* @edev: device to report data for
@@ -145,73 +154,67 @@ static struct eeh_stats eeh_stats;
static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len)
{
struct device_node *dn = eeh_dev_to_of_node(edev);
- struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
u32 cfg;
int cap, i;
int n = 0;
n += scnprintf(buf+n, len-n, "%s\n", dn->full_name);
- printk(KERN_WARNING "EEH: of node=%s\n", dn->full_name);
+ pr_warn("EEH: of node=%s\n", dn->full_name);
eeh_ops->read_config(dn, PCI_VENDOR_ID, 4, &cfg);
n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg);
- printk(KERN_WARNING "EEH: PCI device/vendor: %08x\n", cfg);
+ pr_warn("EEH: PCI device/vendor: %08x\n", cfg);
eeh_ops->read_config(dn, PCI_COMMAND, 4, &cfg);
n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg);
- printk(KERN_WARNING "EEH: PCI cmd/status register: %08x\n", cfg);
-
- if (!dev) {
- printk(KERN_WARNING "EEH: no PCI device for this of node\n");
- return n;
- }
+ pr_warn("EEH: PCI cmd/status register: %08x\n", cfg);
/* Gather bridge-specific registers */
- if (dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) {
+ if (edev->mode & EEH_DEV_BRIDGE) {
eeh_ops->read_config(dn, PCI_SEC_STATUS, 2, &cfg);
n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg);
- printk(KERN_WARNING "EEH: Bridge secondary status: %04x\n", cfg);
+ pr_warn("EEH: Bridge secondary status: %04x\n", cfg);
eeh_ops->read_config(dn, PCI_BRIDGE_CONTROL, 2, &cfg);
n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg);
- printk(KERN_WARNING "EEH: Bridge control: %04x\n", cfg);
+ pr_warn("EEH: Bridge control: %04x\n", cfg);
}
/* Dump out the PCI-X command and status regs */
- cap = pci_find_capability(dev, PCI_CAP_ID_PCIX);
+ cap = edev->pcix_cap;
if (cap) {
eeh_ops->read_config(dn, cap, 4, &cfg);
n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg);
- printk(KERN_WARNING "EEH: PCI-X cmd: %08x\n", cfg);
+ pr_warn("EEH: PCI-X cmd: %08x\n", cfg);
eeh_ops->read_config(dn, cap+4, 4, &cfg);
n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg);
- printk(KERN_WARNING "EEH: PCI-X status: %08x\n", cfg);
+ pr_warn("EEH: PCI-X status: %08x\n", cfg);
}
- /* If PCI-E capable, dump PCI-E cap 10, and the AER */
- if (pci_is_pcie(dev)) {
+ /* If PCI-E capable, dump PCI-E cap 10 */
+ cap = edev->pcie_cap;
+ if (cap) {
n += scnprintf(buf+n, len-n, "pci-e cap10:\n");
- printk(KERN_WARNING
- "EEH: PCI-E capabilities and status follow:\n");
+ pr_warn("EEH: PCI-E capabilities and status follow:\n");
for (i=0; i<=8; i++) {
- eeh_ops->read_config(dn, dev->pcie_cap+4*i, 4, &cfg);
+ eeh_ops->read_config(dn, cap+4*i, 4, &cfg);
n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
- printk(KERN_WARNING "EEH: PCI-E %02x: %08x\n", i, cfg);
+ pr_warn("EEH: PCI-E %02x: %08x\n", i, cfg);
}
+ }
- cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
- if (cap) {
- n += scnprintf(buf+n, len-n, "pci-e AER:\n");
- printk(KERN_WARNING
- "EEH: PCI-E AER capability register set follows:\n");
-
- for (i=0; i<14; i++) {
- eeh_ops->read_config(dn, cap+4*i, 4, &cfg);
- n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
- printk(KERN_WARNING "EEH: PCI-E AER %02x: %08x\n", i, cfg);
- }
+ /* If AER capable, dump it */
+ cap = edev->aer_cap;
+ if (cap) {
+ n += scnprintf(buf+n, len-n, "pci-e AER:\n");
+ pr_warn("EEH: PCI-E AER capability register set follows:\n");
+
+ for (i=0; i<14; i++) {
+ eeh_ops->read_config(dn, cap+4*i, 4, &cfg);
+ n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
+ pr_warn("EEH: PCI-E AER %02x: %08x\n", i, cfg);
}
}
@@ -232,21 +235,19 @@ void eeh_slot_error_detail(struct eeh_pe *pe, int severity)
{
size_t loglen = 0;
struct eeh_dev *edev, *tmp;
- bool valid_cfg_log = true;
/*
* When the PHB is fenced or dead, it's pointless to collect
* the data from PCI config space because it should return
* 0xFF's. For ER, we still retrieve the data from the PCI
* config space.
+ *
+ * For pHyp, we have to enable IO for log retrieval. Otherwise,
+ * 0xFF's is always returned from PCI config space.
*/
- if (eeh_probe_mode_dev() &&
- (pe->type & EEH_PE_PHB) &&
- (pe->state & (EEH_PE_ISOLATED | EEH_PE_PHB_DEAD)))
- valid_cfg_log = false;
-
- if (valid_cfg_log) {
- eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
+ if (!(pe->type & EEH_PE_PHB)) {
+ if (eeh_probe_mode_devtree())
+ eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
eeh_ops->configure_bridge(pe);
eeh_pe_restore_bars(pe);
@@ -309,7 +310,7 @@ static int eeh_phb_check_failure(struct eeh_pe *pe)
/* If the PHB has been in problematic state */
eeh_serialize_lock(&flags);
- if (phb_pe->state & (EEH_PE_ISOLATED | EEH_PE_PHB_DEAD)) {
+ if (phb_pe->state & EEH_PE_ISOLATED) {
ret = 0;
goto out;
}
@@ -515,16 +516,42 @@ EXPORT_SYMBOL(eeh_check_failure);
*/
int eeh_pci_enable(struct eeh_pe *pe, int function)
{
- int rc;
+ int rc, flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
+
+ /*
+ * pHyp doesn't allow to enable IO or DMA on unfrozen PE.
+ * Also, it's pointless to enable them on unfrozen PE. So
+ * we have the check here.
+ */
+ if (function == EEH_OPT_THAW_MMIO ||
+ function == EEH_OPT_THAW_DMA) {
+ rc = eeh_ops->get_state(pe, NULL);
+ if (rc < 0)
+ return rc;
+
+ /* Needn't to enable or already enabled */
+ if ((rc == EEH_STATE_NOT_SUPPORT) ||
+ ((rc & flags) == flags))
+ return 0;
+ }
rc = eeh_ops->set_option(pe, function);
if (rc)
- pr_warning("%s: Unexpected state change %d on PHB#%d-PE#%x, err=%d\n",
- __func__, function, pe->phb->global_number, pe->addr, rc);
+ pr_warn("%s: Unexpected state change %d on "
+ "PHB#%d-PE#%x, err=%d\n",
+ __func__, function, pe->phb->global_number,
+ pe->addr, rc);
rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
- if (rc > 0 && (rc & EEH_STATE_MMIO_ENABLED) &&
- (function == EEH_OPT_THAW_MMIO))
+ if (rc <= 0)
+ return rc;
+
+ if ((function == EEH_OPT_THAW_MMIO) &&
+ (rc & EEH_STATE_MMIO_ENABLED))
+ return 0;
+
+ if ((function == EEH_OPT_THAW_DMA) &&
+ (rc & EEH_STATE_DMA_ENABLED))
return 0;
return rc;
@@ -612,26 +639,7 @@ static void eeh_reset_pe_once(struct eeh_pe *pe)
else
eeh_ops->reset(pe, EEH_RESET_HOT);
- /* The PCI bus requires that the reset be held high for at least
- * a 100 milliseconds. We wait a bit longer 'just in case'.
- */
-#define PCI_BUS_RST_HOLD_TIME_MSEC 250
- msleep(PCI_BUS_RST_HOLD_TIME_MSEC);
-
- /* We might get hit with another EEH freeze as soon as the
- * pci slot reset line is dropped. Make sure we don't miss
- * these, and clear the flag now.
- */
- eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
-
eeh_ops->reset(pe, EEH_RESET_DEACTIVATE);
-
- /* After a PCI slot has been reset, the PCI Express spec requires
- * a 1.5 second idle time for the bus to stabilize, before starting
- * up traffic.
- */
-#define PCI_BUS_SETTLE_TIME_MSEC 1800
- msleep(PCI_BUS_SETTLE_TIME_MSEC);
}
/**
@@ -651,6 +659,10 @@ int eeh_reset_pe(struct eeh_pe *pe)
for (i=0; i<3; i++) {
eeh_reset_pe_once(pe);
+ /*
+ * EEH_PE_ISOLATED is expected to be removed after
+ * BAR restore.
+ */
rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
if ((rc & flags) == flags)
return 0;
@@ -826,8 +838,8 @@ int eeh_init(void)
&hose_list, list_node)
pci_walk_bus(hose->bus, eeh_ops->dev_probe, NULL);
} else {
- pr_warning("%s: Invalid probe mode %d\n",
- __func__, eeh_probe_mode);
+ pr_warn("%s: Invalid probe mode %x",
+ __func__, eeh_subsystem_flags);
return -EINVAL;
}
@@ -1102,10 +1114,45 @@ static const struct file_operations proc_eeh_operations = {
.release = single_release,
};
+#ifdef CONFIG_DEBUG_FS
+static int eeh_enable_dbgfs_set(void *data, u64 val)
+{
+ if (val)
+ eeh_subsystem_flags &= ~EEH_FORCE_DISABLED;
+ else
+ eeh_subsystem_flags |= EEH_FORCE_DISABLED;
+
+ /* Notify the backend */
+ if (eeh_ops->post_init)
+ eeh_ops->post_init();
+
+ return 0;
+}
+
+static int eeh_enable_dbgfs_get(void *data, u64 *val)
+{
+ if (eeh_enabled())
+ *val = 0x1ul;
+ else
+ *val = 0x0ul;
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(eeh_enable_dbgfs_ops, eeh_enable_dbgfs_get,
+ eeh_enable_dbgfs_set, "0x%llx\n");
+#endif
+
static int __init eeh_init_proc(void)
{
- if (machine_is(pseries) || machine_is(powernv))
+ if (machine_is(pseries) || machine_is(powernv)) {
proc_create("powerpc/eeh", 0, NULL, &proc_eeh_operations);
+#ifdef CONFIG_DEBUG_FS
+ debugfs_create_file("eeh_enable", 0600,
+ powerpc_debugfs_root, NULL,
+ &eeh_enable_dbgfs_ops);
+#endif
+ }
+
return 0;
}
__initcall(eeh_init_proc);
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index bb61ca58ca6d..7100a5b96e70 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -171,6 +171,15 @@ static void eeh_enable_irq(struct pci_dev *dev)
}
}
+static bool eeh_dev_removed(struct eeh_dev *edev)
+{
+ /* EEH device removed ? */
+ if (!edev || (edev->mode & EEH_DEV_REMOVED))
+ return true;
+
+ return false;
+}
+
/**
* eeh_report_error - Report pci error to each device driver
* @data: eeh device
@@ -187,10 +196,8 @@ static void *eeh_report_error(void *data, void *userdata)
enum pci_ers_result rc, *res = userdata;
struct pci_driver *driver;
- /* We might not have the associated PCI device,
- * then we should continue for next one.
- */
- if (!dev) return NULL;
+ if (!dev || eeh_dev_removed(edev))
+ return NULL;
dev->error_state = pci_channel_io_frozen;
driver = eeh_pcid_get(dev);
@@ -230,6 +237,9 @@ static void *eeh_report_mmio_enabled(void *data, void *userdata)
enum pci_ers_result rc, *res = userdata;
struct pci_driver *driver;
+ if (!dev || eeh_dev_removed(edev))
+ return NULL;
+
driver = eeh_pcid_get(dev);
if (!driver) return NULL;
@@ -267,7 +277,8 @@ static void *eeh_report_reset(void *data, void *userdata)
enum pci_ers_result rc, *res = userdata;
struct pci_driver *driver;
- if (!dev) return NULL;
+ if (!dev || eeh_dev_removed(edev))
+ return NULL;
dev->error_state = pci_channel_io_normal;
driver = eeh_pcid_get(dev);
@@ -307,7 +318,8 @@ static void *eeh_report_resume(void *data, void *userdata)
struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
struct pci_driver *driver;
- if (!dev) return NULL;
+ if (!dev || eeh_dev_removed(edev))
+ return NULL;
dev->error_state = pci_channel_io_normal;
driver = eeh_pcid_get(dev);
@@ -343,7 +355,8 @@ static void *eeh_report_failure(void *data, void *userdata)
struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
struct pci_driver *driver;
- if (!dev) return NULL;
+ if (!dev || eeh_dev_removed(edev))
+ return NULL;
dev->error_state = pci_channel_io_perm_failure;
driver = eeh_pcid_get(dev);
@@ -380,6 +393,16 @@ static void *eeh_rmv_device(void *data, void *userdata)
if (!dev || (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE))
return NULL;
+ /*
+ * We rely on count-based pcibios_release_device() to
+ * detach permanently offlined PEs. Unfortunately, that's
+ * not reliable enough. We might have the permanently
+ * offlined PEs attached, but we needn't take care of
+ * them and their child devices.
+ */
+ if (eeh_dev_removed(edev))
+ return NULL;
+
driver = eeh_pcid_get(dev);
if (driver) {
eeh_pcid_put(dev);
@@ -417,6 +440,36 @@ static void *eeh_pe_detach_dev(void *data, void *userdata)
return NULL;
}
+/*
+ * Explicitly clear PE's frozen state for PowerNV where
+ * we have frozen PE until BAR restore is completed. It's
+ * harmless to clear it for pSeries. To be consistent with
+ * PE reset (for 3 times), we try to clear the frozen state
+ * for 3 times as well.
+ */
+static int eeh_clear_pe_frozen_state(struct eeh_pe *pe)
+{
+ int i, rc;
+
+ for (i = 0; i < 3; i++) {
+ rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
+ if (rc)
+ continue;
+ rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA);
+ if (!rc)
+ break;
+ }
+
+ /* The PE has been isolated, clear it */
+ if (rc)
+ pr_warn("%s: Can't clear frozen PHB#%x-PE#%x (%d)\n",
+ __func__, pe->phb->global_number, pe->addr, rc);
+ else
+ eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
+
+ return rc;
+}
+
/**
* eeh_reset_device - Perform actual reset of a pci slot
* @pe: EEH PE
@@ -451,19 +504,33 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
eeh_pe_dev_traverse(pe, eeh_rmv_device, &removed);
}
- /* Reset the pci controller. (Asserts RST#; resets config space).
+ /*
+ * Reset the pci controller. (Asserts RST#; resets config space).
* Reconfigure bridges and devices. Don't try to bring the system
* up if the reset failed for some reason.
+ *
+ * During the reset, it's very dangerous to have uncontrolled PCI
+ * config accesses. So we prefer to block them. However, controlled
+ * PCI config accesses initiated from EEH itself are allowed.
*/
+ eeh_pe_state_mark(pe, EEH_PE_RESET);
rc = eeh_reset_pe(pe);
- if (rc)
+ if (rc) {
+ eeh_pe_state_clear(pe, EEH_PE_RESET);
return rc;
+ }
pci_lock_rescan_remove();
/* Restore PE */
eeh_ops->configure_bridge(pe);
eeh_pe_restore_bars(pe);
+ eeh_pe_state_clear(pe, EEH_PE_RESET);
+
+ /* Clear frozen state */
+ rc = eeh_clear_pe_frozen_state(pe);
+ if (rc)
+ return rc;
/* Give the system 5 seconds to finish running the user-space
* hotplug shutdown scripts, e.g. ifdown for ethernet. Yes,
@@ -573,7 +640,6 @@ static void eeh_handle_normal_event(struct eeh_pe *pe)
result = PCI_ERS_RESULT_NEED_RESET;
} else {
pr_info("EEH: Notify device drivers to resume I/O\n");
- result = PCI_ERS_RESULT_NONE;
eeh_pe_dev_traverse(pe, eeh_report_mmio_enabled, &result);
}
}
@@ -585,10 +651,17 @@ static void eeh_handle_normal_event(struct eeh_pe *pe)
if (rc < 0)
goto hard_fail;
- if (rc)
+ if (rc) {
result = PCI_ERS_RESULT_NEED_RESET;
- else
+ } else {
+ /*
+ * We didn't do PE reset for the case. The PE
+ * is still in frozen state. Clear it before
+ * resuming the PE.
+ */
+ eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
result = PCI_ERS_RESULT_RECOVERED;
+ }
}
/* If any device has a hard failure, then shut off everything. */
@@ -650,8 +723,17 @@ perm_error:
/* Notify all devices that they're about to go down. */
eeh_pe_dev_traverse(pe, eeh_report_failure, NULL);
- /* Shut down the device drivers for good. */
+ /* Mark the PE to be removed permanently */
+ pe->freeze_count = EEH_MAX_ALLOWED_FREEZES + 1;
+
+ /*
+ * Shut down the device drivers for good. We mark
+ * all removed devices correctly to avoid access
+ * the their PCI config any more.
+ */
if (frozen_bus) {
+ eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
+
pci_lock_rescan_remove();
pcibios_remove_pci_devices(frozen_bus);
pci_unlock_rescan_remove();
@@ -682,8 +764,7 @@ static void eeh_handle_special_event(void)
phb_pe = eeh_phb_pe_get(hose);
if (!phb_pe) continue;
- eeh_pe_state_mark(phb_pe,
- EEH_PE_ISOLATED | EEH_PE_PHB_DEAD);
+ eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED);
}
eeh_serialize_unlock(flags);
@@ -699,8 +780,7 @@ static void eeh_handle_special_event(void)
eeh_remove_event(pe);
if (rc == EEH_NEXT_ERR_DEAD_PHB)
- eeh_pe_state_mark(pe,
- EEH_PE_ISOLATED | EEH_PE_PHB_DEAD);
+ eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
else
eeh_pe_state_mark(pe,
EEH_PE_ISOLATED | EEH_PE_RECOVERING);
@@ -724,12 +804,14 @@ static void eeh_handle_special_event(void)
if (rc == EEH_NEXT_ERR_FROZEN_PE ||
rc == EEH_NEXT_ERR_FENCED_PHB) {
eeh_handle_normal_event(pe);
+ eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
} else {
pci_lock_rescan_remove();
list_for_each_entry(hose, &hose_list, list_node) {
phb_pe = eeh_phb_pe_get(hose);
if (!phb_pe ||
- !(phb_pe->state & EEH_PE_PHB_DEAD))
+ !(phb_pe->state & EEH_PE_ISOLATED) ||
+ (phb_pe->state & EEH_PE_RECOVERING))
continue;
/* Notify all devices to be down */
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index f0c353fa655a..995c2a284630 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -503,13 +503,17 @@ static void *__eeh_pe_state_mark(void *data, void *flag)
struct eeh_dev *edev, *tmp;
struct pci_dev *pdev;
- /*
- * Mark the PE with the indicated state. Also,
- * the associated PCI device will be put into
- * I/O frozen state to avoid I/O accesses from
- * the PCI device driver.
- */
+ /* Keep the state of permanently removed PE intact */
+ if ((pe->freeze_count > EEH_MAX_ALLOWED_FREEZES) &&
+ (state & (EEH_PE_ISOLATED | EEH_PE_RECOVERING)))
+ return NULL;
+
pe->state |= state;
+
+ /* Offline PCI devices if applicable */
+ if (state != EEH_PE_ISOLATED)
+ return NULL;
+
eeh_pe_for_each_dev(pe, edev, tmp) {
pdev = eeh_dev_to_pci_dev(edev);
if (pdev)
@@ -532,6 +536,27 @@ void eeh_pe_state_mark(struct eeh_pe *pe, int state)
eeh_pe_traverse(pe, __eeh_pe_state_mark, &state);
}
+static void *__eeh_pe_dev_mode_mark(void *data, void *flag)
+{
+ struct eeh_dev *edev = data;
+ int mode = *((int *)flag);
+
+ edev->mode |= mode;
+
+ return NULL;
+}
+
+/**
+ * eeh_pe_dev_state_mark - Mark state for all device under the PE
+ * @pe: EEH PE
+ *
+ * Mark specific state for all child devices of the PE.
+ */
+void eeh_pe_dev_mode_mark(struct eeh_pe *pe, int mode)
+{
+ eeh_pe_dev_traverse(pe, __eeh_pe_dev_mode_mark, &mode);
+}
+
/**
* __eeh_pe_state_clear - Clear state for the PE
* @data: EEH PE
@@ -546,8 +571,16 @@ static void *__eeh_pe_state_clear(void *data, void *flag)
struct eeh_pe *pe = (struct eeh_pe *)data;
int state = *((int *)flag);
+ /* Keep the state of permanently removed PE intact */
+ if ((pe->freeze_count > EEH_MAX_ALLOWED_FREEZES) &&
+ (state & EEH_PE_ISOLATED))
+ return NULL;
+
pe->state &= ~state;
- pe->check_count = 0;
+
+ /* Clear check count since last isolation */
+ if (state & EEH_PE_ISOLATED)
+ pe->check_count = 0;
return NULL;
}
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 2230fd0ca3e4..02667744fbb5 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -69,7 +69,7 @@ int __init early_init_dt_scan_fw_dump(unsigned long node,
*/
token = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump", NULL);
if (!token)
- return 0;
+ return 1;
fw_dump.fadump_supported = 1;
fw_dump.ibm_configure_kernel_dump = *token;
@@ -92,7 +92,7 @@ int __init early_init_dt_scan_fw_dump(unsigned long node,
&size);
if (!sections)
- return 0;
+ return 1;
num_sections = size / (3 * sizeof(u32));
@@ -110,6 +110,7 @@ int __init early_init_dt_scan_fw_dump(unsigned long node,
break;
}
}
+
return 1;
}
diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c
index 40bd7bd4e19a..85fb16e64cef 100644
--- a/arch/powerpc/kernel/legacy_serial.c
+++ b/arch/powerpc/kernel/legacy_serial.c
@@ -71,8 +71,9 @@ static int __init add_legacy_port(struct device_node *np, int want_index,
phys_addr_t taddr, unsigned long irq,
upf_t flags, int irq_check_parent)
{
- const __be32 *clk, *spd;
+ const __be32 *clk, *spd, *rs;
u32 clock = BASE_BAUD * 16;
+ u32 shift = 0;
int index;
/* get clock freq. if present */
@@ -83,6 +84,11 @@ static int __init add_legacy_port(struct device_node *np, int want_index,
/* get default speed if present */
spd = of_get_property(np, "current-speed", NULL);
+ /* get register shift if present */
+ rs = of_get_property(np, "reg-shift", NULL);
+ if (rs && *rs)
+ shift = be32_to_cpup(rs);
+
/* If we have a location index, then try to use it */
if (want_index >= 0 && want_index < MAX_LEGACY_SERIAL_PORTS)
index = want_index;
@@ -126,6 +132,7 @@ static int __init add_legacy_port(struct device_node *np, int want_index,
legacy_serial_ports[index].uartclk = clock;
legacy_serial_ports[index].irq = irq;
legacy_serial_ports[index].flags = flags;
+ legacy_serial_ports[index].regshift = shift;
legacy_serial_infos[index].taddr = taddr;
legacy_serial_infos[index].np = of_node_get(np);
legacy_serial_infos[index].clock = clock;
@@ -163,9 +170,8 @@ static int __init add_legacy_soc_port(struct device_node *np,
if (of_get_property(np, "clock-frequency", NULL) == NULL)
return -1;
- /* if reg-shift or offset, don't try to use it */
- if ((of_get_property(np, "reg-shift", NULL) != NULL) ||
- (of_get_property(np, "reg-offset", NULL) != NULL))
+ /* if reg-offset don't try to use it */
+ if ((of_get_property(np, "reg-offset", NULL) != NULL))
return -1;
/* if rtas uses this device, don't try to use it as well */
@@ -315,17 +321,20 @@ static void __init setup_legacy_serial_console(int console)
struct legacy_serial_info *info = &legacy_serial_infos[console];
struct plat_serial8250_port *port = &legacy_serial_ports[console];
void __iomem *addr;
+ unsigned int stride;
+
+ stride = 1 << port->regshift;
/* Check if a translated MMIO address has been found */
if (info->taddr) {
addr = ioremap(info->taddr, 0x1000);
if (addr == NULL)
return;
- udbg_uart_init_mmio(addr, 1);
+ udbg_uart_init_mmio(addr, stride);
} else {
/* Check if it's PIO and we support untranslated PIO */
if (port->iotype == UPIO_PORT && isa_io_special)
- udbg_uart_init_pio(port->iobase, 1);
+ udbg_uart_init_pio(port->iobase, stride);
else
return;
}
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index d9476c1fc959..add166aa806a 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -21,6 +21,7 @@
#include <linux/string.h>
#include <linux/init.h>
#include <linux/bootmem.h>
+#include <linux/delay.h>
#include <linux/export.h>
#include <linux/of_address.h>
#include <linux/of_pci.h>
@@ -120,6 +121,25 @@ resource_size_t pcibios_window_alignment(struct pci_bus *bus,
return 1;
}
+void pcibios_reset_secondary_bus(struct pci_dev *dev)
+{
+ u16 ctrl;
+
+ if (ppc_md.pcibios_reset_secondary_bus) {
+ ppc_md.pcibios_reset_secondary_bus(dev);
+ return;
+ }
+
+ pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &ctrl);
+ ctrl |= PCI_BRIDGE_CTL_BUS_RESET;
+ pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl);
+ msleep(2);
+
+ ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET;
+ pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl);
+ ssleep(1);
+}
+
static resource_size_t pcibios_io_size(const struct pci_controller *hose)
{
#ifdef CONFIG_PPC64
@@ -666,60 +686,36 @@ void pci_resource_to_user(const struct pci_dev *dev, int bar,
void pci_process_bridge_OF_ranges(struct pci_controller *hose,
struct device_node *dev, int primary)
{
- const __be32 *ranges;
- int rlen;
- int pna = of_n_addr_cells(dev);
- int np = pna + 5;
int memno = 0;
- u32 pci_space;
- unsigned long long pci_addr, cpu_addr, pci_next, cpu_next, size;
struct resource *res;
+ struct of_pci_range range;
+ struct of_pci_range_parser parser;
printk(KERN_INFO "PCI host bridge %s %s ranges:\n",
dev->full_name, primary ? "(primary)" : "");
- /* Get ranges property */
- ranges = of_get_property(dev, "ranges", &rlen);
- if (ranges == NULL)
+ /* Check for ranges property */
+ if (of_pci_range_parser_init(&parser, dev))
return;
/* Parse it */
- while ((rlen -= np * 4) >= 0) {
- /* Read next ranges element */
- pci_space = of_read_number(ranges, 1);
- pci_addr = of_read_number(ranges + 1, 2);
- cpu_addr = of_translate_address(dev, ranges + 3);
- size = of_read_number(ranges + pna + 3, 2);
- ranges += np;
-
+ for_each_of_pci_range(&parser, &range) {
/* If we failed translation or got a zero-sized region
* (some FW try to feed us with non sensical zero sized regions
* such as power3 which look like some kind of attempt at exposing
* the VGA memory hole)
*/
- if (cpu_addr == OF_BAD_ADDR || size == 0)
+ if (range.cpu_addr == OF_BAD_ADDR || range.size == 0)
continue;
- /* Now consume following elements while they are contiguous */
- for (; rlen >= np * sizeof(u32);
- ranges += np, rlen -= np * 4) {
- if (of_read_number(ranges, 1) != pci_space)
- break;
- pci_next = of_read_number(ranges + 1, 2);
- cpu_next = of_translate_address(dev, ranges + 3);
- if (pci_next != pci_addr + size ||
- cpu_next != cpu_addr + size)
- break;
- size += of_read_number(ranges + pna + 3, 2);
- }
-
/* Act based on address space type */
res = NULL;
- switch ((pci_space >> 24) & 0x3) {
- case 1: /* PCI IO space */
+ switch (range.flags & IORESOURCE_TYPE_BITS) {
+ case IORESOURCE_IO:
printk(KERN_INFO
" IO 0x%016llx..0x%016llx -> 0x%016llx\n",
- cpu_addr, cpu_addr + size - 1, pci_addr);
+ range.cpu_addr, range.cpu_addr + range.size - 1,
+ range.pci_addr);
/* We support only one IO range */
if (hose->pci_io_size) {
@@ -729,11 +725,12 @@ void pci_process_bridge_OF_ranges(struct pci_controller *hose,
}
#ifdef CONFIG_PPC32
/* On 32 bits, limit I/O space to 16MB */
- if (size > 0x01000000)
- size = 0x01000000;
+ if (range.size > 0x01000000)
+ range.size = 0x01000000;
/* 32 bits needs to map IOs here */
- hose->io_base_virt = ioremap(cpu_addr, size);
+ hose->io_base_virt = ioremap(range.cpu_addr,
+ range.size);
/* Expect trouble if pci_addr is not 0 */
if (primary)
@@ -743,20 +740,20 @@ void pci_process_bridge_OF_ranges(struct pci_controller *hose,
/* pci_io_size and io_base_phys always represent IO
* space starting at 0 so we factor in pci_addr
*/
- hose->pci_io_size = pci_addr + size;
- hose->io_base_phys = cpu_addr - pci_addr;
+ hose->pci_io_size = range.pci_addr + range.size;
+ hose->io_base_phys = range.cpu_addr - range.pci_addr;
/* Build resource */
res = &hose->io_resource;
- res->flags = IORESOURCE_IO;
- res->start = pci_addr;
+ range.cpu_addr = range.pci_addr;
break;
- case 2: /* PCI Memory space */
- case 3: /* PCI 64 bits Memory space */
+ case IORESOURCE_MEM:
printk(KERN_INFO
" MEM 0x%016llx..0x%016llx -> 0x%016llx %s\n",
- cpu_addr, cpu_addr + size - 1, pci_addr,
- (pci_space & 0x40000000) ? "Prefetch" : "");
+ range.cpu_addr, range.cpu_addr + range.size - 1,
+ range.pci_addr,
+ (range.pci_space & 0x40000000) ?
+ "Prefetch" : "");
/* We support only 3 memory ranges */
if (memno >= 3) {
@@ -765,28 +762,21 @@ void pci_process_bridge_OF_ranges(struct pci_controller *hose,
continue;
}
/* Handles ISA memory hole space here */
- if (pci_addr == 0) {
+ if (range.pci_addr == 0) {
if (primary || isa_mem_base == 0)
- isa_mem_base = cpu_addr;
- hose->isa_mem_phys = cpu_addr;
- hose->isa_mem_size = size;
+ isa_mem_base = range.cpu_addr;
+ hose->isa_mem_phys = range.cpu_addr;
+ hose->isa_mem_size = range.size;
}
/* Build resource */
- hose->mem_offset[memno] = cpu_addr - pci_addr;
+ hose->mem_offset[memno] = range.cpu_addr -
+ range.pci_addr;
res = &hose->mem_resources[memno++];
- res->flags = IORESOURCE_MEM;
- if (pci_space & 0x40000000)
- res->flags |= IORESOURCE_PREFETCH;
- res->start = cpu_addr;
break;
}
if (res != NULL) {
- res->name = dev->full_name;
- res->end = res->start + size - 1;
- res->parent = NULL;
- res->sibling = NULL;
- res->child = NULL;
+ of_pci_range_to_resource(&range, dev, res);
}
}
}
diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c
index 83c26d829991..ea6470c21f4e 100644
--- a/arch/powerpc/kernel/pci_of_scan.c
+++ b/arch/powerpc/kernel/pci_of_scan.c
@@ -304,6 +304,9 @@ static struct pci_dev *of_scan_pci_dev(struct pci_bus *bus,
struct pci_dev *dev = NULL;
const __be32 *reg;
int reglen, devfn;
+#ifdef CONFIG_EEH
+ struct eeh_dev *edev = of_node_to_eeh_dev(dn);
+#endif
pr_debug(" * %s\n", dn->full_name);
if (!of_device_is_available(dn))
@@ -321,6 +324,12 @@ static struct pci_dev *of_scan_pci_dev(struct pci_bus *bus,
return dev;
}
+ /* Device removed permanently ? */
+#ifdef CONFIG_EEH
+ if (edev && (edev->mode & EEH_DEV_REMOVED))
+ return NULL;
+#endif
+
/* create a new pci_dev for this device */
dev = of_create_pci_dev(dn, bus, devfn);
if (!dev)
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index 3bd77edd7610..450850a49dce 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -120,6 +120,7 @@ EXPORT_SYMBOL(giveup_spe);
EXPORT_SYMBOL(flush_instruction_cache);
#endif
EXPORT_SYMBOL(flush_dcache_range);
+EXPORT_SYMBOL(flush_icache_range);
#ifdef CONFIG_SMP
#ifdef CONFIG_PPC32
diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c
index 2f3cdb01506d..658e89d2025b 100644
--- a/arch/powerpc/kernel/rtas_flash.c
+++ b/arch/powerpc/kernel/rtas_flash.c
@@ -705,7 +705,7 @@ static int __init rtas_flash_init(void)
if (rtas_token("ibm,update-flash-64-and-reboot") ==
RTAS_UNKNOWN_SERVICE) {
pr_info("rtas_flash: no firmware flash support\n");
- return 1;
+ return -EINVAL;
}
rtas_validate_flash_data.buf = kzalloc(VALIDATE_BUF_SIZE, GFP_KERNEL);
diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c
index 7d4c7172f38e..c168337aef9d 100644
--- a/arch/powerpc/kernel/rtas_pci.c
+++ b/arch/powerpc/kernel/rtas_pci.c
@@ -80,10 +80,6 @@ int rtas_read_config(struct pci_dn *pdn, int where, int size, u32 *val)
if (ret)
return PCIBIOS_DEVICE_NOT_FOUND;
- if (returnval == EEH_IO_ERROR_VALUE(size) &&
- eeh_dev_check_failure(of_node_to_eeh_dev(pdn->node)))
- return PCIBIOS_DEVICE_NOT_FOUND;
-
return PCIBIOS_SUCCESSFUL;
}
@@ -92,18 +88,39 @@ static int rtas_pci_read_config(struct pci_bus *bus,
int where, int size, u32 *val)
{
struct device_node *busdn, *dn;
-
- busdn = pci_bus_to_OF_node(bus);
+ struct pci_dn *pdn;
+ bool found = false;
+#ifdef CONFIG_EEH
+ struct eeh_dev *edev;
+#endif
+ int ret;
/* Search only direct children of the bus */
+ *val = 0xFFFFFFFF;
+ busdn = pci_bus_to_OF_node(bus);
for (dn = busdn->child; dn; dn = dn->sibling) {
- struct pci_dn *pdn = PCI_DN(dn);
+ pdn = PCI_DN(dn);
if (pdn && pdn->devfn == devfn
- && of_device_is_available(dn))
- return rtas_read_config(pdn, where, size, val);
+ && of_device_is_available(dn)) {
+ found = true;
+ break;
+ }
}
- return PCIBIOS_DEVICE_NOT_FOUND;
+ if (!found)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+#ifdef CONFIG_EEH
+ edev = of_node_to_eeh_dev(dn);
+ if (edev && edev->pe && edev->pe->state & EEH_PE_RESET)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+#endif
+
+ ret = rtas_read_config(pdn, where, size, val);
+ if (*val == EEH_IO_ERROR_VALUE(size) &&
+ eeh_dev_check_failure(of_node_to_eeh_dev(dn)))
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ return ret;
}
int rtas_write_config(struct pci_dn *pdn, int where, int size, u32 val)
@@ -136,17 +153,34 @@ static int rtas_pci_write_config(struct pci_bus *bus,
int where, int size, u32 val)
{
struct device_node *busdn, *dn;
-
- busdn = pci_bus_to_OF_node(bus);
+ struct pci_dn *pdn;
+ bool found = false;
+#ifdef CONFIG_EEH
+ struct eeh_dev *edev;
+#endif
+ int ret;
/* Search only direct children of the bus */
+ busdn = pci_bus_to_OF_node(bus);
for (dn = busdn->child; dn; dn = dn->sibling) {
- struct pci_dn *pdn = PCI_DN(dn);
+ pdn = PCI_DN(dn);
if (pdn && pdn->devfn == devfn
- && of_device_is_available(dn))
- return rtas_write_config(pdn, where, size, val);
+ && of_device_is_available(dn)) {
+ found = true;
+ break;
+ }
}
- return PCIBIOS_DEVICE_NOT_FOUND;
+
+ if (!found)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+#ifdef CONFIG_EEH
+ edev = of_node_to_eeh_dev(dn);
+ if (edev && edev->pe && (edev->pe->state & EEH_PE_RESET))
+ return PCIBIOS_DEVICE_NOT_FOUND;
+#endif
+ ret = rtas_write_config(pdn, where, size, val);
+
+ return ret;
}
static struct pci_ops rtas_pci_ops = {
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 79b7612ac6fa..3cf25c89469d 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -212,6 +212,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
{
unsigned long cpu_id = (unsigned long)v - 1;
unsigned int pvr;
+ unsigned long proc_freq;
unsigned short maj;
unsigned short min;
@@ -263,12 +264,19 @@ static int show_cpuinfo(struct seq_file *m, void *v)
#endif /* CONFIG_TAU */
/*
- * Assume here that all clock rates are the same in a
- * smp system. -- Cort
+ * Platforms that have variable clock rates, should implement
+ * the method ppc_md.get_proc_freq() that reports the clock
+ * rate of a given cpu. The rest can use ppc_proc_freq to
+ * report the clock rate that is same across all cpus.
*/
- if (ppc_proc_freq)
+ if (ppc_md.get_proc_freq)
+ proc_freq = ppc_md.get_proc_freq(cpu_id);
+ else
+ proc_freq = ppc_proc_freq;
+
+ if (proc_freq)
seq_printf(m, "clock\t\t: %lu.%06luMHz\n",
- ppc_proc_freq / 1000000, ppc_proc_freq % 1000000);
+ proc_freq / 1000000, proc_freq % 1000000);
if (ppc_md.show_percpuinfo != NULL)
ppc_md.show_percpuinfo(m, cpu_id);
diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S
index 03567c05950a..508c54b92fa6 100644
--- a/arch/powerpc/kernel/tm.S
+++ b/arch/powerpc/kernel/tm.S
@@ -10,6 +10,7 @@
#include <asm/ppc-opcode.h>
#include <asm/ptrace.h>
#include <asm/reg.h>
+#include <asm/bug.h>
#ifdef CONFIG_VSX
/* See fpu.S, this is borrowed from there */
@@ -175,6 +176,13 @@ dont_backup_vec:
stfd fr0,FPSTATE_FPSCR(r7)
dont_backup_fp:
+ /* Do sanity check on MSR to make sure we are suspended */
+ li r7, (MSR_TS_S)@higher
+ srdi r6, r14, 32
+ and r6, r6, r7
+1: tdeqi r6, 0
+ EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0
+
/* The moment we treclaim, ALL of our GPRs will switch
* to user register state. (FPRs, CCR etc. also!)
* Use an sprg and a tm_scratch in the PACA to shuffle.
@@ -320,8 +328,6 @@ _GLOBAL(__tm_recheckpoint)
*/
SAVE_NVGPRS(r1)
- std r1, PACAR1(r13)
-
/* Load complete register state from ts_ckpt* registers */
addi r7, r3, PT_CKPT_REGS /* Thread's ckpt_regs */
@@ -385,12 +391,10 @@ restore_gprs:
/* ******************** CR,LR,CCR,MSR ********** */
ld r4, _CTR(r7)
ld r5, _LINK(r7)
- ld r6, _CCR(r7)
ld r8, _XER(r7)
mtctr r4
mtlr r5
- mtcr r6
mtxer r8
/* ******************** TAR ******************** */
@@ -406,7 +410,8 @@ restore_gprs:
li r4, 0
mtmsrd r4, 1
- REST_4GPRS(0, r7) /* GPR0-3 */
+ REST_GPR(0, r7) /* GPR0 */
+ REST_2GPRS(2, r7) /* GPR2-3 */
REST_GPR(4, r7) /* GPR4 */
REST_4GPRS(8, r7) /* GPR8-11 */
REST_2GPRS(12, r7) /* GPR12-13 */
@@ -418,6 +423,31 @@ restore_gprs:
mtspr SPRN_DSCR, r5
mtspr SPRN_PPR, r6
+ /* Do final sanity check on TEXASR to make sure FS is set. Do this
+ * here before we load up the userspace r1 so any bugs we hit will get
+ * a call chain */
+ mfspr r5, SPRN_TEXASR
+ srdi r5, r5, 16
+ li r6, (TEXASR_FS)@h
+ and r6, r6, r5
+1: tdeqi r6, 0
+ EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0
+
+ /* Do final sanity check on MSR to make sure we are not transactional
+ * or suspended
+ */
+ mfmsr r6
+ li r5, (MSR_TS_MASK)@higher
+ srdi r6, r6, 32
+ and r6, r6, r5
+1: tdnei r6, 0
+ EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0
+
+ /* Restore CR */
+ ld r6, _CCR(r7)
+ mtcr r6
+
+ REST_GPR(1, r7) /* GPR1 */
REST_GPR(5, r7) /* GPR5-7 */
REST_GPR(6, r7)
ld r7, GPR7(r7)
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index ffbb871c2bd8..b031f932c0cc 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -242,6 +242,12 @@ kvm_novcpu_exit:
*/
.globl kvm_start_guest
kvm_start_guest:
+
+ /* Set runlatch bit the minute you wake up from nap */
+ mfspr r1, SPRN_CTRLF
+ ori r1, r1, 1
+ mtspr SPRN_CTRLT, r1
+
ld r2,PACATOC(r13)
li r0,KVM_HWTHREAD_IN_KVM
@@ -309,6 +315,11 @@ kvm_no_guest:
li r0, KVM_HWTHREAD_IN_NAP
stb r0, HSTATE_HWTHREAD_STATE(r13)
kvm_do_nap:
+ /* Clear the runlatch bit before napping */
+ mfspr r2, SPRN_CTRLF
+ clrrdi r2, r2, 1
+ mtspr SPRN_CTRLT, r2
+
li r3, LPCR_PECE0
mfspr r4, SPRN_LPCR
rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1
@@ -1999,8 +2010,13 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
/*
* Take a nap until a decrementer or external or doobell interrupt
- * occurs, with PECE1, PECE0 and PECEDP set in LPCR
+ * occurs, with PECE1, PECE0 and PECEDP set in LPCR. Also clear the
+ * runlatch bit before napping.
*/
+ mfspr r2, SPRN_CTRLF
+ clrrdi r2, r2, 1
+ mtspr SPRN_CTRLT, r2
+
li r0,1
stb r0,HSTATE_HWTHREAD_REQ(r13)
mfspr r5,SPRN_LPCR
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 3ea26c25590b..cf1d325eae8b 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -82,17 +82,14 @@ static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)
va &= ~((1ul << mmu_psize_defs[apsize].shift) - 1);
va |= penc << 12;
va |= ssize << 8;
- /* Add AVAL part */
- if (psize != apsize) {
- /*
- * MPSS, 64K base page size and 16MB parge page size
- * We don't need all the bits, but rest of the bits
- * must be ignored by the processor.
- * vpn cover upto 65 bits of va. (0...65) and we need
- * 58..64 bits of va.
- */
- va |= (vpn & 0xfe);
- }
+ /*
+ * AVAL bits:
+ * We don't need all the bits, but rest of the bits
+ * must be ignored by the processor.
+ * vpn cover upto 65 bits of va. (0...65) and we need
+ * 58..64 bits of va.
+ */
+ va |= (vpn & 0xfe); /* AVAL */
va |= 1; /* L */
asm volatile(ASM_FTR_IFCLR("tlbie %0,1", PPC_TLBIE(%1,%0), %2)
: : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206)
@@ -133,17 +130,14 @@ static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize)
va &= ~((1ul << mmu_psize_defs[apsize].shift) - 1);
va |= penc << 12;
va |= ssize << 8;
- /* Add AVAL part */
- if (psize != apsize) {
- /*
- * MPSS, 64K base page size and 16MB parge page size
- * We don't need all the bits, but rest of the bits
- * must be ignored by the processor.
- * vpn cover upto 65 bits of va. (0...65) and we need
- * 58..64 bits of va.
- */
- va |= (vpn & 0xfe);
- }
+ /*
+ * AVAL bits:
+ * We don't need all the bits, but rest of the bits
+ * must be ignored by the processor.
+ * vpn cover upto 65 bits of va. (0...65) and we need
+ * 58..64 bits of va.
+ */
+ va |= (vpn & 0xfe);
va |= 1; /* L */
asm volatile(".long 0x7c000224 | (%0 << 11) | (1 << 21)"
: : "r"(va) : "memory");
diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
index 297c91051413..e0766b82e165 100644
--- a/arch/powerpc/perf/hv-24x7.c
+++ b/arch/powerpc/perf/hv-24x7.c
@@ -155,16 +155,28 @@ static ssize_t read_offset_data(void *dest, size_t dest_len,
return copy_len;
}
-static unsigned long h_get_24x7_catalog_page(char page[static 4096],
- u32 version, u32 index)
+static unsigned long h_get_24x7_catalog_page_(unsigned long phys_4096,
+ unsigned long version,
+ unsigned long index)
{
- WARN_ON(!IS_ALIGNED((unsigned long)page, 4096));
+ pr_devel("h_get_24x7_catalog_page(0x%lx, %lu, %lu)",
+ phys_4096,
+ version,
+ index);
+ WARN_ON(!IS_ALIGNED(phys_4096, 4096));
return plpar_hcall_norets(H_GET_24X7_CATALOG_PAGE,
- virt_to_phys(page),
+ phys_4096,
version,
index);
}
+static unsigned long h_get_24x7_catalog_page(char page[],
+ u64 version, u32 index)
+{
+ return h_get_24x7_catalog_page_(virt_to_phys(page),
+ version, index);
+}
+
static ssize_t catalog_read(struct file *filp, struct kobject *kobj,
struct bin_attribute *bin_attr, char *buf,
loff_t offset, size_t count)
@@ -173,7 +185,7 @@ static ssize_t catalog_read(struct file *filp, struct kobject *kobj,
ssize_t ret = 0;
size_t catalog_len = 0, catalog_page_len = 0, page_count = 0;
loff_t page_offset = 0;
- uint32_t catalog_version_num = 0;
+ uint64_t catalog_version_num = 0;
void *page = kmem_cache_alloc(hv_page_cache, GFP_USER);
struct hv_24x7_catalog_page_0 *page_0 = page;
if (!page)
@@ -185,7 +197,7 @@ static ssize_t catalog_read(struct file *filp, struct kobject *kobj,
goto e_free;
}
- catalog_version_num = be32_to_cpu(page_0->version);
+ catalog_version_num = be64_to_cpu(page_0->version);
catalog_page_len = be32_to_cpu(page_0->length);
catalog_len = catalog_page_len * 4096;
@@ -208,8 +220,9 @@ static ssize_t catalog_read(struct file *filp, struct kobject *kobj,
page, 4096, page_offset * 4096);
e_free:
if (hret)
- pr_err("h_get_24x7_catalog_page(ver=%d, page=%lld) failed: rc=%ld\n",
- catalog_version_num, page_offset, hret);
+ pr_err("h_get_24x7_catalog_page(ver=%lld, page=%lld) failed:"
+ " rc=%ld\n",
+ catalog_version_num, page_offset, hret);
kfree(page);
pr_devel("catalog_read: offset=%lld(%lld) count=%zu(%zu) catalog_len=%zu(%zu) => %zd\n",
@@ -243,7 +256,7 @@ e_free: \
static DEVICE_ATTR_RO(_name)
PAGE_0_ATTR(catalog_version, "%lld\n",
- (unsigned long long)be32_to_cpu(page_0->version));
+ (unsigned long long)be64_to_cpu(page_0->version));
PAGE_0_ATTR(catalog_len, "%lld\n",
(unsigned long long)be32_to_cpu(page_0->length) * 4096);
static BIN_ATTR_RO(catalog, 0/* real length varies */);
@@ -485,13 +498,13 @@ static int hv_24x7_init(void)
struct hv_perf_caps caps;
if (!firmware_has_feature(FW_FEATURE_LPAR)) {
- pr_info("not a virtualized system, not enabling\n");
+ pr_debug("not a virtualized system, not enabling\n");
return -ENODEV;
}
hret = hv_perf_caps_get(&caps);
if (hret) {
- pr_info("could not obtain capabilities, error 0x%80lx, not enabling\n",
+ pr_debug("could not obtain capabilities, not enabling, rc=%ld\n",
hret);
return -ENODEV;
}
diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c
index 278ba7b9c2b5..c9d399a2df82 100644
--- a/arch/powerpc/perf/hv-gpci.c
+++ b/arch/powerpc/perf/hv-gpci.c
@@ -78,7 +78,7 @@ static ssize_t kernel_version_show(struct device *dev,
return sprintf(page, "0x%x\n", COUNTER_INFO_VERSION_CURRENT);
}
-DEVICE_ATTR_RO(kernel_version);
+static DEVICE_ATTR_RO(kernel_version);
HV_CAPS_ATTR(version, "0x%x\n");
HV_CAPS_ATTR(ga, "%d\n");
HV_CAPS_ATTR(expanded, "%d\n");
@@ -273,13 +273,13 @@ static int hv_gpci_init(void)
struct hv_perf_caps caps;
if (!firmware_has_feature(FW_FEATURE_LPAR)) {
- pr_info("not a virtualized system, not enabling\n");
+ pr_debug("not a virtualized system, not enabling\n");
return -ENODEV;
}
hret = hv_perf_caps_get(&caps);
if (hret) {
- pr_info("could not obtain capabilities, error 0x%80lx, not enabling\n",
+ pr_debug("could not obtain capabilities, not enabling, rc=%ld\n",
hret);
return -ENODEV;
}
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index d9e2b19b7c8d..43b65ad1970a 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -422,6 +422,7 @@ config CPU_BIG_ENDIAN
config CPU_LITTLE_ENDIAN
bool "Build little endian kernel"
+ select PPC64_BOOT_WRAPPER
help
Build a little endian kernel.
@@ -430,3 +431,7 @@ config CPU_LITTLE_ENDIAN
little endian powerpc.
endchoice
+
+config PPC64_BOOT_WRAPPER
+ def_bool n
+ depends on CPU_LITTLE_ENDIAN
diff --git a/arch/powerpc/platforms/embedded6xx/Kconfig b/arch/powerpc/platforms/embedded6xx/Kconfig
index 2a7024d8d8b1..a25f496c2ef9 100644
--- a/arch/powerpc/platforms/embedded6xx/Kconfig
+++ b/arch/powerpc/platforms/embedded6xx/Kconfig
@@ -65,6 +65,7 @@ config MVME5100
select PPC_INDIRECT_PCI
select PPC_I8259
select PPC_NATIVE
+ select PPC_UDBG_16550
help
This option enables support for the Motorola (now Emerson) MVME5100
board.
diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c
index 253fefe3d1a0..79d0cdf786d0 100644
--- a/arch/powerpc/platforms/powernv/eeh-ioda.c
+++ b/arch/powerpc/platforms/powernv/eeh-ioda.c
@@ -42,11 +42,19 @@ static int ioda_eeh_event(struct notifier_block *nb,
{
uint64_t changed_evts = (uint64_t)change;
- /* We simply send special EEH event */
- if ((changed_evts & OPAL_EVENT_PCI_ERROR) &&
- (events & OPAL_EVENT_PCI_ERROR) &&
- eeh_enabled())
+ /*
+ * We simply send special EEH event if EEH has
+ * been enabled, or clear pending events in
+ * case that we enable EEH soon
+ */
+ if (!(changed_evts & OPAL_EVENT_PCI_ERROR) ||
+ !(events & OPAL_EVENT_PCI_ERROR))
+ return 0;
+
+ if (eeh_enabled())
eeh_send_failure_event(NULL);
+ else
+ opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul);
return 0;
}
@@ -141,7 +149,9 @@ static int ioda_eeh_post_init(struct pci_controller *hose)
}
#ifdef CONFIG_DEBUG_FS
- if (phb->dbgfs) {
+ if (!phb->has_dbgfs && phb->dbgfs) {
+ phb->has_dbgfs = 1;
+
debugfs_create_file("err_injct_outbound", 0600,
phb->dbgfs, hose,
&ioda_eeh_outb_dbgfs_ops);
@@ -154,7 +164,14 @@ static int ioda_eeh_post_init(struct pci_controller *hose)
}
#endif
- phb->eeh_state |= PNV_EEH_STATE_ENABLED;
+ /* If EEH is enabled, we're going to rely on that.
+ * Otherwise, we restore to conventional mechanism
+ * to clear frozen PE during PCI config access.
+ */
+ if (eeh_enabled())
+ phb->flags |= PNV_PHB_FLAG_EEH;
+ else
+ phb->flags &= ~PNV_PHB_FLAG_EEH;
return 0;
}
@@ -268,6 +285,21 @@ static int ioda_eeh_get_state(struct eeh_pe *pe)
return EEH_STATE_NOT_SUPPORT;
}
+ /*
+ * If we're in middle of PE reset, return normal
+ * state to keep EEH core going. For PHB reset, we
+ * still expect to have fenced PHB cleared with
+ * PHB reset.
+ */
+ if (!(pe->type & EEH_PE_PHB) &&
+ (pe->state & EEH_PE_RESET)) {
+ result = (EEH_STATE_MMIO_ACTIVE |
+ EEH_STATE_DMA_ACTIVE |
+ EEH_STATE_MMIO_ENABLED |
+ EEH_STATE_DMA_ENABLED);
+ return result;
+ }
+
/* Retrieve PE status through OPAL */
pe_no = pe->addr;
ret = opal_pci_eeh_freeze_status(phb->opal_id, pe_no,
@@ -347,52 +379,6 @@ static int ioda_eeh_get_state(struct eeh_pe *pe)
return result;
}
-static int ioda_eeh_pe_clear(struct eeh_pe *pe)
-{
- struct pci_controller *hose;
- struct pnv_phb *phb;
- u32 pe_no;
- u8 fstate;
- u16 pcierr;
- s64 ret;
-
- pe_no = pe->addr;
- hose = pe->phb;
- phb = pe->phb->private_data;
-
- /* Clear the EEH error on the PE */
- ret = opal_pci_eeh_freeze_clear(phb->opal_id,
- pe_no, OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
- if (ret) {
- pr_err("%s: Failed to clear EEH error for "
- "PHB#%x-PE#%x, err=%lld\n",
- __func__, hose->global_number, pe_no, ret);
- return -EIO;
- }
-
- /*
- * Read the PE state back and verify that the frozen
- * state has been removed.
- */
- ret = opal_pci_eeh_freeze_status(phb->opal_id, pe_no,
- &fstate, &pcierr, NULL);
- if (ret) {
- pr_err("%s: Failed to get EEH status on "
- "PHB#%x-PE#%x\n, err=%lld\n",
- __func__, hose->global_number, pe_no, ret);
- return -EIO;
- }
-
- if (fstate != OPAL_EEH_STOPPED_NOT_FROZEN) {
- pr_err("%s: Frozen state not cleared on "
- "PHB#%x-PE#%x, sts=%x\n",
- __func__, hose->global_number, pe_no, fstate);
- return -EIO;
- }
-
- return 0;
-}
-
static s64 ioda_eeh_phb_poll(struct pnv_phb *phb)
{
s64 rc = OPAL_HARDWARE;
@@ -402,13 +388,16 @@ static s64 ioda_eeh_phb_poll(struct pnv_phb *phb)
if (rc <= 0)
break;
- msleep(rc);
+ if (system_state < SYSTEM_RUNNING)
+ udelay(1000 * rc);
+ else
+ msleep(rc);
}
return rc;
}
-static int ioda_eeh_phb_reset(struct pci_controller *hose, int option)
+int ioda_eeh_phb_reset(struct pci_controller *hose, int option)
{
struct pnv_phb *phb = hose->private_data;
s64 rc = OPAL_HARDWARE;
@@ -431,9 +420,17 @@ static int ioda_eeh_phb_reset(struct pci_controller *hose, int option)
/*
* Poll state of the PHB until the request is done
- * successfully.
+ * successfully. The PHB reset is usually PHB complete
+ * reset followed by hot reset on root bus. So we also
+ * need the PCI bus settlement delay.
*/
rc = ioda_eeh_phb_poll(phb);
+ if (option == EEH_RESET_DEACTIVATE) {
+ if (system_state < SYSTEM_RUNNING)
+ udelay(1000 * EEH_PE_RST_SETTLE_TIME);
+ else
+ msleep(EEH_PE_RST_SETTLE_TIME);
+ }
out:
if (rc != OPAL_SUCCESS)
return -EIO;
@@ -471,6 +468,8 @@ static int ioda_eeh_root_reset(struct pci_controller *hose, int option)
/* Poll state of the PHB until the request is done */
rc = ioda_eeh_phb_poll(phb);
+ if (option == EEH_RESET_DEACTIVATE)
+ msleep(EEH_PE_RST_SETTLE_TIME);
out:
if (rc != OPAL_SUCCESS)
return -EIO;
@@ -478,32 +477,149 @@ out:
return 0;
}
-static int ioda_eeh_bridge_reset(struct pci_controller *hose,
- struct pci_dev *dev, int option)
+static bool ioda_eeh_is_plx_dnport(struct pci_dev *dev, int *reg,
+ int *mask, int *len)
{
- u16 ctrl;
+ unsigned short *pid;
+ unsigned short ids[] = {
+ 0x10b5, 0x8748, 0x0080, 0x0400, /* PLX#8748 */
+ 0x0000, 0x0000, 0x0000, 0x0000, /* End flag */
+ };
+
+ if (!pci_is_pcie(dev))
+ return false;
+ if (pci_pcie_type(dev) != PCI_EXP_TYPE_DOWNSTREAM)
+ return false;
+
+ pid = &ids[0];
+ while (!reg) {
+ if (pid[0] == 0x0)
+ break;
- pr_debug("%s: Reset device %04x:%02x:%02x.%01x with option %d\n",
- __func__, hose->global_number, dev->bus->number,
- PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn), option);
+ if (dev->vendor == pid[0] &&
+ dev->device == pid[1]) {
+ *reg = pid[2];
+ *mask = pid[3];
+ *len = 2;
+ return true;
+ }
+ }
+
+ *reg = PCI_BRIDGE_CONTROL;
+ *mask = PCI_BRIDGE_CTL_BUS_RESET;
+ *len = 2;
+ return false;
+}
+
+static int ioda_eeh_bridge_reset(struct pci_dev *dev, int option)
+
+{
+ struct device_node *dn = pci_device_to_OF_node(dev);
+ struct eeh_dev *edev = of_node_to_eeh_dev(dn);
+ int aer = edev ? edev->aer_cap : 0;
+ int reg, mask, val, len;
+ bool is_plx_dnport;
+
+ pr_debug("%s: Reset PCI bus %04x:%02x with option %d\n",
+ __func__, pci_domain_nr(dev->bus),
+ dev->bus->number, option);
+
+
+ is_plx_dnport = ioda_eeh_is_plx_dnport(dev, &reg, &mask, &len);
+ if (option == EEH_RESET_FUNDAMENTAL)
+ if (!is_plx_dnport || !edev)
+ option = EEH_RESET_HOT;
+
+ if (option == EEH_RESET_HOT) {
+ reg = PCI_BRIDGE_CONTROL;
+ mask = PCI_BRIDGE_CTL_BUS_RESET;
+ len = 2;
+ }
+
+ if (option == EEH_RESET_DEACTIVATE) {
+ if (!is_plx_dnport || !edev ||
+ !(edev->mode & EEH_DEV_FRESET)) {
+ reg = PCI_BRIDGE_CONTROL;
+ mask = PCI_BRIDGE_CTL_BUS_RESET;
+ len = 2;
+ }
+ }
switch (option) {
case EEH_RESET_FUNDAMENTAL:
+ edev->mode |= EEH_DEV_FRESET;
+ /* Fall through */
case EEH_RESET_HOT:
- pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &ctrl);
- ctrl |= PCI_BRIDGE_CTL_BUS_RESET;
- pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl);
+ if (aer) {
+ /* Mask receiver error */
+ eeh_ops->read_config(dn, aer + PCI_ERR_COR_MASK,
+ 4, &val);
+ val |= PCI_ERR_COR_RCVR;
+ eeh_ops->write_config(dn, aer + PCI_ERR_COR_MASK,
+ 4, val);
+
+ /* Mask linkDown */
+ eeh_ops->read_config(dn, aer + PCI_ERR_UNCOR_MASK,
+ 4, &val);
+ val |= PCI_ERR_UNC_SURPDN;
+ eeh_ops->write_config(dn, aer + PCI_ERR_UNCOR_MASK,
+ 4, val);
+ }
+
+ eeh_ops->read_config(dn, reg, len, &val);
+ val |= mask;
+ eeh_ops->write_config(dn, reg, len, val);
+ msleep(EEH_PE_RST_HOLD_TIME);
+
break;
case EEH_RESET_DEACTIVATE:
- pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &ctrl);
- ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET;
- pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl);
+ eeh_ops->read_config(dn, reg, len, &val);
+ val &= ~mask;
+ eeh_ops->write_config(dn, reg, len, val);
+ msleep(EEH_PE_RST_SETTLE_TIME);
+
+ if (edev)
+ edev->mode &= ~EEH_DEV_FRESET;
+ if (aer) {
+ /* Clear receive error and enable it */
+ eeh_ops->write_config(dn, aer + PCI_ERR_COR_STATUS,
+ 4, PCI_ERR_COR_RCVR);
+ eeh_ops->read_config(dn, aer + PCI_ERR_COR_MASK,
+ 4, &val);
+ val &= ~PCI_ERR_COR_RCVR;
+ eeh_ops->write_config(dn, aer + PCI_ERR_COR_MASK,
+ 4, val);
+
+ /* Clear linkDown and enable it */
+ eeh_ops->write_config(dn, aer + PCI_ERR_UNCOR_STATUS,
+ 4, PCI_ERR_UNC_SURPDN);
+ eeh_ops->read_config(dn, aer + PCI_ERR_UNCOR_MASK,
+ 4, &val);
+ val &= ~PCI_ERR_UNC_SURPDN;
+ eeh_ops->write_config(dn, aer + PCI_ERR_UNCOR_MASK,
+ 4, val);
+ }
+
break;
}
return 0;
}
+void pnv_pci_reset_secondary_bus(struct pci_dev *dev)
+{
+ struct pci_controller *hose;
+
+ if (pci_is_root_bus(dev->bus)) {
+ hose = pci_bus_to_host(dev->bus);
+ ioda_eeh_root_reset(hose, EEH_RESET_HOT);
+ ioda_eeh_root_reset(hose, EEH_RESET_DEACTIVATE);
+ } else {
+ ioda_eeh_bridge_reset(dev, EEH_RESET_HOT);
+ ioda_eeh_bridge_reset(dev, EEH_RESET_DEACTIVATE);
+ }
+}
+
/**
* ioda_eeh_reset - Reset the indicated PE
* @pe: EEH PE
@@ -523,36 +639,28 @@ static int ioda_eeh_reset(struct eeh_pe *pe, int option)
int ret;
/*
- * Anyway, we have to clear the problematic state for the
- * corresponding PE. However, we needn't do it if the PE
- * is PHB associated. That means the PHB is having fatal
- * errors and it needs reset. Further more, the AIB interface
- * isn't reliable any more.
- */
- if (!(pe->type & EEH_PE_PHB) &&
- (option == EEH_RESET_HOT ||
- option == EEH_RESET_FUNDAMENTAL)) {
- ret = ioda_eeh_pe_clear(pe);
- if (ret)
- return -EIO;
- }
-
- /*
- * The rules applied to reset, either fundamental or hot reset:
+ * For PHB reset, we always have complete reset. For those PEs whose
+ * primary bus derived from root complex (root bus) or root port
+ * (usually bus#1), we apply hot or fundamental reset on the root port.
+ * For other PEs, we always have hot reset on the PE primary bus.
*
- * We always reset the direct upstream bridge of the PE. If the
- * direct upstream bridge isn't root bridge, we always take hot
- * reset no matter what option (fundamental or hot) is. Otherwise,
- * we should do the reset according to the required option.
+ * Here, we have different design to pHyp, which always clear the
+ * frozen state during PE reset. However, the good idea here from
+ * benh is to keep frozen state before we get PE reset done completely
+ * (until BAR restore). With the frozen state, HW drops illegal IO
+ * or MMIO access, which can incur recrusive frozen PE during PE
+ * reset. The side effect is that EEH core has to clear the frozen
+ * state explicitly after BAR restore.
*/
if (pe->type & EEH_PE_PHB) {
ret = ioda_eeh_phb_reset(hose, option);
} else {
bus = eeh_pe_bus_get(pe);
- if (pci_is_root_bus(bus))
+ if (pci_is_root_bus(bus) ||
+ pci_is_root_bus(bus->parent))
ret = ioda_eeh_root_reset(hose, option);
else
- ret = ioda_eeh_bridge_reset(hose, bus->self, option);
+ ret = ioda_eeh_bridge_reset(bus->self, option);
}
return ret;
@@ -639,22 +747,6 @@ static void ioda_eeh_hub_diag(struct pci_controller *hose)
}
}
-static int ioda_eeh_get_phb_pe(struct pci_controller *hose,
- struct eeh_pe **pe)
-{
- struct eeh_pe *phb_pe;
-
- phb_pe = eeh_phb_pe_get(hose);
- if (!phb_pe) {
- pr_warning("%s Can't find PE for PHB#%d\n",
- __func__, hose->global_number);
- return -EEXIST;
- }
-
- *pe = phb_pe;
- return 0;
-}
-
static int ioda_eeh_get_pe(struct pci_controller *hose,
u16 pe_no, struct eeh_pe **pe)
{
@@ -662,7 +754,8 @@ static int ioda_eeh_get_pe(struct pci_controller *hose,
struct eeh_dev dev;
/* Find the PHB PE */
- if (ioda_eeh_get_phb_pe(hose, &phb_pe))
+ phb_pe = eeh_phb_pe_get(hose);
+ if (!phb_pe)
return -EEXIST;
/* Find the PE according to PE# */
@@ -690,6 +783,7 @@ static int ioda_eeh_next_error(struct eeh_pe **pe)
{
struct pci_controller *hose;
struct pnv_phb *phb;
+ struct eeh_pe *phb_pe;
u64 frozen_pe_no;
u16 err_type, severity;
long rc;
@@ -706,10 +800,12 @@ static int ioda_eeh_next_error(struct eeh_pe **pe)
list_for_each_entry(hose, &hose_list, list_node) {
/*
* If the subordinate PCI buses of the PHB has been
- * removed, we needn't take care of it any more.
+ * removed or is exactly under error recovery, we
+ * needn't take care of it any more.
*/
phb = hose->private_data;
- if (phb->eeh_state & PNV_EEH_STATE_REMOVED)
+ phb_pe = eeh_phb_pe_get(hose);
+ if (!phb_pe || (phb_pe->state & EEH_PE_ISOLATED))
continue;
rc = opal_pci_next_error(phb->opal_id,
@@ -742,12 +838,6 @@ static int ioda_eeh_next_error(struct eeh_pe **pe)
switch (err_type) {
case OPAL_EEH_IOC_ERROR:
if (severity == OPAL_EEH_SEV_IOC_DEAD) {
- list_for_each_entry(hose, &hose_list,
- list_node) {
- phb = hose->private_data;
- phb->eeh_state |= PNV_EEH_STATE_REMOVED;
- }
-
pr_err("EEH: dead IOC detected\n");
ret = EEH_NEXT_ERR_DEAD_IOC;
} else if (severity == OPAL_EEH_SEV_INF) {
@@ -760,17 +850,12 @@ static int ioda_eeh_next_error(struct eeh_pe **pe)
break;
case OPAL_EEH_PHB_ERROR:
if (severity == OPAL_EEH_SEV_PHB_DEAD) {
- if (ioda_eeh_get_phb_pe(hose, pe))
- break;
-
+ *pe = phb_pe;
pr_err("EEH: dead PHB#%x detected\n",
hose->global_number);
- phb->eeh_state |= PNV_EEH_STATE_REMOVED;
ret = EEH_NEXT_ERR_DEAD_PHB;
} else if (severity == OPAL_EEH_SEV_PHB_FENCED) {
- if (ioda_eeh_get_phb_pe(hose, pe))
- break;
-
+ *pe = phb_pe;
pr_err("EEH: fenced PHB#%x detected\n",
hose->global_number);
ret = EEH_NEXT_ERR_FENCED_PHB;
@@ -788,17 +873,21 @@ static int ioda_eeh_next_error(struct eeh_pe **pe)
* If we can't find the corresponding PE, the
* PEEV / PEST would be messy. So we force an
* fenced PHB so that it can be recovered.
+ *
+ * If the PE has been marked as isolated, that
+ * should have been removed permanently or in
+ * progress with recovery. We needn't report
+ * it again.
*/
if (ioda_eeh_get_pe(hose, frozen_pe_no, pe)) {
- if (!ioda_eeh_get_phb_pe(hose, pe)) {
- pr_err("EEH: Escalated fenced PHB#%x "
- "detected for PE#%llx\n",
- hose->global_number,
- frozen_pe_no);
- ret = EEH_NEXT_ERR_FENCED_PHB;
- } else {
- ret = EEH_NEXT_ERR_NONE;
- }
+ *pe = phb_pe;
+ pr_err("EEH: Escalated fenced PHB#%x "
+ "detected for PE#%llx\n",
+ hose->global_number,
+ frozen_pe_no);
+ ret = EEH_NEXT_ERR_FENCED_PHB;
+ } else if ((*pe)->state & EEH_PE_ISOLATED) {
+ ret = EEH_NEXT_ERR_NONE;
} else {
pr_err("EEH: Frozen PE#%x on PHB#%x detected\n",
(*pe)->addr, (*pe)->phb->global_number);
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index a59788e83b8b..56a206f32f77 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -126,6 +126,7 @@ static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag)
edev->mode &= 0xFFFFFF00;
if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE)
edev->mode |= EEH_DEV_BRIDGE;
+ edev->pcix_cap = pci_find_capability(dev, PCI_CAP_ID_PCIX);
if (pci_is_pcie(dev)) {
edev->pcie_cap = pci_pcie_cap(dev);
@@ -133,6 +134,9 @@ static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag)
edev->mode |= EEH_DEV_ROOT_PORT;
else if (pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM)
edev->mode |= EEH_DEV_DS_PORT;
+
+ edev->aer_cap = pci_find_ext_capability(dev,
+ PCI_EXT_CAP_ID_ERR);
}
edev->config_addr = ((dev->bus->number << 8) | dev->devfn);
diff --git a/arch/powerpc/platforms/powernv/opal-dump.c b/arch/powerpc/platforms/powernv/opal-dump.c
index b9827b0d87e4..788a1977b9a5 100644
--- a/arch/powerpc/platforms/powernv/opal-dump.c
+++ b/arch/powerpc/platforms/powernv/opal-dump.c
@@ -209,89 +209,20 @@ static struct kobj_type dump_ktype = {
.default_attrs = dump_default_attrs,
};
-static void free_dump_sg_list(struct opal_sg_list *list)
-{
- struct opal_sg_list *sg1;
- while (list) {
- sg1 = list->next;
- kfree(list);
- list = sg1;
- }
- list = NULL;
-}
-
-static struct opal_sg_list *dump_data_to_sglist(struct dump_obj *dump)
-{
- struct opal_sg_list *sg1, *list = NULL;
- void *addr;
- int64_t size;
-
- addr = dump->buffer;
- size = dump->size;
-
- sg1 = kzalloc(PAGE_SIZE, GFP_KERNEL);
- if (!sg1)
- goto nomem;
-
- list = sg1;
- sg1->num_entries = 0;
- while (size > 0) {
- /* Translate virtual address to physical address */
- sg1->entry[sg1->num_entries].data =
- (void *)(vmalloc_to_pfn(addr) << PAGE_SHIFT);
-
- if (size > PAGE_SIZE)
- sg1->entry[sg1->num_entries].length = PAGE_SIZE;
- else
- sg1->entry[sg1->num_entries].length = size;
-
- sg1->num_entries++;
- if (sg1->num_entries >= SG_ENTRIES_PER_NODE) {
- sg1->next = kzalloc(PAGE_SIZE, GFP_KERNEL);
- if (!sg1->next)
- goto nomem;
-
- sg1 = sg1->next;
- sg1->num_entries = 0;
- }
- addr += PAGE_SIZE;
- size -= PAGE_SIZE;
- }
- return list;
-
-nomem:
- pr_err("%s : Failed to allocate memory\n", __func__);
- free_dump_sg_list(list);
- return NULL;
-}
-
-static void sglist_to_phy_addr(struct opal_sg_list *list)
-{
- struct opal_sg_list *sg, *next;
-
- for (sg = list; sg; sg = next) {
- next = sg->next;
- /* Don't translate NULL pointer for last entry */
- if (sg->next)
- sg->next = (struct opal_sg_list *)__pa(sg->next);
- else
- sg->next = NULL;
-
- /* Convert num_entries to length */
- sg->num_entries =
- sg->num_entries * sizeof(struct opal_sg_entry) + 16;
- }
-}
-
-static int64_t dump_read_info(uint32_t *id, uint32_t *size, uint32_t *type)
+static int64_t dump_read_info(uint32_t *dump_id, uint32_t *dump_size, uint32_t *dump_type)
{
+ __be32 id, size, type;
int rc;
- *type = 0xffffffff;
- rc = opal_dump_info2(id, size, type);
+ type = cpu_to_be32(0xffffffff);
+ rc = opal_dump_info2(&id, &size, &type);
if (rc == OPAL_PARAMETER)
- rc = opal_dump_info(id, size);
+ rc = opal_dump_info(&id, &size);
+
+ *dump_id = be32_to_cpu(id);
+ *dump_size = be32_to_cpu(size);
+ *dump_type = be32_to_cpu(type);
if (rc)
pr_warn("%s: Failed to get dump info (%d)\n",
@@ -314,15 +245,12 @@ static int64_t dump_read_data(struct dump_obj *dump)
}
/* Generate SG list */
- list = dump_data_to_sglist(dump);
+ list = opal_vmalloc_to_sg_list(dump->buffer, dump->size);
if (!list) {
rc = -ENOMEM;
goto out;
}
- /* Translate sg list addr to real address */
- sglist_to_phy_addr(list);
-
/* First entry address */
addr = __pa(list);
@@ -341,7 +269,7 @@ static int64_t dump_read_data(struct dump_obj *dump)
__func__, dump->id);
/* Free SG list */
- free_dump_sg_list(list);
+ opal_free_sg_list(list);
out:
return rc;
diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c
index ef7bc2a97862..10268c41d830 100644
--- a/arch/powerpc/platforms/powernv/opal-elog.c
+++ b/arch/powerpc/platforms/powernv/opal-elog.c
@@ -238,18 +238,25 @@ static struct elog_obj *create_elog_obj(uint64_t id, size_t size, uint64_t type)
static void elog_work_fn(struct work_struct *work)
{
- size_t elog_size;
+ __be64 size;
+ __be64 id;
+ __be64 type;
+ uint64_t elog_size;
uint64_t log_id;
uint64_t elog_type;
int rc;
char name[2+16+1];
- rc = opal_get_elog_size(&log_id, &elog_size, &elog_type);
+ rc = opal_get_elog_size(&id, &size, &type);
if (rc != OPAL_SUCCESS) {
pr_err("ELOG: Opal log read failed\n");
return;
}
+ elog_size = be64_to_cpu(size);
+ log_id = be64_to_cpu(id);
+ elog_type = be64_to_cpu(type);
+
BUG_ON(elog_size > OPAL_MAX_ERRLOG_SIZE);
if (elog_size >= OPAL_MAX_ERRLOG_SIZE)
diff --git a/arch/powerpc/platforms/powernv/opal-flash.c b/arch/powerpc/platforms/powernv/opal-flash.c
index 714ef972406b..145a80bc5354 100644
--- a/arch/powerpc/platforms/powernv/opal-flash.c
+++ b/arch/powerpc/platforms/powernv/opal-flash.c
@@ -20,6 +20,7 @@
#include <linux/mm.h>
#include <linux/vmalloc.h>
#include <linux/pagemap.h>
+#include <linux/delay.h>
#include <asm/opal.h>
@@ -79,9 +80,6 @@
/* XXX: Assume candidate image size is <= 1GB */
#define MAX_IMAGE_SIZE 0x40000000
-/* Flash sg list version */
-#define SG_LIST_VERSION (1UL)
-
/* Image status */
enum {
IMAGE_INVALID,
@@ -131,11 +129,15 @@ static DEFINE_MUTEX(image_data_mutex);
*/
static inline void opal_flash_validate(void)
{
- struct validate_flash_t *args_buf = &validate_flash_data;
+ long ret;
+ void *buf = validate_flash_data.buf;
+ __be32 size, result;
- args_buf->status = opal_validate_flash(__pa(args_buf->buf),
- &(args_buf->buf_size),
- &(args_buf->result));
+ ret = opal_validate_flash(__pa(buf), &size, &result);
+
+ validate_flash_data.status = ret;
+ validate_flash_data.buf_size = be32_to_cpu(size);
+ validate_flash_data.result = be32_to_cpu(result);
}
/*
@@ -268,93 +270,11 @@ static ssize_t manage_store(struct kobject *kobj,
}
/*
- * Free sg list
- */
-static void free_sg_list(struct opal_sg_list *list)
-{
- struct opal_sg_list *sg1;
- while (list) {
- sg1 = list->next;
- kfree(list);
- list = sg1;
- }
- list = NULL;
-}
-
-/*
- * Build candidate image scatter gather list
- *
- * list format:
- * -----------------------------------
- * | VER (8) | Entry length in bytes |
- * -----------------------------------
- * | Pointer to next entry |
- * -----------------------------------
- * | Address of memory area 1 |
- * -----------------------------------
- * | Length of memory area 1 |
- * -----------------------------------
- * | ......... |
- * -----------------------------------
- * | ......... |
- * -----------------------------------
- * | Address of memory area N |
- * -----------------------------------
- * | Length of memory area N |
- * -----------------------------------
- */
-static struct opal_sg_list *image_data_to_sglist(void)
-{
- struct opal_sg_list *sg1, *list = NULL;
- void *addr;
- int size;
-
- addr = image_data.data;
- size = image_data.size;
-
- sg1 = kzalloc(PAGE_SIZE, GFP_KERNEL);
- if (!sg1)
- return NULL;
-
- list = sg1;
- sg1->num_entries = 0;
- while (size > 0) {
- /* Translate virtual address to physical address */
- sg1->entry[sg1->num_entries].data =
- (void *)(vmalloc_to_pfn(addr) << PAGE_SHIFT);
-
- if (size > PAGE_SIZE)
- sg1->entry[sg1->num_entries].length = PAGE_SIZE;
- else
- sg1->entry[sg1->num_entries].length = size;
-
- sg1->num_entries++;
- if (sg1->num_entries >= SG_ENTRIES_PER_NODE) {
- sg1->next = kzalloc(PAGE_SIZE, GFP_KERNEL);
- if (!sg1->next) {
- pr_err("%s : Failed to allocate memory\n",
- __func__);
- goto nomem;
- }
-
- sg1 = sg1->next;
- sg1->num_entries = 0;
- }
- addr += PAGE_SIZE;
- size -= PAGE_SIZE;
- }
- return list;
-nomem:
- free_sg_list(list);
- return NULL;
-}
-
-/*
* OPAL update flash
*/
static int opal_flash_update(int op)
{
- struct opal_sg_list *sg, *list, *next;
+ struct opal_sg_list *list;
unsigned long addr;
int64_t rc = OPAL_PARAMETER;
@@ -364,35 +284,13 @@ static int opal_flash_update(int op)
goto flash;
}
- list = image_data_to_sglist();
+ list = opal_vmalloc_to_sg_list(image_data.data, image_data.size);
if (!list)
goto invalid_img;
/* First entry address */
addr = __pa(list);
- /* Translate sg list address to absolute */
- for (sg = list; sg; sg = next) {
- next = sg->next;
- /* Don't translate NULL pointer for last entry */
- if (sg->next)
- sg->next = (struct opal_sg_list *)__pa(sg->next);
- else
- sg->next = NULL;
-
- /*
- * Convert num_entries to version/length format
- * to satisfy OPAL.
- */
- sg->num_entries = (SG_LIST_VERSION << 56) |
- (sg->num_entries * sizeof(struct opal_sg_entry) + 16);
- }
-
- pr_alert("FLASH: Image is %u bytes\n", image_data.size);
- pr_alert("FLASH: Image update requested\n");
- pr_alert("FLASH: Image will be updated during system reboot\n");
- pr_alert("FLASH: This will take several minutes. Do not power off!\n");
-
flash:
rc = opal_update_flash(addr);
@@ -400,6 +298,47 @@ invalid_img:
return rc;
}
+/* Return CPUs to OPAL before starting FW update */
+static void flash_return_cpu(void *info)
+{
+ int cpu = smp_processor_id();
+
+ if (!cpu_online(cpu))
+ return;
+
+ /* Disable IRQ */
+ hard_irq_disable();
+
+ /* Return the CPU to OPAL */
+ opal_return_cpu();
+}
+
+/* This gets called just before system reboots */
+void opal_flash_term_callback(void)
+{
+ struct cpumask mask;
+
+ if (update_flash_data.status != FLASH_IMG_READY)
+ return;
+
+ pr_alert("FLASH: Flashing new firmware\n");
+ pr_alert("FLASH: Image is %u bytes\n", image_data.size);
+ pr_alert("FLASH: Performing flash and reboot/shutdown\n");
+ pr_alert("FLASH: This will take several minutes. Do not power off!\n");
+
+ /* Small delay to help getting the above message out */
+ msleep(500);
+
+ /* Return secondary CPUs to firmware */
+ cpumask_copy(&mask, cpu_online_mask);
+ cpumask_clear_cpu(smp_processor_id(), &mask);
+ if (!cpumask_empty(&mask))
+ smp_call_function_many(&mask,
+ flash_return_cpu, NULL, false);
+ /* Hard disable interrupts */
+ hard_irq_disable();
+}
+
/*
* Show candidate image status
*/
diff --git a/arch/powerpc/platforms/powernv/opal-sysparam.c b/arch/powerpc/platforms/powernv/opal-sysparam.c
index 6b614726baf2..d202f9bc3683 100644
--- a/arch/powerpc/platforms/powernv/opal-sysparam.c
+++ b/arch/powerpc/platforms/powernv/opal-sysparam.c
@@ -39,10 +39,11 @@ struct param_attr {
struct kobj_attribute kobj_attr;
};
-static int opal_get_sys_param(u32 param_id, u32 length, void *buffer)
+static ssize_t opal_get_sys_param(u32 param_id, u32 length, void *buffer)
{
struct opal_msg msg;
- int ret, token;
+ ssize_t ret;
+ int token;
token = opal_async_get_token_interruptible();
if (token < 0) {
@@ -59,7 +60,7 @@ static int opal_get_sys_param(u32 param_id, u32 length, void *buffer)
ret = opal_async_wait_response(token, &msg);
if (ret) {
- pr_err("%s: Failed to wait for the async response, %d\n",
+ pr_err("%s: Failed to wait for the async response, %zd\n",
__func__, ret);
goto out_token;
}
@@ -111,7 +112,7 @@ static ssize_t sys_param_show(struct kobject *kobj,
{
struct param_attr *attr = container_of(kobj_attr, struct param_attr,
kobj_attr);
- int ret;
+ ssize_t ret;
mutex_lock(&opal_sysparam_mutex);
ret = opal_get_sys_param(attr->param_id, attr->param_size,
@@ -121,9 +122,10 @@ static ssize_t sys_param_show(struct kobject *kobj,
memcpy(buf, param_data_buf, attr->param_size);
+ ret = attr->param_size;
out:
mutex_unlock(&opal_sysparam_mutex);
- return ret ? ret : attr->param_size;
+ return ret;
}
static ssize_t sys_param_store(struct kobject *kobj,
@@ -131,14 +133,20 @@ static ssize_t sys_param_store(struct kobject *kobj,
{
struct param_attr *attr = container_of(kobj_attr, struct param_attr,
kobj_attr);
- int ret;
+ ssize_t ret;
+
+ /* MAX_PARAM_DATA_LEN is sizeof(param_data_buf) */
+ if (count > MAX_PARAM_DATA_LEN)
+ count = MAX_PARAM_DATA_LEN;
mutex_lock(&opal_sysparam_mutex);
memcpy(param_data_buf, buf, count);
ret = opal_set_sys_param(attr->param_id, attr->param_size,
param_data_buf);
mutex_unlock(&opal_sysparam_mutex);
- return ret ? ret : count;
+ if (!ret)
+ ret = count;
+ return ret;
}
void __init opal_sys_param_init(void)
@@ -214,13 +222,13 @@ void __init opal_sys_param_init(void)
}
if (of_property_read_u32_array(sysparam, "param-len", size, count)) {
- pr_err("SYSPARAM: Missing propery param-len in the DT\n");
+ pr_err("SYSPARAM: Missing property param-len in the DT\n");
goto out_free_perm;
}
if (of_property_read_u8_array(sysparam, "param-perm", perm, count)) {
- pr_err("SYSPARAM: Missing propery param-perm in the DT\n");
+ pr_err("SYSPARAM: Missing property param-perm in the DT\n");
goto out_free_perm;
}
@@ -233,6 +241,12 @@ void __init opal_sys_param_init(void)
/* For each of the parameters, populate the parameter attributes */
for (i = 0; i < count; i++) {
+ if (size[i] > MAX_PARAM_DATA_LEN) {
+ pr_warn("SYSPARAM: Not creating parameter %d as size "
+ "exceeds buffer length\n", i);
+ continue;
+ }
+
sysfs_attr_init(&attr[i].kobj_attr.attr);
attr[i].param_id = id[i];
attr[i].param_size = size[i];
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 49d2f00019e5..360ad80c754c 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -242,14 +242,14 @@ void opal_notifier_update_evt(uint64_t evt_mask,
void opal_notifier_enable(void)
{
int64_t rc;
- uint64_t evt = 0;
+ __be64 evt = 0;
atomic_set(&opal_notifier_hold, 0);
/* Process pending events */
rc = opal_poll_events(&evt);
if (rc == OPAL_SUCCESS && evt)
- opal_do_notifier(evt);
+ opal_do_notifier(be64_to_cpu(evt));
}
void opal_notifier_disable(void)
@@ -529,7 +529,7 @@ static irqreturn_t opal_interrupt(int irq, void *data)
opal_handle_interrupt(virq_to_hw(irq), &events);
- opal_do_notifier(events);
+ opal_do_notifier(be64_to_cpu(events));
return IRQ_HANDLED;
}
@@ -638,3 +638,66 @@ void opal_shutdown(void)
/* Export this so that test modules can use it */
EXPORT_SYMBOL_GPL(opal_invalid_call);
+
+/* Convert a region of vmalloc memory to an opal sg list */
+struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr,
+ unsigned long vmalloc_size)
+{
+ struct opal_sg_list *sg, *first = NULL;
+ unsigned long i = 0;
+
+ sg = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!sg)
+ goto nomem;
+
+ first = sg;
+
+ while (vmalloc_size > 0) {
+ uint64_t data = vmalloc_to_pfn(vmalloc_addr) << PAGE_SHIFT;
+ uint64_t length = min(vmalloc_size, PAGE_SIZE);
+
+ sg->entry[i].data = cpu_to_be64(data);
+ sg->entry[i].length = cpu_to_be64(length);
+ i++;
+
+ if (i >= SG_ENTRIES_PER_NODE) {
+ struct opal_sg_list *next;
+
+ next = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!next)
+ goto nomem;
+
+ sg->length = cpu_to_be64(
+ i * sizeof(struct opal_sg_entry) + 16);
+ i = 0;
+ sg->next = cpu_to_be64(__pa(next));
+ sg = next;
+ }
+
+ vmalloc_addr += length;
+ vmalloc_size -= length;
+ }
+
+ sg->length = cpu_to_be64(i * sizeof(struct opal_sg_entry) + 16);
+
+ return first;
+
+nomem:
+ pr_err("%s : Failed to allocate memory\n", __func__);
+ opal_free_sg_list(first);
+ return NULL;
+}
+
+void opal_free_sg_list(struct opal_sg_list *sg)
+{
+ while (sg) {
+ uint64_t next = be64_to_cpu(sg->next);
+
+ kfree(sg);
+
+ if (next)
+ sg = __va(next);
+ else
+ sg = NULL;
+ }
+}
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 3b2b4fb3585b..de19edeaa7a7 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -13,6 +13,7 @@
#include <linux/kernel.h>
#include <linux/pci.h>
+#include <linux/crash_dump.h>
#include <linux/debugfs.h>
#include <linux/delay.h>
#include <linux/string.h>
@@ -343,7 +344,6 @@ static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe)
pci_name(dev));
continue;
}
- pci_dev_get(dev);
pdn->pcidev = dev;
pdn->pe_number = pe->pe_number;
pe->dma_weight += pnv_ioda_dma_weight(dev);
@@ -462,7 +462,7 @@ static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev
pe = &phb->ioda.pe_array[pdn->pe_number];
WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops);
- set_iommu_table_base_and_group(&pdev->dev, &pe->tce32_table);
+ set_iommu_table_base(&pdev->dev, &pe->tce32_table);
}
static int pnv_pci_ioda_dma_set_mask(struct pnv_phb *phb,
@@ -664,15 +664,15 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
* errors, and on the first pass the data will be a relative
* bus number, print that out instead.
*/
- tbl->it_busno = 0;
pe->tce_inval_reg_phys = be64_to_cpup(swinvp);
tbl->it_index = (unsigned long)ioremap(pe->tce_inval_reg_phys,
8);
- tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE |
- TCE_PCI_SWINV_PAIR;
+ tbl->it_type |= (TCE_PCI_SWINV_CREATE |
+ TCE_PCI_SWINV_FREE |
+ TCE_PCI_SWINV_PAIR);
}
iommu_init_table(tbl, phb->hose->node);
- iommu_register_group(tbl, pci_domain_nr(pe->pbus), pe->pe_number);
+ iommu_register_group(tbl, phb->hose->global_number, pe->pe_number);
if (pe->pdev)
set_iommu_table_base_and_group(&pe->pdev->dev, tbl);
@@ -794,14 +794,13 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
* errors, and on the first pass the data will be a relative
* bus number, print that out instead.
*/
- tbl->it_busno = 0;
pe->tce_inval_reg_phys = be64_to_cpup(swinvp);
tbl->it_index = (unsigned long)ioremap(pe->tce_inval_reg_phys,
8);
- tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE;
+ tbl->it_type |= (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE);
}
iommu_init_table(tbl, phb->hose->node);
- iommu_register_group(tbl, pci_domain_nr(pe->pbus), pe->pe_number);
+ iommu_register_group(tbl, phb->hose->global_number, pe->pe_number);
if (pe->pdev)
set_iommu_table_base_and_group(&pe->pdev->dev, tbl);
@@ -1387,12 +1386,24 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np,
ppc_md.pcibios_fixup = pnv_pci_ioda_fixup;
ppc_md.pcibios_enable_device_hook = pnv_pci_enable_device_hook;
ppc_md.pcibios_window_alignment = pnv_pci_window_alignment;
+ ppc_md.pcibios_reset_secondary_bus = pnv_pci_reset_secondary_bus;
pci_add_flags(PCI_REASSIGN_ALL_RSRC);
/* Reset IODA tables to a clean state */
rc = opal_pci_reset(phb_id, OPAL_PCI_IODA_TABLE_RESET, OPAL_ASSERT_RESET);
if (rc)
pr_warning(" OPAL Error %ld performing IODA table reset !\n", rc);
+
+ /* If we're running in kdump kerenl, the previous kerenl never
+ * shutdown PCI devices correctly. We already got IODA table
+ * cleaned out. So we have to issue PHB reset to stop all PCI
+ * transactions from previous kerenl.
+ */
+ if (is_kdump_kernel()) {
+ pr_info(" Issue PHB reset ...\n");
+ ioda_eeh_phb_reset(hose, EEH_RESET_FUNDAMENTAL);
+ ioda_eeh_phb_reset(hose, OPAL_DEASSERT_RESET);
+ }
}
void __init pnv_pci_init_ioda2_phb(struct device_node *np)
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 8518817dcdfd..eefbfcc3fd8c 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -131,65 +131,60 @@ static void pnv_pci_dump_p7ioc_diag_data(struct pci_controller *hose,
int i;
data = (struct OpalIoP7IOCPhbErrorData *)common;
- pr_info("P7IOC PHB#%d Diag-data (Version: %d)\n\n",
+ pr_info("P7IOC PHB#%d Diag-data (Version: %d)\n",
hose->global_number, common->version);
if (data->brdgCtl)
- pr_info(" brdgCtl: %08x\n",
+ pr_info("brdgCtl: %08x\n",
data->brdgCtl);
if (data->portStatusReg || data->rootCmplxStatus ||
data->busAgentStatus)
- pr_info(" UtlSts: %08x %08x %08x\n",
+ pr_info("UtlSts: %08x %08x %08x\n",
data->portStatusReg, data->rootCmplxStatus,
data->busAgentStatus);
if (data->deviceStatus || data->slotStatus ||
data->linkStatus || data->devCmdStatus ||
data->devSecStatus)
- pr_info(" RootSts: %08x %08x %08x %08x %08x\n",
+ pr_info("RootSts: %08x %08x %08x %08x %08x\n",
data->deviceStatus, data->slotStatus,
data->linkStatus, data->devCmdStatus,
data->devSecStatus);
if (data->rootErrorStatus || data->uncorrErrorStatus ||
data->corrErrorStatus)
- pr_info(" RootErrSts: %08x %08x %08x\n",
+ pr_info("RootErrSts: %08x %08x %08x\n",
data->rootErrorStatus, data->uncorrErrorStatus,
data->corrErrorStatus);
if (data->tlpHdr1 || data->tlpHdr2 ||
data->tlpHdr3 || data->tlpHdr4)
- pr_info(" RootErrLog: %08x %08x %08x %08x\n",
+ pr_info("RootErrLog: %08x %08x %08x %08x\n",
data->tlpHdr1, data->tlpHdr2,
data->tlpHdr3, data->tlpHdr4);
if (data->sourceId || data->errorClass ||
data->correlator)
- pr_info(" RootErrLog1: %08x %016llx %016llx\n",
+ pr_info("RootErrLog1: %08x %016llx %016llx\n",
data->sourceId, data->errorClass,
data->correlator);
if (data->p7iocPlssr || data->p7iocCsr)
- pr_info(" PhbSts: %016llx %016llx\n",
+ pr_info("PhbSts: %016llx %016llx\n",
data->p7iocPlssr, data->p7iocCsr);
- if (data->lemFir || data->lemErrorMask ||
- data->lemWOF)
- pr_info(" Lem: %016llx %016llx %016llx\n",
+ if (data->lemFir)
+ pr_info("Lem: %016llx %016llx %016llx\n",
data->lemFir, data->lemErrorMask,
data->lemWOF);
- if (data->phbErrorStatus || data->phbFirstErrorStatus ||
- data->phbErrorLog0 || data->phbErrorLog1)
- pr_info(" PhbErr: %016llx %016llx %016llx %016llx\n",
+ if (data->phbErrorStatus)
+ pr_info("PhbErr: %016llx %016llx %016llx %016llx\n",
data->phbErrorStatus, data->phbFirstErrorStatus,
data->phbErrorLog0, data->phbErrorLog1);
- if (data->mmioErrorStatus || data->mmioFirstErrorStatus ||
- data->mmioErrorLog0 || data->mmioErrorLog1)
- pr_info(" OutErr: %016llx %016llx %016llx %016llx\n",
+ if (data->mmioErrorStatus)
+ pr_info("OutErr: %016llx %016llx %016llx %016llx\n",
data->mmioErrorStatus, data->mmioFirstErrorStatus,
data->mmioErrorLog0, data->mmioErrorLog1);
- if (data->dma0ErrorStatus || data->dma0FirstErrorStatus ||
- data->dma0ErrorLog0 || data->dma0ErrorLog1)
- pr_info(" InAErr: %016llx %016llx %016llx %016llx\n",
+ if (data->dma0ErrorStatus)
+ pr_info("InAErr: %016llx %016llx %016llx %016llx\n",
data->dma0ErrorStatus, data->dma0FirstErrorStatus,
data->dma0ErrorLog0, data->dma0ErrorLog1);
- if (data->dma1ErrorStatus || data->dma1FirstErrorStatus ||
- data->dma1ErrorLog0 || data->dma1ErrorLog1)
- pr_info(" InBErr: %016llx %016llx %016llx %016llx\n",
+ if (data->dma1ErrorStatus)
+ pr_info("InBErr: %016llx %016llx %016llx %016llx\n",
data->dma1ErrorStatus, data->dma1FirstErrorStatus,
data->dma1ErrorLog0, data->dma1ErrorLog1);
@@ -198,7 +193,7 @@ static void pnv_pci_dump_p7ioc_diag_data(struct pci_controller *hose,
(data->pestB[i] >> 63) == 0)
continue;
- pr_info(" PE[%3d] A/B: %016llx %016llx\n",
+ pr_info("PE[%3d] A/B: %016llx %016llx\n",
i, data->pestA[i], data->pestB[i]);
}
}
@@ -210,69 +205,63 @@ static void pnv_pci_dump_phb3_diag_data(struct pci_controller *hose,
int i;
data = (struct OpalIoPhb3ErrorData*)common;
- pr_info("PHB3 PHB#%d Diag-data (Version: %d)\n\n",
+ pr_info("PHB3 PHB#%d Diag-data (Version: %d)\n",
hose->global_number, common->version);
if (data->brdgCtl)
- pr_info(" brdgCtl: %08x\n",
+ pr_info("brdgCtl: %08x\n",
data->brdgCtl);
if (data->portStatusReg || data->rootCmplxStatus ||
data->busAgentStatus)
- pr_info(" UtlSts: %08x %08x %08x\n",
+ pr_info("UtlSts: %08x %08x %08x\n",
data->portStatusReg, data->rootCmplxStatus,
data->busAgentStatus);
if (data->deviceStatus || data->slotStatus ||
data->linkStatus || data->devCmdStatus ||
data->devSecStatus)
- pr_info(" RootSts: %08x %08x %08x %08x %08x\n",
+ pr_info("RootSts: %08x %08x %08x %08x %08x\n",
data->deviceStatus, data->slotStatus,
data->linkStatus, data->devCmdStatus,
data->devSecStatus);
if (data->rootErrorStatus || data->uncorrErrorStatus ||
data->corrErrorStatus)
- pr_info(" RootErrSts: %08x %08x %08x\n",
+ pr_info("RootErrSts: %08x %08x %08x\n",
data->rootErrorStatus, data->uncorrErrorStatus,
data->corrErrorStatus);
if (data->tlpHdr1 || data->tlpHdr2 ||
data->tlpHdr3 || data->tlpHdr4)
- pr_info(" RootErrLog: %08x %08x %08x %08x\n",
+ pr_info("RootErrLog: %08x %08x %08x %08x\n",
data->tlpHdr1, data->tlpHdr2,
data->tlpHdr3, data->tlpHdr4);
if (data->sourceId || data->errorClass ||
data->correlator)
- pr_info(" RootErrLog1: %08x %016llx %016llx\n",
+ pr_info("RootErrLog1: %08x %016llx %016llx\n",
data->sourceId, data->errorClass,
data->correlator);
- if (data->nFir || data->nFirMask ||
- data->nFirWOF)
- pr_info(" nFir: %016llx %016llx %016llx\n",
+ if (data->nFir)
+ pr_info("nFir: %016llx %016llx %016llx\n",
data->nFir, data->nFirMask,
data->nFirWOF);
if (data->phbPlssr || data->phbCsr)
- pr_info(" PhbSts: %016llx %016llx\n",
+ pr_info("PhbSts: %016llx %016llx\n",
data->phbPlssr, data->phbCsr);
- if (data->lemFir || data->lemErrorMask ||
- data->lemWOF)
- pr_info(" Lem: %016llx %016llx %016llx\n",
+ if (data->lemFir)
+ pr_info("Lem: %016llx %016llx %016llx\n",
data->lemFir, data->lemErrorMask,
data->lemWOF);
- if (data->phbErrorStatus || data->phbFirstErrorStatus ||
- data->phbErrorLog0 || data->phbErrorLog1)
- pr_info(" PhbErr: %016llx %016llx %016llx %016llx\n",
+ if (data->phbErrorStatus)
+ pr_info("PhbErr: %016llx %016llx %016llx %016llx\n",
data->phbErrorStatus, data->phbFirstErrorStatus,
data->phbErrorLog0, data->phbErrorLog1);
- if (data->mmioErrorStatus || data->mmioFirstErrorStatus ||
- data->mmioErrorLog0 || data->mmioErrorLog1)
- pr_info(" OutErr: %016llx %016llx %016llx %016llx\n",
+ if (data->mmioErrorStatus)
+ pr_info("OutErr: %016llx %016llx %016llx %016llx\n",
data->mmioErrorStatus, data->mmioFirstErrorStatus,
data->mmioErrorLog0, data->mmioErrorLog1);
- if (data->dma0ErrorStatus || data->dma0FirstErrorStatus ||
- data->dma0ErrorLog0 || data->dma0ErrorLog1)
- pr_info(" InAErr: %016llx %016llx %016llx %016llx\n",
+ if (data->dma0ErrorStatus)
+ pr_info("InAErr: %016llx %016llx %016llx %016llx\n",
data->dma0ErrorStatus, data->dma0FirstErrorStatus,
data->dma0ErrorLog0, data->dma0ErrorLog1);
- if (data->dma1ErrorStatus || data->dma1FirstErrorStatus ||
- data->dma1ErrorLog0 || data->dma1ErrorLog1)
- pr_info(" InBErr: %016llx %016llx %016llx %016llx\n",
+ if (data->dma1ErrorStatus)
+ pr_info("InBErr: %016llx %016llx %016llx %016llx\n",
data->dma1ErrorStatus, data->dma1FirstErrorStatus,
data->dma1ErrorLog0, data->dma1ErrorLog1);
@@ -281,7 +270,7 @@ static void pnv_pci_dump_phb3_diag_data(struct pci_controller *hose,
(data->pestB[i] >> 63) == 0)
continue;
- pr_info(" PE[%3d] A/B: %016llx %016llx\n",
+ pr_info("PE[%3d] A/B: %016llx %016llx\n",
i, data->pestA[i], data->pestB[i]);
}
}
@@ -384,9 +373,6 @@ int pnv_pci_cfg_read(struct device_node *dn,
struct pci_dn *pdn = PCI_DN(dn);
struct pnv_phb *phb = pdn->phb->private_data;
u32 bdfn = (pdn->busno << 8) | pdn->devfn;
-#ifdef CONFIG_EEH
- struct eeh_pe *phb_pe = NULL;
-#endif
s64 rc;
switch (size) {
@@ -412,31 +398,9 @@ int pnv_pci_cfg_read(struct device_node *dn,
default:
return PCIBIOS_FUNC_NOT_SUPPORTED;
}
+
cfg_dbg("%s: bus: %x devfn: %x +%x/%x -> %08x\n",
__func__, pdn->busno, pdn->devfn, where, size, *val);
-
- /*
- * Check if the specified PE has been put into frozen
- * state. On the other hand, we needn't do that while
- * the PHB has been put into frozen state because of
- * PHB-fatal errors.
- */
-#ifdef CONFIG_EEH
- phb_pe = eeh_phb_pe_get(pdn->phb);
- if (phb_pe && (phb_pe->state & EEH_PE_ISOLATED))
- return PCIBIOS_SUCCESSFUL;
-
- if (phb->eeh_state & PNV_EEH_STATE_ENABLED) {
- if (*val == EEH_IO_ERROR_VALUE(size) &&
- eeh_dev_check_failure(of_node_to_eeh_dev(dn)))
- return PCIBIOS_DEVICE_NOT_FOUND;
- } else {
- pnv_pci_config_check_eeh(phb, dn);
- }
-#else
- pnv_pci_config_check_eeh(phb, dn);
-#endif
-
return PCIBIOS_SUCCESSFUL;
}
@@ -463,33 +427,74 @@ int pnv_pci_cfg_write(struct device_node *dn,
return PCIBIOS_FUNC_NOT_SUPPORTED;
}
- /* Check if the PHB got frozen due to an error (no response) */
-#ifdef CONFIG_EEH
- if (!(phb->eeh_state & PNV_EEH_STATE_ENABLED))
- pnv_pci_config_check_eeh(phb, dn);
-#else
- pnv_pci_config_check_eeh(phb, dn);
-#endif
-
return PCIBIOS_SUCCESSFUL;
}
+#if CONFIG_EEH
+static bool pnv_pci_cfg_check(struct pci_controller *hose,
+ struct device_node *dn)
+{
+ struct eeh_dev *edev = NULL;
+ struct pnv_phb *phb = hose->private_data;
+
+ /* EEH not enabled ? */
+ if (!(phb->flags & PNV_PHB_FLAG_EEH))
+ return true;
+
+ /* PE reset or device removed ? */
+ edev = of_node_to_eeh_dev(dn);
+ if (edev) {
+ if (edev->pe &&
+ (edev->pe->state & EEH_PE_RESET))
+ return false;
+
+ if (edev->mode & EEH_DEV_REMOVED)
+ return false;
+ }
+
+ return true;
+}
+#else
+static inline pnv_pci_cfg_check(struct pci_controller *hose,
+ struct device_node *dn)
+{
+ return true;
+}
+#endif /* CONFIG_EEH */
+
static int pnv_pci_read_config(struct pci_bus *bus,
unsigned int devfn,
int where, int size, u32 *val)
{
struct device_node *dn, *busdn = pci_bus_to_OF_node(bus);
struct pci_dn *pdn;
+ struct pnv_phb *phb;
+ bool found = false;
+ int ret;
+ *val = 0xFFFFFFFF;
for (dn = busdn->child; dn; dn = dn->sibling) {
pdn = PCI_DN(dn);
- if (pdn && pdn->devfn == devfn)
- return pnv_pci_cfg_read(dn, where, size, val);
+ if (pdn && pdn->devfn == devfn) {
+ phb = pdn->phb->private_data;
+ found = true;
+ break;
+ }
}
- *val = 0xFFFFFFFF;
- return PCIBIOS_DEVICE_NOT_FOUND;
+ if (!found || !pnv_pci_cfg_check(pdn->phb, dn))
+ return PCIBIOS_DEVICE_NOT_FOUND;
+ ret = pnv_pci_cfg_read(dn, where, size, val);
+ if (phb->flags & PNV_PHB_FLAG_EEH) {
+ if (*val == EEH_IO_ERROR_VALUE(size) &&
+ eeh_dev_check_failure(of_node_to_eeh_dev(dn)))
+ return PCIBIOS_DEVICE_NOT_FOUND;
+ } else {
+ pnv_pci_config_check_eeh(phb, dn);
+ }
+
+ return ret;
}
static int pnv_pci_write_config(struct pci_bus *bus,
@@ -498,14 +503,27 @@ static int pnv_pci_write_config(struct pci_bus *bus,
{
struct device_node *dn, *busdn = pci_bus_to_OF_node(bus);
struct pci_dn *pdn;
+ struct pnv_phb *phb;
+ bool found = false;
+ int ret;
for (dn = busdn->child; dn; dn = dn->sibling) {
pdn = PCI_DN(dn);
- if (pdn && pdn->devfn == devfn)
- return pnv_pci_cfg_write(dn, where, size, val);
+ if (pdn && pdn->devfn == devfn) {
+ phb = pdn->phb->private_data;
+ found = true;
+ break;
+ }
}
- return PCIBIOS_DEVICE_NOT_FOUND;
+ if (!found || !pnv_pci_cfg_check(pdn->phb, dn))
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ ret = pnv_pci_cfg_write(dn, where, size, val);
+ if (!(phb->flags & PNV_PHB_FLAG_EEH))
+ pnv_pci_config_check_eeh(phb, dn);
+
+ return ret;
}
struct pci_ops pnv_pci_ops = {
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index cde169442775..676232c34328 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -81,28 +81,27 @@ struct pnv_eeh_ops {
int (*configure_bridge)(struct eeh_pe *pe);
int (*next_error)(struct eeh_pe **pe);
};
-
-#define PNV_EEH_STATE_ENABLED (1 << 0) /* EEH enabled */
-#define PNV_EEH_STATE_REMOVED (1 << 1) /* PHB removed */
-
#endif /* CONFIG_EEH */
+#define PNV_PHB_FLAG_EEH (1 << 0)
+
struct pnv_phb {
struct pci_controller *hose;
enum pnv_phb_type type;
enum pnv_phb_model model;
u64 hub_id;
u64 opal_id;
+ int flags;
void __iomem *regs;
int initialized;
spinlock_t lock;
#ifdef CONFIG_EEH
struct pnv_eeh_ops *eeh_ops;
- int eeh_state;
#endif
#ifdef CONFIG_DEBUG_FS
+ int has_dbgfs;
struct dentry *dbgfs;
#endif
@@ -205,5 +204,7 @@ extern void pnv_pci_init_ioda_hub(struct device_node *np);
extern void pnv_pci_init_ioda2_phb(struct device_node *np);
extern void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl,
__be64 *startp, __be64 *endp, bool rm);
+extern void pnv_pci_reset_secondary_bus(struct pci_dev *dev);
+extern int ioda_eeh_phb_reset(struct pci_controller *hose, int option);
#endif /* __POWERNV_PCI_H */
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 61cf8fa9c61b..865aab40ded7 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -27,6 +27,7 @@
#include <linux/interrupt.h>
#include <linux/bug.h>
#include <linux/pci.h>
+#include <linux/cpufreq.h>
#include <asm/machdep.h>
#include <asm/firmware.h>
@@ -98,11 +99,32 @@ static void pnv_show_cpuinfo(struct seq_file *m)
of_node_put(root);
}
+static void pnv_prepare_going_down(void)
+{
+ /*
+ * Disable all notifiers from OPAL, we can't
+ * service interrupts anymore anyway
+ */
+ opal_notifier_disable();
+
+ /* Soft disable interrupts */
+ local_irq_disable();
+
+ /*
+ * Return secondary CPUs to firwmare if a flash update
+ * is pending otherwise we will get all sort of error
+ * messages about CPU being stuck etc.. This will also
+ * have the side effect of hard disabling interrupts so
+ * past this point, the kernel is effectively dead.
+ */
+ opal_flash_term_callback();
+}
+
static void __noreturn pnv_restart(char *cmd)
{
long rc = OPAL_BUSY;
- opal_notifier_disable();
+ pnv_prepare_going_down();
while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
rc = opal_cec_reboot();
@@ -119,7 +141,7 @@ static void __noreturn pnv_power_off(void)
{
long rc = OPAL_BUSY;
- opal_notifier_disable();
+ pnv_prepare_going_down();
while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
rc = opal_cec_power_down(0);
@@ -162,18 +184,62 @@ static void pnv_shutdown(void)
}
#ifdef CONFIG_KEXEC
+static void pnv_kexec_wait_secondaries_down(void)
+{
+ int my_cpu, i, notified = -1;
+
+ my_cpu = get_cpu();
+
+ for_each_online_cpu(i) {
+ uint8_t status;
+ int64_t rc;
+
+ if (i == my_cpu)
+ continue;
+
+ for (;;) {
+ rc = opal_query_cpu_status(get_hard_smp_processor_id(i),
+ &status);
+ if (rc != OPAL_SUCCESS || status != OPAL_THREAD_STARTED)
+ break;
+ barrier();
+ if (i != notified) {
+ printk(KERN_INFO "kexec: waiting for cpu %d "
+ "(physical %d) to enter OPAL\n",
+ i, paca[i].hw_cpu_id);
+ notified = i;
+ }
+ }
+ }
+}
+
static void pnv_kexec_cpu_down(int crash_shutdown, int secondary)
{
xics_kexec_teardown_cpu(secondary);
- /* Return secondary CPUs to firmware on OPAL v3 */
- if (firmware_has_feature(FW_FEATURE_OPALv3) && secondary) {
+ /* On OPAL v3, we return all CPUs to firmware */
+
+ if (!firmware_has_feature(FW_FEATURE_OPALv3))
+ return;
+
+ if (secondary) {
+ /* Return secondary CPUs to firmware on OPAL v3 */
mb();
get_paca()->kexec_state = KEXEC_STATE_REAL_MODE;
mb();
/* Return the CPU to OPAL */
opal_return_cpu();
+ } else if (crash_shutdown) {
+ /*
+ * On crash, we don't wait for secondaries to go
+ * down as they might be unreachable or hung, so
+ * instead we just wait a bit and move on.
+ */
+ mdelay(1);
+ } else {
+ /* Primary waits for the secondaries to have reached OPAL */
+ pnv_kexec_wait_secondaries_down();
}
}
#endif /* CONFIG_KEXEC */
@@ -225,6 +291,25 @@ static int __init pnv_probe(void)
return 1;
}
+/*
+ * Returns the cpu frequency for 'cpu' in Hz. This is used by
+ * /proc/cpuinfo
+ */
+unsigned long pnv_get_proc_freq(unsigned int cpu)
+{
+ unsigned long ret_freq;
+
+ ret_freq = cpufreq_quick_get(cpu) * 1000ul;
+
+ /*
+ * If the backend cpufreq driver does not exist,
+ * then fallback to old way of reporting the clockrate.
+ */
+ if (!ret_freq)
+ ret_freq = ppc_proc_freq;
+ return ret_freq;
+}
+
define_machine(powernv) {
.name = "PowerNV",
.probe = pnv_probe,
@@ -232,6 +317,7 @@ define_machine(powernv) {
.setup_arch = pnv_setup_arch,
.init_IRQ = pnv_init_IRQ,
.show_cpuinfo = pnv_show_cpuinfo,
+ .get_proc_freq = pnv_get_proc_freq,
.progress = pnv_progress,
.machine_shutdown = pnv_shutdown,
.power_save = power7_idle,
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index 908672bdcea6..bf5fcd452168 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -30,6 +30,7 @@
#include <asm/cputhreads.h>
#include <asm/xics.h>
#include <asm/opal.h>
+#include <asm/runlatch.h>
#include "powernv.h"
@@ -156,7 +157,9 @@ static void pnv_smp_cpu_kill_self(void)
*/
mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1);
while (!generic_check_cpu_restart(cpu)) {
+ ppc64_runlatch_off();
power7_nap();
+ ppc64_runlatch_on();
if (!generic_check_cpu_restart(cpu)) {
DBG("CPU%d Unexpected exit while offline !\n", cpu);
/* We may be getting an IPI, so we re-enable
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index 8a8f0472d98f..0bec0c02c5e7 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -175,6 +175,36 @@ static int pseries_eeh_find_cap(struct device_node *dn, int cap)
return 0;
}
+static int pseries_eeh_find_ecap(struct device_node *dn, int cap)
+{
+ struct pci_dn *pdn = PCI_DN(dn);
+ struct eeh_dev *edev = of_node_to_eeh_dev(dn);
+ u32 header;
+ int pos = 256;
+ int ttl = (4096 - 256) / 8;
+
+ if (!edev || !edev->pcie_cap)
+ return 0;
+ if (rtas_read_config(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL)
+ return 0;
+ else if (!header)
+ return 0;
+
+ while (ttl-- > 0) {
+ if (PCI_EXT_CAP_ID(header) == cap && pos)
+ return pos;
+
+ pos = PCI_EXT_CAP_NEXT(header);
+ if (pos < 256)
+ break;
+
+ if (rtas_read_config(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL)
+ break;
+ }
+
+ return 0;
+}
+
/**
* pseries_eeh_of_probe - EEH probe on the given device
* @dn: OF node
@@ -220,7 +250,9 @@ static void *pseries_eeh_of_probe(struct device_node *dn, void *flag)
* or PCIe switch downstream port.
*/
edev->class_code = class_code;
+ edev->pcix_cap = pseries_eeh_find_cap(dn, PCI_CAP_ID_PCIX);
edev->pcie_cap = pseries_eeh_find_cap(dn, PCI_CAP_ID_EXP);
+ edev->aer_cap = pseries_eeh_find_ecap(dn, PCI_EXT_CAP_ID_ERR);
edev->mode &= 0xFFFFFF00;
if ((edev->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) {
edev->mode |= EEH_DEV_BRIDGE;
@@ -464,6 +496,7 @@ static int pseries_eeh_get_state(struct eeh_pe *pe, int *state)
} else {
result = EEH_STATE_NOT_SUPPORT;
}
+ break;
default:
result = EEH_STATE_NOT_SUPPORT;
}
@@ -499,11 +532,19 @@ static int pseries_eeh_reset(struct eeh_pe *pe, int option)
/* If fundamental-reset not supported, try hot-reset */
if (option == EEH_RESET_FUNDAMENTAL &&
ret == -8) {
+ option = EEH_RESET_HOT;
ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
config_addr, BUID_HI(pe->phb->buid),
- BUID_LO(pe->phb->buid), EEH_RESET_HOT);
+ BUID_LO(pe->phb->buid), option);
}
+ /* We need reset hold or settlement delay */
+ if (option == EEH_RESET_FUNDAMENTAL ||
+ option == EEH_RESET_HOT)
+ msleep(EEH_PE_RST_HOLD_TIME);
+ else
+ msleep(EEH_PE_RST_SETTLE_TIME);
+
return ret;
}
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 9b8e05078a63..20d62975856f 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -88,13 +88,14 @@ void set_default_offline_state(int cpu)
static void rtas_stop_self(void)
{
- struct rtas_args args = {
- .token = cpu_to_be32(rtas_stop_self_token),
+ static struct rtas_args args = {
.nargs = 0,
.nret = 1,
.rets = &args.args[0],
};
+ args.token = cpu_to_be32(rtas_stop_self_token);
+
local_irq_disable();
BUG_ON(rtas_stop_self_token == RTAS_UNKNOWN_SERVICE);
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 573b488fc48b..7f75c94af822 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -100,10 +100,10 @@ static int pseries_remove_memblock(unsigned long base, unsigned int memblock_siz
start_pfn = base >> PAGE_SHIFT;
- if (!pfn_valid(start_pfn)) {
- memblock_remove(base, memblock_size);
- return 0;
- }
+ lock_device_hotplug();
+
+ if (!pfn_valid(start_pfn))
+ goto out;
block_sz = memory_block_size_bytes();
sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE;
@@ -114,8 +114,10 @@ static int pseries_remove_memblock(unsigned long base, unsigned int memblock_siz
base += MIN_MEMORY_BLOCK_SIZE;
}
+out:
/* Update memory regions for memory remove */
memblock_remove(base, memblock_size);
+ unlock_device_hotplug();
return 0;
}
diff --git a/arch/powerpc/sysdev/ppc4xx_pci.c b/arch/powerpc/sysdev/ppc4xx_pci.c
index 64603a10b863..4914fd3f41ec 100644
--- a/arch/powerpc/sysdev/ppc4xx_pci.c
+++ b/arch/powerpc/sysdev/ppc4xx_pci.c
@@ -1058,7 +1058,7 @@ static int __init apm821xx_pciex_core_init(struct device_node *np)
return 1;
}
-static int apm821xx_pciex_init_port_hw(struct ppc4xx_pciex_port *port)
+static int __init apm821xx_pciex_init_port_hw(struct ppc4xx_pciex_port *port)
{
u32 val;
OpenPOWER on IntegriCloud