diff options
Diffstat (limited to 'arch/powerpc')
57 files changed, 1709 insertions, 838 deletions
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index a1f8c7f1ec60..a33c23308e97 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -22,8 +22,14 @@ all: $(obj)/zImage BOOTCFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ -fno-strict-aliasing -Os -msoft-float -pipe \ -fomit-frame-pointer -fno-builtin -fPIC -nostdinc \ - -isystem $(shell $(CROSS32CC) -print-file-name=include) \ - -mbig-endian + -isystem $(shell $(CROSS32CC) -print-file-name=include) +ifdef CONFIG_PPC64_BOOT_WRAPPER +BOOTCFLAGS += -m64 +endif +ifdef CONFIG_CPU_BIG_ENDIAN +BOOTCFLAGS += -mbig-endian +endif + BOOTAFLAGS := -D__ASSEMBLY__ $(BOOTCFLAGS) -traditional -nostdinc ifdef CONFIG_DEBUG_INFO @@ -99,6 +105,11 @@ src-plat-$(CONFIG_EMBEDDED6xx) += cuboot-pq2.c cuboot-mpc7448hpc2.c \ src-plat-$(CONFIG_AMIGAONE) += cuboot-amigaone.c src-plat-$(CONFIG_PPC_PS3) += ps3-head.S ps3-hvcall.S ps3.c src-plat-$(CONFIG_EPAPR_BOOT) += epapr.c epapr-wrapper.c +src-plat-$(CONFIG_PPC_PSERIES) += pseries-head.S +src-plat-$(CONFIG_PPC_POWERNV) += pseries-head.S +src-plat-$(CONFIG_PPC_IBM_CELL_BLADE) += pseries-head.S +src-plat-$(CONFIG_PPC_CELLEB) += pseries-head.S +src-plat-$(CONFIG_PPC_CELL_QPACE) += pseries-head.S src-wlib := $(sort $(src-wlib-y)) src-plat := $(sort $(src-plat-y)) @@ -137,7 +148,11 @@ $(addprefix $(obj)/,$(libfdt) $(libfdtheader)): $(obj)/%: $(srctree)/scripts/dtc $(obj)/empty.c: @touch $@ -$(obj)/zImage.lds $(obj)/zImage.coff.lds $(obj)/zImage.ps3.lds: $(obj)/%: $(srctree)/$(src)/%.S +$(obj)/zImage.lds: $(obj)/%: $(srctree)/$(src)/%.S + $(CROSS32CC) $(cpp_flags) -E -Wp,-MD,$(depfile) -P -Upowerpc \ + -D__ASSEMBLY__ -DLINKER_SCRIPT -o $@ $< + +$(obj)/zImage.coff.lds $(obj)/zImage.ps3.lds : $(obj)/%: $(srctree)/$(src)/%.S @cp $< $@ clean-files := $(zlib) $(zlibheader) $(zliblinuxheader) \ diff --git a/arch/powerpc/boot/addnote.c b/arch/powerpc/boot/addnote.c index 349b5530d2c4..9d9f6f334d3c 100644 --- a/arch/powerpc/boot/addnote.c +++ b/arch/powerpc/boot/addnote.c @@ -6,6 +6,8 @@ * * Copyright 2000 Paul Mackerras. * + * Adapted for 64 bit little endian images by Andrew Tauferner. + * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -55,36 +57,61 @@ unsigned int rpanote[N_RPA_DESCR] = { #define ROUNDUP(len) (((len) + 3) & ~3) -unsigned char buf[512]; +unsigned char buf[1024]; +#define ELFDATA2LSB 1 +#define ELFDATA2MSB 2 +static int e_data = ELFDATA2MSB; +#define ELFCLASS32 1 +#define ELFCLASS64 2 +static int e_class = ELFCLASS32; #define GET_16BE(off) ((buf[off] << 8) + (buf[(off)+1])) -#define GET_32BE(off) ((GET_16BE(off) << 16) + GET_16BE((off)+2)) - -#define PUT_16BE(off, v) (buf[off] = ((v) >> 8) & 0xff, \ - buf[(off) + 1] = (v) & 0xff) -#define PUT_32BE(off, v) (PUT_16BE((off), (v) >> 16), \ - PUT_16BE((off) + 2, (v))) +#define GET_32BE(off) ((GET_16BE(off) << 16U) + GET_16BE((off)+2U)) +#define GET_64BE(off) ((((unsigned long long)GET_32BE(off)) << 32ULL) + \ + ((unsigned long long)GET_32BE((off)+4ULL))) +#define PUT_16BE(off, v)(buf[off] = ((v) >> 8) & 0xff, \ + buf[(off) + 1] = (v) & 0xff) +#define PUT_32BE(off, v)(PUT_16BE((off), (v) >> 16L), PUT_16BE((off) + 2, (v))) +#define PUT_64BE(off, v)((PUT_32BE((off), (v) >> 32L), \ + PUT_32BE((off) + 4, (v)))) + +#define GET_16LE(off) ((buf[off]) + (buf[(off)+1] << 8)) +#define GET_32LE(off) (GET_16LE(off) + (GET_16LE((off)+2U) << 16U)) +#define GET_64LE(off) ((unsigned long long)GET_32LE(off) + \ + (((unsigned long long)GET_32LE((off)+4ULL)) << 32ULL)) +#define PUT_16LE(off, v) (buf[off] = (v) & 0xff, \ + buf[(off) + 1] = ((v) >> 8) & 0xff) +#define PUT_32LE(off, v) (PUT_16LE((off), (v)), PUT_16LE((off) + 2, (v) >> 16L)) +#define PUT_64LE(off, v) (PUT_32LE((off), (v)), PUT_32LE((off) + 4, (v) >> 32L)) + +#define GET_16(off) (e_data == ELFDATA2MSB ? GET_16BE(off) : GET_16LE(off)) +#define GET_32(off) (e_data == ELFDATA2MSB ? GET_32BE(off) : GET_32LE(off)) +#define GET_64(off) (e_data == ELFDATA2MSB ? GET_64BE(off) : GET_64LE(off)) +#define PUT_16(off, v) (e_data == ELFDATA2MSB ? PUT_16BE(off, v) : \ + PUT_16LE(off, v)) +#define PUT_32(off, v) (e_data == ELFDATA2MSB ? PUT_32BE(off, v) : \ + PUT_32LE(off, v)) +#define PUT_64(off, v) (e_data == ELFDATA2MSB ? PUT_64BE(off, v) : \ + PUT_64LE(off, v)) /* Structure of an ELF file */ #define E_IDENT 0 /* ELF header */ -#define E_PHOFF 28 -#define E_PHENTSIZE 42 -#define E_PHNUM 44 -#define E_HSIZE 52 /* size of ELF header */ +#define E_PHOFF (e_class == ELFCLASS32 ? 28 : 32) +#define E_PHENTSIZE (e_class == ELFCLASS32 ? 42 : 54) +#define E_PHNUM (e_class == ELFCLASS32 ? 44 : 56) +#define E_HSIZE (e_class == ELFCLASS32 ? 52 : 64) #define EI_MAGIC 0 /* offsets in E_IDENT area */ #define EI_CLASS 4 #define EI_DATA 5 #define PH_TYPE 0 /* ELF program header */ -#define PH_OFFSET 4 -#define PH_FILESZ 16 -#define PH_HSIZE 32 /* size of program header */ +#define PH_OFFSET (e_class == ELFCLASS32 ? 4 : 8) +#define PH_FILESZ (e_class == ELFCLASS32 ? 16 : 32) +#define PH_HSIZE (e_class == ELFCLASS32 ? 32 : 56) #define PT_NOTE 4 /* Program header type = note */ -#define ELFCLASS32 1 -#define ELFDATA2MSB 2 unsigned char elf_magic[4] = { 0x7f, 'E', 'L', 'F' }; @@ -92,8 +119,8 @@ int main(int ac, char **av) { int fd, n, i; - int ph, ps, np; - int nnote, nnote2, ns; + unsigned long ph, ps, np; + long nnote, nnote2, ns; if (ac != 2) { fprintf(stderr, "Usage: %s elf-file\n", av[0]); @@ -114,26 +141,27 @@ main(int ac, char **av) exit(1); } - if (n < E_HSIZE || memcmp(&buf[E_IDENT+EI_MAGIC], elf_magic, 4) != 0) + if (memcmp(&buf[E_IDENT+EI_MAGIC], elf_magic, 4) != 0) + goto notelf; + e_class = buf[E_IDENT+EI_CLASS]; + if (e_class != ELFCLASS32 && e_class != ELFCLASS64) + goto notelf; + e_data = buf[E_IDENT+EI_DATA]; + if (e_data != ELFDATA2MSB && e_data != ELFDATA2LSB) + goto notelf; + if (n < E_HSIZE) goto notelf; - if (buf[E_IDENT+EI_CLASS] != ELFCLASS32 - || buf[E_IDENT+EI_DATA] != ELFDATA2MSB) { - fprintf(stderr, "%s is not a big-endian 32-bit ELF image\n", - av[1]); - exit(1); - } - - ph = GET_32BE(E_PHOFF); - ps = GET_16BE(E_PHENTSIZE); - np = GET_16BE(E_PHNUM); + ph = (e_class == ELFCLASS32 ? GET_32(E_PHOFF) : GET_64(E_PHOFF)); + ps = GET_16(E_PHENTSIZE); + np = GET_16(E_PHNUM); if (ph < E_HSIZE || ps < PH_HSIZE || np < 1) goto notelf; if (ph + (np + 2) * ps + nnote + nnote2 > n) goto nospace; for (i = 0; i < np; ++i) { - if (GET_32BE(ph + PH_TYPE) == PT_NOTE) { + if (GET_32(ph + PH_TYPE) == PT_NOTE) { fprintf(stderr, "%s already has a note entry\n", av[1]); exit(0); @@ -148,15 +176,22 @@ main(int ac, char **av) /* fill in the program header entry */ ns = ph + 2 * ps; - PUT_32BE(ph + PH_TYPE, PT_NOTE); - PUT_32BE(ph + PH_OFFSET, ns); - PUT_32BE(ph + PH_FILESZ, nnote); + PUT_32(ph + PH_TYPE, PT_NOTE); + if (e_class == ELFCLASS32) + PUT_32(ph + PH_OFFSET, ns); + else + PUT_64(ph + PH_OFFSET, ns); + + if (e_class == ELFCLASS32) + PUT_32(ph + PH_FILESZ, nnote); + else + PUT_64(ph + PH_FILESZ, nnote); /* fill in the note area we point to */ /* XXX we should probably make this a proper section */ - PUT_32BE(ns, strlen(arch) + 1); - PUT_32BE(ns + 4, N_DESCR * 4); - PUT_32BE(ns + 8, 0x1275); + PUT_32(ns, strlen(arch) + 1); + PUT_32(ns + 4, N_DESCR * 4); + PUT_32(ns + 8, 0x1275); strcpy((char *) &buf[ns + 12], arch); ns += 12 + strlen(arch) + 1; for (i = 0; i < N_DESCR; ++i, ns += 4) @@ -164,21 +199,28 @@ main(int ac, char **av) /* fill in the second program header entry and the RPA note area */ ph += ps; - PUT_32BE(ph + PH_TYPE, PT_NOTE); - PUT_32BE(ph + PH_OFFSET, ns); - PUT_32BE(ph + PH_FILESZ, nnote2); + PUT_32(ph + PH_TYPE, PT_NOTE); + if (e_class == ELFCLASS32) + PUT_32(ph + PH_OFFSET, ns); + else + PUT_64(ph + PH_OFFSET, ns); + + if (e_class == ELFCLASS32) + PUT_32(ph + PH_FILESZ, nnote); + else + PUT_64(ph + PH_FILESZ, nnote2); /* fill in the note area we point to */ - PUT_32BE(ns, strlen(rpaname) + 1); - PUT_32BE(ns + 4, sizeof(rpanote)); - PUT_32BE(ns + 8, 0x12759999); + PUT_32(ns, strlen(rpaname) + 1); + PUT_32(ns + 4, sizeof(rpanote)); + PUT_32(ns + 8, 0x12759999); strcpy((char *) &buf[ns + 12], rpaname); ns += 12 + ROUNDUP(strlen(rpaname) + 1); for (i = 0; i < N_RPA_DESCR; ++i, ns += 4) PUT_32BE(ns, rpanote[i]); /* Update the number of program headers */ - PUT_16BE(E_PHNUM, np + 2); + PUT_16(E_PHNUM, np + 2); /* write back */ lseek(fd, (long) 0, SEEK_SET); diff --git a/arch/powerpc/boot/crt0.S b/arch/powerpc/boot/crt0.S index 0f7428a37efb..14de4f8778a7 100644 --- a/arch/powerpc/boot/crt0.S +++ b/arch/powerpc/boot/crt0.S @@ -1,17 +1,20 @@ /* * Copyright (C) Paul Mackerras 1997. * + * Adapted for 64 bit LE PowerPC by Andrew Tauferner + * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * - * NOTE: this code runs in 32 bit mode, is position-independent, - * and is packaged as ELF32. */ #include "ppc_asm.h" +RELA = 7 +RELACOUNT = 0x6ffffff9 + .text /* A procedure descriptor used when booting this as a COFF file. * When making COFF, this comes first in the link and we're @@ -21,6 +24,20 @@ _zimage_start_opd: .long 0x500000, 0, 0, 0 +#ifdef __powerpc64__ +.balign 8 +p_start: .llong _start +p_etext: .llong _etext +p_bss_start: .llong __bss_start +p_end: .llong _end + +p_toc: .llong __toc_start + 0x8000 - p_base +p_dyn: .llong __dynamic_start - p_base +p_rela: .llong __rela_dyn_start - p_base +p_prom: .llong 0 + .weak _platform_stack_top +p_pstack: .llong _platform_stack_top +#else p_start: .long _start p_etext: .long _etext p_bss_start: .long __bss_start @@ -28,6 +45,7 @@ p_end: .long _end .weak _platform_stack_top p_pstack: .long _platform_stack_top +#endif .weak _zimage_start .globl _zimage_start @@ -38,6 +56,7 @@ _zimage_start_lib: and the address where we're running. */ bl .+4 p_base: mflr r10 /* r10 now points to runtime addr of p_base */ +#ifndef __powerpc64__ /* grab the link address of the dynamic section in r11 */ addis r11,r10,(_GLOBAL_OFFSET_TABLE_-p_base)@ha lwz r11,(_GLOBAL_OFFSET_TABLE_-p_base)@l(r11) @@ -51,8 +70,6 @@ p_base: mflr r10 /* r10 now points to runtime addr of p_base */ /* The dynamic section contains a series of tagged entries. * We need the RELA and RELACOUNT entries. */ -RELA = 7 -RELACOUNT = 0x6ffffff9 li r9,0 li r0,0 9: lwz r8,0(r12) /* get tag */ @@ -120,9 +137,164 @@ RELACOUNT = 0x6ffffff9 li r0,0 stwu r0,-16(r1) /* establish a stack frame */ 6: +#else /* __powerpc64__ */ + /* Save the prom pointer at p_prom. */ + std r5,(p_prom-p_base)(r10) + + /* Set r2 to the TOC. */ + ld r2,(p_toc-p_base)(r10) + add r2,r2,r10 + + /* Grab the link address of the dynamic section in r11. */ + ld r11,-32768(r2) + cmpwi r11,0 + beq 3f /* if not linked -pie then no dynamic section */ + + ld r11,(p_dyn-p_base)(r10) + add r11,r11,r10 + ld r9,(p_rela-p_base)(r10) + add r9,r9,r10 + li r7,0 + li r8,0 +9: ld r6,0(r11) /* get tag */ + cmpdi r6,0 + beq 12f /* end of list */ + cmpdi r6,RELA + bne 10f + ld r7,8(r11) /* get RELA pointer in r7 */ + b 11f +10: addis r6,r6,(-RELACOUNT)@ha + cmpdi r6,RELACOUNT@l + bne 11f + ld r8,8(r11) /* get RELACOUNT value in r8 */ +11: addi r11,r11,16 + b 9b +12: + cmpdi r7,0 /* check we have both RELA and RELACOUNT */ + cmpdi cr1,r8,0 + beq 3f + beq cr1,3f + + /* Calcuate the runtime offset. */ + subf r7,r7,r9 + + /* Run through the list of relocations and process the + * R_PPC64_RELATIVE ones. */ + mtctr r8 +13: ld r0,8(r9) /* ELF64_R_TYPE(reloc->r_info) */ + cmpdi r0,22 /* R_PPC64_RELATIVE */ + bne 3f + ld r6,0(r9) /* reloc->r_offset */ + ld r0,16(r9) /* reloc->r_addend */ + add r0,r0,r7 + stdx r0,r7,r6 + addi r9,r9,24 + bdnz 13b + + /* Do a cache flush for our text, in case the loader didn't */ +3: ld r9,p_start-p_base(r10) /* note: these are relocated now */ + ld r8,p_etext-p_base(r10) +4: dcbf r0,r9 + icbi r0,r9 + addi r9,r9,0x20 + cmpld cr0,r9,r8 + blt 4b + sync + isync + + /* Clear the BSS */ + ld r9,p_bss_start-p_base(r10) + ld r8,p_end-p_base(r10) + li r0,0 +5: std r0,0(r9) + addi r9,r9,8 + cmpld cr0,r9,r8 + blt 5b + + /* Possibly set up a custom stack */ + ld r8,p_pstack-p_base(r10) + cmpdi r8,0 + beq 6f + ld r1,0(r8) + li r0,0 + stdu r0,-16(r1) /* establish a stack frame */ +6: +#endif /* __powerpc64__ */ /* Call platform_init() */ bl platform_init /* Call start */ b start + +#ifdef __powerpc64__ + +#define PROM_FRAME_SIZE 512 +#define SAVE_GPR(n, base) std n,8*(n)(base) +#define REST_GPR(n, base) ld n,8*(n)(base) +#define SAVE_2GPRS(n, base) SAVE_GPR(n, base); SAVE_GPR(n+1, base) +#define SAVE_4GPRS(n, base) SAVE_2GPRS(n, base); SAVE_2GPRS(n+2, base) +#define SAVE_8GPRS(n, base) SAVE_4GPRS(n, base); SAVE_4GPRS(n+4, base) +#define SAVE_10GPRS(n, base) SAVE_8GPRS(n, base); SAVE_2GPRS(n+8, base) +#define REST_2GPRS(n, base) REST_GPR(n, base); REST_GPR(n+1, base) +#define REST_4GPRS(n, base) REST_2GPRS(n, base); REST_2GPRS(n+2, base) +#define REST_8GPRS(n, base) REST_4GPRS(n, base); REST_4GPRS(n+4, base) +#define REST_10GPRS(n, base) REST_8GPRS(n, base); REST_2GPRS(n+8, base) + +/* prom handles the jump into and return from firmware. The prom args pointer + is loaded in r3. */ +.globl prom +prom: + mflr r0 + std r0,16(r1) + stdu r1,-PROM_FRAME_SIZE(r1) /* Save SP and create stack space */ + + SAVE_GPR(2, r1) + SAVE_GPR(13, r1) + SAVE_8GPRS(14, r1) + SAVE_10GPRS(22, r1) + mfcr r10 + std r10,8*32(r1) + mfmsr r10 + std r10,8*33(r1) + + /* remove MSR_LE from msr but keep MSR_SF */ + mfmsr r10 + rldicr r10,r10,0,62 + mtsrr1 r10 + + /* Load FW address, set LR to label 1, and jump to FW */ + bl 0f +0: mflr r10 + addi r11,r10,(1f-0b) + mtlr r11 + + ld r10,(p_prom-0b)(r10) + mtsrr0 r10 + + rfid + +1: /* Return from OF */ + FIXUP_ENDIAN + + /* Restore registers and return. */ + rldicl r1,r1,0,32 + + /* Restore the MSR (back to 64 bits) */ + ld r10,8*(33)(r1) + mtmsr r10 + isync + + /* Restore other registers */ + REST_GPR(2, r1) + REST_GPR(13, r1) + REST_8GPRS(14, r1) + REST_10GPRS(22, r1) + ld r10,8*32(r1) + mtcr r10 + + addi r1,r1,PROM_FRAME_SIZE + ld r0,16(r1) + mtlr r0 + blr +#endif diff --git a/arch/powerpc/boot/elf_util.c b/arch/powerpc/boot/elf_util.c index 1567a0c0f05c..316552dea4d8 100644 --- a/arch/powerpc/boot/elf_util.c +++ b/arch/powerpc/boot/elf_util.c @@ -26,7 +26,11 @@ int parse_elf64(void *hdr, struct elf_info *info) elf64->e_ident[EI_MAG2] == ELFMAG2 && elf64->e_ident[EI_MAG3] == ELFMAG3 && elf64->e_ident[EI_CLASS] == ELFCLASS64 && +#ifdef __LITTLE_ENDIAN__ + elf64->e_ident[EI_DATA] == ELFDATA2LSB && +#else elf64->e_ident[EI_DATA] == ELFDATA2MSB && +#endif (elf64->e_type == ET_EXEC || elf64->e_type == ET_DYN) && elf64->e_machine == EM_PPC64)) diff --git a/arch/powerpc/boot/main.c b/arch/powerpc/boot/main.c index a28f02165e97..d367a0aece2a 100644 --- a/arch/powerpc/boot/main.c +++ b/arch/powerpc/boot/main.c @@ -139,18 +139,18 @@ static struct addr_range prep_initrd(struct addr_range vmlinux, void *chosen, * edit the command line passed to vmlinux (by setting /chosen/bootargs). * The buffer is put in it's own section so that tools may locate it easier. */ -static char cmdline[COMMAND_LINE_SIZE] +static char cmdline[BOOT_COMMAND_LINE_SIZE] __attribute__((__section__("__builtin_cmdline"))); static void prep_cmdline(void *chosen) { if (cmdline[0] == '\0') - getprop(chosen, "bootargs", cmdline, COMMAND_LINE_SIZE-1); + getprop(chosen, "bootargs", cmdline, BOOT_COMMAND_LINE_SIZE-1); printf("\n\rLinux/PowerPC load: %s", cmdline); /* If possible, edit the command line */ if (console_ops.edit_cmdline) - console_ops.edit_cmdline(cmdline, COMMAND_LINE_SIZE); + console_ops.edit_cmdline(cmdline, BOOT_COMMAND_LINE_SIZE); printf("\n\r"); /* Put the command line back into the devtree for the kernel */ @@ -174,7 +174,7 @@ void start(void) * built-in command line wasn't set by an external tool */ if ((loader_info.cmdline_len > 0) && (cmdline[0] == '\0')) memmove(cmdline, loader_info.cmdline, - min(loader_info.cmdline_len, COMMAND_LINE_SIZE-1)); + min(loader_info.cmdline_len, BOOT_COMMAND_LINE_SIZE-1)); if (console_ops.open && (console_ops.open() < 0)) exit(); diff --git a/arch/powerpc/boot/of.c b/arch/powerpc/boot/of.c index 62e2f43ec1df..7ca910cb2fc6 100644 --- a/arch/powerpc/boot/of.c +++ b/arch/powerpc/boot/of.c @@ -40,8 +40,8 @@ static void *of_try_claim(unsigned long size) #ifdef DEBUG printf(" trying: 0x%08lx\n\r", claim_base); #endif - addr = (unsigned long)of_claim(claim_base, size, 0); - if ((void *)addr != (void *)-1) + addr = (unsigned long) of_claim(claim_base, size, 0); + if (addr != PROM_ERROR) break; } if (addr == 0) diff --git a/arch/powerpc/boot/of.h b/arch/powerpc/boot/of.h index e4c68f7391c5..c8c1750aba0c 100644 --- a/arch/powerpc/boot/of.h +++ b/arch/powerpc/boot/of.h @@ -1,12 +1,15 @@ #ifndef _PPC_BOOT_OF_H_ #define _PPC_BOOT_OF_H_ +#include "swab.h" + typedef void *phandle; -typedef void *ihandle; +typedef u32 ihandle; void of_init(void *promptr); int of_call_prom(const char *service, int nargs, int nret, ...); -void *of_claim(unsigned long virt, unsigned long size, unsigned long align); +unsigned int of_claim(unsigned long virt, unsigned long size, + unsigned long align); void *of_vmlinux_alloc(unsigned long size); void of_exit(void); void *of_finddevice(const char *name); @@ -18,4 +21,16 @@ int of_setprop(const void *phandle, const char *name, const void *buf, /* Console functions */ void of_console_init(void); +typedef u32 __be32; + +#ifdef __LITTLE_ENDIAN__ +#define cpu_to_be32(x) swab32(x) +#define be32_to_cpu(x) swab32(x) +#else +#define cpu_to_be32(x) (x) +#define be32_to_cpu(x) (x) +#endif + +#define PROM_ERROR (-1u) + #endif /* _PPC_BOOT_OF_H_ */ diff --git a/arch/powerpc/boot/ofconsole.c b/arch/powerpc/boot/ofconsole.c index ce0e02424453..8b754702460a 100644 --- a/arch/powerpc/boot/ofconsole.c +++ b/arch/powerpc/boot/ofconsole.c @@ -18,7 +18,7 @@ #include "of.h" -static void *of_stdout_handle; +static unsigned int of_stdout_handle; static int of_console_open(void) { @@ -27,8 +27,10 @@ static int of_console_open(void) if (((devp = of_finddevice("/chosen")) != NULL) && (of_getprop(devp, "stdout", &of_stdout_handle, sizeof(of_stdout_handle)) - == sizeof(of_stdout_handle))) + == sizeof(of_stdout_handle))) { + of_stdout_handle = be32_to_cpu(of_stdout_handle); return 0; + } return -1; } diff --git a/arch/powerpc/boot/oflib.c b/arch/powerpc/boot/oflib.c index b0ec9cf3eaaf..46c98a47d949 100644 --- a/arch/powerpc/boot/oflib.c +++ b/arch/powerpc/boot/oflib.c @@ -16,74 +16,83 @@ #include "of.h" +typedef u32 prom_arg_t; + +/* The following structure is used to communicate with open firmware. + * All arguments in and out are in big endian format. */ +struct prom_args { + __be32 service; /* Address of service name string. */ + __be32 nargs; /* Number of input arguments. */ + __be32 nret; /* Number of output arguments. */ + __be32 args[10]; /* Input/output arguments. */ +}; + +#ifdef __powerpc64__ +extern int prom(void *); +#else static int (*prom) (void *); +#endif void of_init(void *promptr) { +#ifndef __powerpc64__ prom = (int (*)(void *))promptr; +#endif } +#define ADDR(x) (u32)(unsigned long)(x) + int of_call_prom(const char *service, int nargs, int nret, ...) { int i; - struct prom_args { - const char *service; - int nargs; - int nret; - unsigned int args[12]; - } args; + struct prom_args args; va_list list; - args.service = service; - args.nargs = nargs; - args.nret = nret; + args.service = cpu_to_be32(ADDR(service)); + args.nargs = cpu_to_be32(nargs); + args.nret = cpu_to_be32(nret); va_start(list, nret); for (i = 0; i < nargs; i++) - args.args[i] = va_arg(list, unsigned int); + args.args[i] = cpu_to_be32(va_arg(list, prom_arg_t)); va_end(list); for (i = 0; i < nret; i++) args.args[nargs+i] = 0; if (prom(&args) < 0) - return -1; + return PROM_ERROR; - return (nret > 0)? args.args[nargs]: 0; + return (nret > 0) ? be32_to_cpu(args.args[nargs]) : 0; } static int of_call_prom_ret(const char *service, int nargs, int nret, - unsigned int *rets, ...) + prom_arg_t *rets, ...) { int i; - struct prom_args { - const char *service; - int nargs; - int nret; - unsigned int args[12]; - } args; + struct prom_args args; va_list list; - args.service = service; - args.nargs = nargs; - args.nret = nret; + args.service = cpu_to_be32(ADDR(service)); + args.nargs = cpu_to_be32(nargs); + args.nret = cpu_to_be32(nret); va_start(list, rets); for (i = 0; i < nargs; i++) - args.args[i] = va_arg(list, unsigned int); + args.args[i] = cpu_to_be32(va_arg(list, prom_arg_t)); va_end(list); for (i = 0; i < nret; i++) args.args[nargs+i] = 0; if (prom(&args) < 0) - return -1; + return PROM_ERROR; - if (rets != (void *) 0) + if (rets != NULL) for (i = 1; i < nret; ++i) - rets[i-1] = args.args[nargs+i]; + rets[i-1] = be32_to_cpu(args.args[nargs+i]); - return (nret > 0)? args.args[nargs]: 0; + return (nret > 0) ? be32_to_cpu(args.args[nargs]) : 0; } /* returns true if s2 is a prefix of s1 */ @@ -103,7 +112,7 @@ static int string_match(const char *s1, const char *s2) */ static int need_map = -1; static ihandle chosen_mmu; -static phandle memory; +static ihandle memory; static int check_of_version(void) { @@ -132,10 +141,10 @@ static int check_of_version(void) printf("no mmu\n"); return 0; } - memory = (ihandle) of_call_prom("open", 1, 1, "/memory"); - if (memory == (ihandle) -1) { - memory = (ihandle) of_call_prom("open", 1, 1, "/memory@0"); - if (memory == (ihandle) -1) { + memory = of_call_prom("open", 1, 1, "/memory"); + if (memory == PROM_ERROR) { + memory = of_call_prom("open", 1, 1, "/memory@0"); + if (memory == PROM_ERROR) { printf("no memory node\n"); return 0; } @@ -144,40 +153,41 @@ static int check_of_version(void) return 1; } -void *of_claim(unsigned long virt, unsigned long size, unsigned long align) +unsigned int of_claim(unsigned long virt, unsigned long size, + unsigned long align) { int ret; - unsigned int result; + prom_arg_t result; if (need_map < 0) need_map = check_of_version(); if (align || !need_map) - return (void *) of_call_prom("claim", 3, 1, virt, size, align); + return of_call_prom("claim", 3, 1, virt, size, align); ret = of_call_prom_ret("call-method", 5, 2, &result, "claim", memory, align, size, virt); if (ret != 0 || result == -1) - return (void *) -1; + return -1; ret = of_call_prom_ret("call-method", 5, 2, &result, "claim", chosen_mmu, align, size, virt); /* 0x12 == coherent + read/write */ ret = of_call_prom("call-method", 6, 1, "map", chosen_mmu, 0x12, size, virt, virt); - return (void *) virt; + return virt; } void *of_vmlinux_alloc(unsigned long size) { unsigned long start = (unsigned long)_start, end = (unsigned long)_end; - void *addr; + unsigned long addr; void *p; /* With some older POWER4 firmware we need to claim the area the kernel * will reside in. Newer firmwares don't need this so we just ignore * the return value. */ - addr = of_claim(start, end - start, 0); - printf("Trying to claim from 0x%lx to 0x%lx (0x%lx) got %p\r\n", + addr = (unsigned long) of_claim(start, end - start, 0); + printf("Trying to claim from 0x%lx to 0x%lx (0x%lx) got %lx\r\n", start, end, end - start, addr); p = malloc(size); @@ -197,7 +207,7 @@ void of_exit(void) */ void *of_finddevice(const char *name) { - return (phandle) of_call_prom("finddevice", 1, 1, name); + return (void *) (unsigned long) of_call_prom("finddevice", 1, 1, name); } int of_getprop(const void *phandle, const char *name, void *buf, diff --git a/arch/powerpc/boot/ops.h b/arch/powerpc/boot/ops.h index b3218ce451bb..8aad3c55aeda 100644 --- a/arch/powerpc/boot/ops.h +++ b/arch/powerpc/boot/ops.h @@ -15,7 +15,7 @@ #include "types.h" #include "string.h" -#define COMMAND_LINE_SIZE 512 +#define BOOT_COMMAND_LINE_SIZE 2048 #define MAX_PATH_LEN 256 #define MAX_PROP_LEN 256 /* What should this be? */ diff --git a/arch/powerpc/boot/ppc_asm.h b/arch/powerpc/boot/ppc_asm.h index eb0e98be69e0..35ea60c1f070 100644 --- a/arch/powerpc/boot/ppc_asm.h +++ b/arch/powerpc/boot/ppc_asm.h @@ -62,4 +62,16 @@ #define SPRN_TBRL 268 #define SPRN_TBRU 269 +#define FIXUP_ENDIAN \ + tdi 0, 0, 0x48; /* Reverse endian of b . + 8 */ \ + b $+36; /* Skip trampoline if endian is good */ \ + .long 0x05009f42; /* bcl 20,31,$+4 */ \ + .long 0xa602487d; /* mflr r10 */ \ + .long 0x1c004a39; /* addi r10,r10,28 */ \ + .long 0xa600607d; /* mfmsr r11 */ \ + .long 0x01006b69; /* xori r11,r11,1 */ \ + .long 0xa6035a7d; /* mtsrr0 r10 */ \ + .long 0xa6037b7d; /* mtsrr1 r11 */ \ + .long 0x2400004c /* rfid */ + #endif /* _PPC64_PPC_ASM_H */ diff --git a/arch/powerpc/boot/ps3.c b/arch/powerpc/boot/ps3.c index 9954d98871d0..4ec2d86d3c50 100644 --- a/arch/powerpc/boot/ps3.c +++ b/arch/powerpc/boot/ps3.c @@ -47,13 +47,13 @@ BSS_STACK(4096); * The buffer is put in it's own section so that tools may locate it easier. */ -static char cmdline[COMMAND_LINE_SIZE] +static char cmdline[BOOT_COMMAND_LINE_SIZE] __attribute__((__section__("__builtin_cmdline"))); static void prep_cmdline(void *chosen) { if (cmdline[0] == '\0') - getprop(chosen, "bootargs", cmdline, COMMAND_LINE_SIZE-1); + getprop(chosen, "bootargs", cmdline, BOOT_COMMAND_LINE_SIZE-1); else setprop_str(chosen, "bootargs", cmdline); diff --git a/arch/powerpc/boot/pseries-head.S b/arch/powerpc/boot/pseries-head.S new file mode 100644 index 000000000000..6ef6e02e80f9 --- /dev/null +++ b/arch/powerpc/boot/pseries-head.S @@ -0,0 +1,8 @@ +#include "ppc_asm.h" + + .text + + .globl _zimage_start +_zimage_start: + FIXUP_ENDIAN + b _zimage_start_lib diff --git a/arch/powerpc/boot/stdio.c b/arch/powerpc/boot/stdio.c index 5b57800bbc67..a701261b1781 100644 --- a/arch/powerpc/boot/stdio.c +++ b/arch/powerpc/boot/stdio.c @@ -21,6 +21,18 @@ size_t strnlen(const char * s, size_t count) return sc - s; } +#ifdef __powerpc64__ + +# define do_div(n, base) ({ \ + unsigned int __base = (base); \ + unsigned int __rem; \ + __rem = ((unsigned long long)(n)) % __base; \ + (n) = ((unsigned long long)(n)) / __base; \ + __rem; \ +}) + +#else + extern unsigned int __div64_32(unsigned long long *dividend, unsigned int divisor); @@ -39,6 +51,8 @@ extern unsigned int __div64_32(unsigned long long *dividend, __rem; \ }) +#endif /* __powerpc64__ */ + static int skip_atoi(const char **s) { int i, c; diff --git a/arch/powerpc/boot/swab.h b/arch/powerpc/boot/swab.h new file mode 100644 index 000000000000..d0e1431084ca --- /dev/null +++ b/arch/powerpc/boot/swab.h @@ -0,0 +1,29 @@ +#ifndef _PPC_BOOT_SWAB_H_ +#define _PPC_BOOT_SWAB_H_ + +static inline u16 swab16(u16 x) +{ + return ((x & (u16)0x00ffU) << 8) | + ((x & (u16)0xff00U) >> 8); +} + +static inline u32 swab32(u32 x) +{ + return ((x & (u32)0x000000ffUL) << 24) | + ((x & (u32)0x0000ff00UL) << 8) | + ((x & (u32)0x00ff0000UL) >> 8) | + ((x & (u32)0xff000000UL) >> 24); +} + +static inline u64 swab64(u64 x) +{ + return (u64)((x & (u64)0x00000000000000ffULL) << 56) | + (u64)((x & (u64)0x000000000000ff00ULL) << 40) | + (u64)((x & (u64)0x0000000000ff0000ULL) << 24) | + (u64)((x & (u64)0x00000000ff000000ULL) << 8) | + (u64)((x & (u64)0x000000ff00000000ULL) >> 8) | + (u64)((x & (u64)0x0000ff0000000000ULL) >> 24) | + (u64)((x & (u64)0x00ff000000000000ULL) >> 40) | + (u64)((x & (u64)0xff00000000000000ULL) >> 56); +} +#endif /* _PPC_BOOT_SWAB_H_ */ diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper index d27a25518b01..1948cf8b8a40 100755 --- a/arch/powerpc/boot/wrapper +++ b/arch/powerpc/boot/wrapper @@ -40,6 +40,7 @@ cacheit= binary= gzip=.gz pie= +format= # cross-compilation prefix CROSS= @@ -136,6 +137,14 @@ if [ -z "$kernel" ]; then kernel=vmlinux fi +elfformat="`${CROSS}objdump -p "$kernel" | grep 'file format' | awk '{print $4}'`" +case "$elfformat" in + elf64-powerpcle) format=elf64lppc ;; + elf64-powerpc) format=elf32ppc ;; + elf32-powerpc) format=elf32ppc ;; +esac + + platformo=$object/"$platform".o lds=$object/zImage.lds ext=strip @@ -152,8 +161,12 @@ of) make_space=n ;; pseries) - platformo="$object/of.o $object/epapr.o" + platformo="$object/pseries-head.o $object/of.o $object/epapr.o" link_address='0x4000000' + if [ "$format" != "elf32ppc" ]; then + link_address= + pie=-pie + fi make_space=n ;; maple) @@ -379,7 +392,7 @@ if [ "$platform" != "miboot" ]; then if [ -n "$link_address" ] ; then text_start="-Ttext $link_address" fi - ${CROSS}ld -m elf32ppc -T $lds $text_start $pie -o "$ofile" \ + ${CROSS}ld -m $format -T $lds $text_start $pie -o "$ofile" \ $platformo $tmp $object/wrapper.a rm $tmp fi diff --git a/arch/powerpc/boot/zImage.lds.S b/arch/powerpc/boot/zImage.lds.S index 2bd8731f1365..861e72109df2 100644 --- a/arch/powerpc/boot/zImage.lds.S +++ b/arch/powerpc/boot/zImage.lds.S @@ -1,4 +1,10 @@ +#include <asm-generic/vmlinux.lds.h> + +#ifdef CONFIG_PPC64_BOOT_WRAPPER +OUTPUT_ARCH(powerpc:common64) +#else OUTPUT_ARCH(powerpc:common) +#endif ENTRY(_zimage_start) EXTERN(_zimage_start) SECTIONS @@ -16,7 +22,9 @@ SECTIONS *(.rodata*) *(.data*) *(.sdata*) +#ifndef CONFIG_PPC64_BOOT_WRAPPER *(.got2) +#endif } .dynsym : { *(.dynsym) } .dynstr : { *(.dynstr) } @@ -27,7 +35,13 @@ SECTIONS } .hash : { *(.hash) } .interp : { *(.interp) } - .rela.dyn : { *(.rela*) } + .rela.dyn : + { +#ifdef CONFIG_PPC64_BOOT_WRAPPER + __rela_dyn_start = .; +#endif + *(.rela*) + } . = ALIGN(8); .kernel:dtb : @@ -53,6 +67,15 @@ SECTIONS _initrd_end = .; } +#ifdef CONFIG_PPC64_BOOT_WRAPPER + .got : + { + __toc_start = .; + *(.got) + *(.toc) + } +#endif + . = ALIGN(4096); .bss : { diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index d4dd41fb951b..d12529f34524 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -32,6 +32,22 @@ struct device_node; #ifdef CONFIG_EEH +/* EEH subsystem flags */ +#define EEH_ENABLED 0x1 /* EEH enabled */ +#define EEH_FORCE_DISABLED 0x2 /* EEH disabled */ +#define EEH_PROBE_MODE_DEV 0x4 /* From PCI device */ +#define EEH_PROBE_MODE_DEVTREE 0x8 /* From device tree */ + +/* + * Delay for PE reset, all in ms + * + * PCI specification has reset hold time of 100 milliseconds. + * We have 250 milliseconds here. The PCI bus settlement time + * is specified as 1.5 seconds and we have 1.8 seconds. + */ +#define EEH_PE_RST_HOLD_TIME 250 +#define EEH_PE_RST_SETTLE_TIME 1800 + /* * The struct is used to trace PE related EEH functionality. * In theory, there will have one instance of the struct to @@ -53,7 +69,7 @@ struct device_node; #define EEH_PE_ISOLATED (1 << 0) /* Isolated PE */ #define EEH_PE_RECOVERING (1 << 1) /* Recovering PE */ -#define EEH_PE_PHB_DEAD (1 << 2) /* Dead PHB */ +#define EEH_PE_RESET (1 << 2) /* PE reset in progress */ #define EEH_PE_KEEP (1 << 8) /* Keep PE on hotplug */ @@ -92,6 +108,8 @@ struct eeh_pe { #define EEH_DEV_NO_HANDLER (1 << 8) /* No error handler */ #define EEH_DEV_SYSFS (1 << 9) /* Sysfs created */ +#define EEH_DEV_REMOVED (1 << 10) /* Removed permanently */ +#define EEH_DEV_FRESET (1 << 11) /* Fundamental reset */ struct eeh_dev { int mode; /* EEH mode */ @@ -99,7 +117,9 @@ struct eeh_dev { int config_addr; /* Config address */ int pe_config_addr; /* PE config address */ u32 config_space[16]; /* Saved PCI config space */ - u8 pcie_cap; /* Saved PCIe capability */ + int pcix_cap; /* Saved PCIx capability */ + int pcie_cap; /* Saved PCIe capability */ + int aer_cap; /* Saved AER capability */ struct eeh_pe *pe; /* Associated PE */ struct list_head list; /* Form link list in the PE */ struct pci_controller *phb; /* Associated PHB */ @@ -171,37 +191,40 @@ struct eeh_ops { int (*restore_config)(struct device_node *dn); }; +extern int eeh_subsystem_flags; extern struct eeh_ops *eeh_ops; -extern bool eeh_subsystem_enabled; extern raw_spinlock_t confirm_error_lock; -extern int eeh_probe_mode; static inline bool eeh_enabled(void) { - return eeh_subsystem_enabled; + if ((eeh_subsystem_flags & EEH_FORCE_DISABLED) || + !(eeh_subsystem_flags & EEH_ENABLED)) + return false; + + return true; } static inline void eeh_set_enable(bool mode) { - eeh_subsystem_enabled = mode; + if (mode) + eeh_subsystem_flags |= EEH_ENABLED; + else + eeh_subsystem_flags &= ~EEH_ENABLED; } -#define EEH_PROBE_MODE_DEV (1<<0) /* From PCI device */ -#define EEH_PROBE_MODE_DEVTREE (1<<1) /* From device tree */ - static inline void eeh_probe_mode_set(int flag) { - eeh_probe_mode = flag; + eeh_subsystem_flags |= flag; } static inline int eeh_probe_mode_devtree(void) { - return (eeh_probe_mode == EEH_PROBE_MODE_DEVTREE); + return (eeh_subsystem_flags & EEH_PROBE_MODE_DEVTREE); } static inline int eeh_probe_mode_dev(void) { - return (eeh_probe_mode == EEH_PROBE_MODE_DEV); + return (eeh_subsystem_flags & EEH_PROBE_MODE_DEV); } static inline void eeh_serialize_lock(unsigned long *flags) diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 5b6c03f1058f..374abc2e41d7 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -113,6 +113,8 @@ struct machdep_calls { /* Optional, may be NULL. */ void (*show_cpuinfo)(struct seq_file *m); void (*show_percpuinfo)(struct seq_file *m, int i); + /* Returns the current operating frequency of "cpu" in Hz */ + unsigned long (*get_proc_freq)(unsigned int cpu); void (*init_IRQ)(void); @@ -241,6 +243,9 @@ struct machdep_calls { /* Called during PCI resource reassignment */ resource_size_t (*pcibios_window_alignment)(struct pci_bus *, unsigned long type); + /* Reset the secondary bus of bridge */ + void (*pcibios_reset_secondary_bus)(struct pci_dev *dev); + /* Called to shutdown machine specific hardware not already controlled * by other drivers. */ diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index a2efdaa020b0..81720ff59a10 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -41,14 +41,14 @@ struct opal_takeover_args { * size except the last one in the list to be as well. */ struct opal_sg_entry { - void *data; - long length; + __be64 data; + __be64 length; }; -/* sg list */ +/* SG list */ struct opal_sg_list { - unsigned long num_entries; - struct opal_sg_list *next; + __be64 length; + __be64 next; struct opal_sg_entry entry[]; }; @@ -858,8 +858,8 @@ int64_t opal_lpc_write(uint32_t chip_id, enum OpalLPCAddressType addr_type, int64_t opal_lpc_read(uint32_t chip_id, enum OpalLPCAddressType addr_type, uint32_t addr, __be32 *data, uint32_t sz); -int64_t opal_read_elog(uint64_t buffer, size_t size, uint64_t log_id); -int64_t opal_get_elog_size(uint64_t *log_id, size_t *size, uint64_t *elog_type); +int64_t opal_read_elog(uint64_t buffer, uint64_t size, uint64_t log_id); +int64_t opal_get_elog_size(__be64 *log_id, __be64 *size, __be64 *elog_type); int64_t opal_write_elog(uint64_t buffer, uint64_t size, uint64_t offset); int64_t opal_send_ack_elog(uint64_t log_id); void opal_resend_pending_logs(void); @@ -868,23 +868,24 @@ int64_t opal_validate_flash(uint64_t buffer, uint32_t *size, uint32_t *result); int64_t opal_manage_flash(uint8_t op); int64_t opal_update_flash(uint64_t blk_list); int64_t opal_dump_init(uint8_t dump_type); -int64_t opal_dump_info(uint32_t *dump_id, uint32_t *dump_size); -int64_t opal_dump_info2(uint32_t *dump_id, uint32_t *dump_size, uint32_t *dump_type); +int64_t opal_dump_info(__be32 *dump_id, __be32 *dump_size); +int64_t opal_dump_info2(__be32 *dump_id, __be32 *dump_size, __be32 *dump_type); int64_t opal_dump_read(uint32_t dump_id, uint64_t buffer); int64_t opal_dump_ack(uint32_t dump_id); int64_t opal_dump_resend_notification(void); -int64_t opal_get_msg(uint64_t buffer, size_t size); -int64_t opal_check_completion(uint64_t buffer, size_t size, uint64_t token); +int64_t opal_get_msg(uint64_t buffer, uint64_t size); +int64_t opal_check_completion(uint64_t buffer, uint64_t size, uint64_t token); int64_t opal_sync_host_reboot(void); int64_t opal_get_param(uint64_t token, uint32_t param_id, uint64_t buffer, - size_t length); + uint64_t length); int64_t opal_set_param(uint64_t token, uint32_t param_id, uint64_t buffer, - size_t length); + uint64_t length); int64_t opal_sensor_read(uint32_t sensor_hndl, int token, __be32 *sensor_data); /* Internal functions */ -extern int early_init_dt_scan_opal(unsigned long node, const char *uname, int depth, void *data); +extern int early_init_dt_scan_opal(unsigned long node, const char *uname, + int depth, void *data); extern int early_init_dt_scan_recoverable_ranges(unsigned long node, const char *uname, int depth, void *data); @@ -893,10 +894,6 @@ extern int opal_put_chars(uint32_t vtermno, const char *buf, int total_len); extern void hvc_opal_init_early(void); -/* Internal functions */ -extern int early_init_dt_scan_opal(unsigned long node, const char *uname, - int depth, void *data); - extern int opal_notifier_register(struct notifier_block *nb); extern int opal_notifier_unregister(struct notifier_block *nb); @@ -906,9 +903,6 @@ extern void opal_notifier_enable(void); extern void opal_notifier_disable(void); extern void opal_notifier_update_evt(uint64_t evt_mask, uint64_t evt_val); -extern int opal_get_chars(uint32_t vtermno, char *buf, int count); -extern int opal_put_chars(uint32_t vtermno, const char *buf, int total_len); - extern int __opal_async_get_token(void); extern int opal_async_get_token_interruptible(void); extern int __opal_async_release_token(int token); @@ -916,14 +910,13 @@ extern int opal_async_release_token(int token); extern int opal_async_wait_response(uint64_t token, struct opal_msg *msg); extern int opal_get_sensor_data(u32 sensor_hndl, u32 *sensor_data); -extern void hvc_opal_init_early(void); - struct rtc_time; extern int opal_set_rtc_time(struct rtc_time *tm); extern void opal_get_rtc_time(struct rtc_time *tm); extern unsigned long opal_get_boot_time(void); extern void opal_nvram_init(void); extern void opal_flash_init(void); +extern void opal_flash_term_callback(void); extern int opal_elog_init(void); extern void opal_platform_dump_init(void); extern void opal_sys_param_init(void); @@ -937,6 +930,10 @@ extern int opal_resync_timebase(void); extern void opal_lpc_init(void); +struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr, + unsigned long vmalloc_size); +void opal_free_sg_list(struct opal_sg_list *sg); + #endif /* __ASSEMBLY__ */ #endif /* __OPAL_H */ diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h index ed57fa7920c8..db1e2b8eff3c 100644 --- a/arch/powerpc/include/asm/ppc-pci.h +++ b/arch/powerpc/include/asm/ppc-pci.h @@ -58,6 +58,7 @@ int rtas_write_config(struct pci_dn *, int where, int size, u32 val); int rtas_read_config(struct pci_dn *, int where, int size, u32 *val); void eeh_pe_state_mark(struct eeh_pe *pe, int state); void eeh_pe_state_clear(struct eeh_pe *pe, int state); +void eeh_pe_dev_mode_mark(struct eeh_pe *pe, int mode); void eeh_sysfs_add_device(struct pci_dev *pdev); void eeh_sysfs_remove_device(struct pci_dev *pdev); diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index e5d2e0bc7e03..29de0152878f 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -215,6 +215,7 @@ #define SPRN_TEXASR 0x82 /* Transaction EXception & Summary */ #define TEXASR_FS __MASK(63-36) /* Transaction Failure Summary */ #define SPRN_TEXASRU 0x83 /* '' '' '' Upper 32 */ +#define TEXASR_FS __MASK(63-36) /* TEXASR Failure Summary */ #define SPRN_TFHAR 0x80 /* Transaction Failure Handler Addr */ #define SPRN_CTRLF 0x088 #define SPRN_CTRLT 0x098 diff --git a/arch/powerpc/include/uapi/asm/setup.h b/arch/powerpc/include/uapi/asm/setup.h index 552df83f1a49..ae3fb68cb28e 100644 --- a/arch/powerpc/include/uapi/asm/setup.h +++ b/arch/powerpc/include/uapi/asm/setup.h @@ -1 +1,6 @@ -#include <asm-generic/setup.h> +#ifndef _UAPI_ASM_POWERPC_SETUP_H +#define _UAPI_ASM_POWERPC_SETUP_H + +#define COMMAND_LINE_SIZE 2048 + +#endif /* _UAPI_ASM_POWERPC_SETUP_H */ diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index e7b76a6bf150..3764fb788d6c 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -22,6 +22,7 @@ */ #include <linux/delay.h> +#include <linux/debugfs.h> #include <linux/sched.h> #include <linux/init.h> #include <linux/list.h> @@ -87,22 +88,21 @@ /* Time to wait for a PCI slot to report status, in milliseconds */ #define PCI_BUS_RESET_WAIT_MSEC (5*60*1000) -/* Platform dependent EEH operations */ -struct eeh_ops *eeh_ops = NULL; - -bool eeh_subsystem_enabled = false; -EXPORT_SYMBOL(eeh_subsystem_enabled); - /* - * EEH probe mode support. The intention is to support multiple - * platforms for EEH. Some platforms like pSeries do PCI emunation - * based on device tree. However, other platforms like powernv probe - * PCI devices from hardware. The flag is used to distinguish that. - * In addition, struct eeh_ops::probe would be invoked for particular - * OF node or PCI device so that the corresponding PE would be created - * there. + * EEH probe mode support, which is part of the flags, + * is to support multiple platforms for EEH. Some platforms + * like pSeries do PCI emunation based on device tree. + * However, other platforms like powernv probe PCI devices + * from hardware. The flag is used to distinguish that. + * In addition, struct eeh_ops::probe would be invoked for + * particular OF node or PCI device so that the corresponding + * PE would be created there. */ -int eeh_probe_mode; +int eeh_subsystem_flags; +EXPORT_SYMBOL(eeh_subsystem_flags); + +/* Platform dependent EEH operations */ +struct eeh_ops *eeh_ops = NULL; /* Lock to avoid races due to multiple reports of an error */ DEFINE_RAW_SPINLOCK(confirm_error_lock); @@ -133,6 +133,15 @@ static struct eeh_stats eeh_stats; #define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE) +static int __init eeh_setup(char *str) +{ + if (!strcmp(str, "off")) + eeh_subsystem_flags |= EEH_FORCE_DISABLED; + + return 1; +} +__setup("eeh=", eeh_setup); + /** * eeh_gather_pci_data - Copy assorted PCI config space registers to buff * @edev: device to report data for @@ -145,73 +154,67 @@ static struct eeh_stats eeh_stats; static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len) { struct device_node *dn = eeh_dev_to_of_node(edev); - struct pci_dev *dev = eeh_dev_to_pci_dev(edev); u32 cfg; int cap, i; int n = 0; n += scnprintf(buf+n, len-n, "%s\n", dn->full_name); - printk(KERN_WARNING "EEH: of node=%s\n", dn->full_name); + pr_warn("EEH: of node=%s\n", dn->full_name); eeh_ops->read_config(dn, PCI_VENDOR_ID, 4, &cfg); n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg); - printk(KERN_WARNING "EEH: PCI device/vendor: %08x\n", cfg); + pr_warn("EEH: PCI device/vendor: %08x\n", cfg); eeh_ops->read_config(dn, PCI_COMMAND, 4, &cfg); n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg); - printk(KERN_WARNING "EEH: PCI cmd/status register: %08x\n", cfg); - - if (!dev) { - printk(KERN_WARNING "EEH: no PCI device for this of node\n"); - return n; - } + pr_warn("EEH: PCI cmd/status register: %08x\n", cfg); /* Gather bridge-specific registers */ - if (dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) { + if (edev->mode & EEH_DEV_BRIDGE) { eeh_ops->read_config(dn, PCI_SEC_STATUS, 2, &cfg); n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg); - printk(KERN_WARNING "EEH: Bridge secondary status: %04x\n", cfg); + pr_warn("EEH: Bridge secondary status: %04x\n", cfg); eeh_ops->read_config(dn, PCI_BRIDGE_CONTROL, 2, &cfg); n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg); - printk(KERN_WARNING "EEH: Bridge control: %04x\n", cfg); + pr_warn("EEH: Bridge control: %04x\n", cfg); } /* Dump out the PCI-X command and status regs */ - cap = pci_find_capability(dev, PCI_CAP_ID_PCIX); + cap = edev->pcix_cap; if (cap) { eeh_ops->read_config(dn, cap, 4, &cfg); n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg); - printk(KERN_WARNING "EEH: PCI-X cmd: %08x\n", cfg); + pr_warn("EEH: PCI-X cmd: %08x\n", cfg); eeh_ops->read_config(dn, cap+4, 4, &cfg); n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg); - printk(KERN_WARNING "EEH: PCI-X status: %08x\n", cfg); + pr_warn("EEH: PCI-X status: %08x\n", cfg); } - /* If PCI-E capable, dump PCI-E cap 10, and the AER */ - if (pci_is_pcie(dev)) { + /* If PCI-E capable, dump PCI-E cap 10 */ + cap = edev->pcie_cap; + if (cap) { n += scnprintf(buf+n, len-n, "pci-e cap10:\n"); - printk(KERN_WARNING - "EEH: PCI-E capabilities and status follow:\n"); + pr_warn("EEH: PCI-E capabilities and status follow:\n"); for (i=0; i<=8; i++) { - eeh_ops->read_config(dn, dev->pcie_cap+4*i, 4, &cfg); + eeh_ops->read_config(dn, cap+4*i, 4, &cfg); n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); - printk(KERN_WARNING "EEH: PCI-E %02x: %08x\n", i, cfg); + pr_warn("EEH: PCI-E %02x: %08x\n", i, cfg); } + } - cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); - if (cap) { - n += scnprintf(buf+n, len-n, "pci-e AER:\n"); - printk(KERN_WARNING - "EEH: PCI-E AER capability register set follows:\n"); - - for (i=0; i<14; i++) { - eeh_ops->read_config(dn, cap+4*i, 4, &cfg); - n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); - printk(KERN_WARNING "EEH: PCI-E AER %02x: %08x\n", i, cfg); - } + /* If AER capable, dump it */ + cap = edev->aer_cap; + if (cap) { + n += scnprintf(buf+n, len-n, "pci-e AER:\n"); + pr_warn("EEH: PCI-E AER capability register set follows:\n"); + + for (i=0; i<14; i++) { + eeh_ops->read_config(dn, cap+4*i, 4, &cfg); + n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); + pr_warn("EEH: PCI-E AER %02x: %08x\n", i, cfg); } } @@ -232,21 +235,19 @@ void eeh_slot_error_detail(struct eeh_pe *pe, int severity) { size_t loglen = 0; struct eeh_dev *edev, *tmp; - bool valid_cfg_log = true; /* * When the PHB is fenced or dead, it's pointless to collect * the data from PCI config space because it should return * 0xFF's. For ER, we still retrieve the data from the PCI * config space. + * + * For pHyp, we have to enable IO for log retrieval. Otherwise, + * 0xFF's is always returned from PCI config space. */ - if (eeh_probe_mode_dev() && - (pe->type & EEH_PE_PHB) && - (pe->state & (EEH_PE_ISOLATED | EEH_PE_PHB_DEAD))) - valid_cfg_log = false; - - if (valid_cfg_log) { - eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); + if (!(pe->type & EEH_PE_PHB)) { + if (eeh_probe_mode_devtree()) + eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); eeh_ops->configure_bridge(pe); eeh_pe_restore_bars(pe); @@ -309,7 +310,7 @@ static int eeh_phb_check_failure(struct eeh_pe *pe) /* If the PHB has been in problematic state */ eeh_serialize_lock(&flags); - if (phb_pe->state & (EEH_PE_ISOLATED | EEH_PE_PHB_DEAD)) { + if (phb_pe->state & EEH_PE_ISOLATED) { ret = 0; goto out; } @@ -515,16 +516,42 @@ EXPORT_SYMBOL(eeh_check_failure); */ int eeh_pci_enable(struct eeh_pe *pe, int function) { - int rc; + int rc, flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); + + /* + * pHyp doesn't allow to enable IO or DMA on unfrozen PE. + * Also, it's pointless to enable them on unfrozen PE. So + * we have the check here. + */ + if (function == EEH_OPT_THAW_MMIO || + function == EEH_OPT_THAW_DMA) { + rc = eeh_ops->get_state(pe, NULL); + if (rc < 0) + return rc; + + /* Needn't to enable or already enabled */ + if ((rc == EEH_STATE_NOT_SUPPORT) || + ((rc & flags) == flags)) + return 0; + } rc = eeh_ops->set_option(pe, function); if (rc) - pr_warning("%s: Unexpected state change %d on PHB#%d-PE#%x, err=%d\n", - __func__, function, pe->phb->global_number, pe->addr, rc); + pr_warn("%s: Unexpected state change %d on " + "PHB#%d-PE#%x, err=%d\n", + __func__, function, pe->phb->global_number, + pe->addr, rc); rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); - if (rc > 0 && (rc & EEH_STATE_MMIO_ENABLED) && - (function == EEH_OPT_THAW_MMIO)) + if (rc <= 0) + return rc; + + if ((function == EEH_OPT_THAW_MMIO) && + (rc & EEH_STATE_MMIO_ENABLED)) + return 0; + + if ((function == EEH_OPT_THAW_DMA) && + (rc & EEH_STATE_DMA_ENABLED)) return 0; return rc; @@ -612,26 +639,7 @@ static void eeh_reset_pe_once(struct eeh_pe *pe) else eeh_ops->reset(pe, EEH_RESET_HOT); - /* The PCI bus requires that the reset be held high for at least - * a 100 milliseconds. We wait a bit longer 'just in case'. - */ -#define PCI_BUS_RST_HOLD_TIME_MSEC 250 - msleep(PCI_BUS_RST_HOLD_TIME_MSEC); - - /* We might get hit with another EEH freeze as soon as the - * pci slot reset line is dropped. Make sure we don't miss - * these, and clear the flag now. - */ - eeh_pe_state_clear(pe, EEH_PE_ISOLATED); - eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); - - /* After a PCI slot has been reset, the PCI Express spec requires - * a 1.5 second idle time for the bus to stabilize, before starting - * up traffic. - */ -#define PCI_BUS_SETTLE_TIME_MSEC 1800 - msleep(PCI_BUS_SETTLE_TIME_MSEC); } /** @@ -651,6 +659,10 @@ int eeh_reset_pe(struct eeh_pe *pe) for (i=0; i<3; i++) { eeh_reset_pe_once(pe); + /* + * EEH_PE_ISOLATED is expected to be removed after + * BAR restore. + */ rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); if ((rc & flags) == flags) return 0; @@ -826,8 +838,8 @@ int eeh_init(void) &hose_list, list_node) pci_walk_bus(hose->bus, eeh_ops->dev_probe, NULL); } else { - pr_warning("%s: Invalid probe mode %d\n", - __func__, eeh_probe_mode); + pr_warn("%s: Invalid probe mode %x", + __func__, eeh_subsystem_flags); return -EINVAL; } @@ -1102,10 +1114,45 @@ static const struct file_operations proc_eeh_operations = { .release = single_release, }; +#ifdef CONFIG_DEBUG_FS +static int eeh_enable_dbgfs_set(void *data, u64 val) +{ + if (val) + eeh_subsystem_flags &= ~EEH_FORCE_DISABLED; + else + eeh_subsystem_flags |= EEH_FORCE_DISABLED; + + /* Notify the backend */ + if (eeh_ops->post_init) + eeh_ops->post_init(); + + return 0; +} + +static int eeh_enable_dbgfs_get(void *data, u64 *val) +{ + if (eeh_enabled()) + *val = 0x1ul; + else + *val = 0x0ul; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(eeh_enable_dbgfs_ops, eeh_enable_dbgfs_get, + eeh_enable_dbgfs_set, "0x%llx\n"); +#endif + static int __init eeh_init_proc(void) { - if (machine_is(pseries) || machine_is(powernv)) + if (machine_is(pseries) || machine_is(powernv)) { proc_create("powerpc/eeh", 0, NULL, &proc_eeh_operations); +#ifdef CONFIG_DEBUG_FS + debugfs_create_file("eeh_enable", 0600, + powerpc_debugfs_root, NULL, + &eeh_enable_dbgfs_ops); +#endif + } + return 0; } __initcall(eeh_init_proc); diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index bb61ca58ca6d..7100a5b96e70 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -171,6 +171,15 @@ static void eeh_enable_irq(struct pci_dev *dev) } } +static bool eeh_dev_removed(struct eeh_dev *edev) +{ + /* EEH device removed ? */ + if (!edev || (edev->mode & EEH_DEV_REMOVED)) + return true; + + return false; +} + /** * eeh_report_error - Report pci error to each device driver * @data: eeh device @@ -187,10 +196,8 @@ static void *eeh_report_error(void *data, void *userdata) enum pci_ers_result rc, *res = userdata; struct pci_driver *driver; - /* We might not have the associated PCI device, - * then we should continue for next one. - */ - if (!dev) return NULL; + if (!dev || eeh_dev_removed(edev)) + return NULL; dev->error_state = pci_channel_io_frozen; driver = eeh_pcid_get(dev); @@ -230,6 +237,9 @@ static void *eeh_report_mmio_enabled(void *data, void *userdata) enum pci_ers_result rc, *res = userdata; struct pci_driver *driver; + if (!dev || eeh_dev_removed(edev)) + return NULL; + driver = eeh_pcid_get(dev); if (!driver) return NULL; @@ -267,7 +277,8 @@ static void *eeh_report_reset(void *data, void *userdata) enum pci_ers_result rc, *res = userdata; struct pci_driver *driver; - if (!dev) return NULL; + if (!dev || eeh_dev_removed(edev)) + return NULL; dev->error_state = pci_channel_io_normal; driver = eeh_pcid_get(dev); @@ -307,7 +318,8 @@ static void *eeh_report_resume(void *data, void *userdata) struct pci_dev *dev = eeh_dev_to_pci_dev(edev); struct pci_driver *driver; - if (!dev) return NULL; + if (!dev || eeh_dev_removed(edev)) + return NULL; dev->error_state = pci_channel_io_normal; driver = eeh_pcid_get(dev); @@ -343,7 +355,8 @@ static void *eeh_report_failure(void *data, void *userdata) struct pci_dev *dev = eeh_dev_to_pci_dev(edev); struct pci_driver *driver; - if (!dev) return NULL; + if (!dev || eeh_dev_removed(edev)) + return NULL; dev->error_state = pci_channel_io_perm_failure; driver = eeh_pcid_get(dev); @@ -380,6 +393,16 @@ static void *eeh_rmv_device(void *data, void *userdata) if (!dev || (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE)) return NULL; + /* + * We rely on count-based pcibios_release_device() to + * detach permanently offlined PEs. Unfortunately, that's + * not reliable enough. We might have the permanently + * offlined PEs attached, but we needn't take care of + * them and their child devices. + */ + if (eeh_dev_removed(edev)) + return NULL; + driver = eeh_pcid_get(dev); if (driver) { eeh_pcid_put(dev); @@ -417,6 +440,36 @@ static void *eeh_pe_detach_dev(void *data, void *userdata) return NULL; } +/* + * Explicitly clear PE's frozen state for PowerNV where + * we have frozen PE until BAR restore is completed. It's + * harmless to clear it for pSeries. To be consistent with + * PE reset (for 3 times), we try to clear the frozen state + * for 3 times as well. + */ +static int eeh_clear_pe_frozen_state(struct eeh_pe *pe) +{ + int i, rc; + + for (i = 0; i < 3; i++) { + rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); + if (rc) + continue; + rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA); + if (!rc) + break; + } + + /* The PE has been isolated, clear it */ + if (rc) + pr_warn("%s: Can't clear frozen PHB#%x-PE#%x (%d)\n", + __func__, pe->phb->global_number, pe->addr, rc); + else + eeh_pe_state_clear(pe, EEH_PE_ISOLATED); + + return rc; +} + /** * eeh_reset_device - Perform actual reset of a pci slot * @pe: EEH PE @@ -451,19 +504,33 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) eeh_pe_dev_traverse(pe, eeh_rmv_device, &removed); } - /* Reset the pci controller. (Asserts RST#; resets config space). + /* + * Reset the pci controller. (Asserts RST#; resets config space). * Reconfigure bridges and devices. Don't try to bring the system * up if the reset failed for some reason. + * + * During the reset, it's very dangerous to have uncontrolled PCI + * config accesses. So we prefer to block them. However, controlled + * PCI config accesses initiated from EEH itself are allowed. */ + eeh_pe_state_mark(pe, EEH_PE_RESET); rc = eeh_reset_pe(pe); - if (rc) + if (rc) { + eeh_pe_state_clear(pe, EEH_PE_RESET); return rc; + } pci_lock_rescan_remove(); /* Restore PE */ eeh_ops->configure_bridge(pe); eeh_pe_restore_bars(pe); + eeh_pe_state_clear(pe, EEH_PE_RESET); + + /* Clear frozen state */ + rc = eeh_clear_pe_frozen_state(pe); + if (rc) + return rc; /* Give the system 5 seconds to finish running the user-space * hotplug shutdown scripts, e.g. ifdown for ethernet. Yes, @@ -573,7 +640,6 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) result = PCI_ERS_RESULT_NEED_RESET; } else { pr_info("EEH: Notify device drivers to resume I/O\n"); - result = PCI_ERS_RESULT_NONE; eeh_pe_dev_traverse(pe, eeh_report_mmio_enabled, &result); } } @@ -585,10 +651,17 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) if (rc < 0) goto hard_fail; - if (rc) + if (rc) { result = PCI_ERS_RESULT_NEED_RESET; - else + } else { + /* + * We didn't do PE reset for the case. The PE + * is still in frozen state. Clear it before + * resuming the PE. + */ + eeh_pe_state_clear(pe, EEH_PE_ISOLATED); result = PCI_ERS_RESULT_RECOVERED; + } } /* If any device has a hard failure, then shut off everything. */ @@ -650,8 +723,17 @@ perm_error: /* Notify all devices that they're about to go down. */ eeh_pe_dev_traverse(pe, eeh_report_failure, NULL); - /* Shut down the device drivers for good. */ + /* Mark the PE to be removed permanently */ + pe->freeze_count = EEH_MAX_ALLOWED_FREEZES + 1; + + /* + * Shut down the device drivers for good. We mark + * all removed devices correctly to avoid access + * the their PCI config any more. + */ if (frozen_bus) { + eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); + pci_lock_rescan_remove(); pcibios_remove_pci_devices(frozen_bus); pci_unlock_rescan_remove(); @@ -682,8 +764,7 @@ static void eeh_handle_special_event(void) phb_pe = eeh_phb_pe_get(hose); if (!phb_pe) continue; - eeh_pe_state_mark(phb_pe, - EEH_PE_ISOLATED | EEH_PE_PHB_DEAD); + eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED); } eeh_serialize_unlock(flags); @@ -699,8 +780,7 @@ static void eeh_handle_special_event(void) eeh_remove_event(pe); if (rc == EEH_NEXT_ERR_DEAD_PHB) - eeh_pe_state_mark(pe, - EEH_PE_ISOLATED | EEH_PE_PHB_DEAD); + eeh_pe_state_mark(pe, EEH_PE_ISOLATED); else eeh_pe_state_mark(pe, EEH_PE_ISOLATED | EEH_PE_RECOVERING); @@ -724,12 +804,14 @@ static void eeh_handle_special_event(void) if (rc == EEH_NEXT_ERR_FROZEN_PE || rc == EEH_NEXT_ERR_FENCED_PHB) { eeh_handle_normal_event(pe); + eeh_pe_state_clear(pe, EEH_PE_RECOVERING); } else { pci_lock_rescan_remove(); list_for_each_entry(hose, &hose_list, list_node) { phb_pe = eeh_phb_pe_get(hose); if (!phb_pe || - !(phb_pe->state & EEH_PE_PHB_DEAD)) + !(phb_pe->state & EEH_PE_ISOLATED) || + (phb_pe->state & EEH_PE_RECOVERING)) continue; /* Notify all devices to be down */ diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index f0c353fa655a..995c2a284630 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -503,13 +503,17 @@ static void *__eeh_pe_state_mark(void *data, void *flag) struct eeh_dev *edev, *tmp; struct pci_dev *pdev; - /* - * Mark the PE with the indicated state. Also, - * the associated PCI device will be put into - * I/O frozen state to avoid I/O accesses from - * the PCI device driver. - */ + /* Keep the state of permanently removed PE intact */ + if ((pe->freeze_count > EEH_MAX_ALLOWED_FREEZES) && + (state & (EEH_PE_ISOLATED | EEH_PE_RECOVERING))) + return NULL; + pe->state |= state; + + /* Offline PCI devices if applicable */ + if (state != EEH_PE_ISOLATED) + return NULL; + eeh_pe_for_each_dev(pe, edev, tmp) { pdev = eeh_dev_to_pci_dev(edev); if (pdev) @@ -532,6 +536,27 @@ void eeh_pe_state_mark(struct eeh_pe *pe, int state) eeh_pe_traverse(pe, __eeh_pe_state_mark, &state); } +static void *__eeh_pe_dev_mode_mark(void *data, void *flag) +{ + struct eeh_dev *edev = data; + int mode = *((int *)flag); + + edev->mode |= mode; + + return NULL; +} + +/** + * eeh_pe_dev_state_mark - Mark state for all device under the PE + * @pe: EEH PE + * + * Mark specific state for all child devices of the PE. + */ +void eeh_pe_dev_mode_mark(struct eeh_pe *pe, int mode) +{ + eeh_pe_dev_traverse(pe, __eeh_pe_dev_mode_mark, &mode); +} + /** * __eeh_pe_state_clear - Clear state for the PE * @data: EEH PE @@ -546,8 +571,16 @@ static void *__eeh_pe_state_clear(void *data, void *flag) struct eeh_pe *pe = (struct eeh_pe *)data; int state = *((int *)flag); + /* Keep the state of permanently removed PE intact */ + if ((pe->freeze_count > EEH_MAX_ALLOWED_FREEZES) && + (state & EEH_PE_ISOLATED)) + return NULL; + pe->state &= ~state; - pe->check_count = 0; + + /* Clear check count since last isolation */ + if (state & EEH_PE_ISOLATED) + pe->check_count = 0; return NULL; } diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 2230fd0ca3e4..02667744fbb5 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -69,7 +69,7 @@ int __init early_init_dt_scan_fw_dump(unsigned long node, */ token = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump", NULL); if (!token) - return 0; + return 1; fw_dump.fadump_supported = 1; fw_dump.ibm_configure_kernel_dump = *token; @@ -92,7 +92,7 @@ int __init early_init_dt_scan_fw_dump(unsigned long node, &size); if (!sections) - return 0; + return 1; num_sections = size / (3 * sizeof(u32)); @@ -110,6 +110,7 @@ int __init early_init_dt_scan_fw_dump(unsigned long node, break; } } + return 1; } diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c index 40bd7bd4e19a..85fb16e64cef 100644 --- a/arch/powerpc/kernel/legacy_serial.c +++ b/arch/powerpc/kernel/legacy_serial.c @@ -71,8 +71,9 @@ static int __init add_legacy_port(struct device_node *np, int want_index, phys_addr_t taddr, unsigned long irq, upf_t flags, int irq_check_parent) { - const __be32 *clk, *spd; + const __be32 *clk, *spd, *rs; u32 clock = BASE_BAUD * 16; + u32 shift = 0; int index; /* get clock freq. if present */ @@ -83,6 +84,11 @@ static int __init add_legacy_port(struct device_node *np, int want_index, /* get default speed if present */ spd = of_get_property(np, "current-speed", NULL); + /* get register shift if present */ + rs = of_get_property(np, "reg-shift", NULL); + if (rs && *rs) + shift = be32_to_cpup(rs); + /* If we have a location index, then try to use it */ if (want_index >= 0 && want_index < MAX_LEGACY_SERIAL_PORTS) index = want_index; @@ -126,6 +132,7 @@ static int __init add_legacy_port(struct device_node *np, int want_index, legacy_serial_ports[index].uartclk = clock; legacy_serial_ports[index].irq = irq; legacy_serial_ports[index].flags = flags; + legacy_serial_ports[index].regshift = shift; legacy_serial_infos[index].taddr = taddr; legacy_serial_infos[index].np = of_node_get(np); legacy_serial_infos[index].clock = clock; @@ -163,9 +170,8 @@ static int __init add_legacy_soc_port(struct device_node *np, if (of_get_property(np, "clock-frequency", NULL) == NULL) return -1; - /* if reg-shift or offset, don't try to use it */ - if ((of_get_property(np, "reg-shift", NULL) != NULL) || - (of_get_property(np, "reg-offset", NULL) != NULL)) + /* if reg-offset don't try to use it */ + if ((of_get_property(np, "reg-offset", NULL) != NULL)) return -1; /* if rtas uses this device, don't try to use it as well */ @@ -315,17 +321,20 @@ static void __init setup_legacy_serial_console(int console) struct legacy_serial_info *info = &legacy_serial_infos[console]; struct plat_serial8250_port *port = &legacy_serial_ports[console]; void __iomem *addr; + unsigned int stride; + + stride = 1 << port->regshift; /* Check if a translated MMIO address has been found */ if (info->taddr) { addr = ioremap(info->taddr, 0x1000); if (addr == NULL) return; - udbg_uart_init_mmio(addr, 1); + udbg_uart_init_mmio(addr, stride); } else { /* Check if it's PIO and we support untranslated PIO */ if (port->iotype == UPIO_PORT && isa_io_special) - udbg_uart_init_pio(port->iobase, 1); + udbg_uart_init_pio(port->iobase, stride); else return; } diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index d9476c1fc959..add166aa806a 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -21,6 +21,7 @@ #include <linux/string.h> #include <linux/init.h> #include <linux/bootmem.h> +#include <linux/delay.h> #include <linux/export.h> #include <linux/of_address.h> #include <linux/of_pci.h> @@ -120,6 +121,25 @@ resource_size_t pcibios_window_alignment(struct pci_bus *bus, return 1; } +void pcibios_reset_secondary_bus(struct pci_dev *dev) +{ + u16 ctrl; + + if (ppc_md.pcibios_reset_secondary_bus) { + ppc_md.pcibios_reset_secondary_bus(dev); + return; + } + + pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &ctrl); + ctrl |= PCI_BRIDGE_CTL_BUS_RESET; + pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl); + msleep(2); + + ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET; + pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl); + ssleep(1); +} + static resource_size_t pcibios_io_size(const struct pci_controller *hose) { #ifdef CONFIG_PPC64 @@ -666,60 +686,36 @@ void pci_resource_to_user(const struct pci_dev *dev, int bar, void pci_process_bridge_OF_ranges(struct pci_controller *hose, struct device_node *dev, int primary) { - const __be32 *ranges; - int rlen; - int pna = of_n_addr_cells(dev); - int np = pna + 5; int memno = 0; - u32 pci_space; - unsigned long long pci_addr, cpu_addr, pci_next, cpu_next, size; struct resource *res; + struct of_pci_range range; + struct of_pci_range_parser parser; printk(KERN_INFO "PCI host bridge %s %s ranges:\n", dev->full_name, primary ? "(primary)" : ""); - /* Get ranges property */ - ranges = of_get_property(dev, "ranges", &rlen); - if (ranges == NULL) + /* Check for ranges property */ + if (of_pci_range_parser_init(&parser, dev)) return; /* Parse it */ - while ((rlen -= np * 4) >= 0) { - /* Read next ranges element */ - pci_space = of_read_number(ranges, 1); - pci_addr = of_read_number(ranges + 1, 2); - cpu_addr = of_translate_address(dev, ranges + 3); - size = of_read_number(ranges + pna + 3, 2); - ranges += np; - + for_each_of_pci_range(&parser, &range) { /* If we failed translation or got a zero-sized region * (some FW try to feed us with non sensical zero sized regions * such as power3 which look like some kind of attempt at exposing * the VGA memory hole) */ - if (cpu_addr == OF_BAD_ADDR || size == 0) + if (range.cpu_addr == OF_BAD_ADDR || range.size == 0) continue; - /* Now consume following elements while they are contiguous */ - for (; rlen >= np * sizeof(u32); - ranges += np, rlen -= np * 4) { - if (of_read_number(ranges, 1) != pci_space) - break; - pci_next = of_read_number(ranges + 1, 2); - cpu_next = of_translate_address(dev, ranges + 3); - if (pci_next != pci_addr + size || - cpu_next != cpu_addr + size) - break; - size += of_read_number(ranges + pna + 3, 2); - } - /* Act based on address space type */ res = NULL; - switch ((pci_space >> 24) & 0x3) { - case 1: /* PCI IO space */ + switch (range.flags & IORESOURCE_TYPE_BITS) { + case IORESOURCE_IO: printk(KERN_INFO " IO 0x%016llx..0x%016llx -> 0x%016llx\n", - cpu_addr, cpu_addr + size - 1, pci_addr); + range.cpu_addr, range.cpu_addr + range.size - 1, + range.pci_addr); /* We support only one IO range */ if (hose->pci_io_size) { @@ -729,11 +725,12 @@ void pci_process_bridge_OF_ranges(struct pci_controller *hose, } #ifdef CONFIG_PPC32 /* On 32 bits, limit I/O space to 16MB */ - if (size > 0x01000000) - size = 0x01000000; + if (range.size > 0x01000000) + range.size = 0x01000000; /* 32 bits needs to map IOs here */ - hose->io_base_virt = ioremap(cpu_addr, size); + hose->io_base_virt = ioremap(range.cpu_addr, + range.size); /* Expect trouble if pci_addr is not 0 */ if (primary) @@ -743,20 +740,20 @@ void pci_process_bridge_OF_ranges(struct pci_controller *hose, /* pci_io_size and io_base_phys always represent IO * space starting at 0 so we factor in pci_addr */ - hose->pci_io_size = pci_addr + size; - hose->io_base_phys = cpu_addr - pci_addr; + hose->pci_io_size = range.pci_addr + range.size; + hose->io_base_phys = range.cpu_addr - range.pci_addr; /* Build resource */ res = &hose->io_resource; - res->flags = IORESOURCE_IO; - res->start = pci_addr; + range.cpu_addr = range.pci_addr; break; - case 2: /* PCI Memory space */ - case 3: /* PCI 64 bits Memory space */ + case IORESOURCE_MEM: printk(KERN_INFO " MEM 0x%016llx..0x%016llx -> 0x%016llx %s\n", - cpu_addr, cpu_addr + size - 1, pci_addr, - (pci_space & 0x40000000) ? "Prefetch" : ""); + range.cpu_addr, range.cpu_addr + range.size - 1, + range.pci_addr, + (range.pci_space & 0x40000000) ? + "Prefetch" : ""); /* We support only 3 memory ranges */ if (memno >= 3) { @@ -765,28 +762,21 @@ void pci_process_bridge_OF_ranges(struct pci_controller *hose, continue; } /* Handles ISA memory hole space here */ - if (pci_addr == 0) { + if (range.pci_addr == 0) { if (primary || isa_mem_base == 0) - isa_mem_base = cpu_addr; - hose->isa_mem_phys = cpu_addr; - hose->isa_mem_size = size; + isa_mem_base = range.cpu_addr; + hose->isa_mem_phys = range.cpu_addr; + hose->isa_mem_size = range.size; } /* Build resource */ - hose->mem_offset[memno] = cpu_addr - pci_addr; + hose->mem_offset[memno] = range.cpu_addr - + range.pci_addr; res = &hose->mem_resources[memno++]; - res->flags = IORESOURCE_MEM; - if (pci_space & 0x40000000) - res->flags |= IORESOURCE_PREFETCH; - res->start = cpu_addr; break; } if (res != NULL) { - res->name = dev->full_name; - res->end = res->start + size - 1; - res->parent = NULL; - res->sibling = NULL; - res->child = NULL; + of_pci_range_to_resource(&range, dev, res); } } } diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c index 83c26d829991..ea6470c21f4e 100644 --- a/arch/powerpc/kernel/pci_of_scan.c +++ b/arch/powerpc/kernel/pci_of_scan.c @@ -304,6 +304,9 @@ static struct pci_dev *of_scan_pci_dev(struct pci_bus *bus, struct pci_dev *dev = NULL; const __be32 *reg; int reglen, devfn; +#ifdef CONFIG_EEH + struct eeh_dev *edev = of_node_to_eeh_dev(dn); +#endif pr_debug(" * %s\n", dn->full_name); if (!of_device_is_available(dn)) @@ -321,6 +324,12 @@ static struct pci_dev *of_scan_pci_dev(struct pci_bus *bus, return dev; } + /* Device removed permanently ? */ +#ifdef CONFIG_EEH + if (edev && (edev->mode & EEH_DEV_REMOVED)) + return NULL; +#endif + /* create a new pci_dev for this device */ dev = of_create_pci_dev(dn, bus, devfn); if (!dev) diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index 3bd77edd7610..450850a49dce 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -120,6 +120,7 @@ EXPORT_SYMBOL(giveup_spe); EXPORT_SYMBOL(flush_instruction_cache); #endif EXPORT_SYMBOL(flush_dcache_range); +EXPORT_SYMBOL(flush_icache_range); #ifdef CONFIG_SMP #ifdef CONFIG_PPC32 diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c index 2f3cdb01506d..658e89d2025b 100644 --- a/arch/powerpc/kernel/rtas_flash.c +++ b/arch/powerpc/kernel/rtas_flash.c @@ -705,7 +705,7 @@ static int __init rtas_flash_init(void) if (rtas_token("ibm,update-flash-64-and-reboot") == RTAS_UNKNOWN_SERVICE) { pr_info("rtas_flash: no firmware flash support\n"); - return 1; + return -EINVAL; } rtas_validate_flash_data.buf = kzalloc(VALIDATE_BUF_SIZE, GFP_KERNEL); diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c index 7d4c7172f38e..c168337aef9d 100644 --- a/arch/powerpc/kernel/rtas_pci.c +++ b/arch/powerpc/kernel/rtas_pci.c @@ -80,10 +80,6 @@ int rtas_read_config(struct pci_dn *pdn, int where, int size, u32 *val) if (ret) return PCIBIOS_DEVICE_NOT_FOUND; - if (returnval == EEH_IO_ERROR_VALUE(size) && - eeh_dev_check_failure(of_node_to_eeh_dev(pdn->node))) - return PCIBIOS_DEVICE_NOT_FOUND; - return PCIBIOS_SUCCESSFUL; } @@ -92,18 +88,39 @@ static int rtas_pci_read_config(struct pci_bus *bus, int where, int size, u32 *val) { struct device_node *busdn, *dn; - - busdn = pci_bus_to_OF_node(bus); + struct pci_dn *pdn; + bool found = false; +#ifdef CONFIG_EEH + struct eeh_dev *edev; +#endif + int ret; /* Search only direct children of the bus */ + *val = 0xFFFFFFFF; + busdn = pci_bus_to_OF_node(bus); for (dn = busdn->child; dn; dn = dn->sibling) { - struct pci_dn *pdn = PCI_DN(dn); + pdn = PCI_DN(dn); if (pdn && pdn->devfn == devfn - && of_device_is_available(dn)) - return rtas_read_config(pdn, where, size, val); + && of_device_is_available(dn)) { + found = true; + break; + } } - return PCIBIOS_DEVICE_NOT_FOUND; + if (!found) + return PCIBIOS_DEVICE_NOT_FOUND; +#ifdef CONFIG_EEH + edev = of_node_to_eeh_dev(dn); + if (edev && edev->pe && edev->pe->state & EEH_PE_RESET) + return PCIBIOS_DEVICE_NOT_FOUND; +#endif + + ret = rtas_read_config(pdn, where, size, val); + if (*val == EEH_IO_ERROR_VALUE(size) && + eeh_dev_check_failure(of_node_to_eeh_dev(dn))) + return PCIBIOS_DEVICE_NOT_FOUND; + + return ret; } int rtas_write_config(struct pci_dn *pdn, int where, int size, u32 val) @@ -136,17 +153,34 @@ static int rtas_pci_write_config(struct pci_bus *bus, int where, int size, u32 val) { struct device_node *busdn, *dn; - - busdn = pci_bus_to_OF_node(bus); + struct pci_dn *pdn; + bool found = false; +#ifdef CONFIG_EEH + struct eeh_dev *edev; +#endif + int ret; /* Search only direct children of the bus */ + busdn = pci_bus_to_OF_node(bus); for (dn = busdn->child; dn; dn = dn->sibling) { - struct pci_dn *pdn = PCI_DN(dn); + pdn = PCI_DN(dn); if (pdn && pdn->devfn == devfn - && of_device_is_available(dn)) - return rtas_write_config(pdn, where, size, val); + && of_device_is_available(dn)) { + found = true; + break; + } } - return PCIBIOS_DEVICE_NOT_FOUND; + + if (!found) + return PCIBIOS_DEVICE_NOT_FOUND; +#ifdef CONFIG_EEH + edev = of_node_to_eeh_dev(dn); + if (edev && edev->pe && (edev->pe->state & EEH_PE_RESET)) + return PCIBIOS_DEVICE_NOT_FOUND; +#endif + ret = rtas_write_config(pdn, where, size, val); + + return ret; } static struct pci_ops rtas_pci_ops = { diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 79b7612ac6fa..3cf25c89469d 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -212,6 +212,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) { unsigned long cpu_id = (unsigned long)v - 1; unsigned int pvr; + unsigned long proc_freq; unsigned short maj; unsigned short min; @@ -263,12 +264,19 @@ static int show_cpuinfo(struct seq_file *m, void *v) #endif /* CONFIG_TAU */ /* - * Assume here that all clock rates are the same in a - * smp system. -- Cort + * Platforms that have variable clock rates, should implement + * the method ppc_md.get_proc_freq() that reports the clock + * rate of a given cpu. The rest can use ppc_proc_freq to + * report the clock rate that is same across all cpus. */ - if (ppc_proc_freq) + if (ppc_md.get_proc_freq) + proc_freq = ppc_md.get_proc_freq(cpu_id); + else + proc_freq = ppc_proc_freq; + + if (proc_freq) seq_printf(m, "clock\t\t: %lu.%06luMHz\n", - ppc_proc_freq / 1000000, ppc_proc_freq % 1000000); + proc_freq / 1000000, proc_freq % 1000000); if (ppc_md.show_percpuinfo != NULL) ppc_md.show_percpuinfo(m, cpu_id); diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 03567c05950a..508c54b92fa6 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -10,6 +10,7 @@ #include <asm/ppc-opcode.h> #include <asm/ptrace.h> #include <asm/reg.h> +#include <asm/bug.h> #ifdef CONFIG_VSX /* See fpu.S, this is borrowed from there */ @@ -175,6 +176,13 @@ dont_backup_vec: stfd fr0,FPSTATE_FPSCR(r7) dont_backup_fp: + /* Do sanity check on MSR to make sure we are suspended */ + li r7, (MSR_TS_S)@higher + srdi r6, r14, 32 + and r6, r6, r7 +1: tdeqi r6, 0 + EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0 + /* The moment we treclaim, ALL of our GPRs will switch * to user register state. (FPRs, CCR etc. also!) * Use an sprg and a tm_scratch in the PACA to shuffle. @@ -320,8 +328,6 @@ _GLOBAL(__tm_recheckpoint) */ SAVE_NVGPRS(r1) - std r1, PACAR1(r13) - /* Load complete register state from ts_ckpt* registers */ addi r7, r3, PT_CKPT_REGS /* Thread's ckpt_regs */ @@ -385,12 +391,10 @@ restore_gprs: /* ******************** CR,LR,CCR,MSR ********** */ ld r4, _CTR(r7) ld r5, _LINK(r7) - ld r6, _CCR(r7) ld r8, _XER(r7) mtctr r4 mtlr r5 - mtcr r6 mtxer r8 /* ******************** TAR ******************** */ @@ -406,7 +410,8 @@ restore_gprs: li r4, 0 mtmsrd r4, 1 - REST_4GPRS(0, r7) /* GPR0-3 */ + REST_GPR(0, r7) /* GPR0 */ + REST_2GPRS(2, r7) /* GPR2-3 */ REST_GPR(4, r7) /* GPR4 */ REST_4GPRS(8, r7) /* GPR8-11 */ REST_2GPRS(12, r7) /* GPR12-13 */ @@ -418,6 +423,31 @@ restore_gprs: mtspr SPRN_DSCR, r5 mtspr SPRN_PPR, r6 + /* Do final sanity check on TEXASR to make sure FS is set. Do this + * here before we load up the userspace r1 so any bugs we hit will get + * a call chain */ + mfspr r5, SPRN_TEXASR + srdi r5, r5, 16 + li r6, (TEXASR_FS)@h + and r6, r6, r5 +1: tdeqi r6, 0 + EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0 + + /* Do final sanity check on MSR to make sure we are not transactional + * or suspended + */ + mfmsr r6 + li r5, (MSR_TS_MASK)@higher + srdi r6, r6, 32 + and r6, r6, r5 +1: tdnei r6, 0 + EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0 + + /* Restore CR */ + ld r6, _CCR(r7) + mtcr r6 + + REST_GPR(1, r7) /* GPR1 */ REST_GPR(5, r7) /* GPR5-7 */ REST_GPR(6, r7) ld r7, GPR7(r7) diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index ffbb871c2bd8..b031f932c0cc 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -242,6 +242,12 @@ kvm_novcpu_exit: */ .globl kvm_start_guest kvm_start_guest: + + /* Set runlatch bit the minute you wake up from nap */ + mfspr r1, SPRN_CTRLF + ori r1, r1, 1 + mtspr SPRN_CTRLT, r1 + ld r2,PACATOC(r13) li r0,KVM_HWTHREAD_IN_KVM @@ -309,6 +315,11 @@ kvm_no_guest: li r0, KVM_HWTHREAD_IN_NAP stb r0, HSTATE_HWTHREAD_STATE(r13) kvm_do_nap: + /* Clear the runlatch bit before napping */ + mfspr r2, SPRN_CTRLF + clrrdi r2, r2, 1 + mtspr SPRN_CTRLT, r2 + li r3, LPCR_PECE0 mfspr r4, SPRN_LPCR rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1 @@ -1999,8 +2010,13 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206) /* * Take a nap until a decrementer or external or doobell interrupt - * occurs, with PECE1, PECE0 and PECEDP set in LPCR + * occurs, with PECE1, PECE0 and PECEDP set in LPCR. Also clear the + * runlatch bit before napping. */ + mfspr r2, SPRN_CTRLF + clrrdi r2, r2, 1 + mtspr SPRN_CTRLT, r2 + li r0,1 stb r0,HSTATE_HWTHREAD_REQ(r13) mfspr r5,SPRN_LPCR diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 3ea26c25590b..cf1d325eae8b 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -82,17 +82,14 @@ static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize) va &= ~((1ul << mmu_psize_defs[apsize].shift) - 1); va |= penc << 12; va |= ssize << 8; - /* Add AVAL part */ - if (psize != apsize) { - /* - * MPSS, 64K base page size and 16MB parge page size - * We don't need all the bits, but rest of the bits - * must be ignored by the processor. - * vpn cover upto 65 bits of va. (0...65) and we need - * 58..64 bits of va. - */ - va |= (vpn & 0xfe); - } + /* + * AVAL bits: + * We don't need all the bits, but rest of the bits + * must be ignored by the processor. + * vpn cover upto 65 bits of va. (0...65) and we need + * 58..64 bits of va. + */ + va |= (vpn & 0xfe); /* AVAL */ va |= 1; /* L */ asm volatile(ASM_FTR_IFCLR("tlbie %0,1", PPC_TLBIE(%1,%0), %2) : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206) @@ -133,17 +130,14 @@ static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize) va &= ~((1ul << mmu_psize_defs[apsize].shift) - 1); va |= penc << 12; va |= ssize << 8; - /* Add AVAL part */ - if (psize != apsize) { - /* - * MPSS, 64K base page size and 16MB parge page size - * We don't need all the bits, but rest of the bits - * must be ignored by the processor. - * vpn cover upto 65 bits of va. (0...65) and we need - * 58..64 bits of va. - */ - va |= (vpn & 0xfe); - } + /* + * AVAL bits: + * We don't need all the bits, but rest of the bits + * must be ignored by the processor. + * vpn cover upto 65 bits of va. (0...65) and we need + * 58..64 bits of va. + */ + va |= (vpn & 0xfe); va |= 1; /* L */ asm volatile(".long 0x7c000224 | (%0 << 11) | (1 << 21)" : : "r"(va) : "memory"); diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index 297c91051413..e0766b82e165 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -155,16 +155,28 @@ static ssize_t read_offset_data(void *dest, size_t dest_len, return copy_len; } -static unsigned long h_get_24x7_catalog_page(char page[static 4096], - u32 version, u32 index) +static unsigned long h_get_24x7_catalog_page_(unsigned long phys_4096, + unsigned long version, + unsigned long index) { - WARN_ON(!IS_ALIGNED((unsigned long)page, 4096)); + pr_devel("h_get_24x7_catalog_page(0x%lx, %lu, %lu)", + phys_4096, + version, + index); + WARN_ON(!IS_ALIGNED(phys_4096, 4096)); return plpar_hcall_norets(H_GET_24X7_CATALOG_PAGE, - virt_to_phys(page), + phys_4096, version, index); } +static unsigned long h_get_24x7_catalog_page(char page[], + u64 version, u32 index) +{ + return h_get_24x7_catalog_page_(virt_to_phys(page), + version, index); +} + static ssize_t catalog_read(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t offset, size_t count) @@ -173,7 +185,7 @@ static ssize_t catalog_read(struct file *filp, struct kobject *kobj, ssize_t ret = 0; size_t catalog_len = 0, catalog_page_len = 0, page_count = 0; loff_t page_offset = 0; - uint32_t catalog_version_num = 0; + uint64_t catalog_version_num = 0; void *page = kmem_cache_alloc(hv_page_cache, GFP_USER); struct hv_24x7_catalog_page_0 *page_0 = page; if (!page) @@ -185,7 +197,7 @@ static ssize_t catalog_read(struct file *filp, struct kobject *kobj, goto e_free; } - catalog_version_num = be32_to_cpu(page_0->version); + catalog_version_num = be64_to_cpu(page_0->version); catalog_page_len = be32_to_cpu(page_0->length); catalog_len = catalog_page_len * 4096; @@ -208,8 +220,9 @@ static ssize_t catalog_read(struct file *filp, struct kobject *kobj, page, 4096, page_offset * 4096); e_free: if (hret) - pr_err("h_get_24x7_catalog_page(ver=%d, page=%lld) failed: rc=%ld\n", - catalog_version_num, page_offset, hret); + pr_err("h_get_24x7_catalog_page(ver=%lld, page=%lld) failed:" + " rc=%ld\n", + catalog_version_num, page_offset, hret); kfree(page); pr_devel("catalog_read: offset=%lld(%lld) count=%zu(%zu) catalog_len=%zu(%zu) => %zd\n", @@ -243,7 +256,7 @@ e_free: \ static DEVICE_ATTR_RO(_name) PAGE_0_ATTR(catalog_version, "%lld\n", - (unsigned long long)be32_to_cpu(page_0->version)); + (unsigned long long)be64_to_cpu(page_0->version)); PAGE_0_ATTR(catalog_len, "%lld\n", (unsigned long long)be32_to_cpu(page_0->length) * 4096); static BIN_ATTR_RO(catalog, 0/* real length varies */); @@ -485,13 +498,13 @@ static int hv_24x7_init(void) struct hv_perf_caps caps; if (!firmware_has_feature(FW_FEATURE_LPAR)) { - pr_info("not a virtualized system, not enabling\n"); + pr_debug("not a virtualized system, not enabling\n"); return -ENODEV; } hret = hv_perf_caps_get(&caps); if (hret) { - pr_info("could not obtain capabilities, error 0x%80lx, not enabling\n", + pr_debug("could not obtain capabilities, not enabling, rc=%ld\n", hret); return -ENODEV; } diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c index 278ba7b9c2b5..c9d399a2df82 100644 --- a/arch/powerpc/perf/hv-gpci.c +++ b/arch/powerpc/perf/hv-gpci.c @@ -78,7 +78,7 @@ static ssize_t kernel_version_show(struct device *dev, return sprintf(page, "0x%x\n", COUNTER_INFO_VERSION_CURRENT); } -DEVICE_ATTR_RO(kernel_version); +static DEVICE_ATTR_RO(kernel_version); HV_CAPS_ATTR(version, "0x%x\n"); HV_CAPS_ATTR(ga, "%d\n"); HV_CAPS_ATTR(expanded, "%d\n"); @@ -273,13 +273,13 @@ static int hv_gpci_init(void) struct hv_perf_caps caps; if (!firmware_has_feature(FW_FEATURE_LPAR)) { - pr_info("not a virtualized system, not enabling\n"); + pr_debug("not a virtualized system, not enabling\n"); return -ENODEV; } hret = hv_perf_caps_get(&caps); if (hret) { - pr_info("could not obtain capabilities, error 0x%80lx, not enabling\n", + pr_debug("could not obtain capabilities, not enabling, rc=%ld\n", hret); return -ENODEV; } diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index d9e2b19b7c8d..43b65ad1970a 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -422,6 +422,7 @@ config CPU_BIG_ENDIAN config CPU_LITTLE_ENDIAN bool "Build little endian kernel" + select PPC64_BOOT_WRAPPER help Build a little endian kernel. @@ -430,3 +431,7 @@ config CPU_LITTLE_ENDIAN little endian powerpc. endchoice + +config PPC64_BOOT_WRAPPER + def_bool n + depends on CPU_LITTLE_ENDIAN diff --git a/arch/powerpc/platforms/embedded6xx/Kconfig b/arch/powerpc/platforms/embedded6xx/Kconfig index 2a7024d8d8b1..a25f496c2ef9 100644 --- a/arch/powerpc/platforms/embedded6xx/Kconfig +++ b/arch/powerpc/platforms/embedded6xx/Kconfig @@ -65,6 +65,7 @@ config MVME5100 select PPC_INDIRECT_PCI select PPC_I8259 select PPC_NATIVE + select PPC_UDBG_16550 help This option enables support for the Motorola (now Emerson) MVME5100 board. diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c index 253fefe3d1a0..79d0cdf786d0 100644 --- a/arch/powerpc/platforms/powernv/eeh-ioda.c +++ b/arch/powerpc/platforms/powernv/eeh-ioda.c @@ -42,11 +42,19 @@ static int ioda_eeh_event(struct notifier_block *nb, { uint64_t changed_evts = (uint64_t)change; - /* We simply send special EEH event */ - if ((changed_evts & OPAL_EVENT_PCI_ERROR) && - (events & OPAL_EVENT_PCI_ERROR) && - eeh_enabled()) + /* + * We simply send special EEH event if EEH has + * been enabled, or clear pending events in + * case that we enable EEH soon + */ + if (!(changed_evts & OPAL_EVENT_PCI_ERROR) || + !(events & OPAL_EVENT_PCI_ERROR)) + return 0; + + if (eeh_enabled()) eeh_send_failure_event(NULL); + else + opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul); return 0; } @@ -141,7 +149,9 @@ static int ioda_eeh_post_init(struct pci_controller *hose) } #ifdef CONFIG_DEBUG_FS - if (phb->dbgfs) { + if (!phb->has_dbgfs && phb->dbgfs) { + phb->has_dbgfs = 1; + debugfs_create_file("err_injct_outbound", 0600, phb->dbgfs, hose, &ioda_eeh_outb_dbgfs_ops); @@ -154,7 +164,14 @@ static int ioda_eeh_post_init(struct pci_controller *hose) } #endif - phb->eeh_state |= PNV_EEH_STATE_ENABLED; + /* If EEH is enabled, we're going to rely on that. + * Otherwise, we restore to conventional mechanism + * to clear frozen PE during PCI config access. + */ + if (eeh_enabled()) + phb->flags |= PNV_PHB_FLAG_EEH; + else + phb->flags &= ~PNV_PHB_FLAG_EEH; return 0; } @@ -268,6 +285,21 @@ static int ioda_eeh_get_state(struct eeh_pe *pe) return EEH_STATE_NOT_SUPPORT; } + /* + * If we're in middle of PE reset, return normal + * state to keep EEH core going. For PHB reset, we + * still expect to have fenced PHB cleared with + * PHB reset. + */ + if (!(pe->type & EEH_PE_PHB) && + (pe->state & EEH_PE_RESET)) { + result = (EEH_STATE_MMIO_ACTIVE | + EEH_STATE_DMA_ACTIVE | + EEH_STATE_MMIO_ENABLED | + EEH_STATE_DMA_ENABLED); + return result; + } + /* Retrieve PE status through OPAL */ pe_no = pe->addr; ret = opal_pci_eeh_freeze_status(phb->opal_id, pe_no, @@ -347,52 +379,6 @@ static int ioda_eeh_get_state(struct eeh_pe *pe) return result; } -static int ioda_eeh_pe_clear(struct eeh_pe *pe) -{ - struct pci_controller *hose; - struct pnv_phb *phb; - u32 pe_no; - u8 fstate; - u16 pcierr; - s64 ret; - - pe_no = pe->addr; - hose = pe->phb; - phb = pe->phb->private_data; - - /* Clear the EEH error on the PE */ - ret = opal_pci_eeh_freeze_clear(phb->opal_id, - pe_no, OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); - if (ret) { - pr_err("%s: Failed to clear EEH error for " - "PHB#%x-PE#%x, err=%lld\n", - __func__, hose->global_number, pe_no, ret); - return -EIO; - } - - /* - * Read the PE state back and verify that the frozen - * state has been removed. - */ - ret = opal_pci_eeh_freeze_status(phb->opal_id, pe_no, - &fstate, &pcierr, NULL); - if (ret) { - pr_err("%s: Failed to get EEH status on " - "PHB#%x-PE#%x\n, err=%lld\n", - __func__, hose->global_number, pe_no, ret); - return -EIO; - } - - if (fstate != OPAL_EEH_STOPPED_NOT_FROZEN) { - pr_err("%s: Frozen state not cleared on " - "PHB#%x-PE#%x, sts=%x\n", - __func__, hose->global_number, pe_no, fstate); - return -EIO; - } - - return 0; -} - static s64 ioda_eeh_phb_poll(struct pnv_phb *phb) { s64 rc = OPAL_HARDWARE; @@ -402,13 +388,16 @@ static s64 ioda_eeh_phb_poll(struct pnv_phb *phb) if (rc <= 0) break; - msleep(rc); + if (system_state < SYSTEM_RUNNING) + udelay(1000 * rc); + else + msleep(rc); } return rc; } -static int ioda_eeh_phb_reset(struct pci_controller *hose, int option) +int ioda_eeh_phb_reset(struct pci_controller *hose, int option) { struct pnv_phb *phb = hose->private_data; s64 rc = OPAL_HARDWARE; @@ -431,9 +420,17 @@ static int ioda_eeh_phb_reset(struct pci_controller *hose, int option) /* * Poll state of the PHB until the request is done - * successfully. + * successfully. The PHB reset is usually PHB complete + * reset followed by hot reset on root bus. So we also + * need the PCI bus settlement delay. */ rc = ioda_eeh_phb_poll(phb); + if (option == EEH_RESET_DEACTIVATE) { + if (system_state < SYSTEM_RUNNING) + udelay(1000 * EEH_PE_RST_SETTLE_TIME); + else + msleep(EEH_PE_RST_SETTLE_TIME); + } out: if (rc != OPAL_SUCCESS) return -EIO; @@ -471,6 +468,8 @@ static int ioda_eeh_root_reset(struct pci_controller *hose, int option) /* Poll state of the PHB until the request is done */ rc = ioda_eeh_phb_poll(phb); + if (option == EEH_RESET_DEACTIVATE) + msleep(EEH_PE_RST_SETTLE_TIME); out: if (rc != OPAL_SUCCESS) return -EIO; @@ -478,32 +477,149 @@ out: return 0; } -static int ioda_eeh_bridge_reset(struct pci_controller *hose, - struct pci_dev *dev, int option) +static bool ioda_eeh_is_plx_dnport(struct pci_dev *dev, int *reg, + int *mask, int *len) { - u16 ctrl; + unsigned short *pid; + unsigned short ids[] = { + 0x10b5, 0x8748, 0x0080, 0x0400, /* PLX#8748 */ + 0x0000, 0x0000, 0x0000, 0x0000, /* End flag */ + }; + + if (!pci_is_pcie(dev)) + return false; + if (pci_pcie_type(dev) != PCI_EXP_TYPE_DOWNSTREAM) + return false; + + pid = &ids[0]; + while (!reg) { + if (pid[0] == 0x0) + break; - pr_debug("%s: Reset device %04x:%02x:%02x.%01x with option %d\n", - __func__, hose->global_number, dev->bus->number, - PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn), option); + if (dev->vendor == pid[0] && + dev->device == pid[1]) { + *reg = pid[2]; + *mask = pid[3]; + *len = 2; + return true; + } + } + + *reg = PCI_BRIDGE_CONTROL; + *mask = PCI_BRIDGE_CTL_BUS_RESET; + *len = 2; + return false; +} + +static int ioda_eeh_bridge_reset(struct pci_dev *dev, int option) + +{ + struct device_node *dn = pci_device_to_OF_node(dev); + struct eeh_dev *edev = of_node_to_eeh_dev(dn); + int aer = edev ? edev->aer_cap : 0; + int reg, mask, val, len; + bool is_plx_dnport; + + pr_debug("%s: Reset PCI bus %04x:%02x with option %d\n", + __func__, pci_domain_nr(dev->bus), + dev->bus->number, option); + + + is_plx_dnport = ioda_eeh_is_plx_dnport(dev, ®, &mask, &len); + if (option == EEH_RESET_FUNDAMENTAL) + if (!is_plx_dnport || !edev) + option = EEH_RESET_HOT; + + if (option == EEH_RESET_HOT) { + reg = PCI_BRIDGE_CONTROL; + mask = PCI_BRIDGE_CTL_BUS_RESET; + len = 2; + } + + if (option == EEH_RESET_DEACTIVATE) { + if (!is_plx_dnport || !edev || + !(edev->mode & EEH_DEV_FRESET)) { + reg = PCI_BRIDGE_CONTROL; + mask = PCI_BRIDGE_CTL_BUS_RESET; + len = 2; + } + } switch (option) { case EEH_RESET_FUNDAMENTAL: + edev->mode |= EEH_DEV_FRESET; + /* Fall through */ case EEH_RESET_HOT: - pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &ctrl); - ctrl |= PCI_BRIDGE_CTL_BUS_RESET; - pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl); + if (aer) { + /* Mask receiver error */ + eeh_ops->read_config(dn, aer + PCI_ERR_COR_MASK, + 4, &val); + val |= PCI_ERR_COR_RCVR; + eeh_ops->write_config(dn, aer + PCI_ERR_COR_MASK, + 4, val); + + /* Mask linkDown */ + eeh_ops->read_config(dn, aer + PCI_ERR_UNCOR_MASK, + 4, &val); + val |= PCI_ERR_UNC_SURPDN; + eeh_ops->write_config(dn, aer + PCI_ERR_UNCOR_MASK, + 4, val); + } + + eeh_ops->read_config(dn, reg, len, &val); + val |= mask; + eeh_ops->write_config(dn, reg, len, val); + msleep(EEH_PE_RST_HOLD_TIME); + break; case EEH_RESET_DEACTIVATE: - pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &ctrl); - ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET; - pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl); + eeh_ops->read_config(dn, reg, len, &val); + val &= ~mask; + eeh_ops->write_config(dn, reg, len, val); + msleep(EEH_PE_RST_SETTLE_TIME); + + if (edev) + edev->mode &= ~EEH_DEV_FRESET; + if (aer) { + /* Clear receive error and enable it */ + eeh_ops->write_config(dn, aer + PCI_ERR_COR_STATUS, + 4, PCI_ERR_COR_RCVR); + eeh_ops->read_config(dn, aer + PCI_ERR_COR_MASK, + 4, &val); + val &= ~PCI_ERR_COR_RCVR; + eeh_ops->write_config(dn, aer + PCI_ERR_COR_MASK, + 4, val); + + /* Clear linkDown and enable it */ + eeh_ops->write_config(dn, aer + PCI_ERR_UNCOR_STATUS, + 4, PCI_ERR_UNC_SURPDN); + eeh_ops->read_config(dn, aer + PCI_ERR_UNCOR_MASK, + 4, &val); + val &= ~PCI_ERR_UNC_SURPDN; + eeh_ops->write_config(dn, aer + PCI_ERR_UNCOR_MASK, + 4, val); + } + break; } return 0; } +void pnv_pci_reset_secondary_bus(struct pci_dev *dev) +{ + struct pci_controller *hose; + + if (pci_is_root_bus(dev->bus)) { + hose = pci_bus_to_host(dev->bus); + ioda_eeh_root_reset(hose, EEH_RESET_HOT); + ioda_eeh_root_reset(hose, EEH_RESET_DEACTIVATE); + } else { + ioda_eeh_bridge_reset(dev, EEH_RESET_HOT); + ioda_eeh_bridge_reset(dev, EEH_RESET_DEACTIVATE); + } +} + /** * ioda_eeh_reset - Reset the indicated PE * @pe: EEH PE @@ -523,36 +639,28 @@ static int ioda_eeh_reset(struct eeh_pe *pe, int option) int ret; /* - * Anyway, we have to clear the problematic state for the - * corresponding PE. However, we needn't do it if the PE - * is PHB associated. That means the PHB is having fatal - * errors and it needs reset. Further more, the AIB interface - * isn't reliable any more. - */ - if (!(pe->type & EEH_PE_PHB) && - (option == EEH_RESET_HOT || - option == EEH_RESET_FUNDAMENTAL)) { - ret = ioda_eeh_pe_clear(pe); - if (ret) - return -EIO; - } - - /* - * The rules applied to reset, either fundamental or hot reset: + * For PHB reset, we always have complete reset. For those PEs whose + * primary bus derived from root complex (root bus) or root port + * (usually bus#1), we apply hot or fundamental reset on the root port. + * For other PEs, we always have hot reset on the PE primary bus. * - * We always reset the direct upstream bridge of the PE. If the - * direct upstream bridge isn't root bridge, we always take hot - * reset no matter what option (fundamental or hot) is. Otherwise, - * we should do the reset according to the required option. + * Here, we have different design to pHyp, which always clear the + * frozen state during PE reset. However, the good idea here from + * benh is to keep frozen state before we get PE reset done completely + * (until BAR restore). With the frozen state, HW drops illegal IO + * or MMIO access, which can incur recrusive frozen PE during PE + * reset. The side effect is that EEH core has to clear the frozen + * state explicitly after BAR restore. */ if (pe->type & EEH_PE_PHB) { ret = ioda_eeh_phb_reset(hose, option); } else { bus = eeh_pe_bus_get(pe); - if (pci_is_root_bus(bus)) + if (pci_is_root_bus(bus) || + pci_is_root_bus(bus->parent)) ret = ioda_eeh_root_reset(hose, option); else - ret = ioda_eeh_bridge_reset(hose, bus->self, option); + ret = ioda_eeh_bridge_reset(bus->self, option); } return ret; @@ -639,22 +747,6 @@ static void ioda_eeh_hub_diag(struct pci_controller *hose) } } -static int ioda_eeh_get_phb_pe(struct pci_controller *hose, - struct eeh_pe **pe) -{ - struct eeh_pe *phb_pe; - - phb_pe = eeh_phb_pe_get(hose); - if (!phb_pe) { - pr_warning("%s Can't find PE for PHB#%d\n", - __func__, hose->global_number); - return -EEXIST; - } - - *pe = phb_pe; - return 0; -} - static int ioda_eeh_get_pe(struct pci_controller *hose, u16 pe_no, struct eeh_pe **pe) { @@ -662,7 +754,8 @@ static int ioda_eeh_get_pe(struct pci_controller *hose, struct eeh_dev dev; /* Find the PHB PE */ - if (ioda_eeh_get_phb_pe(hose, &phb_pe)) + phb_pe = eeh_phb_pe_get(hose); + if (!phb_pe) return -EEXIST; /* Find the PE according to PE# */ @@ -690,6 +783,7 @@ static int ioda_eeh_next_error(struct eeh_pe **pe) { struct pci_controller *hose; struct pnv_phb *phb; + struct eeh_pe *phb_pe; u64 frozen_pe_no; u16 err_type, severity; long rc; @@ -706,10 +800,12 @@ static int ioda_eeh_next_error(struct eeh_pe **pe) list_for_each_entry(hose, &hose_list, list_node) { /* * If the subordinate PCI buses of the PHB has been - * removed, we needn't take care of it any more. + * removed or is exactly under error recovery, we + * needn't take care of it any more. */ phb = hose->private_data; - if (phb->eeh_state & PNV_EEH_STATE_REMOVED) + phb_pe = eeh_phb_pe_get(hose); + if (!phb_pe || (phb_pe->state & EEH_PE_ISOLATED)) continue; rc = opal_pci_next_error(phb->opal_id, @@ -742,12 +838,6 @@ static int ioda_eeh_next_error(struct eeh_pe **pe) switch (err_type) { case OPAL_EEH_IOC_ERROR: if (severity == OPAL_EEH_SEV_IOC_DEAD) { - list_for_each_entry(hose, &hose_list, - list_node) { - phb = hose->private_data; - phb->eeh_state |= PNV_EEH_STATE_REMOVED; - } - pr_err("EEH: dead IOC detected\n"); ret = EEH_NEXT_ERR_DEAD_IOC; } else if (severity == OPAL_EEH_SEV_INF) { @@ -760,17 +850,12 @@ static int ioda_eeh_next_error(struct eeh_pe **pe) break; case OPAL_EEH_PHB_ERROR: if (severity == OPAL_EEH_SEV_PHB_DEAD) { - if (ioda_eeh_get_phb_pe(hose, pe)) - break; - + *pe = phb_pe; pr_err("EEH: dead PHB#%x detected\n", hose->global_number); - phb->eeh_state |= PNV_EEH_STATE_REMOVED; ret = EEH_NEXT_ERR_DEAD_PHB; } else if (severity == OPAL_EEH_SEV_PHB_FENCED) { - if (ioda_eeh_get_phb_pe(hose, pe)) - break; - + *pe = phb_pe; pr_err("EEH: fenced PHB#%x detected\n", hose->global_number); ret = EEH_NEXT_ERR_FENCED_PHB; @@ -788,17 +873,21 @@ static int ioda_eeh_next_error(struct eeh_pe **pe) * If we can't find the corresponding PE, the * PEEV / PEST would be messy. So we force an * fenced PHB so that it can be recovered. + * + * If the PE has been marked as isolated, that + * should have been removed permanently or in + * progress with recovery. We needn't report + * it again. */ if (ioda_eeh_get_pe(hose, frozen_pe_no, pe)) { - if (!ioda_eeh_get_phb_pe(hose, pe)) { - pr_err("EEH: Escalated fenced PHB#%x " - "detected for PE#%llx\n", - hose->global_number, - frozen_pe_no); - ret = EEH_NEXT_ERR_FENCED_PHB; - } else { - ret = EEH_NEXT_ERR_NONE; - } + *pe = phb_pe; + pr_err("EEH: Escalated fenced PHB#%x " + "detected for PE#%llx\n", + hose->global_number, + frozen_pe_no); + ret = EEH_NEXT_ERR_FENCED_PHB; + } else if ((*pe)->state & EEH_PE_ISOLATED) { + ret = EEH_NEXT_ERR_NONE; } else { pr_err("EEH: Frozen PE#%x on PHB#%x detected\n", (*pe)->addr, (*pe)->phb->global_number); diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index a59788e83b8b..56a206f32f77 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -126,6 +126,7 @@ static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag) edev->mode &= 0xFFFFFF00; if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) edev->mode |= EEH_DEV_BRIDGE; + edev->pcix_cap = pci_find_capability(dev, PCI_CAP_ID_PCIX); if (pci_is_pcie(dev)) { edev->pcie_cap = pci_pcie_cap(dev); @@ -133,6 +134,9 @@ static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag) edev->mode |= EEH_DEV_ROOT_PORT; else if (pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM) edev->mode |= EEH_DEV_DS_PORT; + + edev->aer_cap = pci_find_ext_capability(dev, + PCI_EXT_CAP_ID_ERR); } edev->config_addr = ((dev->bus->number << 8) | dev->devfn); diff --git a/arch/powerpc/platforms/powernv/opal-dump.c b/arch/powerpc/platforms/powernv/opal-dump.c index b9827b0d87e4..788a1977b9a5 100644 --- a/arch/powerpc/platforms/powernv/opal-dump.c +++ b/arch/powerpc/platforms/powernv/opal-dump.c @@ -209,89 +209,20 @@ static struct kobj_type dump_ktype = { .default_attrs = dump_default_attrs, }; -static void free_dump_sg_list(struct opal_sg_list *list) -{ - struct opal_sg_list *sg1; - while (list) { - sg1 = list->next; - kfree(list); - list = sg1; - } - list = NULL; -} - -static struct opal_sg_list *dump_data_to_sglist(struct dump_obj *dump) -{ - struct opal_sg_list *sg1, *list = NULL; - void *addr; - int64_t size; - - addr = dump->buffer; - size = dump->size; - - sg1 = kzalloc(PAGE_SIZE, GFP_KERNEL); - if (!sg1) - goto nomem; - - list = sg1; - sg1->num_entries = 0; - while (size > 0) { - /* Translate virtual address to physical address */ - sg1->entry[sg1->num_entries].data = - (void *)(vmalloc_to_pfn(addr) << PAGE_SHIFT); - - if (size > PAGE_SIZE) - sg1->entry[sg1->num_entries].length = PAGE_SIZE; - else - sg1->entry[sg1->num_entries].length = size; - - sg1->num_entries++; - if (sg1->num_entries >= SG_ENTRIES_PER_NODE) { - sg1->next = kzalloc(PAGE_SIZE, GFP_KERNEL); - if (!sg1->next) - goto nomem; - - sg1 = sg1->next; - sg1->num_entries = 0; - } - addr += PAGE_SIZE; - size -= PAGE_SIZE; - } - return list; - -nomem: - pr_err("%s : Failed to allocate memory\n", __func__); - free_dump_sg_list(list); - return NULL; -} - -static void sglist_to_phy_addr(struct opal_sg_list *list) -{ - struct opal_sg_list *sg, *next; - - for (sg = list; sg; sg = next) { - next = sg->next; - /* Don't translate NULL pointer for last entry */ - if (sg->next) - sg->next = (struct opal_sg_list *)__pa(sg->next); - else - sg->next = NULL; - - /* Convert num_entries to length */ - sg->num_entries = - sg->num_entries * sizeof(struct opal_sg_entry) + 16; - } -} - -static int64_t dump_read_info(uint32_t *id, uint32_t *size, uint32_t *type) +static int64_t dump_read_info(uint32_t *dump_id, uint32_t *dump_size, uint32_t *dump_type) { + __be32 id, size, type; int rc; - *type = 0xffffffff; - rc = opal_dump_info2(id, size, type); + type = cpu_to_be32(0xffffffff); + rc = opal_dump_info2(&id, &size, &type); if (rc == OPAL_PARAMETER) - rc = opal_dump_info(id, size); + rc = opal_dump_info(&id, &size); + + *dump_id = be32_to_cpu(id); + *dump_size = be32_to_cpu(size); + *dump_type = be32_to_cpu(type); if (rc) pr_warn("%s: Failed to get dump info (%d)\n", @@ -314,15 +245,12 @@ static int64_t dump_read_data(struct dump_obj *dump) } /* Generate SG list */ - list = dump_data_to_sglist(dump); + list = opal_vmalloc_to_sg_list(dump->buffer, dump->size); if (!list) { rc = -ENOMEM; goto out; } - /* Translate sg list addr to real address */ - sglist_to_phy_addr(list); - /* First entry address */ addr = __pa(list); @@ -341,7 +269,7 @@ static int64_t dump_read_data(struct dump_obj *dump) __func__, dump->id); /* Free SG list */ - free_dump_sg_list(list); + opal_free_sg_list(list); out: return rc; diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c index ef7bc2a97862..10268c41d830 100644 --- a/arch/powerpc/platforms/powernv/opal-elog.c +++ b/arch/powerpc/platforms/powernv/opal-elog.c @@ -238,18 +238,25 @@ static struct elog_obj *create_elog_obj(uint64_t id, size_t size, uint64_t type) static void elog_work_fn(struct work_struct *work) { - size_t elog_size; + __be64 size; + __be64 id; + __be64 type; + uint64_t elog_size; uint64_t log_id; uint64_t elog_type; int rc; char name[2+16+1]; - rc = opal_get_elog_size(&log_id, &elog_size, &elog_type); + rc = opal_get_elog_size(&id, &size, &type); if (rc != OPAL_SUCCESS) { pr_err("ELOG: Opal log read failed\n"); return; } + elog_size = be64_to_cpu(size); + log_id = be64_to_cpu(id); + elog_type = be64_to_cpu(type); + BUG_ON(elog_size > OPAL_MAX_ERRLOG_SIZE); if (elog_size >= OPAL_MAX_ERRLOG_SIZE) diff --git a/arch/powerpc/platforms/powernv/opal-flash.c b/arch/powerpc/platforms/powernv/opal-flash.c index 714ef972406b..145a80bc5354 100644 --- a/arch/powerpc/platforms/powernv/opal-flash.c +++ b/arch/powerpc/platforms/powernv/opal-flash.c @@ -20,6 +20,7 @@ #include <linux/mm.h> #include <linux/vmalloc.h> #include <linux/pagemap.h> +#include <linux/delay.h> #include <asm/opal.h> @@ -79,9 +80,6 @@ /* XXX: Assume candidate image size is <= 1GB */ #define MAX_IMAGE_SIZE 0x40000000 -/* Flash sg list version */ -#define SG_LIST_VERSION (1UL) - /* Image status */ enum { IMAGE_INVALID, @@ -131,11 +129,15 @@ static DEFINE_MUTEX(image_data_mutex); */ static inline void opal_flash_validate(void) { - struct validate_flash_t *args_buf = &validate_flash_data; + long ret; + void *buf = validate_flash_data.buf; + __be32 size, result; - args_buf->status = opal_validate_flash(__pa(args_buf->buf), - &(args_buf->buf_size), - &(args_buf->result)); + ret = opal_validate_flash(__pa(buf), &size, &result); + + validate_flash_data.status = ret; + validate_flash_data.buf_size = be32_to_cpu(size); + validate_flash_data.result = be32_to_cpu(result); } /* @@ -268,93 +270,11 @@ static ssize_t manage_store(struct kobject *kobj, } /* - * Free sg list - */ -static void free_sg_list(struct opal_sg_list *list) -{ - struct opal_sg_list *sg1; - while (list) { - sg1 = list->next; - kfree(list); - list = sg1; - } - list = NULL; -} - -/* - * Build candidate image scatter gather list - * - * list format: - * ----------------------------------- - * | VER (8) | Entry length in bytes | - * ----------------------------------- - * | Pointer to next entry | - * ----------------------------------- - * | Address of memory area 1 | - * ----------------------------------- - * | Length of memory area 1 | - * ----------------------------------- - * | ......... | - * ----------------------------------- - * | ......... | - * ----------------------------------- - * | Address of memory area N | - * ----------------------------------- - * | Length of memory area N | - * ----------------------------------- - */ -static struct opal_sg_list *image_data_to_sglist(void) -{ - struct opal_sg_list *sg1, *list = NULL; - void *addr; - int size; - - addr = image_data.data; - size = image_data.size; - - sg1 = kzalloc(PAGE_SIZE, GFP_KERNEL); - if (!sg1) - return NULL; - - list = sg1; - sg1->num_entries = 0; - while (size > 0) { - /* Translate virtual address to physical address */ - sg1->entry[sg1->num_entries].data = - (void *)(vmalloc_to_pfn(addr) << PAGE_SHIFT); - - if (size > PAGE_SIZE) - sg1->entry[sg1->num_entries].length = PAGE_SIZE; - else - sg1->entry[sg1->num_entries].length = size; - - sg1->num_entries++; - if (sg1->num_entries >= SG_ENTRIES_PER_NODE) { - sg1->next = kzalloc(PAGE_SIZE, GFP_KERNEL); - if (!sg1->next) { - pr_err("%s : Failed to allocate memory\n", - __func__); - goto nomem; - } - - sg1 = sg1->next; - sg1->num_entries = 0; - } - addr += PAGE_SIZE; - size -= PAGE_SIZE; - } - return list; -nomem: - free_sg_list(list); - return NULL; -} - -/* * OPAL update flash */ static int opal_flash_update(int op) { - struct opal_sg_list *sg, *list, *next; + struct opal_sg_list *list; unsigned long addr; int64_t rc = OPAL_PARAMETER; @@ -364,35 +284,13 @@ static int opal_flash_update(int op) goto flash; } - list = image_data_to_sglist(); + list = opal_vmalloc_to_sg_list(image_data.data, image_data.size); if (!list) goto invalid_img; /* First entry address */ addr = __pa(list); - /* Translate sg list address to absolute */ - for (sg = list; sg; sg = next) { - next = sg->next; - /* Don't translate NULL pointer for last entry */ - if (sg->next) - sg->next = (struct opal_sg_list *)__pa(sg->next); - else - sg->next = NULL; - - /* - * Convert num_entries to version/length format - * to satisfy OPAL. - */ - sg->num_entries = (SG_LIST_VERSION << 56) | - (sg->num_entries * sizeof(struct opal_sg_entry) + 16); - } - - pr_alert("FLASH: Image is %u bytes\n", image_data.size); - pr_alert("FLASH: Image update requested\n"); - pr_alert("FLASH: Image will be updated during system reboot\n"); - pr_alert("FLASH: This will take several minutes. Do not power off!\n"); - flash: rc = opal_update_flash(addr); @@ -400,6 +298,47 @@ invalid_img: return rc; } +/* Return CPUs to OPAL before starting FW update */ +static void flash_return_cpu(void *info) +{ + int cpu = smp_processor_id(); + + if (!cpu_online(cpu)) + return; + + /* Disable IRQ */ + hard_irq_disable(); + + /* Return the CPU to OPAL */ + opal_return_cpu(); +} + +/* This gets called just before system reboots */ +void opal_flash_term_callback(void) +{ + struct cpumask mask; + + if (update_flash_data.status != FLASH_IMG_READY) + return; + + pr_alert("FLASH: Flashing new firmware\n"); + pr_alert("FLASH: Image is %u bytes\n", image_data.size); + pr_alert("FLASH: Performing flash and reboot/shutdown\n"); + pr_alert("FLASH: This will take several minutes. Do not power off!\n"); + + /* Small delay to help getting the above message out */ + msleep(500); + + /* Return secondary CPUs to firmware */ + cpumask_copy(&mask, cpu_online_mask); + cpumask_clear_cpu(smp_processor_id(), &mask); + if (!cpumask_empty(&mask)) + smp_call_function_many(&mask, + flash_return_cpu, NULL, false); + /* Hard disable interrupts */ + hard_irq_disable(); +} + /* * Show candidate image status */ diff --git a/arch/powerpc/platforms/powernv/opal-sysparam.c b/arch/powerpc/platforms/powernv/opal-sysparam.c index 6b614726baf2..d202f9bc3683 100644 --- a/arch/powerpc/platforms/powernv/opal-sysparam.c +++ b/arch/powerpc/platforms/powernv/opal-sysparam.c @@ -39,10 +39,11 @@ struct param_attr { struct kobj_attribute kobj_attr; }; -static int opal_get_sys_param(u32 param_id, u32 length, void *buffer) +static ssize_t opal_get_sys_param(u32 param_id, u32 length, void *buffer) { struct opal_msg msg; - int ret, token; + ssize_t ret; + int token; token = opal_async_get_token_interruptible(); if (token < 0) { @@ -59,7 +60,7 @@ static int opal_get_sys_param(u32 param_id, u32 length, void *buffer) ret = opal_async_wait_response(token, &msg); if (ret) { - pr_err("%s: Failed to wait for the async response, %d\n", + pr_err("%s: Failed to wait for the async response, %zd\n", __func__, ret); goto out_token; } @@ -111,7 +112,7 @@ static ssize_t sys_param_show(struct kobject *kobj, { struct param_attr *attr = container_of(kobj_attr, struct param_attr, kobj_attr); - int ret; + ssize_t ret; mutex_lock(&opal_sysparam_mutex); ret = opal_get_sys_param(attr->param_id, attr->param_size, @@ -121,9 +122,10 @@ static ssize_t sys_param_show(struct kobject *kobj, memcpy(buf, param_data_buf, attr->param_size); + ret = attr->param_size; out: mutex_unlock(&opal_sysparam_mutex); - return ret ? ret : attr->param_size; + return ret; } static ssize_t sys_param_store(struct kobject *kobj, @@ -131,14 +133,20 @@ static ssize_t sys_param_store(struct kobject *kobj, { struct param_attr *attr = container_of(kobj_attr, struct param_attr, kobj_attr); - int ret; + ssize_t ret; + + /* MAX_PARAM_DATA_LEN is sizeof(param_data_buf) */ + if (count > MAX_PARAM_DATA_LEN) + count = MAX_PARAM_DATA_LEN; mutex_lock(&opal_sysparam_mutex); memcpy(param_data_buf, buf, count); ret = opal_set_sys_param(attr->param_id, attr->param_size, param_data_buf); mutex_unlock(&opal_sysparam_mutex); - return ret ? ret : count; + if (!ret) + ret = count; + return ret; } void __init opal_sys_param_init(void) @@ -214,13 +222,13 @@ void __init opal_sys_param_init(void) } if (of_property_read_u32_array(sysparam, "param-len", size, count)) { - pr_err("SYSPARAM: Missing propery param-len in the DT\n"); + pr_err("SYSPARAM: Missing property param-len in the DT\n"); goto out_free_perm; } if (of_property_read_u8_array(sysparam, "param-perm", perm, count)) { - pr_err("SYSPARAM: Missing propery param-perm in the DT\n"); + pr_err("SYSPARAM: Missing property param-perm in the DT\n"); goto out_free_perm; } @@ -233,6 +241,12 @@ void __init opal_sys_param_init(void) /* For each of the parameters, populate the parameter attributes */ for (i = 0; i < count; i++) { + if (size[i] > MAX_PARAM_DATA_LEN) { + pr_warn("SYSPARAM: Not creating parameter %d as size " + "exceeds buffer length\n", i); + continue; + } + sysfs_attr_init(&attr[i].kobj_attr.attr); attr[i].param_id = id[i]; attr[i].param_size = size[i]; diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 49d2f00019e5..360ad80c754c 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -242,14 +242,14 @@ void opal_notifier_update_evt(uint64_t evt_mask, void opal_notifier_enable(void) { int64_t rc; - uint64_t evt = 0; + __be64 evt = 0; atomic_set(&opal_notifier_hold, 0); /* Process pending events */ rc = opal_poll_events(&evt); if (rc == OPAL_SUCCESS && evt) - opal_do_notifier(evt); + opal_do_notifier(be64_to_cpu(evt)); } void opal_notifier_disable(void) @@ -529,7 +529,7 @@ static irqreturn_t opal_interrupt(int irq, void *data) opal_handle_interrupt(virq_to_hw(irq), &events); - opal_do_notifier(events); + opal_do_notifier(be64_to_cpu(events)); return IRQ_HANDLED; } @@ -638,3 +638,66 @@ void opal_shutdown(void) /* Export this so that test modules can use it */ EXPORT_SYMBOL_GPL(opal_invalid_call); + +/* Convert a region of vmalloc memory to an opal sg list */ +struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr, + unsigned long vmalloc_size) +{ + struct opal_sg_list *sg, *first = NULL; + unsigned long i = 0; + + sg = kzalloc(PAGE_SIZE, GFP_KERNEL); + if (!sg) + goto nomem; + + first = sg; + + while (vmalloc_size > 0) { + uint64_t data = vmalloc_to_pfn(vmalloc_addr) << PAGE_SHIFT; + uint64_t length = min(vmalloc_size, PAGE_SIZE); + + sg->entry[i].data = cpu_to_be64(data); + sg->entry[i].length = cpu_to_be64(length); + i++; + + if (i >= SG_ENTRIES_PER_NODE) { + struct opal_sg_list *next; + + next = kzalloc(PAGE_SIZE, GFP_KERNEL); + if (!next) + goto nomem; + + sg->length = cpu_to_be64( + i * sizeof(struct opal_sg_entry) + 16); + i = 0; + sg->next = cpu_to_be64(__pa(next)); + sg = next; + } + + vmalloc_addr += length; + vmalloc_size -= length; + } + + sg->length = cpu_to_be64(i * sizeof(struct opal_sg_entry) + 16); + + return first; + +nomem: + pr_err("%s : Failed to allocate memory\n", __func__); + opal_free_sg_list(first); + return NULL; +} + +void opal_free_sg_list(struct opal_sg_list *sg) +{ + while (sg) { + uint64_t next = be64_to_cpu(sg->next); + + kfree(sg); + + if (next) + sg = __va(next); + else + sg = NULL; + } +} diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 3b2b4fb3585b..de19edeaa7a7 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -13,6 +13,7 @@ #include <linux/kernel.h> #include <linux/pci.h> +#include <linux/crash_dump.h> #include <linux/debugfs.h> #include <linux/delay.h> #include <linux/string.h> @@ -343,7 +344,6 @@ static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe) pci_name(dev)); continue; } - pci_dev_get(dev); pdn->pcidev = dev; pdn->pe_number = pe->pe_number; pe->dma_weight += pnv_ioda_dma_weight(dev); @@ -462,7 +462,7 @@ static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev pe = &phb->ioda.pe_array[pdn->pe_number]; WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops); - set_iommu_table_base_and_group(&pdev->dev, &pe->tce32_table); + set_iommu_table_base(&pdev->dev, &pe->tce32_table); } static int pnv_pci_ioda_dma_set_mask(struct pnv_phb *phb, @@ -664,15 +664,15 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, * errors, and on the first pass the data will be a relative * bus number, print that out instead. */ - tbl->it_busno = 0; pe->tce_inval_reg_phys = be64_to_cpup(swinvp); tbl->it_index = (unsigned long)ioremap(pe->tce_inval_reg_phys, 8); - tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE | - TCE_PCI_SWINV_PAIR; + tbl->it_type |= (TCE_PCI_SWINV_CREATE | + TCE_PCI_SWINV_FREE | + TCE_PCI_SWINV_PAIR); } iommu_init_table(tbl, phb->hose->node); - iommu_register_group(tbl, pci_domain_nr(pe->pbus), pe->pe_number); + iommu_register_group(tbl, phb->hose->global_number, pe->pe_number); if (pe->pdev) set_iommu_table_base_and_group(&pe->pdev->dev, tbl); @@ -794,14 +794,13 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, * errors, and on the first pass the data will be a relative * bus number, print that out instead. */ - tbl->it_busno = 0; pe->tce_inval_reg_phys = be64_to_cpup(swinvp); tbl->it_index = (unsigned long)ioremap(pe->tce_inval_reg_phys, 8); - tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE; + tbl->it_type |= (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE); } iommu_init_table(tbl, phb->hose->node); - iommu_register_group(tbl, pci_domain_nr(pe->pbus), pe->pe_number); + iommu_register_group(tbl, phb->hose->global_number, pe->pe_number); if (pe->pdev) set_iommu_table_base_and_group(&pe->pdev->dev, tbl); @@ -1387,12 +1386,24 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, ppc_md.pcibios_fixup = pnv_pci_ioda_fixup; ppc_md.pcibios_enable_device_hook = pnv_pci_enable_device_hook; ppc_md.pcibios_window_alignment = pnv_pci_window_alignment; + ppc_md.pcibios_reset_secondary_bus = pnv_pci_reset_secondary_bus; pci_add_flags(PCI_REASSIGN_ALL_RSRC); /* Reset IODA tables to a clean state */ rc = opal_pci_reset(phb_id, OPAL_PCI_IODA_TABLE_RESET, OPAL_ASSERT_RESET); if (rc) pr_warning(" OPAL Error %ld performing IODA table reset !\n", rc); + + /* If we're running in kdump kerenl, the previous kerenl never + * shutdown PCI devices correctly. We already got IODA table + * cleaned out. So we have to issue PHB reset to stop all PCI + * transactions from previous kerenl. + */ + if (is_kdump_kernel()) { + pr_info(" Issue PHB reset ...\n"); + ioda_eeh_phb_reset(hose, EEH_RESET_FUNDAMENTAL); + ioda_eeh_phb_reset(hose, OPAL_DEASSERT_RESET); + } } void __init pnv_pci_init_ioda2_phb(struct device_node *np) diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 8518817dcdfd..eefbfcc3fd8c 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -131,65 +131,60 @@ static void pnv_pci_dump_p7ioc_diag_data(struct pci_controller *hose, int i; data = (struct OpalIoP7IOCPhbErrorData *)common; - pr_info("P7IOC PHB#%d Diag-data (Version: %d)\n\n", + pr_info("P7IOC PHB#%d Diag-data (Version: %d)\n", hose->global_number, common->version); if (data->brdgCtl) - pr_info(" brdgCtl: %08x\n", + pr_info("brdgCtl: %08x\n", data->brdgCtl); if (data->portStatusReg || data->rootCmplxStatus || data->busAgentStatus) - pr_info(" UtlSts: %08x %08x %08x\n", + pr_info("UtlSts: %08x %08x %08x\n", data->portStatusReg, data->rootCmplxStatus, data->busAgentStatus); if (data->deviceStatus || data->slotStatus || data->linkStatus || data->devCmdStatus || data->devSecStatus) - pr_info(" RootSts: %08x %08x %08x %08x %08x\n", + pr_info("RootSts: %08x %08x %08x %08x %08x\n", data->deviceStatus, data->slotStatus, data->linkStatus, data->devCmdStatus, data->devSecStatus); if (data->rootErrorStatus || data->uncorrErrorStatus || data->corrErrorStatus) - pr_info(" RootErrSts: %08x %08x %08x\n", + pr_info("RootErrSts: %08x %08x %08x\n", data->rootErrorStatus, data->uncorrErrorStatus, data->corrErrorStatus); if (data->tlpHdr1 || data->tlpHdr2 || data->tlpHdr3 || data->tlpHdr4) - pr_info(" RootErrLog: %08x %08x %08x %08x\n", + pr_info("RootErrLog: %08x %08x %08x %08x\n", data->tlpHdr1, data->tlpHdr2, data->tlpHdr3, data->tlpHdr4); if (data->sourceId || data->errorClass || data->correlator) - pr_info(" RootErrLog1: %08x %016llx %016llx\n", + pr_info("RootErrLog1: %08x %016llx %016llx\n", data->sourceId, data->errorClass, data->correlator); if (data->p7iocPlssr || data->p7iocCsr) - pr_info(" PhbSts: %016llx %016llx\n", + pr_info("PhbSts: %016llx %016llx\n", data->p7iocPlssr, data->p7iocCsr); - if (data->lemFir || data->lemErrorMask || - data->lemWOF) - pr_info(" Lem: %016llx %016llx %016llx\n", + if (data->lemFir) + pr_info("Lem: %016llx %016llx %016llx\n", data->lemFir, data->lemErrorMask, data->lemWOF); - if (data->phbErrorStatus || data->phbFirstErrorStatus || - data->phbErrorLog0 || data->phbErrorLog1) - pr_info(" PhbErr: %016llx %016llx %016llx %016llx\n", + if (data->phbErrorStatus) + pr_info("PhbErr: %016llx %016llx %016llx %016llx\n", data->phbErrorStatus, data->phbFirstErrorStatus, data->phbErrorLog0, data->phbErrorLog1); - if (data->mmioErrorStatus || data->mmioFirstErrorStatus || - data->mmioErrorLog0 || data->mmioErrorLog1) - pr_info(" OutErr: %016llx %016llx %016llx %016llx\n", + if (data->mmioErrorStatus) + pr_info("OutErr: %016llx %016llx %016llx %016llx\n", data->mmioErrorStatus, data->mmioFirstErrorStatus, data->mmioErrorLog0, data->mmioErrorLog1); - if (data->dma0ErrorStatus || data->dma0FirstErrorStatus || - data->dma0ErrorLog0 || data->dma0ErrorLog1) - pr_info(" InAErr: %016llx %016llx %016llx %016llx\n", + if (data->dma0ErrorStatus) + pr_info("InAErr: %016llx %016llx %016llx %016llx\n", data->dma0ErrorStatus, data->dma0FirstErrorStatus, data->dma0ErrorLog0, data->dma0ErrorLog1); - if (data->dma1ErrorStatus || data->dma1FirstErrorStatus || - data->dma1ErrorLog0 || data->dma1ErrorLog1) - pr_info(" InBErr: %016llx %016llx %016llx %016llx\n", + if (data->dma1ErrorStatus) + pr_info("InBErr: %016llx %016llx %016llx %016llx\n", data->dma1ErrorStatus, data->dma1FirstErrorStatus, data->dma1ErrorLog0, data->dma1ErrorLog1); @@ -198,7 +193,7 @@ static void pnv_pci_dump_p7ioc_diag_data(struct pci_controller *hose, (data->pestB[i] >> 63) == 0) continue; - pr_info(" PE[%3d] A/B: %016llx %016llx\n", + pr_info("PE[%3d] A/B: %016llx %016llx\n", i, data->pestA[i], data->pestB[i]); } } @@ -210,69 +205,63 @@ static void pnv_pci_dump_phb3_diag_data(struct pci_controller *hose, int i; data = (struct OpalIoPhb3ErrorData*)common; - pr_info("PHB3 PHB#%d Diag-data (Version: %d)\n\n", + pr_info("PHB3 PHB#%d Diag-data (Version: %d)\n", hose->global_number, common->version); if (data->brdgCtl) - pr_info(" brdgCtl: %08x\n", + pr_info("brdgCtl: %08x\n", data->brdgCtl); if (data->portStatusReg || data->rootCmplxStatus || data->busAgentStatus) - pr_info(" UtlSts: %08x %08x %08x\n", + pr_info("UtlSts: %08x %08x %08x\n", data->portStatusReg, data->rootCmplxStatus, data->busAgentStatus); if (data->deviceStatus || data->slotStatus || data->linkStatus || data->devCmdStatus || data->devSecStatus) - pr_info(" RootSts: %08x %08x %08x %08x %08x\n", + pr_info("RootSts: %08x %08x %08x %08x %08x\n", data->deviceStatus, data->slotStatus, data->linkStatus, data->devCmdStatus, data->devSecStatus); if (data->rootErrorStatus || data->uncorrErrorStatus || data->corrErrorStatus) - pr_info(" RootErrSts: %08x %08x %08x\n", + pr_info("RootErrSts: %08x %08x %08x\n", data->rootErrorStatus, data->uncorrErrorStatus, data->corrErrorStatus); if (data->tlpHdr1 || data->tlpHdr2 || data->tlpHdr3 || data->tlpHdr4) - pr_info(" RootErrLog: %08x %08x %08x %08x\n", + pr_info("RootErrLog: %08x %08x %08x %08x\n", data->tlpHdr1, data->tlpHdr2, data->tlpHdr3, data->tlpHdr4); if (data->sourceId || data->errorClass || data->correlator) - pr_info(" RootErrLog1: %08x %016llx %016llx\n", + pr_info("RootErrLog1: %08x %016llx %016llx\n", data->sourceId, data->errorClass, data->correlator); - if (data->nFir || data->nFirMask || - data->nFirWOF) - pr_info(" nFir: %016llx %016llx %016llx\n", + if (data->nFir) + pr_info("nFir: %016llx %016llx %016llx\n", data->nFir, data->nFirMask, data->nFirWOF); if (data->phbPlssr || data->phbCsr) - pr_info(" PhbSts: %016llx %016llx\n", + pr_info("PhbSts: %016llx %016llx\n", data->phbPlssr, data->phbCsr); - if (data->lemFir || data->lemErrorMask || - data->lemWOF) - pr_info(" Lem: %016llx %016llx %016llx\n", + if (data->lemFir) + pr_info("Lem: %016llx %016llx %016llx\n", data->lemFir, data->lemErrorMask, data->lemWOF); - if (data->phbErrorStatus || data->phbFirstErrorStatus || - data->phbErrorLog0 || data->phbErrorLog1) - pr_info(" PhbErr: %016llx %016llx %016llx %016llx\n", + if (data->phbErrorStatus) + pr_info("PhbErr: %016llx %016llx %016llx %016llx\n", data->phbErrorStatus, data->phbFirstErrorStatus, data->phbErrorLog0, data->phbErrorLog1); - if (data->mmioErrorStatus || data->mmioFirstErrorStatus || - data->mmioErrorLog0 || data->mmioErrorLog1) - pr_info(" OutErr: %016llx %016llx %016llx %016llx\n", + if (data->mmioErrorStatus) + pr_info("OutErr: %016llx %016llx %016llx %016llx\n", data->mmioErrorStatus, data->mmioFirstErrorStatus, data->mmioErrorLog0, data->mmioErrorLog1); - if (data->dma0ErrorStatus || data->dma0FirstErrorStatus || - data->dma0ErrorLog0 || data->dma0ErrorLog1) - pr_info(" InAErr: %016llx %016llx %016llx %016llx\n", + if (data->dma0ErrorStatus) + pr_info("InAErr: %016llx %016llx %016llx %016llx\n", data->dma0ErrorStatus, data->dma0FirstErrorStatus, data->dma0ErrorLog0, data->dma0ErrorLog1); - if (data->dma1ErrorStatus || data->dma1FirstErrorStatus || - data->dma1ErrorLog0 || data->dma1ErrorLog1) - pr_info(" InBErr: %016llx %016llx %016llx %016llx\n", + if (data->dma1ErrorStatus) + pr_info("InBErr: %016llx %016llx %016llx %016llx\n", data->dma1ErrorStatus, data->dma1FirstErrorStatus, data->dma1ErrorLog0, data->dma1ErrorLog1); @@ -281,7 +270,7 @@ static void pnv_pci_dump_phb3_diag_data(struct pci_controller *hose, (data->pestB[i] >> 63) == 0) continue; - pr_info(" PE[%3d] A/B: %016llx %016llx\n", + pr_info("PE[%3d] A/B: %016llx %016llx\n", i, data->pestA[i], data->pestB[i]); } } @@ -384,9 +373,6 @@ int pnv_pci_cfg_read(struct device_node *dn, struct pci_dn *pdn = PCI_DN(dn); struct pnv_phb *phb = pdn->phb->private_data; u32 bdfn = (pdn->busno << 8) | pdn->devfn; -#ifdef CONFIG_EEH - struct eeh_pe *phb_pe = NULL; -#endif s64 rc; switch (size) { @@ -412,31 +398,9 @@ int pnv_pci_cfg_read(struct device_node *dn, default: return PCIBIOS_FUNC_NOT_SUPPORTED; } + cfg_dbg("%s: bus: %x devfn: %x +%x/%x -> %08x\n", __func__, pdn->busno, pdn->devfn, where, size, *val); - - /* - * Check if the specified PE has been put into frozen - * state. On the other hand, we needn't do that while - * the PHB has been put into frozen state because of - * PHB-fatal errors. - */ -#ifdef CONFIG_EEH - phb_pe = eeh_phb_pe_get(pdn->phb); - if (phb_pe && (phb_pe->state & EEH_PE_ISOLATED)) - return PCIBIOS_SUCCESSFUL; - - if (phb->eeh_state & PNV_EEH_STATE_ENABLED) { - if (*val == EEH_IO_ERROR_VALUE(size) && - eeh_dev_check_failure(of_node_to_eeh_dev(dn))) - return PCIBIOS_DEVICE_NOT_FOUND; - } else { - pnv_pci_config_check_eeh(phb, dn); - } -#else - pnv_pci_config_check_eeh(phb, dn); -#endif - return PCIBIOS_SUCCESSFUL; } @@ -463,33 +427,74 @@ int pnv_pci_cfg_write(struct device_node *dn, return PCIBIOS_FUNC_NOT_SUPPORTED; } - /* Check if the PHB got frozen due to an error (no response) */ -#ifdef CONFIG_EEH - if (!(phb->eeh_state & PNV_EEH_STATE_ENABLED)) - pnv_pci_config_check_eeh(phb, dn); -#else - pnv_pci_config_check_eeh(phb, dn); -#endif - return PCIBIOS_SUCCESSFUL; } +#if CONFIG_EEH +static bool pnv_pci_cfg_check(struct pci_controller *hose, + struct device_node *dn) +{ + struct eeh_dev *edev = NULL; + struct pnv_phb *phb = hose->private_data; + + /* EEH not enabled ? */ + if (!(phb->flags & PNV_PHB_FLAG_EEH)) + return true; + + /* PE reset or device removed ? */ + edev = of_node_to_eeh_dev(dn); + if (edev) { + if (edev->pe && + (edev->pe->state & EEH_PE_RESET)) + return false; + + if (edev->mode & EEH_DEV_REMOVED) + return false; + } + + return true; +} +#else +static inline pnv_pci_cfg_check(struct pci_controller *hose, + struct device_node *dn) +{ + return true; +} +#endif /* CONFIG_EEH */ + static int pnv_pci_read_config(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *val) { struct device_node *dn, *busdn = pci_bus_to_OF_node(bus); struct pci_dn *pdn; + struct pnv_phb *phb; + bool found = false; + int ret; + *val = 0xFFFFFFFF; for (dn = busdn->child; dn; dn = dn->sibling) { pdn = PCI_DN(dn); - if (pdn && pdn->devfn == devfn) - return pnv_pci_cfg_read(dn, where, size, val); + if (pdn && pdn->devfn == devfn) { + phb = pdn->phb->private_data; + found = true; + break; + } } - *val = 0xFFFFFFFF; - return PCIBIOS_DEVICE_NOT_FOUND; + if (!found || !pnv_pci_cfg_check(pdn->phb, dn)) + return PCIBIOS_DEVICE_NOT_FOUND; + ret = pnv_pci_cfg_read(dn, where, size, val); + if (phb->flags & PNV_PHB_FLAG_EEH) { + if (*val == EEH_IO_ERROR_VALUE(size) && + eeh_dev_check_failure(of_node_to_eeh_dev(dn))) + return PCIBIOS_DEVICE_NOT_FOUND; + } else { + pnv_pci_config_check_eeh(phb, dn); + } + + return ret; } static int pnv_pci_write_config(struct pci_bus *bus, @@ -498,14 +503,27 @@ static int pnv_pci_write_config(struct pci_bus *bus, { struct device_node *dn, *busdn = pci_bus_to_OF_node(bus); struct pci_dn *pdn; + struct pnv_phb *phb; + bool found = false; + int ret; for (dn = busdn->child; dn; dn = dn->sibling) { pdn = PCI_DN(dn); - if (pdn && pdn->devfn == devfn) - return pnv_pci_cfg_write(dn, where, size, val); + if (pdn && pdn->devfn == devfn) { + phb = pdn->phb->private_data; + found = true; + break; + } } - return PCIBIOS_DEVICE_NOT_FOUND; + if (!found || !pnv_pci_cfg_check(pdn->phb, dn)) + return PCIBIOS_DEVICE_NOT_FOUND; + + ret = pnv_pci_cfg_write(dn, where, size, val); + if (!(phb->flags & PNV_PHB_FLAG_EEH)) + pnv_pci_config_check_eeh(phb, dn); + + return ret; } struct pci_ops pnv_pci_ops = { diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index cde169442775..676232c34328 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -81,28 +81,27 @@ struct pnv_eeh_ops { int (*configure_bridge)(struct eeh_pe *pe); int (*next_error)(struct eeh_pe **pe); }; - -#define PNV_EEH_STATE_ENABLED (1 << 0) /* EEH enabled */ -#define PNV_EEH_STATE_REMOVED (1 << 1) /* PHB removed */ - #endif /* CONFIG_EEH */ +#define PNV_PHB_FLAG_EEH (1 << 0) + struct pnv_phb { struct pci_controller *hose; enum pnv_phb_type type; enum pnv_phb_model model; u64 hub_id; u64 opal_id; + int flags; void __iomem *regs; int initialized; spinlock_t lock; #ifdef CONFIG_EEH struct pnv_eeh_ops *eeh_ops; - int eeh_state; #endif #ifdef CONFIG_DEBUG_FS + int has_dbgfs; struct dentry *dbgfs; #endif @@ -205,5 +204,7 @@ extern void pnv_pci_init_ioda_hub(struct device_node *np); extern void pnv_pci_init_ioda2_phb(struct device_node *np); extern void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl, __be64 *startp, __be64 *endp, bool rm); +extern void pnv_pci_reset_secondary_bus(struct pci_dev *dev); +extern int ioda_eeh_phb_reset(struct pci_controller *hose, int option); #endif /* __POWERNV_PCI_H */ diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 61cf8fa9c61b..865aab40ded7 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -27,6 +27,7 @@ #include <linux/interrupt.h> #include <linux/bug.h> #include <linux/pci.h> +#include <linux/cpufreq.h> #include <asm/machdep.h> #include <asm/firmware.h> @@ -98,11 +99,32 @@ static void pnv_show_cpuinfo(struct seq_file *m) of_node_put(root); } +static void pnv_prepare_going_down(void) +{ + /* + * Disable all notifiers from OPAL, we can't + * service interrupts anymore anyway + */ + opal_notifier_disable(); + + /* Soft disable interrupts */ + local_irq_disable(); + + /* + * Return secondary CPUs to firwmare if a flash update + * is pending otherwise we will get all sort of error + * messages about CPU being stuck etc.. This will also + * have the side effect of hard disabling interrupts so + * past this point, the kernel is effectively dead. + */ + opal_flash_term_callback(); +} + static void __noreturn pnv_restart(char *cmd) { long rc = OPAL_BUSY; - opal_notifier_disable(); + pnv_prepare_going_down(); while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { rc = opal_cec_reboot(); @@ -119,7 +141,7 @@ static void __noreturn pnv_power_off(void) { long rc = OPAL_BUSY; - opal_notifier_disable(); + pnv_prepare_going_down(); while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { rc = opal_cec_power_down(0); @@ -162,18 +184,62 @@ static void pnv_shutdown(void) } #ifdef CONFIG_KEXEC +static void pnv_kexec_wait_secondaries_down(void) +{ + int my_cpu, i, notified = -1; + + my_cpu = get_cpu(); + + for_each_online_cpu(i) { + uint8_t status; + int64_t rc; + + if (i == my_cpu) + continue; + + for (;;) { + rc = opal_query_cpu_status(get_hard_smp_processor_id(i), + &status); + if (rc != OPAL_SUCCESS || status != OPAL_THREAD_STARTED) + break; + barrier(); + if (i != notified) { + printk(KERN_INFO "kexec: waiting for cpu %d " + "(physical %d) to enter OPAL\n", + i, paca[i].hw_cpu_id); + notified = i; + } + } + } +} + static void pnv_kexec_cpu_down(int crash_shutdown, int secondary) { xics_kexec_teardown_cpu(secondary); - /* Return secondary CPUs to firmware on OPAL v3 */ - if (firmware_has_feature(FW_FEATURE_OPALv3) && secondary) { + /* On OPAL v3, we return all CPUs to firmware */ + + if (!firmware_has_feature(FW_FEATURE_OPALv3)) + return; + + if (secondary) { + /* Return secondary CPUs to firmware on OPAL v3 */ mb(); get_paca()->kexec_state = KEXEC_STATE_REAL_MODE; mb(); /* Return the CPU to OPAL */ opal_return_cpu(); + } else if (crash_shutdown) { + /* + * On crash, we don't wait for secondaries to go + * down as they might be unreachable or hung, so + * instead we just wait a bit and move on. + */ + mdelay(1); + } else { + /* Primary waits for the secondaries to have reached OPAL */ + pnv_kexec_wait_secondaries_down(); } } #endif /* CONFIG_KEXEC */ @@ -225,6 +291,25 @@ static int __init pnv_probe(void) return 1; } +/* + * Returns the cpu frequency for 'cpu' in Hz. This is used by + * /proc/cpuinfo + */ +unsigned long pnv_get_proc_freq(unsigned int cpu) +{ + unsigned long ret_freq; + + ret_freq = cpufreq_quick_get(cpu) * 1000ul; + + /* + * If the backend cpufreq driver does not exist, + * then fallback to old way of reporting the clockrate. + */ + if (!ret_freq) + ret_freq = ppc_proc_freq; + return ret_freq; +} + define_machine(powernv) { .name = "PowerNV", .probe = pnv_probe, @@ -232,6 +317,7 @@ define_machine(powernv) { .setup_arch = pnv_setup_arch, .init_IRQ = pnv_init_IRQ, .show_cpuinfo = pnv_show_cpuinfo, + .get_proc_freq = pnv_get_proc_freq, .progress = pnv_progress, .machine_shutdown = pnv_shutdown, .power_save = power7_idle, diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index 908672bdcea6..bf5fcd452168 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -30,6 +30,7 @@ #include <asm/cputhreads.h> #include <asm/xics.h> #include <asm/opal.h> +#include <asm/runlatch.h> #include "powernv.h" @@ -156,7 +157,9 @@ static void pnv_smp_cpu_kill_self(void) */ mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1); while (!generic_check_cpu_restart(cpu)) { + ppc64_runlatch_off(); power7_nap(); + ppc64_runlatch_on(); if (!generic_check_cpu_restart(cpu)) { DBG("CPU%d Unexpected exit while offline !\n", cpu); /* We may be getting an IPI, so we re-enable diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index 8a8f0472d98f..0bec0c02c5e7 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -175,6 +175,36 @@ static int pseries_eeh_find_cap(struct device_node *dn, int cap) return 0; } +static int pseries_eeh_find_ecap(struct device_node *dn, int cap) +{ + struct pci_dn *pdn = PCI_DN(dn); + struct eeh_dev *edev = of_node_to_eeh_dev(dn); + u32 header; + int pos = 256; + int ttl = (4096 - 256) / 8; + + if (!edev || !edev->pcie_cap) + return 0; + if (rtas_read_config(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL) + return 0; + else if (!header) + return 0; + + while (ttl-- > 0) { + if (PCI_EXT_CAP_ID(header) == cap && pos) + return pos; + + pos = PCI_EXT_CAP_NEXT(header); + if (pos < 256) + break; + + if (rtas_read_config(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL) + break; + } + + return 0; +} + /** * pseries_eeh_of_probe - EEH probe on the given device * @dn: OF node @@ -220,7 +250,9 @@ static void *pseries_eeh_of_probe(struct device_node *dn, void *flag) * or PCIe switch downstream port. */ edev->class_code = class_code; + edev->pcix_cap = pseries_eeh_find_cap(dn, PCI_CAP_ID_PCIX); edev->pcie_cap = pseries_eeh_find_cap(dn, PCI_CAP_ID_EXP); + edev->aer_cap = pseries_eeh_find_ecap(dn, PCI_EXT_CAP_ID_ERR); edev->mode &= 0xFFFFFF00; if ((edev->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) { edev->mode |= EEH_DEV_BRIDGE; @@ -464,6 +496,7 @@ static int pseries_eeh_get_state(struct eeh_pe *pe, int *state) } else { result = EEH_STATE_NOT_SUPPORT; } + break; default: result = EEH_STATE_NOT_SUPPORT; } @@ -499,11 +532,19 @@ static int pseries_eeh_reset(struct eeh_pe *pe, int option) /* If fundamental-reset not supported, try hot-reset */ if (option == EEH_RESET_FUNDAMENTAL && ret == -8) { + option = EEH_RESET_HOT; ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL, config_addr, BUID_HI(pe->phb->buid), - BUID_LO(pe->phb->buid), EEH_RESET_HOT); + BUID_LO(pe->phb->buid), option); } + /* We need reset hold or settlement delay */ + if (option == EEH_RESET_FUNDAMENTAL || + option == EEH_RESET_HOT) + msleep(EEH_PE_RST_HOLD_TIME); + else + msleep(EEH_PE_RST_SETTLE_TIME); + return ret; } diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index 9b8e05078a63..20d62975856f 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -88,13 +88,14 @@ void set_default_offline_state(int cpu) static void rtas_stop_self(void) { - struct rtas_args args = { - .token = cpu_to_be32(rtas_stop_self_token), + static struct rtas_args args = { .nargs = 0, .nret = 1, .rets = &args.args[0], }; + args.token = cpu_to_be32(rtas_stop_self_token); + local_irq_disable(); BUG_ON(rtas_stop_self_token == RTAS_UNKNOWN_SERVICE); diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 573b488fc48b..7f75c94af822 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -100,10 +100,10 @@ static int pseries_remove_memblock(unsigned long base, unsigned int memblock_siz start_pfn = base >> PAGE_SHIFT; - if (!pfn_valid(start_pfn)) { - memblock_remove(base, memblock_size); - return 0; - } + lock_device_hotplug(); + + if (!pfn_valid(start_pfn)) + goto out; block_sz = memory_block_size_bytes(); sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE; @@ -114,8 +114,10 @@ static int pseries_remove_memblock(unsigned long base, unsigned int memblock_siz base += MIN_MEMORY_BLOCK_SIZE; } +out: /* Update memory regions for memory remove */ memblock_remove(base, memblock_size); + unlock_device_hotplug(); return 0; } diff --git a/arch/powerpc/sysdev/ppc4xx_pci.c b/arch/powerpc/sysdev/ppc4xx_pci.c index 64603a10b863..4914fd3f41ec 100644 --- a/arch/powerpc/sysdev/ppc4xx_pci.c +++ b/arch/powerpc/sysdev/ppc4xx_pci.c @@ -1058,7 +1058,7 @@ static int __init apm821xx_pciex_core_init(struct device_node *np) return 1; } -static int apm821xx_pciex_init_port_hw(struct ppc4xx_pciex_port *port) +static int __init apm821xx_pciex_init_port_hw(struct ppc4xx_pciex_port *port) { u32 val; |