diff options
144 files changed, 5097 insertions, 704 deletions
diff --git a/Documentation/acpi/apei/einj.txt b/Documentation/acpi/apei/einj.txt new file mode 100644 index 000000000000..dfab71848dc8 --- /dev/null +++ b/Documentation/acpi/apei/einj.txt @@ -0,0 +1,59 @@ + APEI Error INJection + ~~~~~~~~~~~~~~~~~~~~ + +EINJ provides a hardware error injection mechanism +It is very useful for debugging and testing of other APEI and RAS features. + +To use EINJ, make sure the following are enabled in your kernel +configuration: + +CONFIG_DEBUG_FS +CONFIG_ACPI_APEI +CONFIG_ACPI_APEI_EINJ + +The user interface of EINJ is debug file system, under the +directory apei/einj. The following files are provided. + +- available_error_type + Reading this file returns the error injection capability of the + platform, that is, which error types are supported. The error type + definition is as follow, the left field is the error type value, the + right field is error description. + + 0x00000001 Processor Correctable + 0x00000002 Processor Uncorrectable non-fatal + 0x00000004 Processor Uncorrectable fatal + 0x00000008 Memory Correctable + 0x00000010 Memory Uncorrectable non-fatal + 0x00000020 Memory Uncorrectable fatal + 0x00000040 PCI Express Correctable + 0x00000080 PCI Express Uncorrectable fatal + 0x00000100 PCI Express Uncorrectable non-fatal + 0x00000200 Platform Correctable + 0x00000400 Platform Uncorrectable non-fatal + 0x00000800 Platform Uncorrectable fatal + + The format of file contents are as above, except there are only the + available error type lines. + +- error_type + This file is used to set the error type value. The error type value + is defined in "available_error_type" description. + +- error_inject + Write any integer to this file to trigger the error + injection. Before this, please specify all necessary error + parameters. + +- param1 + This file is used to set the first error parameter value. Effect of + parameter depends on error_type specified. For memory error, this is + physical memory address. + +- param2 + This file is used to set the second error parameter value. Effect of + parameter depends on error_type specified. For memory error, this is + physical memory address mask. + +For more information about EINJ, please refer to ACPI specification +version 4.0, section 17.5. diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 839b21b0699a..42d77735fd45 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -750,6 +750,10 @@ and is between 256 and 4096 characters. It is defined in the file Default value is 0. Value can be changed at runtime via /selinux/enforce. + erst_disable [ACPI] + Disable Error Record Serialization Table (ERST) + support. + ether= [HW,NET] Ethernet cards parameters This option is obsoleted by the "netdev=" option, which has equivalent usage. See its documentation for details. @@ -843,6 +847,11 @@ and is between 256 and 4096 characters. It is defined in the file hd= [EIDE] (E)IDE hard drive subsystem geometry Format: <cyl>,<head>,<sect> + hest_disable [ACPI] + Disable Hardware Error Source Table (HEST) support; + corresponding firmware-first mode error processing + logic will be disabled. + highmem=nn[KMG] [KNL,BOOT] forces the highmem zone to have an exact size of <nn>. This works even on boxes that have no highmem otherwise. This also works to reduce highmem diff --git a/MAINTAINERS b/MAINTAINERS index d5b0b1b6dc52..d329b053a718 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5492,7 +5492,7 @@ S: Maintained F: drivers/mmc/host/tmio_mmc.* TMPFS (SHMEM FILESYSTEM) -M: Hugh Dickins <hugh.dickins@tiscali.co.uk> +M: Hugh Dickins <hughd@google.com> L: linux-mm@kvack.org S: Maintained F: include/linux/shmem_fs.h @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 34 -EXTRAVERSION = -rc7 +EXTRAVERSION = NAME = Sheep on Meth # *DOCUMENTATION* diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index 0d08d4170b64..4656a24058d2 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -371,6 +371,10 @@ static inline void __flush_icache_all(void) #ifdef CONFIG_ARM_ERRATA_411920 extern void v6_icache_inval_all(void); v6_icache_inval_all(); +#elif defined(CONFIG_SMP) && __LINUX_ARM_ARCH__ >= 7 + asm("mcr p15, 0, %0, c7, c1, 0 @ invalidate I-cache inner shareable\n" + : + : "r" (0)); #else asm("mcr p15, 0, %0, c7, c5, 0 @ invalidate I-cache\n" : diff --git a/arch/arm/include/asm/smp_twd.h b/arch/arm/include/asm/smp_twd.h index 7be0978b2625..634f357be6bb 100644 --- a/arch/arm/include/asm/smp_twd.h +++ b/arch/arm/include/asm/smp_twd.h @@ -1,6 +1,23 @@ #ifndef __ASMARM_SMP_TWD_H #define __ASMARM_SMP_TWD_H +#define TWD_TIMER_LOAD 0x00 +#define TWD_TIMER_COUNTER 0x04 +#define TWD_TIMER_CONTROL 0x08 +#define TWD_TIMER_INTSTAT 0x0C + +#define TWD_WDOG_LOAD 0x20 +#define TWD_WDOG_COUNTER 0x24 +#define TWD_WDOG_CONTROL 0x28 +#define TWD_WDOG_INTSTAT 0x2C +#define TWD_WDOG_RESETSTAT 0x30 +#define TWD_WDOG_DISABLE 0x34 + +#define TWD_TIMER_CONTROL_ENABLE (1 << 0) +#define TWD_TIMER_CONTROL_ONESHOT (0 << 1) +#define TWD_TIMER_CONTROL_PERIODIC (1 << 1) +#define TWD_TIMER_CONTROL_IT_ENABLE (1 << 2) + struct clock_event_device; extern void __iomem *twd_base; diff --git a/arch/arm/include/asm/tlbflush.h b/arch/arm/include/asm/tlbflush.h index e085e2c545eb..bd863d8608cd 100644 --- a/arch/arm/include/asm/tlbflush.h +++ b/arch/arm/include/asm/tlbflush.h @@ -46,6 +46,9 @@ #define TLB_V7_UIS_FULL (1 << 20) #define TLB_V7_UIS_ASID (1 << 21) +/* Inner Shareable BTB operation (ARMv7 MP extensions) */ +#define TLB_V7_IS_BTB (1 << 22) + #define TLB_L2CLEAN_FR (1 << 29) /* Feroceon */ #define TLB_DCLEAN (1 << 30) #define TLB_WB (1 << 31) @@ -183,7 +186,7 @@ #endif #ifdef CONFIG_SMP -#define v7wbi_tlb_flags (TLB_WB | TLB_DCLEAN | TLB_BTB | \ +#define v7wbi_tlb_flags (TLB_WB | TLB_DCLEAN | TLB_V7_IS_BTB | \ TLB_V7_UIS_FULL | TLB_V7_UIS_PAGE | TLB_V7_UIS_ASID) #else #define v7wbi_tlb_flags (TLB_WB | TLB_DCLEAN | TLB_BTB | \ @@ -339,6 +342,12 @@ static inline void local_flush_tlb_all(void) dsb(); isb(); } + if (tlb_flag(TLB_V7_IS_BTB)) { + /* flush the branch target cache */ + asm("mcr p15, 0, %0, c7, c1, 6" : : "r" (zero) : "cc"); + dsb(); + isb(); + } } static inline void local_flush_tlb_mm(struct mm_struct *mm) @@ -376,6 +385,12 @@ static inline void local_flush_tlb_mm(struct mm_struct *mm) asm("mcr p15, 0, %0, c7, c5, 6" : : "r" (zero) : "cc"); dsb(); } + if (tlb_flag(TLB_V7_IS_BTB)) { + /* flush the branch target cache */ + asm("mcr p15, 0, %0, c7, c1, 6" : : "r" (zero) : "cc"); + dsb(); + isb(); + } } static inline void @@ -416,6 +431,12 @@ local_flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) asm("mcr p15, 0, %0, c7, c5, 6" : : "r" (zero) : "cc"); dsb(); } + if (tlb_flag(TLB_V7_IS_BTB)) { + /* flush the branch target cache */ + asm("mcr p15, 0, %0, c7, c1, 6" : : "r" (zero) : "cc"); + dsb(); + isb(); + } } static inline void local_flush_tlb_kernel_page(unsigned long kaddr) @@ -454,6 +475,12 @@ static inline void local_flush_tlb_kernel_page(unsigned long kaddr) dsb(); isb(); } + if (tlb_flag(TLB_V7_IS_BTB)) { + /* flush the branch target cache */ + asm("mcr p15, 0, %0, c7, c1, 6" : : "r" (zero) : "cc"); + dsb(); + isb(); + } } /* diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c index ea02a7b1c244..7c5f0c024db7 100644 --- a/arch/arm/kernel/smp_twd.c +++ b/arch/arm/kernel/smp_twd.c @@ -21,23 +21,6 @@ #include <asm/smp_twd.h> #include <asm/hardware/gic.h> -#define TWD_TIMER_LOAD 0x00 -#define TWD_TIMER_COUNTER 0x04 -#define TWD_TIMER_CONTROL 0x08 -#define TWD_TIMER_INTSTAT 0x0C - -#define TWD_WDOG_LOAD 0x20 -#define TWD_WDOG_COUNTER 0x24 -#define TWD_WDOG_CONTROL 0x28 -#define TWD_WDOG_INTSTAT 0x2C -#define TWD_WDOG_RESETSTAT 0x30 -#define TWD_WDOG_DISABLE 0x34 - -#define TWD_TIMER_CONTROL_ENABLE (1 << 0) -#define TWD_TIMER_CONTROL_ONESHOT (0 << 1) -#define TWD_TIMER_CONTROL_PERIODIC (1 << 1) -#define TWD_TIMER_CONTROL_IT_ENABLE (1 << 2) - /* set up by the platform code */ void __iomem *twd_base; diff --git a/arch/arm/lib/clear_user.S b/arch/arm/lib/clear_user.S index 5e3f99620c04..14a0d988c82c 100644 --- a/arch/arm/lib/clear_user.S +++ b/arch/arm/lib/clear_user.S @@ -45,6 +45,7 @@ USER( strnebt r2, [r0]) mov r0, #0 ldmfd sp!, {r1, pc} ENDPROC(__clear_user) +ENDPROC(__clear_user_std) .pushsection .fixup,"ax" .align 0 diff --git a/arch/arm/lib/copy_to_user.S b/arch/arm/lib/copy_to_user.S index 027b69bdbad1..d066df686e17 100644 --- a/arch/arm/lib/copy_to_user.S +++ b/arch/arm/lib/copy_to_user.S @@ -93,6 +93,7 @@ WEAK(__copy_to_user) #include "copy_template.S" ENDPROC(__copy_to_user) +ENDPROC(__copy_to_user_std) .pushsection .fixup,"ax" .align 0 diff --git a/arch/arm/mach-davinci/da830.c b/arch/arm/mach-davinci/da830.c index 122e61a9f505..e8cb982f5e8e 100644 --- a/arch/arm/mach-davinci/da830.c +++ b/arch/arm/mach-davinci/da830.c @@ -410,7 +410,7 @@ static struct clk_lookup da830_clks[] = { CLK("davinci-mcasp.0", NULL, &mcasp0_clk), CLK("davinci-mcasp.1", NULL, &mcasp1_clk), CLK("davinci-mcasp.2", NULL, &mcasp2_clk), - CLK("musb_hdrc", NULL, &usb20_clk), + CLK(NULL, "usb20", &usb20_clk), CLK(NULL, "aemif", &aemif_clk), CLK(NULL, "aintc", &aintc_clk), CLK(NULL, "secu_mgr", &secu_mgr_clk), diff --git a/arch/arm/mm/cache-v6.S b/arch/arm/mm/cache-v6.S index 9d89c67a1cc3..e46ecd847138 100644 --- a/arch/arm/mm/cache-v6.S +++ b/arch/arm/mm/cache-v6.S @@ -211,6 +211,9 @@ v6_dma_inv_range: mcrne p15, 0, r1, c7, c15, 1 @ clean & invalidate unified line #endif 1: +#ifdef CONFIG_SMP + str r0, [r0] @ write for ownership +#endif #ifdef HARVARD_CACHE mcr p15, 0, r0, c7, c6, 1 @ invalidate D line #else @@ -231,6 +234,9 @@ v6_dma_inv_range: v6_dma_clean_range: bic r0, r0, #D_CACHE_LINE_SIZE - 1 1: +#ifdef CONFIG_SMP + ldr r2, [r0] @ read for ownership +#endif #ifdef HARVARD_CACHE mcr p15, 0, r0, c7, c10, 1 @ clean D line #else @@ -251,6 +257,10 @@ v6_dma_clean_range: ENTRY(v6_dma_flush_range) bic r0, r0, #D_CACHE_LINE_SIZE - 1 1: +#ifdef CONFIG_SMP + ldr r2, [r0] @ read for ownership + str r2, [r0] @ write for ownership +#endif #ifdef HARVARD_CACHE mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D line #else @@ -273,7 +283,9 @@ ENTRY(v6_dma_map_area) add r1, r1, r0 teq r2, #DMA_FROM_DEVICE beq v6_dma_inv_range - b v6_dma_clean_range + teq r2, #DMA_TO_DEVICE + beq v6_dma_clean_range + b v6_dma_flush_range ENDPROC(v6_dma_map_area) /* @@ -283,9 +295,6 @@ ENDPROC(v6_dma_map_area) * - dir - DMA direction */ ENTRY(v6_dma_unmap_area) - add r1, r1, r0 - teq r2, #DMA_TO_DEVICE - bne v6_dma_inv_range mov pc, lr ENDPROC(v6_dma_unmap_area) diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S index bcd64f265870..06a90dcfc60a 100644 --- a/arch/arm/mm/cache-v7.S +++ b/arch/arm/mm/cache-v7.S @@ -167,7 +167,11 @@ ENTRY(v7_coherent_user_range) cmp r0, r1 blo 1b mov r0, #0 +#ifdef CONFIG_SMP + mcr p15, 0, r0, c7, c1, 6 @ invalidate BTB Inner Shareable +#else mcr p15, 0, r0, c7, c5, 6 @ invalidate BTB +#endif dsb isb mov pc, lr diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c index 9bfeb6b9509a..33b327379f07 100644 --- a/arch/arm/mm/nommu.c +++ b/arch/arm/mm/nommu.c @@ -65,6 +65,15 @@ void flush_dcache_page(struct page *page) } EXPORT_SYMBOL(flush_dcache_page); +void copy_to_user_page(struct vm_area_struct *vma, struct page *page, + unsigned long uaddr, void *dst, const void *src, + unsigned long len) +{ + memcpy(dst, src, len); + if (vma->vm_flags & VM_EXEC) + __cpuc_coherent_user_range(uaddr, uaddr + len); +} + void __iomem *__arm_ioremap_pfn(unsigned long pfn, unsigned long offset, size_t size, unsigned int mtype) { @@ -87,8 +96,8 @@ void __iomem *__arm_ioremap(unsigned long phys_addr, size_t size, } EXPORT_SYMBOL(__arm_ioremap); -void __iomem *__arm_ioremap(unsigned long phys_addr, size_t size, - unsigned int mtype, void *caller) +void __iomem *__arm_ioremap_caller(unsigned long phys_addr, size_t size, + unsigned int mtype, void *caller) { return __arm_ioremap(phys_addr, size, mtype); } diff --git a/arch/arm/mm/tlb-v7.S b/arch/arm/mm/tlb-v7.S index 0cb1848bd876..f3f288a9546d 100644 --- a/arch/arm/mm/tlb-v7.S +++ b/arch/arm/mm/tlb-v7.S @@ -50,7 +50,11 @@ ENTRY(v7wbi_flush_user_tlb_range) cmp r0, r1 blo 1b mov ip, #0 +#ifdef CONFIG_SMP + mcr p15, 0, ip, c7, c1, 6 @ flush BTAC/BTB Inner Shareable +#else mcr p15, 0, ip, c7, c5, 6 @ flush BTAC/BTB +#endif dsb mov pc, lr ENDPROC(v7wbi_flush_user_tlb_range) @@ -79,7 +83,11 @@ ENTRY(v7wbi_flush_kern_tlb_range) cmp r0, r1 blo 1b mov r2, #0 +#ifdef CONFIG_SMP + mcr p15, 0, r2, c7, c1, 6 @ flush BTAC/BTB Inner Shareable +#else mcr p15, 0, r2, c7, c5, 6 @ flush BTAC/BTB +#endif dsb isb mov pc, lr diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c index 64aff520b899..aa2533ae7e9e 100644 --- a/arch/ia64/pci/pci.c +++ b/arch/ia64/pci/pci.c @@ -335,8 +335,11 @@ pcibios_setup_root_windows(struct pci_bus *bus, struct pci_controller *ctrl) } struct pci_bus * __devinit -pci_acpi_scan_root(struct acpi_device *device, int domain, int bus) +pci_acpi_scan_root(struct acpi_pci_root *root) { + struct acpi_device *device = root->device; + int domain = root->segment; + int bus = root->secondary.start; struct pci_controller *controller; unsigned int windows = 0; struct pci_bus *pbus; diff --git a/arch/microblaze/include/asm/uaccess.h b/arch/microblaze/include/asm/uaccess.h index 446bec29b142..26460d15b338 100644 --- a/arch/microblaze/include/asm/uaccess.h +++ b/arch/microblaze/include/asm/uaccess.h @@ -182,6 +182,39 @@ extern long __user_bad(void); * Returns zero on success, or -EFAULT on error. * On error, the variable @x is set to zero. */ +#define get_user(x, ptr) \ + __get_user_check((x), (ptr), sizeof(*(ptr))) + +#define __get_user_check(x, ptr, size) \ +({ \ + unsigned long __gu_val = 0; \ + const typeof(*(ptr)) __user *__gu_addr = (ptr); \ + int __gu_err = 0; \ + \ + if (access_ok(VERIFY_READ, __gu_addr, size)) { \ + switch (size) { \ + case 1: \ + __get_user_asm("lbu", __gu_addr, __gu_val, \ + __gu_err); \ + break; \ + case 2: \ + __get_user_asm("lhu", __gu_addr, __gu_val, \ + __gu_err); \ + break; \ + case 4: \ + __get_user_asm("lw", __gu_addr, __gu_val, \ + __gu_err); \ + break; \ + default: \ + __gu_err = __user_bad(); \ + break; \ + } \ + } else { \ + __gu_err = -EFAULT; \ + } \ + x = (typeof(*(ptr)))__gu_val; \ + __gu_err; \ +}) #define __get_user(x, ptr) \ ({ \ @@ -206,12 +239,6 @@ extern long __user_bad(void); }) -#define get_user(x, ptr) \ -({ \ - access_ok(VERIFY_READ, (ptr), sizeof(*(ptr))) \ - ? __get_user((x), (ptr)) : -EFAULT; \ -}) - #define __put_user_asm(insn, __gu_ptr, __gu_val, __gu_err) \ ({ \ __asm__ __volatile__ ( \ @@ -266,6 +293,42 @@ extern long __user_bad(void); * * Returns zero on success, or -EFAULT on error. */ +#define put_user(x, ptr) \ + __put_user_check((x), (ptr), sizeof(*(ptr))) + +#define __put_user_check(x, ptr, size) \ +({ \ + typeof(*(ptr)) __pu_val; \ + typeof(*(ptr)) __user *__pu_addr = (ptr); \ + int __pu_err = 0; \ + \ + __pu_val = (x); \ + if (access_ok(VERIFY_WRITE, __pu_addr, size)) { \ + switch (size) { \ + case 1: \ + __put_user_asm("sb", __pu_addr, __pu_val, \ + __pu_err); \ + break; \ + case 2: \ + __put_user_asm("sh", __pu_addr, __pu_val, \ + __pu_err); \ + break; \ + case 4: \ + __put_user_asm("sw", __pu_addr, __pu_val, \ + __pu_err); \ + break; \ + case 8: \ + __put_user_asm_8(__pu_addr, __pu_val, __pu_err);\ + break; \ + default: \ + __pu_err = __user_bad(); \ + break; \ + } \ + } else { \ + __pu_err = -EFAULT; \ + } \ + __pu_err; \ +}) #define __put_user(x, ptr) \ ({ \ @@ -290,18 +353,6 @@ extern long __user_bad(void); __gu_err; \ }) -#ifndef CONFIG_MMU - -#define put_user(x, ptr) __put_user((x), (ptr)) - -#else /* CONFIG_MMU */ - -#define put_user(x, ptr) \ -({ \ - access_ok(VERIFY_WRITE, (ptr), sizeof(*(ptr))) \ - ? __put_user((x), (ptr)) : -EFAULT; \ -}) -#endif /* CONFIG_MMU */ /* copy_to_from_user */ #define __copy_from_user(to, from, n) \ diff --git a/arch/microblaze/kernel/cpu/cache.c b/arch/microblaze/kernel/cpu/cache.c index 21c3a92394de..109876e8d643 100644 --- a/arch/microblaze/kernel/cpu/cache.c +++ b/arch/microblaze/kernel/cpu/cache.c @@ -137,8 +137,9 @@ do { \ do { \ int step = -line_length; \ int align = ~(line_length - 1); \ + int count; \ end = ((end & align) == end) ? end - line_length : end & align; \ - int count = end - start; \ + count = end - start; \ WARN_ON(count < 0); \ \ __asm__ __volatile__ (" 1: " #op " %0, %1; \ diff --git a/arch/microblaze/kernel/entry-nommu.S b/arch/microblaze/kernel/entry-nommu.S index 391d6197fc3b..8cc18cd2cce6 100644 --- a/arch/microblaze/kernel/entry-nommu.S +++ b/arch/microblaze/kernel/entry-nommu.S @@ -476,6 +476,8 @@ ENTRY(ret_from_fork) nop work_pending: + enable_irq + andi r11, r19, _TIF_NEED_RESCHED beqi r11, 1f bralid r15, schedule diff --git a/arch/microblaze/kernel/microblaze_ksyms.c b/arch/microblaze/kernel/microblaze_ksyms.c index bc4dcb7d3861..ff85f7718035 100644 --- a/arch/microblaze/kernel/microblaze_ksyms.c +++ b/arch/microblaze/kernel/microblaze_ksyms.c @@ -52,3 +52,14 @@ EXPORT_SYMBOL_GPL(_ebss); extern void _mcount(void); EXPORT_SYMBOL(_mcount); #endif + +/* + * Assembly functions that may be used (directly or indirectly) by modules + */ +EXPORT_SYMBOL(__copy_tofrom_user); +EXPORT_SYMBOL(__strncpy_user); + +#ifdef CONFIG_OPT_LIB_ASM +EXPORT_SYMBOL(memcpy); +EXPORT_SYMBOL(memmove); +#endif diff --git a/arch/microblaze/kernel/module.c b/arch/microblaze/kernel/module.c index cbecf110dc30..0e73f6606547 100644 --- a/arch/microblaze/kernel/module.c +++ b/arch/microblaze/kernel/module.c @@ -16,6 +16,7 @@ #include <linux/string.h> #include <asm/pgtable.h> +#include <asm/cacheflush.h> void *module_alloc(unsigned long size) { @@ -151,6 +152,7 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, const char *strtab, int module_finalize(const Elf32_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *module) { + flush_dcache(); return 0; } diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c index f42c2dde8b1c..cca3579d4268 100644 --- a/arch/microblaze/mm/init.c +++ b/arch/microblaze/mm/init.c @@ -47,6 +47,7 @@ unsigned long memory_start; EXPORT_SYMBOL(memory_start); unsigned long memory_end; /* due to mm/nommu.c */ unsigned long memory_size; +EXPORT_SYMBOL(memory_size); /* * paging_init() sets up the page tables - in fact we've already done this. diff --git a/arch/microblaze/mm/pgtable.c b/arch/microblaze/mm/pgtable.c index 784557fb28cf..59bf2335a4ce 100644 --- a/arch/microblaze/mm/pgtable.c +++ b/arch/microblaze/mm/pgtable.c @@ -42,6 +42,7 @@ unsigned long ioremap_base; unsigned long ioremap_bot; +EXPORT_SYMBOL(ioremap_bot); /* The maximum lowmem defaults to 768Mb, but this can be configured to * another value. diff --git a/arch/microblaze/pci/pci-common.c b/arch/microblaze/pci/pci-common.c index 01c8c97c15b7..9cb782b8e036 100644 --- a/arch/microblaze/pci/pci-common.c +++ b/arch/microblaze/pci/pci-common.c @@ -1507,7 +1507,7 @@ void pcibios_finish_adding_to_bus(struct pci_bus *bus) pci_bus_add_devices(bus); /* Fixup EEH */ - eeh_add_device_tree_late(bus); + /* eeh_add_device_tree_late(bus); */ } EXPORT_SYMBOL_GPL(pcibios_finish_adding_to_bus); diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h index 49382d5e891a..c6e3c93ce7c7 100644 --- a/arch/mips/include/asm/mipsregs.h +++ b/arch/mips/include/asm/mipsregs.h @@ -135,6 +135,12 @@ #define FPU_CSR_COND7 0x80000000 /* $fcc7 */ /* + * Bits 18 - 20 of the FPU Status Register will be read as 0, + * and should be written as zero. + */ +#define FPU_CSR_RSVD 0x001c0000 + +/* * X the exception cause indicator * E the exception enable * S the sticky/flag bit @@ -161,7 +167,8 @@ #define FPU_CSR_UDF_S 0x00000008 #define FPU_CSR_INE_S 0x00000004 -/* rounding mode */ +/* Bits 0 and 1 of FPU Status Register specify the rounding mode */ +#define FPU_CSR_RM 0x00000003 #define FPU_CSR_RN 0x0 /* nearest */ #define FPU_CSR_RZ 0x1 /* towards zero */ #define FPU_CSR_RU 0x2 /* towards +Infinity */ diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index 44337ba03717..a5297e2a353a 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S @@ -385,7 +385,7 @@ EXPORT(sysn32_call_table) PTR sys_fchmodat PTR sys_faccessat PTR compat_sys_pselect6 - PTR sys_ppoll /* 6265 */ + PTR compat_sys_ppoll /* 6265 */ PTR sys_unshare PTR sys_splice PTR sys_sync_file_range diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c index 8f2f8e9d8b21..f2338d1c0b48 100644 --- a/arch/mips/math-emu/cp1emu.c +++ b/arch/mips/math-emu/cp1emu.c @@ -78,6 +78,9 @@ DEFINE_PER_CPU(struct mips_fpu_emulator_stats, fpuemustats); #define FPCREG_RID 0 /* $0 = revision id */ #define FPCREG_CSR 31 /* $31 = csr */ +/* Determine rounding mode from the RM bits of the FCSR */ +#define modeindex(v) ((v) & FPU_CSR_RM) + /* Convert Mips rounding mode (0..3) to IEEE library modes. */ static const unsigned char ieee_rm[4] = { [FPU_CSR_RN] = IEEE754_RN, @@ -384,10 +387,14 @@ static int cop1Emulate(struct pt_regs *xcp, struct mips_fpu_struct *ctx) (void *) (xcp->cp0_epc), MIPSInst_RT(ir), value); #endif - value &= (FPU_CSR_FLUSH | FPU_CSR_ALL_E | FPU_CSR_ALL_S | 0x03); - ctx->fcr31 &= ~(FPU_CSR_FLUSH | FPU_CSR_ALL_E | FPU_CSR_ALL_S | 0x03); - /* convert to ieee library modes */ - ctx->fcr31 |= (value & ~0x3) | ieee_rm[value & 0x3]; + + /* + * Don't write reserved bits, + * and convert to ieee library modes + */ + ctx->fcr31 = (value & + ~(FPU_CSR_RSVD | FPU_CSR_RM)) | + ieee_rm[modeindex(value)]; } if ((ctx->fcr31 >> 5) & ctx->fcr31 & FPU_CSR_ALL_E) { return SIGFPE; diff --git a/arch/mips/oprofile/op_model_loongson2.c b/arch/mips/oprofile/op_model_loongson2.c index 29e2326b6257..fa3bf661ae29 100644 --- a/arch/mips/oprofile/op_model_loongson2.c +++ b/arch/mips/oprofile/op_model_loongson2.c @@ -122,7 +122,7 @@ static irqreturn_t loongson2_perfcount_handler(int irq, void *dev_id) */ /* Check whether the irq belongs to me */ - enabled = read_c0_perfcnt() & LOONGSON2_PERFCNT_INT_EN; + enabled = read_c0_perfctrl() & LOONGSON2_PERFCNT_INT_EN; if (!enabled) return IRQ_NONE; enabled = reg.cnt1_enabled | reg.cnt2_enabled; diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 9f4c9d4f5803..bd100fcf40d0 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -130,43 +130,5 @@ static inline int irqs_disabled_flags(unsigned long flags) */ struct irq_chip; -#ifdef CONFIG_PERF_EVENTS - -#ifdef CONFIG_PPC64 -static inline unsigned long test_perf_event_pending(void) -{ - unsigned long x; - - asm volatile("lbz %0,%1(13)" - : "=r" (x) - : "i" (offsetof(struct paca_struct, perf_event_pending))); - return x; -} - -static inline void set_perf_event_pending(void) -{ - asm volatile("stb %0,%1(13)" : : - "r" (1), - "i" (offsetof(struct paca_struct, perf_event_pending))); -} - -static inline void clear_perf_event_pending(void) -{ - asm volatile("stb %0,%1(13)" : : - "r" (0), - "i" (offsetof(struct paca_struct, perf_event_pending))); -} -#endif /* CONFIG_PPC64 */ - -#else /* CONFIG_PERF_EVENTS */ - -static inline unsigned long test_perf_event_pending(void) -{ - return 0; -} - -static inline void clear_perf_event_pending(void) {} -#endif /* CONFIG_PERF_EVENTS */ - #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_HW_IRQ_H */ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 957ceb7059c5..c09138d150d4 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -133,7 +133,6 @@ int main(void) DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr)); DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled)); DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled)); - DEFINE(PACAPERFPEND, offsetof(struct paca_struct, perf_event_pending)); DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); #ifdef CONFIG_PPC_MM_SLICES DEFINE(PACALOWSLICESPSIZE, offsetof(struct paca_struct, diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c index 59c928564a03..4ff4da2c238b 100644 --- a/arch/powerpc/kernel/dma-swiotlb.c +++ b/arch/powerpc/kernel/dma-swiotlb.c @@ -1,7 +1,8 @@ /* * Contains routines needed to support swiotlb for ppc. * - * Copyright (C) 2009 Becky Bruce, Freescale Semiconductor + * Copyright (C) 2009-2010 Freescale Semiconductor, Inc. + * Author: Becky Bruce * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -70,7 +71,7 @@ static int ppc_swiotlb_bus_notify(struct notifier_block *nb, sd->max_direct_dma_addr = 0; /* May need to bounce if the device can't address all of DRAM */ - if (dma_get_mask(dev) < lmb_end_of_DRAM()) + if ((dma_get_mask(dev) + 1) < lmb_end_of_DRAM()) set_dma_ops(dev, &swiotlb_dma_ops); return NOTIFY_DONE; diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 07109d843787..42e9d908914a 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -556,15 +556,6 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES) 2: TRACE_AND_RESTORE_IRQ(r5); -#ifdef CONFIG_PERF_EVENTS - /* check paca->perf_event_pending if we're enabling ints */ - lbz r3,PACAPERFPEND(r13) - and. r3,r3,r5 - beq 27f - bl .perf_event_do_pending -27: -#endif /* CONFIG_PERF_EVENTS */ - /* extract EE bit and use it to restore paca->hard_enabled */ ld r3,_MSR(r1) rldicl r4,r3,49,63 /* r0 = (r3 >> 15) & 1 */ diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 64f6f2031c22..066bd31551d5 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -53,7 +53,6 @@ #include <linux/bootmem.h> #include <linux/pci.h> #include <linux/debugfs.h> -#include <linux/perf_event.h> #include <asm/uaccess.h> #include <asm/system.h> @@ -145,11 +144,6 @@ notrace void raw_local_irq_restore(unsigned long en) } #endif /* CONFIG_PPC_STD_MMU_64 */ - if (test_perf_event_pending()) { - clear_perf_event_pending(); - perf_event_do_pending(); - } - /* * if (get_paca()->hard_enabled) return; * But again we need to take care that gcc gets hard_enabled directly diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 1b16b9a3e49a..0441bbdadbd1 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -532,25 +532,60 @@ void __init iSeries_time_init_early(void) } #endif /* CONFIG_PPC_ISERIES */ -#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_PPC32) -DEFINE_PER_CPU(u8, perf_event_pending); +#ifdef CONFIG_PERF_EVENTS -void set_perf_event_pending(void) +/* + * 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable... + */ +#ifdef CONFIG_PPC64 +static inline unsigned long test_perf_event_pending(void) { - get_cpu_var(perf_event_pending) = 1; - set_dec(1); - put_cpu_var(perf_event_pending); + unsigned long x; + + asm volatile("lbz %0,%1(13)" + : "=r" (x) + : "i" (offsetof(struct paca_struct, perf_event_pending))); + return x; } +static inline void set_perf_event_pending_flag(void) +{ + asm volatile("stb %0,%1(13)" : : + "r" (1), + "i" (offsetof(struct paca_struct, perf_event_pending))); +} + +static inline void clear_perf_event_pending(void) +{ + asm volatile("stb %0,%1(13)" : : + "r" (0), + "i" (offsetof(struct paca_struct, perf_event_pending))); +} + +#else /* 32-bit */ + +DEFINE_PER_CPU(u8, perf_event_pending); + +#define set_perf_event_pending_flag() __get_cpu_var(perf_event_pending) = 1 #define test_perf_event_pending() __get_cpu_var(perf_event_pending) #define clear_perf_event_pending() __get_cpu_var(perf_event_pending) = 0 -#else /* CONFIG_PERF_EVENTS && CONFIG_PPC32 */ +#endif /* 32 vs 64 bit */ + +void set_perf_event_pending(void) +{ + preempt_disable(); + set_perf_event_pending_flag(); + set_dec(1); + preempt_enable(); +} + +#else /* CONFIG_PERF_EVENTS */ #define test_perf_event_pending() 0 #define clear_perf_event_pending() -#endif /* CONFIG_PERF_EVENTS && CONFIG_PPC32 */ +#endif /* CONFIG_PERF_EVENTS */ /* * For iSeries shared processors, we have to let the hypervisor @@ -582,10 +617,6 @@ void timer_interrupt(struct pt_regs * regs) set_dec(DECREMENTER_MAX); #ifdef CONFIG_PPC32 - if (test_perf_event_pending()) { - clear_perf_event_pending(); - perf_event_do_pending(); - } if (atomic_read(&ppc_n_lost_interrupts) != 0) do_IRQ(regs); #endif @@ -604,6 +635,11 @@ void timer_interrupt(struct pt_regs * regs) calculate_steal_time(); + if (test_perf_event_pending()) { + clear_perf_event_pending(); + perf_event_do_pending(); + } + #ifdef CONFIG_PPC_ISERIES if (firmware_has_feature(FW_FEATURE_ISERIES)) get_lppaca()->int_dword.fields.decr_int = 0; diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c index 2570fcc7665d..812312542e50 100644 --- a/arch/powerpc/kvm/44x_tlb.c +++ b/arch/powerpc/kvm/44x_tlb.c @@ -440,7 +440,7 @@ int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws) unsigned int gtlb_index; gtlb_index = kvmppc_get_gpr(vcpu, ra); - if (gtlb_index > KVM44x_GUEST_TLB_SIZE) { + if (gtlb_index >= KVM44x_GUEST_TLB_SIZE) { printk("%s: index %d\n", __func__, gtlb_index); kvmppc_dump_vcpu(vcpu); return EMULATE_FAIL; diff --git a/arch/s390/kernel/head31.S b/arch/s390/kernel/head31.S index 1bbcc499d455..b8f8dc126102 100644 --- a/arch/s390/kernel/head31.S +++ b/arch/s390/kernel/head31.S @@ -82,7 +82,7 @@ startup_continue: _ehead: #ifdef CONFIG_SHARED_KERNEL - .org 0x100000 + .org 0x100000 - 0x11000 # head.o ends at 0x11000 #endif # diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 1f70970de0aa..cdef68717416 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -80,7 +80,7 @@ startup_continue: _ehead: #ifdef CONFIG_SHARED_KERNEL - .org 0x100000 + .org 0x100000 - 0x11000 # head.o ends at 0x11000 #endif # diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 33fdc5a79764..9f654da4cecc 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -640,7 +640,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) { - long ret; + long ret = 0; /* Do the secure computing check first. */ secure_computing(regs->gprs[2]); @@ -649,7 +649,6 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) * The sysc_tracesys code in entry.S stored the system * call number to gprs[2]. */ - ret = regs->gprs[2]; if (test_thread_flag(TIF_SYSCALL_TRACE) && (tracehook_report_syscall_entry(regs) || regs->gprs[2] >= NR_syscalls)) { @@ -671,7 +670,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) regs->gprs[2], regs->orig_gpr2, regs->gprs[3], regs->gprs[4], regs->gprs[5]); - return ret; + return ret ?: regs->gprs[2]; } asmlinkage void do_syscall_trace_exit(struct pt_regs *regs) diff --git a/arch/x86/include/asm/k8.h b/arch/x86/include/asm/k8.h index f70e60071fe8..af00bd1d2089 100644 --- a/arch/x86/include/asm/k8.h +++ b/arch/x86/include/asm/k8.h @@ -16,11 +16,16 @@ extern int k8_numa_init(unsigned long start_pfn, unsigned long end_pfn); extern int k8_scan_nodes(void); #ifdef CONFIG_K8_NB +extern int num_k8_northbridges; + static inline struct pci_dev *node_to_k8_nb_misc(int node) { return (node < num_k8_northbridges) ? k8_northbridges[node] : NULL; } + #else +#define num_k8_northbridges 0 + static inline struct pci_dev *node_to_k8_nb_misc(int node) { return NULL; diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 6c3fdd631ed3..f32a4301c4d4 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -225,5 +225,13 @@ extern void mcheck_intel_therm_init(void); static inline void mcheck_intel_therm_init(void) { } #endif +/* + * Used by APEI to report memory error via /dev/mcelog + */ + +struct cper_sec_mem_err; +extern void apei_mce_report_mem_error(int corrected, + struct cper_sec_mem_err *mem_err); + #endif /* __KERNEL__ */ #endif /* _ASM_X86_MCE_H */ diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index b3eeb66c0a51..95962a93f99a 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -340,6 +340,10 @@ amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf) (boot_cpu_data.x86_mask < 0x1))) return; + /* not in virtualized environments */ + if (num_k8_northbridges == 0) + return; + this_leaf->can_disable = true; this_leaf->l3_indices = amd_calc_l3_indices(); } diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile index 4ac6d48fe11b..bb34b03af252 100644 --- a/arch/x86/kernel/cpu/mcheck/Makefile +++ b/arch/x86/kernel/cpu/mcheck/Makefile @@ -7,3 +7,5 @@ obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o + +obj-$(CONFIG_ACPI_APEI) += mce-apei.o diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mcheck/mce-apei.c new file mode 100644 index 000000000000..745b54f9be89 --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c @@ -0,0 +1,138 @@ +/* + * Bridge between MCE and APEI + * + * On some machine, corrected memory errors are reported via APEI + * generic hardware error source (GHES) instead of corrected Machine + * Check. These corrected memory errors can be reported to user space + * through /dev/mcelog via faking a corrected Machine Check, so that + * the error memory page can be offlined by /sbin/mcelog if the error + * count for one page is beyond the threshold. + * + * For fatal MCE, save MCE record into persistent storage via ERST, so + * that the MCE record can be logged after reboot via ERST. + * + * Copyright 2010 Intel Corp. + * Author: Huang Ying <ying.huang@intel.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/kernel.h> +#include <linux/acpi.h> +#include <linux/cper.h> +#include <acpi/apei.h> +#include <asm/mce.h> + +#include "mce-internal.h" + +void apei_mce_report_mem_error(int corrected, struct cper_sec_mem_err *mem_err) +{ + struct mce m; + + /* Only corrected MC is reported */ + if (!corrected) + return; + + mce_setup(&m); + m.bank = 1; + /* Fake a memory read corrected error with unknown channel */ + m.status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | 0x9f; + m.addr = mem_err->physical_addr; + mce_log(&m); + mce_notify_irq(); +} +EXPORT_SYMBOL_GPL(apei_mce_report_mem_error); + +#define CPER_CREATOR_MCE \ + UUID_LE(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c, \ + 0x64, 0x90, 0xb8, 0x9d) +#define CPER_SECTION_TYPE_MCE \ + UUID_LE(0xfe08ffbe, 0x95e4, 0x4be7, 0xbc, 0x73, 0x40, 0x96, \ + 0x04, 0x4a, 0x38, 0xfc) + +/* + * CPER specification (in UEFI specification 2.3 appendix N) requires + * byte-packed. + */ +struct cper_mce_record { + struct cper_record_header hdr; + struct cper_section_descriptor sec_hdr; + struct mce mce; +} __packed; + +int apei_write_mce(struct mce *m) +{ + struct cper_mce_record rcd; + + memset(&rcd, 0, sizeof(rcd)); + memcpy(rcd.hdr.signature, CPER_SIG_RECORD, CPER_SIG_SIZE); + rcd.hdr.revision = CPER_RECORD_REV; + rcd.hdr.signature_end = CPER_SIG_END; + rcd.hdr.section_count = 1; + rcd.hdr.error_severity = CPER_SER_FATAL; + /* timestamp, platform_id, partition_id are all invalid */ + rcd.hdr.validation_bits = 0; + rcd.hdr.record_length = sizeof(rcd); + rcd.hdr.creator_id = CPER_CREATOR_MCE; + rcd.hdr.notification_type = CPER_NOTIFY_MCE; + rcd.hdr.record_id = cper_next_record_id(); + rcd.hdr.flags = CPER_HW_ERROR_FLAGS_PREVERR; + + rcd.sec_hdr.section_offset = (void *)&rcd.mce - (void *)&rcd; + rcd.sec_hdr.section_length = sizeof(rcd.mce); + rcd.sec_hdr.revision = CPER_SEC_REV; + /* fru_id and fru_text is invalid */ + rcd.sec_hdr.validation_bits = 0; + rcd.sec_hdr.flags = CPER_SEC_PRIMARY; + rcd.sec_hdr.section_type = CPER_SECTION_TYPE_MCE; + rcd.sec_hdr.section_severity = CPER_SER_FATAL; + + memcpy(&rcd.mce, m, sizeof(*m)); + + return erst_write(&rcd.hdr); +} + +ssize_t apei_read_mce(struct mce *m, u64 *record_id) +{ + struct cper_mce_record rcd; + ssize_t len; + + len = erst_read_next(&rcd.hdr, sizeof(rcd)); + if (len <= 0) + return len; + /* Can not skip other records in storage via ERST unless clear them */ + else if (len != sizeof(rcd) || + uuid_le_cmp(rcd.hdr.creator_id, CPER_CREATOR_MCE)) { + if (printk_ratelimit()) + pr_warning( + "MCE-APEI: Can not skip the unknown record in ERST"); + return -EIO; + } + + memcpy(m, &rcd.mce, sizeof(*m)); + *record_id = rcd.hdr.record_id; + + return sizeof(*m); +} + +/* Check whether there is record in ERST */ +int apei_check_mce(void) +{ + return erst_get_record_count(); +} + +int apei_clear_mce(u64 record_id) +{ + return erst_clear(record_id); +} diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h index 32996f9fab67..fefcc69ee8b5 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-internal.h +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h @@ -28,3 +28,26 @@ extern int mce_ser; extern struct mce_bank *mce_banks; +#ifdef CONFIG_ACPI_APEI +int apei_write_mce(struct mce *m); +ssize_t apei_read_mce(struct mce *m, u64 *record_id); +int apei_check_mce(void); +int apei_clear_mce(u64 record_id); +#else +static inline int apei_write_mce(struct mce *m) +{ + return -EINVAL; +} +static inline ssize_t apei_read_mce(struct mce *m, u64 *record_id) +{ + return 0; +} +static inline int apei_check_mce(void) +{ + return 0; +} +static inline int apei_clear_mce(u64 record_id) +{ + return -EINVAL; +} +#endif diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 8a6f0afa767e..09535ca9b9d7 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -264,7 +264,7 @@ static void wait_for_panic(void) static void mce_panic(char *msg, struct mce *final, char *exp) { - int i; + int i, apei_err = 0; if (!fake_panic) { /* @@ -287,8 +287,11 @@ static void mce_panic(char *msg, struct mce *final, char *exp) struct mce *m = &mcelog.entry[i]; if (!(m->status & MCI_STATUS_VAL)) continue; - if (!(m->status & MCI_STATUS_UC)) + if (!(m->status & MCI_STATUS_UC)) { print_mce(m); + if (!apei_err) + apei_err = apei_write_mce(m); + } } /* Now print uncorrected but with the final one last */ for (i = 0; i < MCE_LOG_LEN; i++) { @@ -297,11 +300,17 @@ static void mce_panic(char *msg, struct mce *final, char *exp) continue; if (!(m->status & MCI_STATUS_UC)) continue; - if (!final || memcmp(m, final, sizeof(struct mce))) + if (!final || memcmp(m, final, sizeof(struct mce))) { print_mce(m); + if (!apei_err) + apei_err = apei_write_mce(m); + } } - if (final) + if (final) { print_mce(final); + if (!apei_err) + apei_err = apei_write_mce(final); + } if (cpu_missing) printk(KERN_EMERG "Some CPUs didn't answer in synchronization\n"); print_mce_tail(); @@ -1493,6 +1502,43 @@ static void collect_tscs(void *data) rdtscll(cpu_tsc[smp_processor_id()]); } +static int mce_apei_read_done; + +/* Collect MCE record of previous boot in persistent storage via APEI ERST. */ +static int __mce_read_apei(char __user **ubuf, size_t usize) +{ + int rc; + u64 record_id; + struct mce m; + + if (usize < sizeof(struct mce)) + return -EINVAL; + + rc = apei_read_mce(&m, &record_id); + /* Error or no more MCE record */ + if (rc <= 0) { + mce_apei_read_done = 1; + return rc; + } + rc = -EFAULT; + if (copy_to_user(*ubuf, &m, sizeof(struct mce))) + return rc; + /* + * In fact, we should have cleared the record after that has + * been flushed to the disk or sent to network in + * /sbin/mcelog, but we have no interface to support that now, + * so just clear it to avoid duplication. + */ + rc = apei_clear_mce(record_id); + if (rc) { + mce_apei_read_done = 1; + return rc; + } + *ubuf += sizeof(struct mce); + + return 0; +} + static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, loff_t *off) { @@ -1506,15 +1552,19 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, return -ENOMEM; mutex_lock(&mce_read_mutex); + + if (!mce_apei_read_done) { + err = __mce_read_apei(&buf, usize); + if (err || buf != ubuf) + goto out; + } + next = rcu_dereference_check_mce(mcelog.next); /* Only supports full reads right now */ - if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) { - mutex_unlock(&mce_read_mutex); - kfree(cpu_tsc); - - return -EINVAL; - } + err = -EINVAL; + if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) + goto out; err = 0; prev = 0; @@ -1562,10 +1612,15 @@ timeout: memset(&mcelog.entry[i], 0, sizeof(struct mce)); } } + + if (err) + err = -EFAULT; + +out: mutex_unlock(&mce_read_mutex); kfree(cpu_tsc); - return err ? -EFAULT : buf - ubuf; + return err ? err : buf - ubuf; } static unsigned int mce_poll(struct file *file, poll_table *wait) @@ -1573,6 +1628,8 @@ static unsigned int mce_poll(struct file *file, poll_table *wait) poll_wait(file, &mce_wait, wait); if (rcu_dereference_check_mce(mcelog.next)) return POLLIN | POLLRDNORM; + if (!mce_apei_read_done && apei_check_mce()) + return POLLIN | POLLRDNORM; return 0; } diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 28ad9f4d8b94..0415c3ef91b5 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -546,11 +546,13 @@ static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c) * check OSVW bit for CPUs that are not affected * by erratum #400 */ - rdmsrl(MSR_AMD64_OSVW_ID_LENGTH, val); - if (val >= 2) { - rdmsrl(MSR_AMD64_OSVW_STATUS, val); - if (!(val & BIT(1))) - goto no_c1e_idle; + if (cpu_has(c, X86_FEATURE_OSVW)) { + rdmsrl(MSR_AMD64_OSVW_ID_LENGTH, val); + if (val >= 2) { + rdmsrl(MSR_AMD64_OSVW_STATUS, val); + if (!(val & BIT(1))) + goto no_c1e_idle; + } } return 1; } diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 2ba58206812a..737361fcd503 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2067,7 +2067,7 @@ static int cpuid_interception(struct vcpu_svm *svm) static int iret_interception(struct vcpu_svm *svm) { ++svm->vcpu.stat.nmi_window_exits; - svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET); + svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_IRET); svm->vcpu.arch.hflags |= HF_IRET_MASK; return 1; } @@ -2479,7 +2479,7 @@ static void svm_inject_nmi(struct kvm_vcpu *vcpu) svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI; vcpu->arch.hflags |= HF_NMI_MASK; - svm->vmcb->control.intercept |= (1UL << INTERCEPT_IRET); + svm->vmcb->control.intercept |= (1ULL << INTERCEPT_IRET); ++vcpu->stat.nmi_injections; } @@ -2539,10 +2539,10 @@ static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) if (masked) { svm->vcpu.arch.hflags |= HF_NMI_MASK; - svm->vmcb->control.intercept |= (1UL << INTERCEPT_IRET); + svm->vmcb->control.intercept |= (1ULL << INTERCEPT_IRET); } else { svm->vcpu.arch.hflags &= ~HF_NMI_MASK; - svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET); + svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_IRET); } } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index bc933cfb4e66..2f8db0ec8ae4 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2703,8 +2703,7 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) return 0; return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & - (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS | - GUEST_INTR_STATE_NMI)); + (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_NMI)); } static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3c4ca98ad27f..c4f35b545c1d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1712,6 +1712,7 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, if (copy_from_user(cpuid_entries, entries, cpuid->nent * sizeof(struct kvm_cpuid_entry))) goto out_free; + vcpu_load(vcpu); for (i = 0; i < cpuid->nent; i++) { vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function; vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax; @@ -1729,6 +1730,7 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, r = 0; kvm_apic_set_version(vcpu); kvm_x86_ops->cpuid_update(vcpu); + vcpu_put(vcpu); out_free: vfree(cpuid_entries); @@ -1749,9 +1751,11 @@ static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu, if (copy_from_user(&vcpu->arch.cpuid_entries, entries, cpuid->nent * sizeof(struct kvm_cpuid_entry2))) goto out; + vcpu_load(vcpu); vcpu->arch.cpuid_nent = cpuid->nent; kvm_apic_set_version(vcpu); kvm_x86_ops->cpuid_update(vcpu); + vcpu_put(vcpu); return 0; out: diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 28c68762648f..38512d0c4742 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c @@ -461,7 +461,8 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes) * node, it must now point to the fake node ID. */ for (j = 0; j < MAX_LOCAL_APIC; j++) - if (apicid_to_node[j] == nid) + if (apicid_to_node[j] == nid && + fake_apicid_to_node[j] == NUMA_NO_NODE) fake_apicid_to_node[j] = i; } for (i = 0; i < num_nodes; i++) diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 31930fd30ea9..9dcf43d7d0c0 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -224,8 +224,11 @@ res_alloc_fail: return; } -struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int domain, int busnum) +struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root) { + struct acpi_device *device = root->device; + int domain = root->segment; + int busnum = root->secondary.start; struct pci_bus *bus; struct pci_sysdata *sd; int node; diff --git a/arch/x86/pci/mrst.c b/arch/x86/pci/mrst.c index 8bf2fcb88d04..1cdc02cf8fa4 100644 --- a/arch/x86/pci/mrst.c +++ b/arch/x86/pci/mrst.c @@ -247,6 +247,10 @@ static void __devinit pci_fixed_bar_fixup(struct pci_dev *dev) u32 size; int i; + /* Must have extended configuration space */ + if (dev->cfg_size < PCIE_CAP_OFFSET + 4) + return; + /* Fixup the BAR sizes for fixed BAR devices and make them unmoveable */ offset = fixed_bar_cap(dev->bus, dev->devfn); if (!offset || PCI_DEVFN(2, 0) == dev->devfn || diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index 93d2c7971df6..746411518802 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -360,4 +360,13 @@ config ACPI_SBS To compile this driver as a module, choose M here: the modules will be called sbs and sbshc. +config ACPI_HED + tristate "Hardware Error Device" + help + This driver supports the Hardware Error Device (PNP0C33), + which is used to report some hardware errors notified via + SCI, mainly the corrected errors. + +source "drivers/acpi/apei/Kconfig" + endif # ACPI diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile index a8d8998dd5c5..6ee33169e1dc 100644 --- a/drivers/acpi/Makefile +++ b/drivers/acpi/Makefile @@ -19,7 +19,7 @@ obj-y += acpi.o \ # All the builtin files are in the "acpi." module_param namespace. acpi-y += osl.o utils.o reboot.o -acpi-y += hest.o +acpi-y += atomicio.o # sleep related files acpi-y += wakeup.o @@ -59,6 +59,7 @@ obj-$(CONFIG_ACPI_BATTERY) += battery.o obj-$(CONFIG_ACPI_SBS) += sbshc.o obj-$(CONFIG_ACPI_SBS) += sbs.o obj-$(CONFIG_ACPI_POWER_METER) += power_meter.o +obj-$(CONFIG_ACPI_HED) += hed.o # processor has its own "processor." module_param namespace processor-y := processor_driver.o processor_throttling.o @@ -66,3 +67,5 @@ processor-y += processor_idle.o processor_thermal.o processor-$(CONFIG_CPU_FREQ) += processor_perflib.o obj-$(CONFIG_ACPI_PROCESSOR_AGGREGATOR) += acpi_pad.o + +obj-$(CONFIG_ACPI_APEI) += apei/ diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig new file mode 100644 index 000000000000..f8c668f27b5a --- /dev/null +++ b/drivers/acpi/apei/Kconfig @@ -0,0 +1,30 @@ +config ACPI_APEI + bool "ACPI Platform Error Interface (APEI)" + depends on X86 + help + APEI allows to report errors (for example from the chipset) + to the operating system. This improves NMI handling + especially. In addition it supports error serialization and + error injection. + +config ACPI_APEI_GHES + tristate "APEI Generic Hardware Error Source" + depends on ACPI_APEI && X86 + select ACPI_HED + help + Generic Hardware Error Source provides a way to report + platform hardware errors (such as that from chipset). It + works in so called "Firmware First" mode, that is, hardware + errors are reported to firmware firstly, then reported to + Linux by firmware. This way, some non-standard hardware + error registers or non-standard hardware link can be checked + by firmware to produce more valuable hardware error + information for Linux. + +config ACPI_APEI_EINJ + tristate "APEI Error INJection (EINJ)" + depends on ACPI_APEI && DEBUG_FS + help + EINJ provides a hardware error injection mechanism, it is + mainly used for debugging and testing the other parts of + APEI and some other RAS features. diff --git a/drivers/acpi/apei/Makefile b/drivers/acpi/apei/Makefile new file mode 100644 index 000000000000..b13b03a17789 --- /dev/null +++ b/drivers/acpi/apei/Makefile @@ -0,0 +1,5 @@ +obj-$(CONFIG_ACPI_APEI) += apei.o +obj-$(CONFIG_ACPI_APEI_GHES) += ghes.o +obj-$(CONFIG_ACPI_APEI_EINJ) += einj.o + +apei-y := apei-base.o hest.o cper.o erst.o diff --git a/drivers/acpi/apei/apei-base.c b/drivers/acpi/apei/apei-base.c new file mode 100644 index 000000000000..db3946e9c66b --- /dev/null +++ b/drivers/acpi/apei/apei-base.c @@ -0,0 +1,593 @@ +/* + * apei-base.c - ACPI Platform Error Interface (APEI) supporting + * infrastructure + * + * APEI allows to report errors (for example from the chipset) to the + * the operating system. This improves NMI handling especially. In + * addition it supports error serialization and error injection. + * + * For more information about APEI, please refer to ACPI Specification + * version 4.0, chapter 17. + * + * This file has Common functions used by more than one APEI table, + * including framework of interpreter for ERST and EINJ; resource + * management for APEI registers. + * + * Copyright (C) 2009, Intel Corp. + * Author: Huang Ying <ying.huang@intel.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/acpi.h> +#include <linux/io.h> +#include <linux/kref.h> +#include <linux/rculist.h> +#include <linux/interrupt.h> +#include <linux/debugfs.h> +#include <acpi/atomicio.h> + +#include "apei-internal.h" + +#define APEI_PFX "APEI: " + +/* + * APEI ERST (Error Record Serialization Table) and EINJ (Error + * INJection) interpreter framework. + */ + +#define APEI_EXEC_PRESERVE_REGISTER 0x1 + +void apei_exec_ctx_init(struct apei_exec_context *ctx, + struct apei_exec_ins_type *ins_table, + u32 instructions, + struct acpi_whea_header *action_table, + u32 entries) +{ + ctx->ins_table = ins_table; + ctx->instructions = instructions; + ctx->action_table = action_table; + ctx->entries = entries; +} +EXPORT_SYMBOL_GPL(apei_exec_ctx_init); + +int __apei_exec_read_register(struct acpi_whea_header *entry, u64 *val) +{ + int rc; + + rc = acpi_atomic_read(val, &entry->register_region); + if (rc) + return rc; + *val >>= entry->register_region.bit_offset; + *val &= entry->mask; + + return 0; +} + +int apei_exec_read_register(struct apei_exec_context *ctx, + struct acpi_whea_header *entry) +{ + int rc; + u64 val = 0; + + rc = __apei_exec_read_register(entry, &val); + if (rc) + return rc; + ctx->value = val; + + return 0; +} +EXPORT_SYMBOL_GPL(apei_exec_read_register); + +int apei_exec_read_register_value(struct apei_exec_context *ctx, + struct acpi_whea_header *entry) +{ + int rc; + + rc = apei_exec_read_register(ctx, entry); + if (rc) + return rc; + ctx->value = (ctx->value == entry->value); + + return 0; +} +EXPORT_SYMBOL_GPL(apei_exec_read_register_value); + +int __apei_exec_write_register(struct acpi_whea_header *entry, u64 val) +{ + int rc; + + val &= entry->mask; + val <<= entry->register_region.bit_offset; + if (entry->flags & APEI_EXEC_PRESERVE_REGISTER) { + u64 valr = 0; + rc = acpi_atomic_read(&valr, &entry->register_region); + if (rc) + return rc; + valr &= ~(entry->mask << entry->register_region.bit_offset); + val |= valr; + } + rc = acpi_atomic_write(val, &entry->register_region); + + return rc; +} + +int apei_exec_write_register(struct apei_exec_context *ctx, + struct acpi_whea_header *entry) +{ + return __apei_exec_write_register(entry, ctx->value); +} +EXPORT_SYMBOL_GPL(apei_exec_write_register); + +int apei_exec_write_register_value(struct apei_exec_context *ctx, + struct acpi_whea_header *entry) +{ + int rc; + + ctx->value = entry->value; + rc = apei_exec_write_register(ctx, entry); + + return rc; +} +EXPORT_SYMBOL_GPL(apei_exec_write_register_value); + +int apei_exec_noop(struct apei_exec_context *ctx, + struct acpi_whea_header *entry) +{ + return 0; +} +EXPORT_SYMBOL_GPL(apei_exec_noop); + +/* + * Interpret the specified action. Go through whole action table, + * execute all instructions belong to the action. + */ +int apei_exec_run(struct apei_exec_context *ctx, u8 action) +{ + int rc; + u32 i, ip; + struct acpi_whea_header *entry; + apei_exec_ins_func_t run; + + ctx->ip = 0; + + /* + * "ip" is the instruction pointer of current instruction, + * "ctx->ip" specifies the next instruction to executed, + * instruction "run" function may change the "ctx->ip" to + * implement "goto" semantics. + */ +rewind: + ip = 0; + for (i = 0; i < ctx->entries; i++) { + entry = &ctx->action_table[i]; + if (entry->action != action) + continue; + if (ip == ctx->ip) { + if (entry->instruction >= ctx->instructions || + !ctx->ins_table[entry->instruction].run) { + pr_warning(FW_WARN APEI_PFX + "Invalid action table, unknown instruction type: %d\n", + entry->instruction); + return -EINVAL; + } + run = ctx->ins_table[entry->instruction].run; + rc = run(ctx, entry); + if (rc < 0) + return rc; + else if (rc != APEI_EXEC_SET_IP) + ctx->ip++; + } + ip++; + if (ctx->ip < ip) + goto rewind; + } + + return 0; +} +EXPORT_SYMBOL_GPL(apei_exec_run); + +typedef int (*apei_exec_entry_func_t)(struct apei_exec_context *ctx, + struct acpi_whea_header *entry, + void *data); + +static int apei_exec_for_each_entry(struct apei_exec_context *ctx, + apei_exec_entry_func_t func, + void *data, + int *end) +{ + u8 ins; + int i, rc; + struct acpi_whea_header *entry; + struct apei_exec_ins_type *ins_table = ctx->ins_table; + + for (i = 0; i < ctx->entries; i++) { + entry = ctx->action_table + i; + ins = entry->instruction; + if (end) + *end = i; + if (ins >= ctx->instructions || !ins_table[ins].run) { + pr_warning(FW_WARN APEI_PFX + "Invalid action table, unknown instruction type: %d\n", + ins); + return -EINVAL; + } + rc = func(ctx, entry, data); + if (rc) + return rc; + } + + return 0; +} + +static int pre_map_gar_callback(struct apei_exec_context *ctx, + struct acpi_whea_header *entry, + void *data) +{ + u8 ins = entry->instruction; + + if (ctx->ins_table[ins].flags & APEI_EXEC_INS_ACCESS_REGISTER) + return acpi_pre_map_gar(&entry->register_region); + + return 0; +} + +/* + * Pre-map all GARs in action table to make it possible to access them + * in NMI handler. + */ +int apei_exec_pre_map_gars(struct apei_exec_context *ctx) +{ + int rc, end; + + rc = apei_exec_for_each_entry(ctx, pre_map_gar_callback, + NULL, &end); + if (rc) { + struct apei_exec_context ctx_unmap; + memcpy(&ctx_unmap, ctx, sizeof(*ctx)); + ctx_unmap.entries = end; + apei_exec_post_unmap_gars(&ctx_unmap); + } + + return rc; +} +EXPORT_SYMBOL_GPL(apei_exec_pre_map_gars); + +static int post_unmap_gar_callback(struct apei_exec_context *ctx, + struct acpi_whea_header *entry, + void *data) +{ + u8 ins = entry->instruction; + + if (ctx->ins_table[ins].flags & APEI_EXEC_INS_ACCESS_REGISTER) + acpi_post_unmap_gar(&entry->register_region); + + return 0; +} + +/* Post-unmap all GAR in action table. */ +int apei_exec_post_unmap_gars(struct apei_exec_context *ctx) +{ + return apei_exec_for_each_entry(ctx, post_unmap_gar_callback, + NULL, NULL); +} +EXPORT_SYMBOL_GPL(apei_exec_post_unmap_gars); + +/* + * Resource management for GARs in APEI + */ +struct apei_res { + struct list_head list; + unsigned long start; + unsigned long end; +}; + +/* Collect all resources requested, to avoid conflict */ +struct apei_resources apei_resources_all = { + .iomem = LIST_HEAD_INIT(apei_resources_all.iomem), + .ioport = LIST_HEAD_INIT(apei_resources_all.ioport), +}; + +static int apei_res_add(struct list_head *res_list, + unsigned long start, unsigned long size) +{ + struct apei_res *res, *resn, *res_ins = NULL; + unsigned long end = start + size; + + if (end <= start) + return 0; +repeat: + list_for_each_entry_safe(res, resn, res_list, list) { + if (res->start > end || res->end < start) + continue; + else if (end <= res->end && start >= res->start) { + kfree(res_ins); + return 0; + } + list_del(&res->list); + res->start = start = min(res->start, start); + res->end = end = max(res->end, end); + kfree(res_ins); + res_ins = res; + goto repeat; + } + + if (res_ins) + list_add(&res_ins->list, res_list); + else { + res_ins = kmalloc(sizeof(*res), GFP_KERNEL); + if (!res_ins) + return -ENOMEM; + res_ins->start = start; + res_ins->end = end; + list_add(&res_ins->list, res_list); + } + + return 0; +} + +static int apei_res_sub(struct list_head *res_list1, + struct list_head *res_list2) +{ + struct apei_res *res1, *resn1, *res2, *res; + res1 = list_entry(res_list1->next, struct apei_res, list); + resn1 = list_entry(res1->list.next, struct apei_res, list); + while (&res1->list != res_list1) { + list_for_each_entry(res2, res_list2, list) { + if (res1->start >= res2->end || + res1->end <= res2->start) + continue; + else if (res1->end <= res2->end && + res1->start >= res2->start) { + list_del(&res1->list); + kfree(res1); + break; + } else if (res1->end > res2->end && + res1->start < res2->start) { + res = kmalloc(sizeof(*res), GFP_KERNEL); + if (!res) + return -ENOMEM; + res->start = res2->end; + res->end = res1->end; + res1->end = res2->start; + list_add(&res->list, &res1->list); + resn1 = res; + } else { + if (res1->start < res2->start) + res1->end = res2->start; + else + res1->start = res2->end; + } + } + res1 = resn1; + resn1 = list_entry(resn1->list.next, struct apei_res, list); + } + + return 0; +} + +static void apei_res_clean(struct list_head *res_list) +{ + struct apei_res *res, *resn; + + list_for_each_entry_safe(res, resn, res_list, list) { + list_del(&res->list); + kfree(res); + } +} + +void apei_resources_fini(struct apei_resources *resources) +{ + apei_res_clean(&resources->iomem); + apei_res_clean(&resources->ioport); +} +EXPORT_SYMBOL_GPL(apei_resources_fini); + +static int apei_resources_merge(struct apei_resources *resources1, + struct apei_resources *resources2) +{ + int rc; + struct apei_res *res; + + list_for_each_entry(res, &resources2->iomem, list) { + rc = apei_res_add(&resources1->iomem, res->start, + res->end - res->start); + if (rc) + return rc; + } + list_for_each_entry(res, &resources2->ioport, list) { + rc = apei_res_add(&resources1->ioport, res->start, + res->end - res->start); + if (rc) + return rc; + } + + return 0; +} + +/* + * EINJ has two groups of GARs (EINJ table entry and trigger table + * entry), so common resources are subtracted from the trigger table + * resources before the second requesting. + */ +int apei_resources_sub(struct apei_resources *resources1, + struct apei_resources *resources2) +{ + int rc; + + rc = apei_res_sub(&resources1->iomem, &resources2->iomem); + if (rc) + return rc; + return apei_res_sub(&resources1->ioport, &resources2->ioport); +} +EXPORT_SYMBOL_GPL(apei_resources_sub); + +/* + * IO memory/port rersource management mechanism is used to check + * whether memory/port area used by GARs conflicts with normal memory + * or IO memory/port of devices. + */ +int apei_resources_request(struct apei_resources *resources, + const char *desc) +{ + struct apei_res *res, *res_bak; + struct resource *r; + + apei_resources_sub(resources, &apei_resources_all); + + list_for_each_entry(res, &resources->iomem, list) { + r = request_mem_region(res->start, res->end - res->start, + desc); + if (!r) { + pr_err(APEI_PFX + "Can not request iomem region <%016llx-%016llx> for GARs.\n", + (unsigned long long)res->start, + (unsigned long long)res->end); + res_bak = res; + goto err_unmap_iomem; + } + } + + list_for_each_entry(res, &resources->ioport, list) { + r = request_region(res->start, res->end - res->start, desc); + if (!r) { + pr_err(APEI_PFX + "Can not request ioport region <%016llx-%016llx> for GARs.\n", + (unsigned long long)res->start, + (unsigned long long)res->end); + res_bak = res; + goto err_unmap_ioport; + } + } + + apei_resources_merge(&apei_resources_all, resources); + + return 0; +err_unmap_ioport: + list_for_each_entry(res, &resources->ioport, list) { + if (res == res_bak) + break; + release_mem_region(res->start, res->end - res->start); + } + res_bak = NULL; +err_unmap_iomem: + list_for_each_entry(res, &resources->iomem, list) { + if (res == res_bak) + break; + release_region(res->start, res->end - res->start); + } + return -EINVAL; +} +EXPORT_SYMBOL_GPL(apei_resources_request); + +void apei_resources_release(struct apei_resources *resources) +{ + struct apei_res *res; + + list_for_each_entry(res, &resources->iomem, list) + release_mem_region(res->start, res->end - res->start); + list_for_each_entry(res, &resources->ioport, list) + release_region(res->start, res->end - res->start); + + apei_resources_sub(&apei_resources_all, resources); +} +EXPORT_SYMBOL_GPL(apei_resources_release); + +static int apei_check_gar(struct acpi_generic_address *reg, u64 *paddr) +{ + u32 width, space_id; + + width = reg->bit_width; + space_id = reg->space_id; + /* Handle possible alignment issues */ + memcpy(paddr, ®->address, sizeof(*paddr)); + if (!*paddr) { + pr_warning(FW_BUG APEI_PFX + "Invalid physical address in GAR [0x%llx/%u/%u]\n", + *paddr, width, space_id); + return -EINVAL; + } + + if ((width != 8) && (width != 16) && (width != 32) && (width != 64)) { + pr_warning(FW_BUG APEI_PFX + "Invalid bit width in GAR [0x%llx/%u/%u]\n", + *paddr, width, space_id); + return -EINVAL; + } + + if (space_id != ACPI_ADR_SPACE_SYSTEM_MEMORY && + space_id != ACPI_ADR_SPACE_SYSTEM_IO) { + pr_warning(FW_BUG APEI_PFX + "Invalid address space type in GAR [0x%llx/%u/%u]\n", + *paddr, width, space_id); + return -EINVAL; + } + + return 0; +} + +static int collect_res_callback(struct apei_exec_context *ctx, + struct acpi_whea_header *entry, + void *data) +{ + struct apei_resources *resources = data; + struct acpi_generic_address *reg = &entry->register_region; + u8 ins = entry->instruction; + u64 paddr; + int rc; + + if (!(ctx->ins_table[ins].flags & APEI_EXEC_INS_ACCESS_REGISTER)) + return 0; + + rc = apei_check_gar(reg, &paddr); + if (rc) + return rc; + + switch (reg->space_id) { + case ACPI_ADR_SPACE_SYSTEM_MEMORY: + return apei_res_add(&resources->iomem, paddr, + reg->bit_width / 8); + case ACPI_ADR_SPACE_SYSTEM_IO: + return apei_res_add(&resources->ioport, paddr, + reg->bit_width / 8); + default: + return -EINVAL; + } +} + +/* + * Same register may be used by multiple instructions in GARs, so + * resources are collected before requesting. + */ +int apei_exec_collect_resources(struct apei_exec_context *ctx, + struct apei_resources *resources) +{ + return apei_exec_for_each_entry(ctx, collect_res_callback, + resources, NULL); +} +EXPORT_SYMBOL_GPL(apei_exec_collect_resources); + +struct dentry *apei_get_debugfs_dir(void) +{ + static struct dentry *dapei; + + if (!dapei) + dapei = debugfs_create_dir("apei", NULL); + + return dapei; +} +EXPORT_SYMBOL_GPL(apei_get_debugfs_dir); diff --git a/drivers/acpi/apei/apei-internal.h b/drivers/acpi/apei/apei-internal.h new file mode 100644 index 000000000000..18df1e940276 --- /dev/null +++ b/drivers/acpi/apei/apei-internal.h @@ -0,0 +1,114 @@ +/* + * apei-internal.h - ACPI Platform Error Interface internal + * definations. + */ + +#ifndef APEI_INTERNAL_H +#define APEI_INTERNAL_H + +#include <linux/cper.h> + +struct apei_exec_context; + +typedef int (*apei_exec_ins_func_t)(struct apei_exec_context *ctx, + struct acpi_whea_header *entry); + +#define APEI_EXEC_INS_ACCESS_REGISTER 0x0001 + +struct apei_exec_ins_type { + u32 flags; + apei_exec_ins_func_t run; +}; + +struct apei_exec_context { + u32 ip; + u64 value; + u64 var1; + u64 var2; + u64 src_base; + u64 dst_base; + struct apei_exec_ins_type *ins_table; + u32 instructions; + struct acpi_whea_header *action_table; + u32 entries; +}; + +void apei_exec_ctx_init(struct apei_exec_context *ctx, + struct apei_exec_ins_type *ins_table, + u32 instructions, + struct acpi_whea_header *action_table, + u32 entries); + +static inline void apei_exec_ctx_set_input(struct apei_exec_context *ctx, + u64 input) +{ + ctx->value = input; +} + +static inline u64 apei_exec_ctx_get_output(struct apei_exec_context *ctx) +{ + return ctx->value; +} + +int apei_exec_run(struct apei_exec_context *ctx, u8 action); + +/* Common instruction implementation */ + +/* IP has been set in instruction function */ +#define APEI_EXEC_SET_IP 1 + +int __apei_exec_read_register(struct acpi_whea_header *entry, u64 *val); +int __apei_exec_write_register(struct acpi_whea_header *entry, u64 val); +int apei_exec_read_register(struct apei_exec_context *ctx, + struct acpi_whea_header *entry); +int apei_exec_read_register_value(struct apei_exec_context *ctx, + struct acpi_whea_header *entry); +int apei_exec_write_register(struct apei_exec_context *ctx, + struct acpi_whea_header *entry); +int apei_exec_write_register_value(struct apei_exec_context *ctx, + struct acpi_whea_header *entry); +int apei_exec_noop(struct apei_exec_context *ctx, + struct acpi_whea_header *entry); +int apei_exec_pre_map_gars(struct apei_exec_context *ctx); +int apei_exec_post_unmap_gars(struct apei_exec_context *ctx); + +struct apei_resources { + struct list_head iomem; + struct list_head ioport; +}; + +static inline void apei_resources_init(struct apei_resources *resources) +{ + INIT_LIST_HEAD(&resources->iomem); + INIT_LIST_HEAD(&resources->ioport); +} + +void apei_resources_fini(struct apei_resources *resources); +int apei_resources_sub(struct apei_resources *resources1, + struct apei_resources *resources2); +int apei_resources_request(struct apei_resources *resources, + const char *desc); +void apei_resources_release(struct apei_resources *resources); +int apei_exec_collect_resources(struct apei_exec_context *ctx, + struct apei_resources *resources); + +struct dentry; +struct dentry *apei_get_debugfs_dir(void); + +#define apei_estatus_for_each_section(estatus, section) \ + for (section = (struct acpi_hest_generic_data *)(estatus + 1); \ + (void *)section - (void *)estatus < estatus->data_length; \ + section = (void *)(section+1) + section->error_data_length) + +static inline u32 apei_estatus_len(struct acpi_hest_generic_status *estatus) +{ + if (estatus->raw_data_length) + return estatus->raw_data_offset + \ + estatus->raw_data_length; + else + return sizeof(*estatus) + estatus->data_length; +} + +int apei_estatus_check_header(const struct acpi_hest_generic_status *estatus); +int apei_estatus_check(const struct acpi_hest_generic_status *estatus); +#endif diff --git a/drivers/acpi/apei/cper.c b/drivers/acpi/apei/cper.c new file mode 100644 index 000000000000..f4cf2fc4c8c1 --- /dev/null +++ b/drivers/acpi/apei/cper.c @@ -0,0 +1,84 @@ +/* + * UEFI Common Platform Error Record (CPER) support + * + * Copyright (C) 2010, Intel Corp. + * Author: Huang Ying <ying.huang@intel.com> + * + * CPER is the format used to describe platform hardware error by + * various APEI tables, such as ERST, BERT and HEST etc. + * + * For more information about CPER, please refer to Appendix N of UEFI + * Specification version 2.3. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/time.h> +#include <linux/cper.h> +#include <linux/acpi.h> + +/* + * CPER record ID need to be unique even after reboot, because record + * ID is used as index for ERST storage, while CPER records from + * multiple boot may co-exist in ERST. + */ +u64 cper_next_record_id(void) +{ + static atomic64_t seq; + + if (!atomic64_read(&seq)) + atomic64_set(&seq, ((u64)get_seconds()) << 32); + + return atomic64_inc_return(&seq); +} +EXPORT_SYMBOL_GPL(cper_next_record_id); + +int apei_estatus_check_header(const struct acpi_hest_generic_status *estatus) +{ + if (estatus->data_length && + estatus->data_length < sizeof(struct acpi_hest_generic_data)) + return -EINVAL; + if (estatus->raw_data_length && + estatus->raw_data_offset < sizeof(*estatus) + estatus->data_length) + return -EINVAL; + + return 0; +} +EXPORT_SYMBOL_GPL(apei_estatus_check_header); + +int apei_estatus_check(const struct acpi_hest_generic_status *estatus) +{ + struct acpi_hest_generic_data *gdata; + unsigned int data_len, gedata_len; + int rc; + + rc = apei_estatus_check_header(estatus); + if (rc) + return rc; + data_len = estatus->data_length; + gdata = (struct acpi_hest_generic_data *)(estatus + 1); + while (data_len > sizeof(*gdata)) { + gedata_len = gdata->error_data_length; + if (gedata_len > data_len - sizeof(*gdata)) + return -EINVAL; + data_len -= gedata_len + sizeof(*gdata); + } + if (data_len) + return -EINVAL; + + return 0; +} +EXPORT_SYMBOL_GPL(apei_estatus_check); diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c new file mode 100644 index 000000000000..465c885938ee --- /dev/null +++ b/drivers/acpi/apei/einj.c @@ -0,0 +1,548 @@ +/* + * APEI Error INJection support + * + * EINJ provides a hardware error injection mechanism, this is useful + * for debugging and testing of other APEI and RAS features. + * + * For more information about EINJ, please refer to ACPI Specification + * version 4.0, section 17.5. + * + * Copyright 2009-2010 Intel Corp. + * Author: Huang Ying <ying.huang@intel.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/io.h> +#include <linux/debugfs.h> +#include <linux/seq_file.h> +#include <linux/nmi.h> +#include <linux/delay.h> +#include <acpi/acpi.h> + +#include "apei-internal.h" + +#define EINJ_PFX "EINJ: " + +#define SPIN_UNIT 100 /* 100ns */ +/* Firmware should respond within 1 miliseconds */ +#define FIRMWARE_TIMEOUT (1 * NSEC_PER_MSEC) + +/* + * Some BIOSes allow parameters to the SET_ERROR_TYPE entries in the + * EINJ table through an unpublished extension. Use with caution as + * most will ignore the parameter and make their own choice of address + * for error injection. + */ +struct einj_parameter { + u64 type; + u64 reserved1; + u64 reserved2; + u64 param1; + u64 param2; +}; + +#define EINJ_OP_BUSY 0x1 +#define EINJ_STATUS_SUCCESS 0x0 +#define EINJ_STATUS_FAIL 0x1 +#define EINJ_STATUS_INVAL 0x2 + +#define EINJ_TAB_ENTRY(tab) \ + ((struct acpi_whea_header *)((char *)(tab) + \ + sizeof(struct acpi_table_einj))) + +static struct acpi_table_einj *einj_tab; + +static struct apei_resources einj_resources; + +static struct apei_exec_ins_type einj_ins_type[] = { + [ACPI_EINJ_READ_REGISTER] = { + .flags = APEI_EXEC_INS_ACCESS_REGISTER, + .run = apei_exec_read_register, + }, + [ACPI_EINJ_READ_REGISTER_VALUE] = { + .flags = APEI_EXEC_INS_ACCESS_REGISTER, + .run = apei_exec_read_register_value, + }, + [ACPI_EINJ_WRITE_REGISTER] = { + .flags = APEI_EXEC_INS_ACCESS_REGISTER, + .run = apei_exec_write_register, + }, + [ACPI_EINJ_WRITE_REGISTER_VALUE] = { + .flags = APEI_EXEC_INS_ACCESS_REGISTER, + .run = apei_exec_write_register_value, + }, + [ACPI_EINJ_NOOP] = { + .flags = 0, + .run = apei_exec_noop, + }, +}; + +/* + * Prevent EINJ interpreter to run simultaneously, because the + * corresponding firmware implementation may not work properly when + * invoked simultaneously. + */ +static DEFINE_MUTEX(einj_mutex); + +static struct einj_parameter *einj_param; + +static void einj_exec_ctx_init(struct apei_exec_context *ctx) +{ + apei_exec_ctx_init(ctx, einj_ins_type, ARRAY_SIZE(einj_ins_type), + EINJ_TAB_ENTRY(einj_tab), einj_tab->entries); +} + +static int __einj_get_available_error_type(u32 *type) +{ + struct apei_exec_context ctx; + int rc; + + einj_exec_ctx_init(&ctx); + rc = apei_exec_run(&ctx, ACPI_EINJ_GET_ERROR_TYPE); + if (rc) + return rc; + *type = apei_exec_ctx_get_output(&ctx); + + return 0; +} + +/* Get error injection capabilities of the platform */ +static int einj_get_available_error_type(u32 *type) +{ + int rc; + + mutex_lock(&einj_mutex); + rc = __einj_get_available_error_type(type); + mutex_unlock(&einj_mutex); + + return rc; +} + +static int einj_timedout(u64 *t) +{ + if ((s64)*t < SPIN_UNIT) { + pr_warning(FW_WARN EINJ_PFX + "Firmware does not respond in time\n"); + return 1; + } + *t -= SPIN_UNIT; + ndelay(SPIN_UNIT); + touch_nmi_watchdog(); + return 0; +} + +static u64 einj_get_parameter_address(void) +{ + int i; + u64 paddr = 0; + struct acpi_whea_header *entry; + + entry = EINJ_TAB_ENTRY(einj_tab); + for (i = 0; i < einj_tab->entries; i++) { + if (entry->action == ACPI_EINJ_SET_ERROR_TYPE && + entry->instruction == ACPI_EINJ_WRITE_REGISTER && + entry->register_region.space_id == + ACPI_ADR_SPACE_SYSTEM_MEMORY) + memcpy(&paddr, &entry->register_region.address, + sizeof(paddr)); + entry++; + } + + return paddr; +} + +/* do sanity check to trigger table */ +static int einj_check_trigger_header(struct acpi_einj_trigger *trigger_tab) +{ + if (trigger_tab->header_size != sizeof(struct acpi_einj_trigger)) + return -EINVAL; + if (trigger_tab->table_size > PAGE_SIZE || + trigger_tab->table_size <= trigger_tab->header_size) + return -EINVAL; + if (trigger_tab->entry_count != + (trigger_tab->table_size - trigger_tab->header_size) / + sizeof(struct acpi_einj_entry)) + return -EINVAL; + + return 0; +} + +/* Execute instructions in trigger error action table */ +static int __einj_error_trigger(u64 trigger_paddr) +{ + struct acpi_einj_trigger *trigger_tab = NULL; + struct apei_exec_context trigger_ctx; + struct apei_resources trigger_resources; + struct acpi_whea_header *trigger_entry; + struct resource *r; + u32 table_size; + int rc = -EIO; + + r = request_mem_region(trigger_paddr, sizeof(*trigger_tab), + "APEI EINJ Trigger Table"); + if (!r) { + pr_err(EINJ_PFX + "Can not request iomem region <%016llx-%016llx> for Trigger table.\n", + (unsigned long long)trigger_paddr, + (unsigned long long)trigger_paddr+sizeof(*trigger_tab)); + goto out; + } + trigger_tab = ioremap_cache(trigger_paddr, sizeof(*trigger_tab)); + if (!trigger_tab) { + pr_err(EINJ_PFX "Failed to map trigger table!\n"); + goto out_rel_header; + } + rc = einj_check_trigger_header(trigger_tab); + if (rc) { + pr_warning(FW_BUG EINJ_PFX + "The trigger error action table is invalid\n"); + goto out_rel_header; + } + rc = -EIO; + table_size = trigger_tab->table_size; + r = request_mem_region(trigger_paddr + sizeof(*trigger_tab), + table_size - sizeof(*trigger_tab), + "APEI EINJ Trigger Table"); + if (!r) { + pr_err(EINJ_PFX +"Can not request iomem region <%016llx-%016llx> for Trigger Table Entry.\n", + (unsigned long long)trigger_paddr+sizeof(*trigger_tab), + (unsigned long long)trigger_paddr + table_size); + goto out_rel_header; + } + iounmap(trigger_tab); + trigger_tab = ioremap_cache(trigger_paddr, table_size); + if (!trigger_tab) { + pr_err(EINJ_PFX "Failed to map trigger table!\n"); + goto out_rel_entry; + } + trigger_entry = (struct acpi_whea_header *) + ((char *)trigger_tab + sizeof(struct acpi_einj_trigger)); + apei_resources_init(&trigger_resources); + apei_exec_ctx_init(&trigger_ctx, einj_ins_type, + ARRAY_SIZE(einj_ins_type), + trigger_entry, trigger_tab->entry_count); + rc = apei_exec_collect_resources(&trigger_ctx, &trigger_resources); + if (rc) + goto out_fini; + rc = apei_resources_sub(&trigger_resources, &einj_resources); + if (rc) + goto out_fini; + rc = apei_resources_request(&trigger_resources, "APEI EINJ Trigger"); + if (rc) + goto out_fini; + rc = apei_exec_pre_map_gars(&trigger_ctx); + if (rc) + goto out_release; + + rc = apei_exec_run(&trigger_ctx, ACPI_EINJ_TRIGGER_ERROR); + + apei_exec_post_unmap_gars(&trigger_ctx); +out_release: + apei_resources_release(&trigger_resources); +out_fini: + apei_resources_fini(&trigger_resources); +out_rel_entry: + release_mem_region(trigger_paddr + sizeof(*trigger_tab), + table_size - sizeof(*trigger_tab)); +out_rel_header: + release_mem_region(trigger_paddr, sizeof(*trigger_tab)); +out: + if (trigger_tab) + iounmap(trigger_tab); + + return rc; +} + +static int __einj_error_inject(u32 type, u64 param1, u64 param2) +{ + struct apei_exec_context ctx; + u64 val, trigger_paddr, timeout = FIRMWARE_TIMEOUT; + int rc; + + einj_exec_ctx_init(&ctx); + + rc = apei_exec_run(&ctx, ACPI_EINJ_BEGIN_OPERATION); + if (rc) + return rc; + apei_exec_ctx_set_input(&ctx, type); + rc = apei_exec_run(&ctx, ACPI_EINJ_SET_ERROR_TYPE); + if (rc) + return rc; + if (einj_param) { + writeq(param1, &einj_param->param1); + writeq(param2, &einj_param->param2); + } + rc = apei_exec_run(&ctx, ACPI_EINJ_EXECUTE_OPERATION); + if (rc) + return rc; + for (;;) { + rc = apei_exec_run(&ctx, ACPI_EINJ_CHECK_BUSY_STATUS); + if (rc) + return rc; + val = apei_exec_ctx_get_output(&ctx); + if (!(val & EINJ_OP_BUSY)) + break; + if (einj_timedout(&timeout)) + return -EIO; + } + rc = apei_exec_run(&ctx, ACPI_EINJ_GET_COMMAND_STATUS); + if (rc) + return rc; + val = apei_exec_ctx_get_output(&ctx); + if (val != EINJ_STATUS_SUCCESS) + return -EBUSY; + + rc = apei_exec_run(&ctx, ACPI_EINJ_GET_TRIGGER_TABLE); + if (rc) + return rc; + trigger_paddr = apei_exec_ctx_get_output(&ctx); + rc = __einj_error_trigger(trigger_paddr); + if (rc) + return rc; + rc = apei_exec_run(&ctx, ACPI_EINJ_END_OPERATION); + + return rc; +} + +/* Inject the specified hardware error */ +static int einj_error_inject(u32 type, u64 param1, u64 param2) +{ + int rc; + + mutex_lock(&einj_mutex); + rc = __einj_error_inject(type, param1, param2); + mutex_unlock(&einj_mutex); + + return rc; +} + +static u32 error_type; +static u64 error_param1; +static u64 error_param2; +static struct dentry *einj_debug_dir; + +static int available_error_type_show(struct seq_file *m, void *v) +{ + int rc; + u32 available_error_type = 0; + + rc = einj_get_available_error_type(&available_error_type); + if (rc) + return rc; + if (available_error_type & 0x0001) + seq_printf(m, "0x00000001\tProcessor Correctable\n"); + if (available_error_type & 0x0002) + seq_printf(m, "0x00000002\tProcessor Uncorrectable non-fatal\n"); + if (available_error_type & 0x0004) + seq_printf(m, "0x00000004\tProcessor Uncorrectable fatal\n"); + if (available_error_type & 0x0008) + seq_printf(m, "0x00000008\tMemory Correctable\n"); + if (available_error_type & 0x0010) + seq_printf(m, "0x00000010\tMemory Uncorrectable non-fatal\n"); + if (available_error_type & 0x0020) + seq_printf(m, "0x00000020\tMemory Uncorrectable fatal\n"); + if (available_error_type & 0x0040) + seq_printf(m, "0x00000040\tPCI Express Correctable\n"); + if (available_error_type & 0x0080) + seq_printf(m, "0x00000080\tPCI Express Uncorrectable non-fatal\n"); + if (available_error_type & 0x0100) + seq_printf(m, "0x00000100\tPCI Express Uncorrectable fatal\n"); + if (available_error_type & 0x0200) + seq_printf(m, "0x00000200\tPlatform Correctable\n"); + if (available_error_type & 0x0400) + seq_printf(m, "0x00000400\tPlatform Uncorrectable non-fatal\n"); + if (available_error_type & 0x0800) + seq_printf(m, "0x00000800\tPlatform Uncorrectable fatal\n"); + + return 0; +} + +static int available_error_type_open(struct inode *inode, struct file *file) +{ + return single_open(file, available_error_type_show, NULL); +} + +static const struct file_operations available_error_type_fops = { + .open = available_error_type_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int error_type_get(void *data, u64 *val) +{ + *val = error_type; + + return 0; +} + +static int error_type_set(void *data, u64 val) +{ + int rc; + u32 available_error_type = 0; + + /* Only one error type can be specified */ + if (val & (val - 1)) + return -EINVAL; + rc = einj_get_available_error_type(&available_error_type); + if (rc) + return rc; + if (!(val & available_error_type)) + return -EINVAL; + error_type = val; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(error_type_fops, error_type_get, + error_type_set, "0x%llx\n"); + +static int error_inject_set(void *data, u64 val) +{ + if (!error_type) + return -EINVAL; + + return einj_error_inject(error_type, error_param1, error_param2); +} + +DEFINE_SIMPLE_ATTRIBUTE(error_inject_fops, NULL, + error_inject_set, "%llu\n"); + +static int einj_check_table(struct acpi_table_einj *einj_tab) +{ + if (einj_tab->header_length != sizeof(struct acpi_table_einj)) + return -EINVAL; + if (einj_tab->header.length < sizeof(struct acpi_table_einj)) + return -EINVAL; + if (einj_tab->entries != + (einj_tab->header.length - sizeof(struct acpi_table_einj)) / + sizeof(struct acpi_einj_entry)) + return -EINVAL; + + return 0; +} + +static int __init einj_init(void) +{ + int rc; + u64 param_paddr; + acpi_status status; + struct dentry *fentry; + struct apei_exec_context ctx; + + if (acpi_disabled) + return -ENODEV; + + status = acpi_get_table(ACPI_SIG_EINJ, 0, + (struct acpi_table_header **)&einj_tab); + if (status == AE_NOT_FOUND) { + pr_info(EINJ_PFX "Table is not found!\n"); + return -ENODEV; + } else if (ACPI_FAILURE(status)) { + const char *msg = acpi_format_exception(status); + pr_err(EINJ_PFX "Failed to get table, %s\n", msg); + return -EINVAL; + } + + rc = einj_check_table(einj_tab); + if (rc) { + pr_warning(FW_BUG EINJ_PFX "EINJ table is invalid\n"); + return -EINVAL; + } + + rc = -ENOMEM; + einj_debug_dir = debugfs_create_dir("einj", apei_get_debugfs_dir()); + if (!einj_debug_dir) + goto err_cleanup; + fentry = debugfs_create_file("available_error_type", S_IRUSR, + einj_debug_dir, NULL, + &available_error_type_fops); + if (!fentry) + goto err_cleanup; + fentry = debugfs_create_file("error_type", S_IRUSR | S_IWUSR, + einj_debug_dir, NULL, &error_type_fops); + if (!fentry) + goto err_cleanup; + fentry = debugfs_create_x64("param1", S_IRUSR | S_IWUSR, + einj_debug_dir, &error_param1); + if (!fentry) + goto err_cleanup; + fentry = debugfs_create_x64("param2", S_IRUSR | S_IWUSR, + einj_debug_dir, &error_param2); + if (!fentry) + goto err_cleanup; + fentry = debugfs_create_file("error_inject", S_IWUSR, + einj_debug_dir, NULL, &error_inject_fops); + if (!fentry) + goto err_cleanup; + + apei_resources_init(&einj_resources); + einj_exec_ctx_init(&ctx); + rc = apei_exec_collect_resources(&ctx, &einj_resources); + if (rc) + goto err_fini; + rc = apei_resources_request(&einj_resources, "APEI EINJ"); + if (rc) + goto err_fini; + rc = apei_exec_pre_map_gars(&ctx); + if (rc) + goto err_release; + param_paddr = einj_get_parameter_address(); + if (param_paddr) { + einj_param = ioremap(param_paddr, sizeof(*einj_param)); + rc = -ENOMEM; + if (!einj_param) + goto err_unmap; + } + + pr_info(EINJ_PFX "Error INJection is initialized.\n"); + + return 0; + +err_unmap: + apei_exec_post_unmap_gars(&ctx); +err_release: + apei_resources_release(&einj_resources); +err_fini: + apei_resources_fini(&einj_resources); +err_cleanup: + debugfs_remove_recursive(einj_debug_dir); + + return rc; +} + +static void __exit einj_exit(void) +{ + struct apei_exec_context ctx; + + if (einj_param) + iounmap(einj_param); + einj_exec_ctx_init(&ctx); + apei_exec_post_unmap_gars(&ctx); + apei_resources_release(&einj_resources); + apei_resources_fini(&einj_resources); + debugfs_remove_recursive(einj_debug_dir); +} + +module_init(einj_init); +module_exit(einj_exit); + +MODULE_AUTHOR("Huang Ying"); +MODULE_DESCRIPTION("APEI Error INJection support"); +MODULE_LICENSE("GPL"); diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c new file mode 100644 index 000000000000..2ebc39115507 --- /dev/null +++ b/drivers/acpi/apei/erst.c @@ -0,0 +1,855 @@ +/* + * APEI Error Record Serialization Table support + * + * ERST is a way provided by APEI to save and retrieve hardware error + * infomation to and from a persistent store. + * + * For more information about ERST, please refer to ACPI Specification + * version 4.0, section 17.4. + * + * Copyright 2010 Intel Corp. + * Author: Huang Ying <ying.huang@intel.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/delay.h> +#include <linux/io.h> +#include <linux/acpi.h> +#include <linux/uaccess.h> +#include <linux/cper.h> +#include <linux/nmi.h> +#include <acpi/apei.h> + +#include "apei-internal.h" + +#define ERST_PFX "ERST: " + +/* ERST command status */ +#define ERST_STATUS_SUCCESS 0x0 +#define ERST_STATUS_NOT_ENOUGH_SPACE 0x1 +#define ERST_STATUS_HARDWARE_NOT_AVAILABLE 0x2 +#define ERST_STATUS_FAILED 0x3 +#define ERST_STATUS_RECORD_STORE_EMPTY 0x4 +#define ERST_STATUS_RECORD_NOT_FOUND 0x5 + +#define ERST_TAB_ENTRY(tab) \ + ((struct acpi_whea_header *)((char *)(tab) + \ + sizeof(struct acpi_table_erst))) + +#define SPIN_UNIT 100 /* 100ns */ +/* Firmware should respond within 1 miliseconds */ +#define FIRMWARE_TIMEOUT (1 * NSEC_PER_MSEC) +#define FIRMWARE_MAX_STALL 50 /* 50us */ + +int erst_disable; +EXPORT_SYMBOL_GPL(erst_disable); + +static struct acpi_table_erst *erst_tab; + +/* ERST Error Log Address Range atrributes */ +#define ERST_RANGE_RESERVED 0x0001 +#define ERST_RANGE_NVRAM 0x0002 +#define ERST_RANGE_SLOW 0x0004 + +/* + * ERST Error Log Address Range, used as buffer for reading/writing + * error records. + */ +static struct erst_erange { + u64 base; + u64 size; + void __iomem *vaddr; + u32 attr; +} erst_erange; + +/* + * Prevent ERST interpreter to run simultaneously, because the + * corresponding firmware implementation may not work properly when + * invoked simultaneously. + * + * It is used to provide exclusive accessing for ERST Error Log + * Address Range too. + */ +static DEFINE_SPINLOCK(erst_lock); + +static inline int erst_errno(int command_status) +{ + switch (command_status) { + case ERST_STATUS_SUCCESS: + return 0; + case ERST_STATUS_HARDWARE_NOT_AVAILABLE: + return -ENODEV; + case ERST_STATUS_NOT_ENOUGH_SPACE: + return -ENOSPC; + case ERST_STATUS_RECORD_STORE_EMPTY: + case ERST_STATUS_RECORD_NOT_FOUND: + return -ENOENT; + default: + return -EINVAL; + } +} + +static int erst_timedout(u64 *t, u64 spin_unit) +{ + if ((s64)*t < spin_unit) { + pr_warning(FW_WARN ERST_PFX + "Firmware does not respond in time\n"); + return 1; + } + *t -= spin_unit; + ndelay(spin_unit); + touch_nmi_watchdog(); + return 0; +} + +static int erst_exec_load_var1(struct apei_exec_context *ctx, + struct acpi_whea_header *entry) +{ + return __apei_exec_read_register(entry, &ctx->var1); +} + +static int erst_exec_load_var2(struct apei_exec_context *ctx, + struct acpi_whea_header *entry) +{ + return __apei_exec_read_register(entry, &ctx->var2); +} + +static int erst_exec_store_var1(struct apei_exec_context *ctx, + struct acpi_whea_header *entry) +{ + return __apei_exec_write_register(entry, ctx->var1); +} + +static int erst_exec_add(struct apei_exec_context *ctx, + struct acpi_whea_header *entry) +{ + ctx->var1 += ctx->var2; + return 0; +} + +static int erst_exec_subtract(struct apei_exec_context *ctx, + struct acpi_whea_header *entry) +{ + ctx->var1 -= ctx->var2; + return 0; +} + +static int erst_exec_add_value(struct apei_exec_context *ctx, + struct acpi_whea_header *entry) +{ + int rc; + u64 val; + + rc = __apei_exec_read_register(entry, &val); + if (rc) + return rc; + val += ctx->value; + rc = __apei_exec_write_register(entry, val); + return rc; +} + +static int erst_exec_subtract_value(struct apei_exec_context *ctx, + struct acpi_whea_header *entry) +{ + int rc; + u64 val; + + rc = __apei_exec_read_register(entry, &val); + if (rc) + return rc; + val -= ctx->value; + rc = __apei_exec_write_register(entry, val); + return rc; +} + +static int erst_exec_stall(struct apei_exec_context *ctx, + struct acpi_whea_header *entry) +{ + u64 stall_time; + + if (ctx->value > FIRMWARE_MAX_STALL) { + if (!in_nmi()) + pr_warning(FW_WARN ERST_PFX + "Too long stall time for stall instruction: %llx.\n", + ctx->value); + stall_time = FIRMWARE_MAX_STALL; + } else + stall_time = ctx->value; + udelay(stall_time); + return 0; +} + +static int erst_exec_stall_while_true(struct apei_exec_context *ctx, + struct acpi_whea_header *entry) +{ + int rc; + u64 val; + u64 timeout = FIRMWARE_TIMEOUT; + u64 stall_time; + + if (ctx->var1 > FIRMWARE_MAX_STALL) { + if (!in_nmi()) + pr_warning(FW_WARN ERST_PFX + "Too long stall time for stall while true instruction: %llx.\n", + ctx->var1); + stall_time = FIRMWARE_MAX_STALL; + } else + stall_time = ctx->var1; + + for (;;) { + rc = __apei_exec_read_register(entry, &val); + if (rc) + return rc; + if (val != ctx->value) + break; + if (erst_timedout(&timeout, stall_time * NSEC_PER_USEC)) + return -EIO; + } + return 0; +} + +static int erst_exec_skip_next_instruction_if_true( + struct apei_exec_context *ctx, + struct acpi_whea_header *entry) +{ + int rc; + u64 val; + + rc = __apei_exec_read_register(entry, &val); + if (rc) + return rc; + if (val == ctx->value) { + ctx->ip += 2; + return APEI_EXEC_SET_IP; + } + + return 0; +} + +static int erst_exec_goto(struct apei_exec_context *ctx, + struct acpi_whea_header *entry) +{ + ctx->ip = ctx->value; + return APEI_EXEC_SET_IP; +} + +static int erst_exec_set_src_address_base(struct apei_exec_context *ctx, + struct acpi_whea_header *entry) +{ + return __apei_exec_read_register(entry, &ctx->src_base); +} + +static int erst_exec_set_dst_address_base(struct apei_exec_context *ctx, + struct acpi_whea_header *entry) +{ + return __apei_exec_read_register(entry, &ctx->dst_base); +} + +static int erst_exec_move_data(struct apei_exec_context *ctx, + struct acpi_whea_header *entry) +{ + int rc; + u64 offset; + + rc = __apei_exec_read_register(entry, &offset); + if (rc) + return rc; + memmove((void *)ctx->dst_base + offset, + (void *)ctx->src_base + offset, + ctx->var2); + + return 0; +} + +static struct apei_exec_ins_type erst_ins_type[] = { + [ACPI_ERST_READ_REGISTER] = { + .flags = APEI_EXEC_INS_ACCESS_REGISTER, + .run = apei_exec_read_register, + }, + [ACPI_ERST_READ_REGISTER_VALUE] = { + .flags = APEI_EXEC_INS_ACCESS_REGISTER, + .run = apei_exec_read_register_value, + }, + [ACPI_ERST_WRITE_REGISTER] = { + .flags = APEI_EXEC_INS_ACCESS_REGISTER, + .run = apei_exec_write_register, + }, + [ACPI_ERST_WRITE_REGISTER_VALUE] = { + .flags = APEI_EXEC_INS_ACCESS_REGISTER, + .run = apei_exec_write_register_value, + }, + [ACPI_ERST_NOOP] = { + .flags = 0, + .run = apei_exec_noop, + }, + [ACPI_ERST_LOAD_VAR1] = { + .flags = APEI_EXEC_INS_ACCESS_REGISTER, + .run = erst_exec_load_var1, + }, + [ACPI_ERST_LOAD_VAR2] = { + .flags = APEI_EXEC_INS_ACCESS_REGISTER, + .run = erst_exec_load_var2, + }, + [ACPI_ERST_STORE_VAR1] = { + .flags = APEI_EXEC_INS_ACCESS_REGISTER, + .run = erst_exec_store_var1, + }, + [ACPI_ERST_ADD] = { + .flags = 0, + .run = erst_exec_add, + }, + [ACPI_ERST_SUBTRACT] = { + .flags = 0, + .run = erst_exec_subtract, + }, + [ACPI_ERST_ADD_VALUE] = { + .flags = APEI_EXEC_INS_ACCESS_REGISTER, + .run = erst_exec_add_value, + }, + [ACPI_ERST_SUBTRACT_VALUE] = { + .flags = APEI_EXEC_INS_ACCESS_REGISTER, + .run = erst_exec_subtract_value, + }, + [ACPI_ERST_STALL] = { + .flags = 0, + .run = erst_exec_stall, + }, + [ACPI_ERST_STALL_WHILE_TRUE] = { + .flags = APEI_EXEC_INS_ACCESS_REGISTER, + .run = erst_exec_stall_while_true, + }, + [ACPI_ERST_SKIP_NEXT_IF_TRUE] = { + .flags = APEI_EXEC_INS_ACCESS_REGISTER, + .run = erst_exec_skip_next_instruction_if_true, + }, + [ACPI_ERST_GOTO] = { + .flags = 0, + .run = erst_exec_goto, + }, + [ACPI_ERST_SET_SRC_ADDRESS_BASE] = { + .flags = APEI_EXEC_INS_ACCESS_REGISTER, + .run = erst_exec_set_src_address_base, + }, + [ACPI_ERST_SET_DST_ADDRESS_BASE] = { + .flags = APEI_EXEC_INS_ACCESS_REGISTER, + .run = erst_exec_set_dst_address_base, + }, + [ACPI_ERST_MOVE_DATA] = { + .flags = APEI_EXEC_INS_ACCESS_REGISTER, + .run = erst_exec_move_data, + }, +}; + +static inline void erst_exec_ctx_init(struct apei_exec_context *ctx) +{ + apei_exec_ctx_init(ctx, erst_ins_type, ARRAY_SIZE(erst_ins_type), + ERST_TAB_ENTRY(erst_tab), erst_tab->entries); +} + +static int erst_get_erange(struct erst_erange *range) +{ + struct apei_exec_context ctx; + int rc; + + erst_exec_ctx_init(&ctx); + rc = apei_exec_run(&ctx, ACPI_ERST_GET_ERROR_RANGE); + if (rc) + return rc; + range->base = apei_exec_ctx_get_output(&ctx); + rc = apei_exec_run(&ctx, ACPI_ERST_GET_ERROR_LENGTH); + if (rc) + return rc; + range->size = apei_exec_ctx_get_output(&ctx); + rc = apei_exec_run(&ctx, ACPI_ERST_GET_ERROR_ATTRIBUTES); + if (rc) + return rc; + range->attr = apei_exec_ctx_get_output(&ctx); + + return 0; +} + +static ssize_t __erst_get_record_count(void) +{ + struct apei_exec_context ctx; + int rc; + + erst_exec_ctx_init(&ctx); + rc = apei_exec_run(&ctx, ACPI_ERST_GET_RECORD_COUNT); + if (rc) + return rc; + return apei_exec_ctx_get_output(&ctx); +} + +ssize_t erst_get_record_count(void) +{ + ssize_t count; + unsigned long flags; + + if (erst_disable) + return -ENODEV; + + spin_lock_irqsave(&erst_lock, flags); + count = __erst_get_record_count(); + spin_unlock_irqrestore(&erst_lock, flags); + + return count; +} +EXPORT_SYMBOL_GPL(erst_get_record_count); + +static int __erst_get_next_record_id(u64 *record_id) +{ + struct apei_exec_context ctx; + int rc; + + erst_exec_ctx_init(&ctx); + rc = apei_exec_run(&ctx, ACPI_ERST_GET_RECORD_ID); + if (rc) + return rc; + *record_id = apei_exec_ctx_get_output(&ctx); + + return 0; +} + +/* + * Get the record ID of an existing error record on the persistent + * storage. If there is no error record on the persistent storage, the + * returned record_id is APEI_ERST_INVALID_RECORD_ID. + */ +int erst_get_next_record_id(u64 *record_id) +{ + int rc; + unsigned long flags; + + if (erst_disable) + return -ENODEV; + + spin_lock_irqsave(&erst_lock, flags); + rc = __erst_get_next_record_id(record_id); + spin_unlock_irqrestore(&erst_lock, flags); + + return rc; +} +EXPORT_SYMBOL_GPL(erst_get_next_record_id); + +static int __erst_write_to_storage(u64 offset) +{ + struct apei_exec_context ctx; + u64 timeout = FIRMWARE_TIMEOUT; + u64 val; + int rc; + + erst_exec_ctx_init(&ctx); + rc = apei_exec_run(&ctx, ACPI_ERST_BEGIN_WRITE); + if (rc) + return rc; + apei_exec_ctx_set_input(&ctx, offset); + rc = apei_exec_run(&ctx, ACPI_ERST_SET_RECORD_OFFSET); + if (rc) + return rc; + rc = apei_exec_run(&ctx, ACPI_ERST_EXECUTE_OPERATION); + if (rc) + return rc; + for (;;) { + rc = apei_exec_run(&ctx, ACPI_ERST_CHECK_BUSY_STATUS); + if (rc) + return rc; + val = apei_exec_ctx_get_output(&ctx); + if (!val) + break; + if (erst_timedout(&timeout, SPIN_UNIT)) + return -EIO; + } + rc = apei_exec_run(&ctx, ACPI_ERST_GET_COMMAND_STATUS); + if (rc) + return rc; + val = apei_exec_ctx_get_output(&ctx); + rc = apei_exec_run(&ctx, ACPI_ERST_END); + if (rc) + return rc; + + return erst_errno(val); +} + +static int __erst_read_from_storage(u64 record_id, u64 offset) +{ + struct apei_exec_context ctx; + u64 timeout = FIRMWARE_TIMEOUT; + u64 val; + int rc; + + erst_exec_ctx_init(&ctx); + rc = apei_exec_run(&ctx, ACPI_ERST_BEGIN_READ); + if (rc) + return rc; + apei_exec_ctx_set_input(&ctx, offset); + rc = apei_exec_run(&ctx, ACPI_ERST_SET_RECORD_OFFSET); + if (rc) + return rc; + apei_exec_ctx_set_input(&ctx, record_id); + rc = apei_exec_run(&ctx, ACPI_ERST_SET_RECORD_ID); + if (rc) + return rc; + rc = apei_exec_run(&ctx, ACPI_ERST_EXECUTE_OPERATION); + if (rc) + return rc; + for (;;) { + rc = apei_exec_run(&ctx, ACPI_ERST_CHECK_BUSY_STATUS); + if (rc) + return rc; + val = apei_exec_ctx_get_output(&ctx); + if (!val) + break; + if (erst_timedout(&timeout, SPIN_UNIT)) + return -EIO; + }; + rc = apei_exec_run(&ctx, ACPI_ERST_GET_COMMAND_STATUS); + if (rc) + return rc; + val = apei_exec_ctx_get_output(&ctx); + rc = apei_exec_run(&ctx, ACPI_ERST_END); + if (rc) + return rc; + + return erst_errno(val); +} + +static int __erst_clear_from_storage(u64 record_id) +{ + struct apei_exec_context ctx; + u64 timeout = FIRMWARE_TIMEOUT; + u64 val; + int rc; + + erst_exec_ctx_init(&ctx); + rc = apei_exec_run(&ctx, ACPI_ERST_BEGIN_CLEAR); + if (rc) + return rc; + apei_exec_ctx_set_input(&ctx, record_id); + rc = apei_exec_run(&ctx, ACPI_ERST_SET_RECORD_ID); + if (rc) + return rc; + rc = apei_exec_run(&ctx, ACPI_ERST_EXECUTE_OPERATION); + if (rc) + return rc; + for (;;) { + rc = apei_exec_run(&ctx, ACPI_ERST_CHECK_BUSY_STATUS); + if (rc) + return rc; + val = apei_exec_ctx_get_output(&ctx); + if (!val) + break; + if (erst_timedout(&timeout, SPIN_UNIT)) + return -EIO; + } + rc = apei_exec_run(&ctx, ACPI_ERST_GET_COMMAND_STATUS); + if (rc) + return rc; + val = apei_exec_ctx_get_output(&ctx); + rc = apei_exec_run(&ctx, ACPI_ERST_END); + if (rc) + return rc; + + return erst_errno(val); +} + +/* NVRAM ERST Error Log Address Range is not supported yet */ +static void pr_unimpl_nvram(void) +{ + if (printk_ratelimit()) + pr_warning(ERST_PFX + "NVRAM ERST Log Address Range is not implemented yet\n"); +} + +static int __erst_write_to_nvram(const struct cper_record_header *record) +{ + /* do not print message, because printk is not safe for NMI */ + return -ENOSYS; +} + +static int __erst_read_to_erange_from_nvram(u64 record_id, u64 *offset) +{ + pr_unimpl_nvram(); + return -ENOSYS; +} + +static int __erst_clear_from_nvram(u64 record_id) +{ + pr_unimpl_nvram(); + return -ENOSYS; +} + +int erst_write(const struct cper_record_header *record) +{ + int rc; + unsigned long flags; + struct cper_record_header *rcd_erange; + + if (erst_disable) + return -ENODEV; + + if (memcmp(record->signature, CPER_SIG_RECORD, CPER_SIG_SIZE)) + return -EINVAL; + + if (erst_erange.attr & ERST_RANGE_NVRAM) { + if (!spin_trylock_irqsave(&erst_lock, flags)) + return -EBUSY; + rc = __erst_write_to_nvram(record); + spin_unlock_irqrestore(&erst_lock, flags); + return rc; + } + + if (record->record_length > erst_erange.size) + return -EINVAL; + + if (!spin_trylock_irqsave(&erst_lock, flags)) + return -EBUSY; + memcpy(erst_erange.vaddr, record, record->record_length); + rcd_erange = erst_erange.vaddr; + /* signature for serialization system */ + memcpy(&rcd_erange->persistence_information, "ER", 2); + + rc = __erst_write_to_storage(0); + spin_unlock_irqrestore(&erst_lock, flags); + + return rc; +} +EXPORT_SYMBOL_GPL(erst_write); + +static int __erst_read_to_erange(u64 record_id, u64 *offset) +{ + int rc; + + if (erst_erange.attr & ERST_RANGE_NVRAM) + return __erst_read_to_erange_from_nvram( + record_id, offset); + + rc = __erst_read_from_storage(record_id, 0); + if (rc) + return rc; + *offset = 0; + + return 0; +} + +static ssize_t __erst_read(u64 record_id, struct cper_record_header *record, + size_t buflen) +{ + int rc; + u64 offset, len = 0; + struct cper_record_header *rcd_tmp; + + rc = __erst_read_to_erange(record_id, &offset); + if (rc) + return rc; + rcd_tmp = erst_erange.vaddr + offset; + len = rcd_tmp->record_length; + if (len <= buflen) + memcpy(record, rcd_tmp, len); + + return len; +} + +/* + * If return value > buflen, the buffer size is not big enough, + * else if return value < 0, something goes wrong, + * else everything is OK, and return value is record length + */ +ssize_t erst_read(u64 record_id, struct cper_record_header *record, + size_t buflen) +{ + ssize_t len; + unsigned long flags; + + if (erst_disable) + return -ENODEV; + + spin_lock_irqsave(&erst_lock, flags); + len = __erst_read(record_id, record, buflen); + spin_unlock_irqrestore(&erst_lock, flags); + return len; +} +EXPORT_SYMBOL_GPL(erst_read); + +/* + * If return value > buflen, the buffer size is not big enough, + * else if return value = 0, there is no more record to read, + * else if return value < 0, something goes wrong, + * else everything is OK, and return value is record length + */ +ssize_t erst_read_next(struct cper_record_header *record, size_t buflen) +{ + int rc; + ssize_t len; + unsigned long flags; + u64 record_id; + + if (erst_disable) + return -ENODEV; + + spin_lock_irqsave(&erst_lock, flags); + rc = __erst_get_next_record_id(&record_id); + if (rc) { + spin_unlock_irqrestore(&erst_lock, flags); + return rc; + } + /* no more record */ + if (record_id == APEI_ERST_INVALID_RECORD_ID) { + spin_unlock_irqrestore(&erst_lock, flags); + return 0; + } + + len = __erst_read(record_id, record, buflen); + spin_unlock_irqrestore(&erst_lock, flags); + + return len; +} +EXPORT_SYMBOL_GPL(erst_read_next); + +int erst_clear(u64 record_id) +{ + int rc; + unsigned long flags; + + if (erst_disable) + return -ENODEV; + + spin_lock_irqsave(&erst_lock, flags); + if (erst_erange.attr & ERST_RANGE_NVRAM) + rc = __erst_clear_from_nvram(record_id); + else + rc = __erst_clear_from_storage(record_id); + spin_unlock_irqrestore(&erst_lock, flags); + + return rc; +} +EXPORT_SYMBOL_GPL(erst_clear); + +static int __init setup_erst_disable(char *str) +{ + erst_disable = 1; + return 0; +} + +__setup("erst_disable", setup_erst_disable); + +static int erst_check_table(struct acpi_table_erst *erst_tab) +{ + if (erst_tab->header_length != sizeof(struct acpi_table_erst)) + return -EINVAL; + if (erst_tab->header.length < sizeof(struct acpi_table_erst)) + return -EINVAL; + if (erst_tab->entries != + (erst_tab->header.length - sizeof(struct acpi_table_erst)) / + sizeof(struct acpi_erst_entry)) + return -EINVAL; + + return 0; +} + +static int __init erst_init(void) +{ + int rc = 0; + acpi_status status; + struct apei_exec_context ctx; + struct apei_resources erst_resources; + struct resource *r; + + if (acpi_disabled) + goto err; + + if (erst_disable) { + pr_info(ERST_PFX + "Error Record Serialization Table (ERST) support is disabled.\n"); + goto err; + } + + status = acpi_get_table(ACPI_SIG_ERST, 0, + (struct acpi_table_header **)&erst_tab); + if (status == AE_NOT_FOUND) { + pr_err(ERST_PFX "Table is not found!\n"); + goto err; + } else if (ACPI_FAILURE(status)) { + const char *msg = acpi_format_exception(status); + pr_err(ERST_PFX "Failed to get table, %s\n", msg); + rc = -EINVAL; + goto err; + } + + rc = erst_check_table(erst_tab); + if (rc) { + pr_err(FW_BUG ERST_PFX "ERST table is invalid\n"); + goto err; + } + + apei_resources_init(&erst_resources); + erst_exec_ctx_init(&ctx); + rc = apei_exec_collect_resources(&ctx, &erst_resources); + if (rc) + goto err_fini; + rc = apei_resources_request(&erst_resources, "APEI ERST"); + if (rc) + goto err_fini; + rc = apei_exec_pre_map_gars(&ctx); + if (rc) + goto err_release; + rc = erst_get_erange(&erst_erange); + if (rc) { + if (rc == -ENODEV) + pr_info(ERST_PFX + "The corresponding hardware device or firmware implementation " + "is not available.\n"); + else + pr_err(ERST_PFX + "Failed to get Error Log Address Range.\n"); + goto err_unmap_reg; + } + + r = request_mem_region(erst_erange.base, erst_erange.size, "APEI ERST"); + if (!r) { + pr_err(ERST_PFX + "Can not request iomem region <0x%16llx-0x%16llx> for ERST.\n", + (unsigned long long)erst_erange.base, + (unsigned long long)erst_erange.base + erst_erange.size); + rc = -EIO; + goto err_unmap_reg; + } + rc = -ENOMEM; + erst_erange.vaddr = ioremap_cache(erst_erange.base, + erst_erange.size); + if (!erst_erange.vaddr) + goto err_release_erange; + + pr_info(ERST_PFX + "Error Record Serialization Table (ERST) support is initialized.\n"); + + return 0; + +err_release_erange: + release_mem_region(erst_erange.base, erst_erange.size); +err_unmap_reg: + apei_exec_post_unmap_gars(&ctx); +err_release: + apei_resources_release(&erst_resources); +err_fini: + apei_resources_fini(&erst_resources); +err: + erst_disable = 1; + return rc; +} + +device_initcall(erst_init); diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c new file mode 100644 index 000000000000..fd0cc016a099 --- /dev/null +++ b/drivers/acpi/apei/ghes.c @@ -0,0 +1,427 @@ +/* + * APEI Generic Hardware Error Source support + * + * Generic Hardware Error Source provides a way to report platform + * hardware errors (such as that from chipset). It works in so called + * "Firmware First" mode, that is, hardware errors are reported to + * firmware firstly, then reported to Linux by firmware. This way, + * some non-standard hardware error registers or non-standard hardware + * link can be checked by firmware to produce more hardware error + * information for Linux. + * + * For more information about Generic Hardware Error Source, please + * refer to ACPI Specification version 4.0, section 17.3.2.6 + * + * Now, only SCI notification type and memory errors are + * supported. More notification type and hardware error type will be + * added later. + * + * Copyright 2010 Intel Corp. + * Author: Huang Ying <ying.huang@intel.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation; + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/acpi.h> +#include <linux/io.h> +#include <linux/interrupt.h> +#include <linux/cper.h> +#include <linux/kdebug.h> +#include <acpi/apei.h> +#include <acpi/atomicio.h> +#include <acpi/hed.h> +#include <asm/mce.h> + +#include "apei-internal.h" + +#define GHES_PFX "GHES: " + +#define GHES_ESTATUS_MAX_SIZE 65536 + +/* + * One struct ghes is created for each generic hardware error + * source. + * + * It provides the context for APEI hardware error timer/IRQ/SCI/NMI + * handler. Handler for one generic hardware error source is only + * triggered after the previous one is done. So handler can uses + * struct ghes without locking. + * + * estatus: memory buffer for error status block, allocated during + * HEST parsing. + */ +#define GHES_TO_CLEAR 0x0001 + +struct ghes { + struct acpi_hest_generic *generic; + struct acpi_hest_generic_status *estatus; + struct list_head list; + u64 buffer_paddr; + unsigned long flags; +}; + +/* + * Error source lists, one list for each notification method. The + * members in lists are struct ghes. + * + * The list members are only added in HEST parsing and deleted during + * module_exit, that is, single-threaded. So no lock is needed for + * that. + * + * But the mutual exclusion is needed between members adding/deleting + * and timer/IRQ/SCI/NMI handler, which may traverse the list. RCU is + * used for that. + */ +static LIST_HEAD(ghes_sci); + +static struct ghes *ghes_new(struct acpi_hest_generic *generic) +{ + struct ghes *ghes; + unsigned int error_block_length; + int rc; + + ghes = kzalloc(sizeof(*ghes), GFP_KERNEL); + if (!ghes) + return ERR_PTR(-ENOMEM); + ghes->generic = generic; + INIT_LIST_HEAD(&ghes->list); + rc = acpi_pre_map_gar(&generic->error_status_address); + if (rc) + goto err_free; + error_block_length = generic->error_block_length; + if (error_block_length > GHES_ESTATUS_MAX_SIZE) { + pr_warning(FW_WARN GHES_PFX + "Error status block length is too long: %u for " + "generic hardware error source: %d.\n", + error_block_length, generic->header.source_id); + error_block_length = GHES_ESTATUS_MAX_SIZE; + } + ghes->estatus = kmalloc(error_block_length, GFP_KERNEL); + if (!ghes->estatus) { + rc = -ENOMEM; + goto err_unmap; + } + + return ghes; + +err_unmap: + acpi_post_unmap_gar(&generic->error_status_address); +err_free: + kfree(ghes); + return ERR_PTR(rc); +} + +static void ghes_fini(struct ghes *ghes) +{ + kfree(ghes->estatus); + acpi_post_unmap_gar(&ghes->generic->error_status_address); +} + +enum { + GHES_SER_NO = 0x0, + GHES_SER_CORRECTED = 0x1, + GHES_SER_RECOVERABLE = 0x2, + GHES_SER_PANIC = 0x3, +}; + +static inline int ghes_severity(int severity) +{ + switch (severity) { + case CPER_SER_INFORMATIONAL: + return GHES_SER_NO; + case CPER_SER_CORRECTED: + return GHES_SER_CORRECTED; + case CPER_SER_RECOVERABLE: + return GHES_SER_RECOVERABLE; + case CPER_SER_FATAL: + return GHES_SER_PANIC; + default: + /* Unkown, go panic */ + return GHES_SER_PANIC; + } +} + +/* SCI handler run in work queue, so ioremap can be used here */ +static int ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len, + int from_phys) +{ + void *vaddr; + + vaddr = ioremap_cache(paddr, len); + if (!vaddr) + return -ENOMEM; + if (from_phys) + memcpy(buffer, vaddr, len); + else + memcpy(vaddr, buffer, len); + iounmap(vaddr); + + return 0; +} + +static int ghes_read_estatus(struct ghes *ghes, int silent) +{ + struct acpi_hest_generic *g = ghes->generic; + u64 buf_paddr; + u32 len; + int rc; + + rc = acpi_atomic_read(&buf_paddr, &g->error_status_address); + if (rc) { + if (!silent && printk_ratelimit()) + pr_warning(FW_WARN GHES_PFX +"Failed to read error status block address for hardware error source: %d.\n", + g->header.source_id); + return -EIO; + } + if (!buf_paddr) + return -ENOENT; + + rc = ghes_copy_tofrom_phys(ghes->estatus, buf_paddr, + sizeof(*ghes->estatus), 1); + if (rc) + return rc; + if (!ghes->estatus->block_status) + return -ENOENT; + + ghes->buffer_paddr = buf_paddr; + ghes->flags |= GHES_TO_CLEAR; + + rc = -EIO; + len = apei_estatus_len(ghes->estatus); + if (len < sizeof(*ghes->estatus)) + goto err_read_block; + if (len > ghes->generic->error_block_length) + goto err_read_block; + if (apei_estatus_check_header(ghes->estatus)) + goto err_read_block; + rc = ghes_copy_tofrom_phys(ghes->estatus + 1, + buf_paddr + sizeof(*ghes->estatus), + len - sizeof(*ghes->estatus), 1); + if (rc) + return rc; + if (apei_estatus_check(ghes->estatus)) + goto err_read_block; + rc = 0; + +err_read_block: + if (rc && !silent) + pr_warning(FW_WARN GHES_PFX + "Failed to read error status block!\n"); + return rc; +} + +static void ghes_clear_estatus(struct ghes *ghes) +{ + ghes->estatus->block_status = 0; + if (!(ghes->flags & GHES_TO_CLEAR)) + return; + ghes_copy_tofrom_phys(ghes->estatus, ghes->buffer_paddr, + sizeof(ghes->estatus->block_status), 0); + ghes->flags &= ~GHES_TO_CLEAR; +} + +static void ghes_do_proc(struct ghes *ghes) +{ + int ser, processed = 0; + struct acpi_hest_generic_data *gdata; + + ser = ghes_severity(ghes->estatus->error_severity); + apei_estatus_for_each_section(ghes->estatus, gdata) { +#ifdef CONFIG_X86_MCE + if (!uuid_le_cmp(*(uuid_le *)gdata->section_type, + CPER_SEC_PLATFORM_MEM)) { + apei_mce_report_mem_error( + ser == GHES_SER_CORRECTED, + (struct cper_sec_mem_err *)(gdata+1)); + processed = 1; + } +#endif + } + + if (!processed && printk_ratelimit()) + pr_warning(GHES_PFX + "Unknown error record from generic hardware error source: %d\n", + ghes->generic->header.source_id); +} + +static int ghes_proc(struct ghes *ghes) +{ + int rc; + + rc = ghes_read_estatus(ghes, 0); + if (rc) + goto out; + ghes_do_proc(ghes); + +out: + ghes_clear_estatus(ghes); + return 0; +} + +static int ghes_notify_sci(struct notifier_block *this, + unsigned long event, void *data) +{ + struct ghes *ghes; + int ret = NOTIFY_DONE; + + rcu_read_lock(); + list_for_each_entry_rcu(ghes, &ghes_sci, list) { + if (!ghes_proc(ghes)) + ret = NOTIFY_OK; + } + rcu_read_unlock(); + + return ret; +} + +static struct notifier_block ghes_notifier_sci = { + .notifier_call = ghes_notify_sci, +}; + +static int hest_ghes_parse(struct acpi_hest_header *hest_hdr, void *data) +{ + struct acpi_hest_generic *generic; + struct ghes *ghes = NULL; + int rc = 0; + + if (hest_hdr->type != ACPI_HEST_TYPE_GENERIC_ERROR) + return 0; + + generic = (struct acpi_hest_generic *)hest_hdr; + if (!generic->enabled) + return 0; + + if (generic->error_block_length < + sizeof(struct acpi_hest_generic_status)) { + pr_warning(FW_BUG GHES_PFX +"Invalid error block length: %u for generic hardware error source: %d\n", + generic->error_block_length, + generic->header.source_id); + goto err; + } + if (generic->records_to_preallocate == 0) { + pr_warning(FW_BUG GHES_PFX +"Invalid records to preallocate: %u for generic hardware error source: %d\n", + generic->records_to_preallocate, + generic->header.source_id); + goto err; + } + ghes = ghes_new(generic); + if (IS_ERR(ghes)) { + rc = PTR_ERR(ghes); + ghes = NULL; + goto err; + } + switch (generic->notify.type) { + case ACPI_HEST_NOTIFY_POLLED: + pr_warning(GHES_PFX +"Generic hardware error source: %d notified via POLL is not supported!\n", + generic->header.source_id); + break; + case ACPI_HEST_NOTIFY_EXTERNAL: + case ACPI_HEST_NOTIFY_LOCAL: + pr_warning(GHES_PFX +"Generic hardware error source: %d notified via IRQ is not supported!\n", + generic->header.source_id); + break; + case ACPI_HEST_NOTIFY_SCI: + if (list_empty(&ghes_sci)) + register_acpi_hed_notifier(&ghes_notifier_sci); + list_add_rcu(&ghes->list, &ghes_sci); + break; + case ACPI_HEST_NOTIFY_NMI: + pr_warning(GHES_PFX +"Generic hardware error source: %d notified via NMI is not supported!\n", + generic->header.source_id); + break; + default: + pr_warning(FW_WARN GHES_PFX + "Unknown notification type: %u for generic hardware error source: %d\n", + generic->notify.type, generic->header.source_id); + break; + } + + return 0; +err: + if (ghes) + ghes_fini(ghes); + return rc; +} + +static void ghes_cleanup(void) +{ + struct ghes *ghes, *nghes; + + if (!list_empty(&ghes_sci)) + unregister_acpi_hed_notifier(&ghes_notifier_sci); + + synchronize_rcu(); + + list_for_each_entry_safe(ghes, nghes, &ghes_sci, list) { + list_del(&ghes->list); + ghes_fini(ghes); + kfree(ghes); + } +} + +static int __init ghes_init(void) +{ + int rc; + + if (acpi_disabled) + return -ENODEV; + + if (hest_disable) { + pr_info(GHES_PFX "HEST is not enabled!\n"); + return -EINVAL; + } + + rc = apei_hest_parse(hest_ghes_parse, NULL); + if (rc) { + pr_err(GHES_PFX + "Error during parsing HEST generic hardware error sources.\n"); + goto err_cleanup; + } + + if (list_empty(&ghes_sci)) { + pr_info(GHES_PFX + "No functional generic hardware error sources.\n"); + rc = -ENODEV; + goto err_cleanup; + } + + pr_info(GHES_PFX + "Generic Hardware Error Source support is initialized.\n"); + + return 0; +err_cleanup: + ghes_cleanup(); + return rc; +} + +static void __exit ghes_exit(void) +{ + ghes_cleanup(); +} + +module_init(ghes_init); +module_exit(ghes_exit); + +MODULE_AUTHOR("Huang Ying"); +MODULE_DESCRIPTION("APEI Generic Hardware Error Source support"); +MODULE_LICENSE("GPL"); diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c new file mode 100644 index 000000000000..e7f40d362cb3 --- /dev/null +++ b/drivers/acpi/apei/hest.c @@ -0,0 +1,173 @@ +/* + * APEI Hardware Error Souce Table support + * + * HEST describes error sources in detail; communicates operational + * parameters (i.e. severity levels, masking bits, and threshold + * values) to Linux as necessary. It also allows the BIOS to report + * non-standard error sources to Linux (for example, chipset-specific + * error registers). + * + * For more information about HEST, please refer to ACPI Specification + * version 4.0, section 17.3.2. + * + * Copyright 2009 Intel Corp. + * Author: Huang Ying <ying.huang@intel.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation; + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/acpi.h> +#include <linux/kdebug.h> +#include <linux/highmem.h> +#include <linux/io.h> +#include <acpi/apei.h> + +#include "apei-internal.h" + +#define HEST_PFX "HEST: " + +int hest_disable; +EXPORT_SYMBOL_GPL(hest_disable); + +/* HEST table parsing */ + +static struct acpi_table_hest *hest_tab; + +static int hest_void_parse(struct acpi_hest_header *hest_hdr, void *data) +{ + return 0; +} + +static int hest_esrc_len_tab[ACPI_HEST_TYPE_RESERVED] = { + [ACPI_HEST_TYPE_IA32_CHECK] = -1, /* need further calculation */ + [ACPI_HEST_TYPE_IA32_CORRECTED_CHECK] = -1, + [ACPI_HEST_TYPE_IA32_NMI] = sizeof(struct acpi_hest_ia_nmi), + [ACPI_HEST_TYPE_AER_ROOT_PORT] = sizeof(struct acpi_hest_aer_root), + [ACPI_HEST_TYPE_AER_ENDPOINT] = sizeof(struct acpi_hest_aer), + [ACPI_HEST_TYPE_AER_BRIDGE] = sizeof(struct acpi_hest_aer_bridge), + [ACPI_HEST_TYPE_GENERIC_ERROR] = sizeof(struct acpi_hest_generic), +}; + +static int hest_esrc_len(struct acpi_hest_header *hest_hdr) +{ + u16 hest_type = hest_hdr->type; + int len; + + if (hest_type >= ACPI_HEST_TYPE_RESERVED) + return 0; + + len = hest_esrc_len_tab[hest_type]; + + if (hest_type == ACPI_HEST_TYPE_IA32_CORRECTED_CHECK) { + struct acpi_hest_ia_corrected *cmc; + cmc = (struct acpi_hest_ia_corrected *)hest_hdr; + len = sizeof(*cmc) + cmc->num_hardware_banks * + sizeof(struct acpi_hest_ia_error_bank); + } else if (hest_type == ACPI_HEST_TYPE_IA32_CHECK) { + struct acpi_hest_ia_machine_check *mc; + mc = (struct acpi_hest_ia_machine_check *)hest_hdr; + len = sizeof(*mc) + mc->num_hardware_banks * + sizeof(struct acpi_hest_ia_error_bank); + } + BUG_ON(len == -1); + + return len; +}; + +int apei_hest_parse(apei_hest_func_t func, void *data) +{ + struct acpi_hest_header *hest_hdr; + int i, rc, len; + + if (hest_disable) + return -EINVAL; + + hest_hdr = (struct acpi_hest_header *)(hest_tab + 1); + for (i = 0; i < hest_tab->error_source_count; i++) { + len = hest_esrc_len(hest_hdr); + if (!len) { + pr_warning(FW_WARN HEST_PFX + "Unknown or unused hardware error source " + "type: %d for hardware error source: %d.\n", + hest_hdr->type, hest_hdr->source_id); + return -EINVAL; + } + if ((void *)hest_hdr + len > + (void *)hest_tab + hest_tab->header.length) { + pr_warning(FW_BUG HEST_PFX + "Table contents overflow for hardware error source: %d.\n", + hest_hdr->source_id); + return -EINVAL; + } + + rc = func(hest_hdr, data); + if (rc) + return rc; + + hest_hdr = (void *)hest_hdr + len; + } + + return 0; +} +EXPORT_SYMBOL_GPL(apei_hest_parse); + +static int __init setup_hest_disable(char *str) +{ + hest_disable = 1; + return 0; +} + +__setup("hest_disable", setup_hest_disable); + +static int __init hest_init(void) +{ + acpi_status status; + int rc = -ENODEV; + + if (acpi_disabled) + goto err; + + if (hest_disable) { + pr_info(HEST_PFX "HEST tabling parsing is disabled.\n"); + goto err; + } + + status = acpi_get_table(ACPI_SIG_HEST, 0, + (struct acpi_table_header **)&hest_tab); + if (status == AE_NOT_FOUND) { + pr_info(HEST_PFX "Table is not found!\n"); + goto err; + } else if (ACPI_FAILURE(status)) { + const char *msg = acpi_format_exception(status); + pr_err(HEST_PFX "Failed to get table, %s\n", msg); + rc = -EINVAL; + goto err; + } + + rc = apei_hest_parse(hest_void_parse, NULL); + if (rc) + goto err; + + pr_info(HEST_PFX "HEST table parsing is initialized.\n"); + + return 0; +err: + hest_disable = 1; + return rc; +} + +subsys_initcall(hest_init); diff --git a/drivers/acpi/atomicio.c b/drivers/acpi/atomicio.c new file mode 100644 index 000000000000..814b19249616 --- /dev/null +++ b/drivers/acpi/atomicio.c @@ -0,0 +1,360 @@ +/* + * atomicio.c - ACPI IO memory pre-mapping/post-unmapping, then + * accessing in atomic context. + * + * This is used for NMI handler to access IO memory area, because + * ioremap/iounmap can not be used in NMI handler. The IO memory area + * is pre-mapped in process context and accessed in NMI handler. + * + * Copyright (C) 2009-2010, Intel Corp. + * Author: Huang Ying <ying.huang@intel.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/acpi.h> +#include <linux/io.h> +#include <linux/kref.h> +#include <linux/rculist.h> +#include <linux/interrupt.h> +#include <acpi/atomicio.h> + +#define ACPI_PFX "ACPI: " + +static LIST_HEAD(acpi_iomaps); +/* + * Used for mutual exclusion between writers of acpi_iomaps list, for + * synchronization between readers and writer, RCU is used. + */ +static DEFINE_SPINLOCK(acpi_iomaps_lock); + +struct acpi_iomap { + struct list_head list; + void __iomem *vaddr; + unsigned long size; + phys_addr_t paddr; + struct kref ref; +}; + +/* acpi_iomaps_lock or RCU read lock must be held before calling */ +static struct acpi_iomap *__acpi_find_iomap(phys_addr_t paddr, + unsigned long size) +{ + struct acpi_iomap *map; + + list_for_each_entry_rcu(map, &acpi_iomaps, list) { + if (map->paddr + map->size >= paddr + size && + map->paddr <= paddr) + return map; + } + return NULL; +} + +/* + * Atomic "ioremap" used by NMI handler, if the specified IO memory + * area is not pre-mapped, NULL will be returned. + * + * acpi_iomaps_lock or RCU read lock must be held before calling + */ +static void __iomem *__acpi_ioremap_fast(phys_addr_t paddr, + unsigned long size) +{ + struct acpi_iomap *map; + + map = __acpi_find_iomap(paddr, size); + if (map) + return map->vaddr + (paddr - map->paddr); + else + return NULL; +} + +/* acpi_iomaps_lock must be held before calling */ +static void __iomem *__acpi_try_ioremap(phys_addr_t paddr, + unsigned long size) +{ + struct acpi_iomap *map; + + map = __acpi_find_iomap(paddr, size); + if (map) { + kref_get(&map->ref); + return map->vaddr + (paddr - map->paddr); + } else + return NULL; +} + +/* + * Used to pre-map the specified IO memory area. First try to find + * whether the area is already pre-mapped, if it is, increase the + * reference count (in __acpi_try_ioremap) and return; otherwise, do + * the real ioremap, and add the mapping into acpi_iomaps list. + */ +static void __iomem *acpi_pre_map(phys_addr_t paddr, + unsigned long size) +{ + void __iomem *vaddr; + struct acpi_iomap *map; + unsigned long pg_sz, flags; + phys_addr_t pg_off; + + spin_lock_irqsave(&acpi_iomaps_lock, flags); + vaddr = __acpi_try_ioremap(paddr, size); + spin_unlock_irqrestore(&acpi_iomaps_lock, flags); + if (vaddr) + return vaddr; + + pg_off = paddr & PAGE_MASK; + pg_sz = ((paddr + size + PAGE_SIZE - 1) & PAGE_MASK) - pg_off; + vaddr = ioremap(pg_off, pg_sz); + if (!vaddr) + return NULL; + map = kmalloc(sizeof(*map), GFP_KERNEL); + if (!map) + goto err_unmap; + INIT_LIST_HEAD(&map->list); + map->paddr = pg_off; + map->size = pg_sz; + map->vaddr = vaddr; + kref_init(&map->ref); + + spin_lock_irqsave(&acpi_iomaps_lock, flags); + vaddr = __acpi_try_ioremap(paddr, size); + if (vaddr) { + spin_unlock_irqrestore(&acpi_iomaps_lock, flags); + iounmap(map->vaddr); + kfree(map); + return vaddr; + } + list_add_tail_rcu(&map->list, &acpi_iomaps); + spin_unlock_irqrestore(&acpi_iomaps_lock, flags); + + return vaddr + (paddr - pg_off); +err_unmap: + iounmap(vaddr); + return NULL; +} + +/* acpi_iomaps_lock must be held before calling */ +static void __acpi_kref_del_iomap(struct kref *ref) +{ + struct acpi_iomap *map; + + map = container_of(ref, struct acpi_iomap, ref); + list_del_rcu(&map->list); +} + +/* + * Used to post-unmap the specified IO memory area. The iounmap is + * done only if the reference count goes zero. + */ +static void acpi_post_unmap(phys_addr_t paddr, unsigned long size) +{ + struct acpi_iomap *map; + unsigned long flags; + int del; + + spin_lock_irqsave(&acpi_iomaps_lock, flags); + map = __acpi_find_iomap(paddr, size); + BUG_ON(!map); + del = kref_put(&map->ref, __acpi_kref_del_iomap); + spin_unlock_irqrestore(&acpi_iomaps_lock, flags); + + if (!del) + return; + + synchronize_rcu(); + iounmap(map->vaddr); + kfree(map); +} + +/* In NMI handler, should set silent = 1 */ +static int acpi_check_gar(struct acpi_generic_address *reg, + u64 *paddr, int silent) +{ + u32 width, space_id; + + width = reg->bit_width; + space_id = reg->space_id; + /* Handle possible alignment issues */ + memcpy(paddr, ®->address, sizeof(*paddr)); + if (!*paddr) { + if (!silent) + pr_warning(FW_BUG ACPI_PFX + "Invalid physical address in GAR [0x%llx/%u/%u]\n", + *paddr, width, space_id); + return -EINVAL; + } + + if ((width != 8) && (width != 16) && (width != 32) && (width != 64)) { + if (!silent) + pr_warning(FW_BUG ACPI_PFX + "Invalid bit width in GAR [0x%llx/%u/%u]\n", + *paddr, width, space_id); + return -EINVAL; + } + + if (space_id != ACPI_ADR_SPACE_SYSTEM_MEMORY && + space_id != ACPI_ADR_SPACE_SYSTEM_IO) { + if (!silent) + pr_warning(FW_BUG ACPI_PFX + "Invalid address space type in GAR [0x%llx/%u/%u]\n", + *paddr, width, space_id); + return -EINVAL; + } + + return 0; +} + +/* Pre-map, working on GAR */ +int acpi_pre_map_gar(struct acpi_generic_address *reg) +{ + u64 paddr; + void __iomem *vaddr; + int rc; + + if (reg->space_id != ACPI_ADR_SPACE_SYSTEM_MEMORY) + return 0; + + rc = acpi_check_gar(reg, &paddr, 0); + if (rc) + return rc; + + vaddr = acpi_pre_map(paddr, reg->bit_width / 8); + if (!vaddr) + return -EIO; + + return 0; +} +EXPORT_SYMBOL_GPL(acpi_pre_map_gar); + +/* Post-unmap, working on GAR */ +int acpi_post_unmap_gar(struct acpi_generic_address *reg) +{ + u64 paddr; + int rc; + + if (reg->space_id != ACPI_ADR_SPACE_SYSTEM_MEMORY) + return 0; + + rc = acpi_check_gar(reg, &paddr, 0); + if (rc) + return rc; + + acpi_post_unmap(paddr, reg->bit_width / 8); + + return 0; +} +EXPORT_SYMBOL_GPL(acpi_post_unmap_gar); + +/* + * Can be used in atomic (including NMI) or process context. RCU read + * lock can only be released after the IO memory area accessing. + */ +static int acpi_atomic_read_mem(u64 paddr, u64 *val, u32 width) +{ + void __iomem *addr; + + rcu_read_lock(); + addr = __acpi_ioremap_fast(paddr, width); + switch (width) { + case 8: + *val = readb(addr); + break; + case 16: + *val = readw(addr); + break; + case 32: + *val = readl(addr); + break; + case 64: + *val = readq(addr); + break; + default: + return -EINVAL; + } + rcu_read_unlock(); + + return 0; +} + +static int acpi_atomic_write_mem(u64 paddr, u64 val, u32 width) +{ + void __iomem *addr; + + rcu_read_lock(); + addr = __acpi_ioremap_fast(paddr, width); + switch (width) { + case 8: + writeb(val, addr); + break; + case 16: + writew(val, addr); + break; + case 32: + writel(val, addr); + break; + case 64: + writeq(val, addr); + break; + default: + return -EINVAL; + } + rcu_read_unlock(); + + return 0; +} + +/* GAR accessing in atomic (including NMI) or process context */ +int acpi_atomic_read(u64 *val, struct acpi_generic_address *reg) +{ + u64 paddr; + int rc; + + rc = acpi_check_gar(reg, &paddr, 1); + if (rc) + return rc; + + *val = 0; + switch (reg->space_id) { + case ACPI_ADR_SPACE_SYSTEM_MEMORY: + return acpi_atomic_read_mem(paddr, val, reg->bit_width); + case ACPI_ADR_SPACE_SYSTEM_IO: + return acpi_os_read_port(paddr, (u32 *)val, reg->bit_width); + default: + return -EINVAL; + } +} +EXPORT_SYMBOL_GPL(acpi_atomic_read); + +int acpi_atomic_write(u64 val, struct acpi_generic_address *reg) +{ + u64 paddr; + int rc; + + rc = acpi_check_gar(reg, &paddr, 1); + if (rc) + return rc; + + switch (reg->space_id) { + case ACPI_ADR_SPACE_SYSTEM_MEMORY: + return acpi_atomic_write_mem(paddr, val, reg->bit_width); + case ACPI_ADR_SPACE_SYSTEM_IO: + return acpi_os_write_port(paddr, val, reg->bit_width); + default: + return -EINVAL; + } +} +EXPORT_SYMBOL_GPL(acpi_atomic_write); diff --git a/drivers/acpi/hed.c b/drivers/acpi/hed.c new file mode 100644 index 000000000000..d0c1967f7597 --- /dev/null +++ b/drivers/acpi/hed.c @@ -0,0 +1,112 @@ +/* + * ACPI Hardware Error Device (PNP0C33) Driver + * + * Copyright (C) 2010, Intel Corp. + * Author: Huang Ying <ying.huang@intel.com> + * + * ACPI Hardware Error Device is used to report some hardware errors + * notified via SCI, mainly the corrected errors. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation; + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/acpi.h> +#include <acpi/acpi_bus.h> +#include <acpi/acpi_drivers.h> +#include <acpi/hed.h> + +static struct acpi_device_id acpi_hed_ids[] = { + {"PNP0C33", 0}, + {"", 0}, +}; +MODULE_DEVICE_TABLE(acpi, acpi_hed_ids); + +static acpi_handle hed_handle; + +static BLOCKING_NOTIFIER_HEAD(acpi_hed_notify_list); + +int register_acpi_hed_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&acpi_hed_notify_list, nb); +} +EXPORT_SYMBOL_GPL(register_acpi_hed_notifier); + +void unregister_acpi_hed_notifier(struct notifier_block *nb) +{ + blocking_notifier_chain_unregister(&acpi_hed_notify_list, nb); +} +EXPORT_SYMBOL_GPL(unregister_acpi_hed_notifier); + +/* + * SCI to report hardware error is forwarded to the listeners of HED, + * it is used by HEST Generic Hardware Error Source with notify type + * SCI. + */ +static void acpi_hed_notify(struct acpi_device *device, u32 event) +{ + blocking_notifier_call_chain(&acpi_hed_notify_list, 0, NULL); +} + +static int __devinit acpi_hed_add(struct acpi_device *device) +{ + /* Only one hardware error device */ + if (hed_handle) + return -EINVAL; + hed_handle = device->handle; + return 0; +} + +static int __devexit acpi_hed_remove(struct acpi_device *device, int type) +{ + hed_handle = NULL; + return 0; +} + +static struct acpi_driver acpi_hed_driver = { + .name = "hardware_error_device", + .class = "hardware_error", + .ids = acpi_hed_ids, + .ops = { + .add = acpi_hed_add, + .remove = acpi_hed_remove, + .notify = acpi_hed_notify, + }, +}; + +static int __init acpi_hed_init(void) +{ + if (acpi_disabled) + return -ENODEV; + + if (acpi_bus_register_driver(&acpi_hed_driver) < 0) + return -ENODEV; + + return 0; +} + +static void __exit acpi_hed_exit(void) +{ + acpi_bus_unregister_driver(&acpi_hed_driver); +} + +module_init(acpi_hed_init); +module_exit(acpi_hed_exit); + +ACPI_MODULE_NAME("hed"); +MODULE_AUTHOR("Huang Ying"); +MODULE_DESCRIPTION("ACPI Hardware Error Device Driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/acpi/hest.c b/drivers/acpi/hest.c deleted file mode 100644 index 1c527a192872..000000000000 --- a/drivers/acpi/hest.c +++ /dev/null @@ -1,139 +0,0 @@ -#include <linux/acpi.h> -#include <linux/pci.h> - -#define PREFIX "ACPI: " - -static inline unsigned long parse_acpi_hest_ia_machine_check(struct acpi_hest_ia_machine_check *p) -{ - return sizeof(*p) + - (sizeof(struct acpi_hest_ia_error_bank) * p->num_hardware_banks); -} - -static inline unsigned long parse_acpi_hest_ia_corrected(struct acpi_hest_ia_corrected *p) -{ - return sizeof(*p) + - (sizeof(struct acpi_hest_ia_error_bank) * p->num_hardware_banks); -} - -static inline unsigned long parse_acpi_hest_ia_nmi(struct acpi_hest_ia_nmi *p) -{ - return sizeof(*p); -} - -static inline unsigned long parse_acpi_hest_generic(struct acpi_hest_generic *p) -{ - return sizeof(*p); -} - -static inline unsigned int hest_match_pci(struct acpi_hest_aer_common *p, struct pci_dev *pci) -{ - return (0 == pci_domain_nr(pci->bus) && - p->bus == pci->bus->number && - p->device == PCI_SLOT(pci->devfn) && - p->function == PCI_FUNC(pci->devfn)); -} - -static unsigned long parse_acpi_hest_aer(void *hdr, int type, struct pci_dev *pci, int *firmware_first) -{ - struct acpi_hest_aer_common *p = hdr + sizeof(struct acpi_hest_header); - unsigned long rc=0; - u8 pcie_type = 0; - u8 bridge = 0; - switch (type) { - case ACPI_HEST_TYPE_AER_ROOT_PORT: - rc = sizeof(struct acpi_hest_aer_root); - pcie_type = PCI_EXP_TYPE_ROOT_PORT; - break; - case ACPI_HEST_TYPE_AER_ENDPOINT: - rc = sizeof(struct acpi_hest_aer); - pcie_type = PCI_EXP_TYPE_ENDPOINT; - break; - case ACPI_HEST_TYPE_AER_BRIDGE: - rc = sizeof(struct acpi_hest_aer_bridge); - if ((pci->class >> 16) == PCI_BASE_CLASS_BRIDGE) - bridge = 1; - break; - } - - if (p->flags & ACPI_HEST_GLOBAL) { - if ((pci->is_pcie && (pci->pcie_type == pcie_type)) || bridge) - *firmware_first = !!(p->flags & ACPI_HEST_FIRMWARE_FIRST); - } - else - if (hest_match_pci(p, pci)) - *firmware_first = !!(p->flags & ACPI_HEST_FIRMWARE_FIRST); - return rc; -} - -static int acpi_hest_firmware_first(struct acpi_table_header *stdheader, struct pci_dev *pci) -{ - struct acpi_table_hest *hest = (struct acpi_table_hest *)stdheader; - void *p = (void *)hest + sizeof(*hest); /* defined by the ACPI 4.0 spec */ - struct acpi_hest_header *hdr = p; - - int i; - int firmware_first = 0; - static unsigned char printed_unused = 0; - static unsigned char printed_reserved = 0; - - for (i=0, hdr=p; p < (((void *)hest) + hest->header.length) && i < hest->error_source_count; i++) { - switch (hdr->type) { - case ACPI_HEST_TYPE_IA32_CHECK: - p += parse_acpi_hest_ia_machine_check(p); - break; - case ACPI_HEST_TYPE_IA32_CORRECTED_CHECK: - p += parse_acpi_hest_ia_corrected(p); - break; - case ACPI_HEST_TYPE_IA32_NMI: - p += parse_acpi_hest_ia_nmi(p); - break; - /* These three should never appear */ - case ACPI_HEST_TYPE_NOT_USED3: - case ACPI_HEST_TYPE_NOT_USED4: - case ACPI_HEST_TYPE_NOT_USED5: - if (!printed_unused) { - printk(KERN_DEBUG PREFIX - "HEST Error Source list contains an obsolete type (%d).\n", hdr->type); - printed_unused = 1; - } - break; - case ACPI_HEST_TYPE_AER_ROOT_PORT: - case ACPI_HEST_TYPE_AER_ENDPOINT: - case ACPI_HEST_TYPE_AER_BRIDGE: - p += parse_acpi_hest_aer(p, hdr->type, pci, &firmware_first); - break; - case ACPI_HEST_TYPE_GENERIC_ERROR: - p += parse_acpi_hest_generic(p); - break; - /* These should never appear either */ - case ACPI_HEST_TYPE_RESERVED: - default: - if (!printed_reserved) { - printk(KERN_DEBUG PREFIX - "HEST Error Source list contains a reserved type (%d).\n", hdr->type); - printed_reserved = 1; - } - break; - } - } - return firmware_first; -} - -int acpi_hest_firmware_first_pci(struct pci_dev *pci) -{ - acpi_status status = AE_NOT_FOUND; - struct acpi_table_header *hest = NULL; - - if (acpi_disabled) - return 0; - - status = acpi_get_table(ACPI_SIG_HEST, 1, &hest); - - if (ACPI_SUCCESS(status)) { - if (acpi_hest_firmware_first(hest, pci)) { - return 1; - } - } - return 0; -} -EXPORT_SYMBOL_GPL(acpi_hest_firmware_first_pci); diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c index aefce33f2a09..4eac59393edc 100644 --- a/drivers/acpi/pci_root.c +++ b/drivers/acpi/pci_root.c @@ -120,7 +120,8 @@ acpi_handle acpi_get_pci_rootbridge_handle(unsigned int seg, unsigned int bus) struct acpi_pci_root *root; list_for_each_entry(root, &acpi_pci_roots, node) - if ((root->segment == (u16) seg) && (root->bus_nr == (u16) bus)) + if ((root->segment == (u16) seg) && + (root->secondary.start == (u16) bus)) return root->device->handle; return NULL; } @@ -154,7 +155,7 @@ EXPORT_SYMBOL_GPL(acpi_is_root_bridge); static acpi_status get_root_bridge_busnr_callback(struct acpi_resource *resource, void *data) { - int *busnr = data; + struct resource *res = data; struct acpi_resource_address64 address; if (resource->type != ACPI_RESOURCE_TYPE_ADDRESS16 && @@ -164,28 +165,27 @@ get_root_bridge_busnr_callback(struct acpi_resource *resource, void *data) acpi_resource_to_address64(resource, &address); if ((address.address_length > 0) && - (address.resource_type == ACPI_BUS_NUMBER_RANGE)) - *busnr = address.minimum; + (address.resource_type == ACPI_BUS_NUMBER_RANGE)) { + res->start = address.minimum; + res->end = address.minimum + address.address_length - 1; + } return AE_OK; } static acpi_status try_get_root_bridge_busnr(acpi_handle handle, - unsigned long long *bus) + struct resource *res) { acpi_status status; - int busnum; - busnum = -1; + res->start = -1; status = acpi_walk_resources(handle, METHOD_NAME__CRS, - get_root_bridge_busnr_callback, &busnum); + get_root_bridge_busnr_callback, res); if (ACPI_FAILURE(status)) return status; - /* Check if we really get a bus number from _CRS */ - if (busnum == -1) + if (res->start == -1) return AE_ERROR; - *bus = busnum; return AE_OK; } @@ -429,34 +429,47 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device) struct acpi_device *child; u32 flags, base_flags; + root = kzalloc(sizeof(struct acpi_pci_root), GFP_KERNEL); + if (!root) + return -ENOMEM; + segment = 0; status = acpi_evaluate_integer(device->handle, METHOD_NAME__SEG, NULL, &segment); if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) { printk(KERN_ERR PREFIX "can't evaluate _SEG\n"); - return -ENODEV; + result = -ENODEV; + goto end; } /* Check _CRS first, then _BBN. If no _BBN, default to zero. */ - bus = 0; - status = try_get_root_bridge_busnr(device->handle, &bus); + root->secondary.flags = IORESOURCE_BUS; + status = try_get_root_bridge_busnr(device->handle, &root->secondary); if (ACPI_FAILURE(status)) { + /* + * We need both the start and end of the downstream bus range + * to interpret _CBA (MMCONFIG base address), so it really is + * supposed to be in _CRS. If we don't find it there, all we + * can do is assume [_BBN-0xFF] or [0-0xFF]. + */ + root->secondary.end = 0xFF; + printk(KERN_WARNING FW_BUG PREFIX + "no secondary bus range in _CRS\n"); status = acpi_evaluate_integer(device->handle, METHOD_NAME__BBN, NULL, &bus); - if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) { - printk(KERN_ERR PREFIX - "no bus number in _CRS and can't evaluate _BBN\n"); - return -ENODEV; + if (ACPI_SUCCESS(status)) + root->secondary.start = bus; + else if (status == AE_NOT_FOUND) + root->secondary.start = 0; + else { + printk(KERN_ERR PREFIX "can't evaluate _BBN\n"); + result = -ENODEV; + goto end; } } - root = kzalloc(sizeof(struct acpi_pci_root), GFP_KERNEL); - if (!root) - return -ENOMEM; - INIT_LIST_HEAD(&root->node); root->device = device; root->segment = segment & 0xFFFF; - root->bus_nr = bus & 0xFF; strcpy(acpi_device_name(device), ACPI_PCI_ROOT_DEVICE_NAME); strcpy(acpi_device_class(device), ACPI_PCI_ROOT_CLASS); device->driver_data = root; @@ -475,9 +488,9 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device) /* TBD: Locking */ list_add_tail(&root->node, &acpi_pci_roots); - printk(KERN_INFO PREFIX "%s [%s] (%04x:%02x)\n", + printk(KERN_INFO PREFIX "%s [%s] (domain %04x %pR)\n", acpi_device_name(device), acpi_device_bid(device), - root->segment, root->bus_nr); + root->segment, &root->secondary); /* * Scan the Root Bridge @@ -486,11 +499,11 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device) * PCI namespace does not get created until this call is made (and * thus the root bridge's pci_dev does not exist). */ - root->bus = pci_acpi_scan_root(device, segment, bus); + root->bus = pci_acpi_scan_root(root); if (!root->bus) { printk(KERN_ERR PREFIX "Bus %04x:%02x not present in PCI namespace\n", - root->segment, root->bus_nr); + root->segment, (unsigned int)root->secondary.start); result = -ENODEV; goto end; } diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index 6da962c9b21c..d71f0fc34b46 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c @@ -1875,6 +1875,7 @@ got_driver: */ if (filp->f_op == &hung_up_tty_fops) filp->f_op = &tty_fops; + unlock_kernel(); goto retry_open; } unlock_kernel(); diff --git a/drivers/input/joystick/iforce/iforce-main.c b/drivers/input/joystick/iforce/iforce-main.c index b1edd778639c..405febd94f24 100644 --- a/drivers/input/joystick/iforce/iforce-main.c +++ b/drivers/input/joystick/iforce/iforce-main.c @@ -54,6 +54,9 @@ static signed short btn_avb_wheel[] = static signed short abs_joystick[] = { ABS_X, ABS_Y, ABS_THROTTLE, ABS_HAT0X, ABS_HAT0Y, -1 }; +static signed short abs_joystick_rudder[] = +{ ABS_X, ABS_Y, ABS_THROTTLE, ABS_RUDDER, ABS_HAT0X, ABS_HAT0Y, -1 }; + static signed short abs_avb_pegasus[] = { ABS_X, ABS_Y, ABS_THROTTLE, ABS_RUDDER, ABS_HAT0X, ABS_HAT0Y, ABS_HAT1X, ABS_HAT1Y, -1 }; @@ -76,8 +79,9 @@ static struct iforce_device iforce_device[] = { { 0x061c, 0xc0a4, "ACT LABS Force RS", btn_wheel, abs_wheel, ff_iforce }, //? { 0x061c, 0xc084, "ACT LABS Force RS", btn_wheel, abs_wheel, ff_iforce }, { 0x06f8, 0x0001, "Guillemot Race Leader Force Feedback", btn_wheel, abs_wheel, ff_iforce }, //? + { 0x06f8, 0x0001, "Guillemot Jet Leader Force Feedback", btn_joystick, abs_joystick_rudder, ff_iforce }, { 0x06f8, 0x0004, "Guillemot Force Feedback Racing Wheel", btn_wheel, abs_wheel, ff_iforce }, //? - { 0x06f8, 0x0004, "Gullemot Jet Leader 3D", btn_joystick, abs_joystick, ff_iforce }, //? + { 0x06f8, 0xa302, "Guillemot Jet Leader 3D", btn_joystick, abs_joystick, ff_iforce }, //? { 0x06d6, 0x29bc, "Trust Force Feedback Race Master", btn_wheel, abs_wheel, ff_iforce }, { 0x0000, 0x0000, "Unknown I-Force Device [%04x:%04x]", btn_joystick, abs_joystick, ff_iforce } }; diff --git a/drivers/input/joystick/iforce/iforce-usb.c b/drivers/input/joystick/iforce/iforce-usb.c index b41303d3ec54..6c96631ae5d9 100644 --- a/drivers/input/joystick/iforce/iforce-usb.c +++ b/drivers/input/joystick/iforce/iforce-usb.c @@ -212,6 +212,7 @@ static struct usb_device_id iforce_usb_ids [] = { { USB_DEVICE(0x061c, 0xc0a4) }, /* ACT LABS Force RS */ { USB_DEVICE(0x061c, 0xc084) }, /* ACT LABS Force RS */ { USB_DEVICE(0x06f8, 0x0001) }, /* Guillemot Race Leader Force Feedback */ + { USB_DEVICE(0x06f8, 0x0003) }, /* Guillemot Jet Leader Force Feedback */ { USB_DEVICE(0x06f8, 0x0004) }, /* Guillemot Force Feedback Racing Wheel */ { USB_DEVICE(0x06f8, 0xa302) }, /* Guillemot Jet Leader 3D */ { } /* Terminating entry */ diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c index 0520c2e19927..112b4ee52ff2 100644 --- a/drivers/input/mouse/elantech.c +++ b/drivers/input/mouse/elantech.c @@ -185,7 +185,7 @@ static void elantech_report_absolute_v1(struct psmouse *psmouse) int fingers; static int old_fingers; - if (etd->fw_version_maj == 0x01) { + if (etd->fw_version < 0x020000) { /* * byte 0: D U p1 p2 1 p3 R L * byte 1: f 0 th tw x9 x8 y9 y8 @@ -227,7 +227,7 @@ static void elantech_report_absolute_v1(struct psmouse *psmouse) input_report_key(dev, BTN_LEFT, packet[0] & 0x01); input_report_key(dev, BTN_RIGHT, packet[0] & 0x02); - if ((etd->fw_version_maj == 0x01) && + if (etd->fw_version < 0x020000 && (etd->capabilities & ETP_CAP_HAS_ROCKER)) { /* rocker up */ input_report_key(dev, BTN_FORWARD, packet[0] & 0x40); @@ -321,7 +321,7 @@ static int elantech_check_parity_v1(struct psmouse *psmouse) unsigned char p1, p2, p3; /* Parity bits are placed differently */ - if (etd->fw_version_maj == 0x01) { + if (etd->fw_version < 0x020000) { /* byte 0: D U p1 p2 1 p3 R L */ p1 = (packet[0] & 0x20) >> 5; p2 = (packet[0] & 0x10) >> 4; @@ -457,7 +457,7 @@ static void elantech_set_input_params(struct psmouse *psmouse) switch (etd->hw_version) { case 1: /* Rocker button */ - if ((etd->fw_version_maj == 0x01) && + if (etd->fw_version < 0x020000 && (etd->capabilities & ETP_CAP_HAS_ROCKER)) { __set_bit(BTN_FORWARD, dev->keybit); __set_bit(BTN_BACK, dev->keybit); @@ -686,15 +686,14 @@ int elantech_init(struct psmouse *psmouse) pr_err("elantech.c: failed to query firmware version.\n"); goto init_fail; } - etd->fw_version_maj = param[0]; - etd->fw_version_min = param[2]; + + etd->fw_version = (param[0] << 16) | (param[1] << 8) | param[2]; /* * Assume every version greater than this is new EeePC style * hardware with 6 byte packets */ - if ((etd->fw_version_maj == 0x02 && etd->fw_version_min >= 0x30) || - etd->fw_version_maj > 0x02) { + if (etd->fw_version >= 0x020030) { etd->hw_version = 2; /* For now show extra debug information */ etd->debug = 1; @@ -704,8 +703,9 @@ int elantech_init(struct psmouse *psmouse) etd->hw_version = 1; etd->paritycheck = 1; } - pr_info("elantech.c: assuming hardware version %d, firmware version %d.%d\n", - etd->hw_version, etd->fw_version_maj, etd->fw_version_min); + + pr_info("elantech.c: assuming hardware version %d, firmware version %d.%d.%d\n", + etd->hw_version, param[0], param[1], param[2]); if (synaptics_send_cmd(psmouse, ETP_CAPABILITIES_QUERY, param)) { pr_err("elantech.c: failed to query capabilities.\n"); @@ -720,8 +720,8 @@ int elantech_init(struct psmouse *psmouse) * a touch action starts causing the mouse cursor or scrolled page * to jump. Enable a workaround. */ - if (etd->fw_version_maj == 0x02 && etd->fw_version_min == 0x22) { - pr_info("elantech.c: firmware version 2.34 detected, " + if (etd->fw_version == 0x020022) { + pr_info("elantech.c: firmware version 2.0.34 detected, " "enabling jumpy cursor workaround\n"); etd->jumpy_cursor = 1; } diff --git a/drivers/input/mouse/elantech.h b/drivers/input/mouse/elantech.h index feac5f7af966..ac57bde1bb9f 100644 --- a/drivers/input/mouse/elantech.h +++ b/drivers/input/mouse/elantech.h @@ -100,11 +100,10 @@ struct elantech_data { unsigned char reg_26; unsigned char debug; unsigned char capabilities; - unsigned char fw_version_maj; - unsigned char fw_version_min; - unsigned char hw_version; unsigned char paritycheck; unsigned char jumpy_cursor; + unsigned char hw_version; + unsigned int fw_version; unsigned char parity[256]; }; diff --git a/drivers/input/mouse/psmouse-base.c b/drivers/input/mouse/psmouse-base.c index cbc807264940..a3c97315a473 100644 --- a/drivers/input/mouse/psmouse-base.c +++ b/drivers/input/mouse/psmouse-base.c @@ -1394,6 +1394,7 @@ static int psmouse_reconnect(struct serio *serio) struct psmouse *psmouse = serio_get_drvdata(serio); struct psmouse *parent = NULL; struct serio_driver *drv = serio->drv; + unsigned char type; int rc = -1; if (!drv || !psmouse) { @@ -1413,10 +1414,15 @@ static int psmouse_reconnect(struct serio *serio) if (psmouse->reconnect) { if (psmouse->reconnect(psmouse)) goto out; - } else if (psmouse_probe(psmouse) < 0 || - psmouse->type != psmouse_extensions(psmouse, - psmouse_max_proto, false)) { - goto out; + } else { + psmouse_reset(psmouse); + + if (psmouse_probe(psmouse) < 0) + goto out; + + type = psmouse_extensions(psmouse, psmouse_max_proto, false); + if (psmouse->type != type) + goto out; } /* ok, the device type (and capabilities) match the old one, diff --git a/drivers/input/touchscreen/ad7877.c b/drivers/input/touchscreen/ad7877.c index e019d53d1ab4..0d2d7e54b465 100644 --- a/drivers/input/touchscreen/ad7877.c +++ b/drivers/input/touchscreen/ad7877.c @@ -156,9 +156,14 @@ struct ser_req { u16 reset; u16 ref_on; u16 command; - u16 sample; struct spi_message msg; struct spi_transfer xfer[6]; + + /* + * DMA (thus cache coherency maintenance) requires the + * transfer buffers to live in their own cache lines. + */ + u16 sample ____cacheline_aligned; }; struct ad7877 { @@ -182,8 +187,6 @@ struct ad7877 { u8 averaging; u8 pen_down_acc_interval; - u16 conversion_data[AD7877_NR_SENSE]; - struct spi_transfer xfer[AD7877_NR_SENSE + 2]; struct spi_message msg; @@ -195,6 +198,12 @@ struct ad7877 { spinlock_t lock; struct timer_list timer; /* P: lock */ unsigned pending:1; /* P: lock */ + + /* + * DMA (thus cache coherency maintenance) requires the + * transfer buffers to live in their own cache lines. + */ + u16 conversion_data[AD7877_NR_SENSE] ____cacheline_aligned; }; static int gpio3; diff --git a/drivers/mfd/wm831x-core.c b/drivers/mfd/wm831x-core.c index a3d5728b6449..f2ab025ad97a 100644 --- a/drivers/mfd/wm831x-core.c +++ b/drivers/mfd/wm831x-core.c @@ -349,6 +349,9 @@ int wm831x_auxadc_read(struct wm831x *wm831x, enum wm831x_auxadc input) goto disable; } + /* If an interrupt arrived late clean up after it */ + try_wait_for_completion(&wm831x->auxadc_done); + /* Ignore the result to allow us to soldier on without IRQ hookup */ wait_for_completion_timeout(&wm831x->auxadc_done, msecs_to_jiffies(5)); diff --git a/drivers/mfd/wm8350-core.c b/drivers/mfd/wm8350-core.c index e400a3bed063..b5807484b4c9 100644 --- a/drivers/mfd/wm8350-core.c +++ b/drivers/mfd/wm8350-core.c @@ -363,6 +363,10 @@ int wm8350_read_auxadc(struct wm8350 *wm8350, int channel, int scale, int vref) reg |= 1 << channel | WM8350_AUXADC_POLL; wm8350_reg_write(wm8350, WM8350_DIGITISER_CONTROL_1, reg); + /* If a late IRQ left the completion signalled then consume + * the completion. */ + try_wait_for_completion(&wm8350->auxadc_done); + /* We ignore the result of the completion and just check for a * conversion result, allowing us to soldier on if the IRQ * infrastructure is not set up for the chip. */ diff --git a/drivers/mmc/host/at91_mci.c b/drivers/mmc/host/at91_mci.c index a6dd7da37357..336d9f553f3e 100644 --- a/drivers/mmc/host/at91_mci.c +++ b/drivers/mmc/host/at91_mci.c @@ -314,8 +314,8 @@ static void at91_mci_post_dma_read(struct at91mci_host *host) dmabuf = (unsigned *)tmpv; } + flush_kernel_dcache_page(sg_page(sg)); kunmap_atomic(sgbuffer, KM_BIO_SRC_IRQ); - dmac_flush_range((void *)sgbuffer, ((void *)sgbuffer) + amount); data->bytes_xfered += amount; if (size == 0) break; diff --git a/drivers/pci/pcie/aer/aerdrv.h b/drivers/pci/pcie/aer/aerdrv.h index bd833ea3ba49..7182c337aef1 100644 --- a/drivers/pci/pcie/aer/aerdrv.h +++ b/drivers/pci/pcie/aer/aerdrv.h @@ -134,4 +134,21 @@ static inline int aer_osc_setup(struct pcie_device *pciedev) } #endif +#ifdef CONFIG_ACPI_APEI +extern int pcie_aer_get_firmware_first(struct pci_dev *pci_dev); +#else +static inline int pcie_aer_get_firmware_first(struct pci_dev *pci_dev) +{ + if (pci_dev->__aer_firmware_first_valid) + return pci_dev->__aer_firmware_first; + return 0; +} +#endif + +static inline void pcie_aer_force_firmware_first(struct pci_dev *pci_dev, + int enable) +{ + pci_dev->__aer_firmware_first = !!enable; + pci_dev->__aer_firmware_first_valid = 1; +} #endif /* _AERDRV_H_ */ diff --git a/drivers/pci/pcie/aer/aerdrv_acpi.c b/drivers/pci/pcie/aer/aerdrv_acpi.c index 04814087658d..f278d7b0d95d 100644 --- a/drivers/pci/pcie/aer/aerdrv_acpi.c +++ b/drivers/pci/pcie/aer/aerdrv_acpi.c @@ -16,6 +16,7 @@ #include <linux/acpi.h> #include <linux/pci-acpi.h> #include <linux/delay.h> +#include <acpi/apei.h> #include "aerdrv.h" /** @@ -53,3 +54,79 @@ int aer_osc_setup(struct pcie_device *pciedev) return 0; } + +#ifdef CONFIG_ACPI_APEI +static inline int hest_match_pci(struct acpi_hest_aer_common *p, + struct pci_dev *pci) +{ + return (0 == pci_domain_nr(pci->bus) && + p->bus == pci->bus->number && + p->device == PCI_SLOT(pci->devfn) && + p->function == PCI_FUNC(pci->devfn)); +} + +struct aer_hest_parse_info { + struct pci_dev *pci_dev; + int firmware_first; +}; + +static int aer_hest_parse(struct acpi_hest_header *hest_hdr, void *data) +{ + struct aer_hest_parse_info *info = data; + struct acpi_hest_aer_common *p; + u8 pcie_type = 0; + u8 bridge = 0; + int ff = 0; + + switch (hest_hdr->type) { + case ACPI_HEST_TYPE_AER_ROOT_PORT: + pcie_type = PCI_EXP_TYPE_ROOT_PORT; + break; + case ACPI_HEST_TYPE_AER_ENDPOINT: + pcie_type = PCI_EXP_TYPE_ENDPOINT; + break; + case ACPI_HEST_TYPE_AER_BRIDGE: + if ((info->pci_dev->class >> 16) == PCI_BASE_CLASS_BRIDGE) + bridge = 1; + break; + default: + return 0; + } + + p = (struct acpi_hest_aer_common *)(hest_hdr + 1); + if (p->flags & ACPI_HEST_GLOBAL) { + if ((info->pci_dev->is_pcie && + info->pci_dev->pcie_type == pcie_type) || bridge) + ff = !!(p->flags & ACPI_HEST_FIRMWARE_FIRST); + } else + if (hest_match_pci(p, info->pci_dev)) + ff = !!(p->flags & ACPI_HEST_FIRMWARE_FIRST); + info->firmware_first = ff; + + return 0; +} + +static void aer_set_firmware_first(struct pci_dev *pci_dev) +{ + int rc; + struct aer_hest_parse_info info = { + .pci_dev = pci_dev, + .firmware_first = 0, + }; + + rc = apei_hest_parse(aer_hest_parse, &info); + + if (rc) + pci_dev->__aer_firmware_first = 0; + else + pci_dev->__aer_firmware_first = info.firmware_first; + pci_dev->__aer_firmware_first_valid = 1; +} + +int pcie_aer_get_firmware_first(struct pci_dev *dev) +{ + if (!dev->__aer_firmware_first_valid) + aer_set_firmware_first(dev); + return dev->__aer_firmware_first; +} +#endif diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index aceb04b67b60..586b6713e417 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -36,7 +36,7 @@ int pci_enable_pcie_error_reporting(struct pci_dev *dev) u16 reg16 = 0; int pos; - if (dev->aer_firmware_first) + if (pcie_aer_get_firmware_first(dev)) return -EIO; pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); @@ -64,7 +64,7 @@ int pci_disable_pcie_error_reporting(struct pci_dev *dev) u16 reg16 = 0; int pos; - if (dev->aer_firmware_first) + if (pcie_aer_get_firmware_first(dev)) return -EIO; pos = pci_pcie_cap(dev); @@ -859,7 +859,7 @@ void aer_delete_rootport(struct aer_rpc *rpc) */ int aer_init(struct pcie_device *dev) { - if (dev->port->aer_firmware_first) { + if (pcie_aer_get_firmware_first(dev->port)) { dev_printk(KERN_DEBUG, &dev->device, "PCIe errors handled by platform firmware.\n"); goto out; @@ -873,7 +873,7 @@ out: if (forceload) { dev_printk(KERN_DEBUG, &dev->device, "aerdrv forceload requested.\n"); - dev->port->aer_firmware_first = 0; + pcie_aer_force_firmware_first(dev->port, 0); return 0; } return -ENXIO; diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index c82548afcd5c..f4adba2d1dd3 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -10,7 +10,6 @@ #include <linux/module.h> #include <linux/cpumask.h> #include <linux/pci-aspm.h> -#include <acpi/acpi_hest.h> #include "pci.h" #define CARDBUS_LATENCY_TIMER 176 /* secondary latency timer */ @@ -904,12 +903,6 @@ void set_pcie_hotplug_bridge(struct pci_dev *pdev) pdev->is_hotplug_bridge = 1; } -static void set_pci_aer_firmware_first(struct pci_dev *pdev) -{ - if (acpi_hest_firmware_first_pci(pdev)) - pdev->aer_firmware_first = 1; -} - #define LEGACY_IO_RESOURCE (IORESOURCE_IO | IORESOURCE_PCI_FIXED) /** @@ -939,7 +932,6 @@ int pci_setup_device(struct pci_dev *dev) dev->multifunction = !!(hdr_type & 0x80); dev->error_state = pci_channel_io_normal; set_pcie_port_type(dev); - set_pci_aer_firmware_first(dev); list_for_each_entry(slot, &dev->bus->slots, list) if (PCI_SLOT(dev->devfn) == slot->number) diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index 4fe36d2e1049..19b111383f62 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -838,65 +838,11 @@ static void pci_bus_dump_resources(struct pci_bus *bus) } } -static int __init pci_bus_get_depth(struct pci_bus *bus) -{ - int depth = 0; - struct pci_dev *dev; - - list_for_each_entry(dev, &bus->devices, bus_list) { - int ret; - struct pci_bus *b = dev->subordinate; - if (!b) - continue; - - ret = pci_bus_get_depth(b); - if (ret + 1 > depth) - depth = ret + 1; - } - - return depth; -} -static int __init pci_get_max_depth(void) -{ - int depth = 0; - struct pci_bus *bus; - - list_for_each_entry(bus, &pci_root_buses, node) { - int ret; - - ret = pci_bus_get_depth(bus); - if (ret > depth) - depth = ret; - } - - return depth; -} - -/* - * first try will not touch pci bridge res - * second and later try will clear small leaf bridge res - * will stop till to the max deepth if can not find good one - */ void __init pci_assign_unassigned_resources(void) { struct pci_bus *bus; - int tried_times = 0; - enum release_type rel_type = leaf_only; - struct resource_list_x head, *list; - unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM | - IORESOURCE_PREFETCH; - unsigned long failed_type; - int max_depth = pci_get_max_depth(); - int pci_try_num; - head.next = NULL; - - pci_try_num = max_depth + 1; - printk(KERN_DEBUG "PCI: max bus depth: %d pci_try_num: %d\n", - max_depth, pci_try_num); - -again: /* Depth first, calculate sizes and alignments of all subordinate buses. */ list_for_each_entry(bus, &pci_root_buses, node) { @@ -904,65 +850,9 @@ again: } /* Depth last, allocate resources and update the hardware. */ list_for_each_entry(bus, &pci_root_buses, node) { - __pci_bus_assign_resources(bus, &head); - } - tried_times++; - - /* any device complain? */ - if (!head.next) - goto enable_and_dump; - failed_type = 0; - for (list = head.next; list;) { - failed_type |= list->flags; - list = list->next; - } - /* - * io port are tight, don't try extra - * or if reach the limit, don't want to try more - */ - failed_type &= type_mask; - if ((failed_type == IORESOURCE_IO) || (tried_times >= pci_try_num)) { - free_failed_list(&head); - goto enable_and_dump; - } - - printk(KERN_DEBUG "PCI: No. %d try to assign unassigned res\n", - tried_times + 1); - - /* third times and later will not check if it is leaf */ - if ((tried_times + 1) > 2) - rel_type = whole_subtree; - - /* - * Try to release leaf bridge's resources that doesn't fit resource of - * child device under that bridge - */ - for (list = head.next; list;) { - bus = list->dev->bus; - pci_bus_release_bridge_resources(bus, list->flags & type_mask, - rel_type); - list = list->next; - } - /* restore size and flags */ - for (list = head.next; list;) { - struct resource *res = list->res; - - res->start = list->start; - res->end = list->end; - res->flags = list->flags; - if (list->dev->subordinate) - res->flags = 0; - - list = list->next; - } - free_failed_list(&head); - - goto again; - -enable_and_dump: - /* Depth last, update the hardware. */ - list_for_each_entry(bus, &pci_root_buses, node) + pci_bus_assign_resources(bus); pci_enable_bridges(bus); + } /* dump the resource on buses */ list_for_each_entry(bus, &pci_root_buses, node) { diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index acf222f91f5a..fa2339cb1681 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -37,6 +37,9 @@ */ #define DASD_CHANQ_MAX_SIZE 4 +#define DASD_SLEEPON_START_TAG (void *) 1 +#define DASD_SLEEPON_END_TAG (void *) 2 + /* * SECTION: exported variables of dasd.c */ @@ -1472,7 +1475,10 @@ void dasd_add_request_tail(struct dasd_ccw_req *cqr) */ static void dasd_wakeup_cb(struct dasd_ccw_req *cqr, void *data) { - wake_up((wait_queue_head_t *) data); + spin_lock_irq(get_ccwdev_lock(cqr->startdev->cdev)); + cqr->callback_data = DASD_SLEEPON_END_TAG; + spin_unlock_irq(get_ccwdev_lock(cqr->startdev->cdev)); + wake_up(&generic_waitq); } static inline int _wait_for_wakeup(struct dasd_ccw_req *cqr) @@ -1482,10 +1488,7 @@ static inline int _wait_for_wakeup(struct dasd_ccw_req *cqr) device = cqr->startdev; spin_lock_irq(get_ccwdev_lock(device->cdev)); - rc = ((cqr->status == DASD_CQR_DONE || - cqr->status == DASD_CQR_NEED_ERP || - cqr->status == DASD_CQR_TERMINATED) && - list_empty(&cqr->devlist)); + rc = (cqr->callback_data == DASD_SLEEPON_END_TAG); spin_unlock_irq(get_ccwdev_lock(device->cdev)); return rc; } @@ -1573,7 +1576,7 @@ static int _dasd_sleep_on(struct dasd_ccw_req *maincqr, int interruptible) wait_event(generic_waitq, !(device->stopped)); cqr->callback = dasd_wakeup_cb; - cqr->callback_data = (void *) &generic_waitq; + cqr->callback_data = DASD_SLEEPON_START_TAG; dasd_add_request_tail(cqr); if (interruptible) { rc = wait_event_interruptible( @@ -1652,7 +1655,7 @@ int dasd_sleep_on_immediatly(struct dasd_ccw_req *cqr) } cqr->callback = dasd_wakeup_cb; - cqr->callback_data = (void *) &generic_waitq; + cqr->callback_data = DASD_SLEEPON_START_TAG; cqr->status = DASD_CQR_QUEUED; list_add(&cqr->devlist, &device->ccw_queue); diff --git a/drivers/serial/imx.c b/drivers/serial/imx.c index 4315b23590bd..eacb588a9345 100644 --- a/drivers/serial/imx.c +++ b/drivers/serial/imx.c @@ -120,7 +120,8 @@ #define MX2_UCR3_RXDMUXSEL (1<<2) /* RXD Muxed Input Select, on mx2/mx3 */ #define UCR3_INVT (1<<1) /* Inverted Infrared transmission */ #define UCR3_BPEN (1<<0) /* Preset registers enable */ -#define UCR4_CTSTL_32 (32<<10) /* CTS trigger level (32 chars) */ +#define UCR4_CTSTL_SHF 10 /* CTS trigger level shift */ +#define UCR4_CTSTL_MASK 0x3F /* CTS trigger is 6 bits wide */ #define UCR4_INVR (1<<9) /* Inverted infrared reception */ #define UCR4_ENIRI (1<<8) /* Serial infrared interrupt enable */ #define UCR4_WKEN (1<<7) /* Wake interrupt enable */ @@ -591,6 +592,9 @@ static int imx_setup_ufcr(struct imx_port *sport, unsigned int mode) return 0; } +/* half the RX buffer size */ +#define CTSTL 16 + static int imx_startup(struct uart_port *port) { struct imx_port *sport = (struct imx_port *)port; @@ -607,6 +611,10 @@ static int imx_startup(struct uart_port *port) if (USE_IRDA(sport)) temp |= UCR4_IRSC; + /* set the trigger level for CTS */ + temp &= ~(UCR4_CTSTL_MASK<< UCR4_CTSTL_SHF); + temp |= CTSTL<< UCR4_CTSTL_SHF; + writel(temp & ~UCR4_DREN, sport->port.membase + UCR4); if (USE_IRDA(sport)) { diff --git a/drivers/serial/mpc52xx_uart.c b/drivers/serial/mpc52xx_uart.c index a176ab4bd65b..02469c31bf0b 100644 --- a/drivers/serial/mpc52xx_uart.c +++ b/drivers/serial/mpc52xx_uart.c @@ -1467,7 +1467,7 @@ mpc52xx_uart_init(void) /* * Map the PSC FIFO Controller and init if on MPC512x. */ - if (psc_ops->fifoc_init) { + if (psc_ops && psc_ops->fifoc_init) { ret = psc_ops->fifoc_init(); if (ret) return ret; diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c index 4a6366a42129..111a01a747fc 100644 --- a/drivers/usb/core/inode.c +++ b/drivers/usb/core/inode.c @@ -380,6 +380,7 @@ static int usbfs_rmdir(struct inode *dir, struct dentry *dentry) mutex_lock(&inode->i_mutex); dentry_unhash(dentry); if (usbfs_empty(dentry)) { + dont_mount(dentry); drop_nlink(dentry->d_inode); drop_nlink(dentry->d_inode); dput(dentry); diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index e69d238c5af0..49fa953aaf6e 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -1035,7 +1035,12 @@ int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len) /* This actually signals the guest, using eventfd. */ void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq) { - __u16 flags = 0; + __u16 flags; + /* Flush out used index updates. This is paired + * with the barrier that the Guest executes when enabling + * interrupts. */ + smp_mb(); + if (get_user(flags, &vq->avail->flags)) { vq_err(vq, "Failed to get flags"); return; diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index 0bf5020d0d32..b87ba23442d2 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -175,7 +175,7 @@ config SA1100_WATCHDOG config MPCORE_WATCHDOG tristate "MPcore watchdog" - depends on ARM_MPCORE_PLATFORM && LOCAL_TIMERS + depends on HAVE_ARM_TWD help Watchdog timer embedded into the MPcore system. diff --git a/drivers/watchdog/mpcore_wdt.c b/drivers/watchdog/mpcore_wdt.c index 016c6a791cab..b8ec7aca3c8e 100644 --- a/drivers/watchdog/mpcore_wdt.c +++ b/drivers/watchdog/mpcore_wdt.c @@ -31,8 +31,9 @@ #include <linux/platform_device.h> #include <linux/uaccess.h> #include <linux/slab.h> +#include <linux/io.h> -#include <asm/hardware/arm_twd.h> +#include <asm/smp_twd.h> struct mpcore_wdt { unsigned long timer_alive; @@ -44,7 +45,7 @@ struct mpcore_wdt { }; static struct platform_device *mpcore_wdt_dev; -extern unsigned int mpcore_timer_rate; +static DEFINE_SPINLOCK(wdt_lock); #define TIMER_MARGIN 60 static int mpcore_margin = TIMER_MARGIN; @@ -94,13 +95,15 @@ static irqreturn_t mpcore_wdt_fire(int irq, void *arg) */ static void mpcore_wdt_keepalive(struct mpcore_wdt *wdt) { - unsigned int count; + unsigned long count; + spin_lock(&wdt_lock); /* Assume prescale is set to 256 */ - count = (mpcore_timer_rate / 256) * mpcore_margin; + count = __raw_readl(wdt->base + TWD_WDOG_COUNTER); + count = (0xFFFFFFFFU - count) * (HZ / 5); + count = (count / 256) * mpcore_margin; /* Reload the counter */ - spin_lock(&wdt_lock); writel(count + wdt->perturb, wdt->base + TWD_WDOG_LOAD); wdt->perturb = wdt->perturb ? 0 : 1; spin_unlock(&wdt_lock); @@ -119,7 +122,6 @@ static void mpcore_wdt_start(struct mpcore_wdt *wdt) { dev_printk(KERN_INFO, wdt->dev, "enabling watchdog.\n"); - spin_lock(&wdt_lock); /* This loads the count register but does NOT start the count yet */ mpcore_wdt_keepalive(wdt); @@ -130,7 +132,6 @@ static void mpcore_wdt_start(struct mpcore_wdt *wdt) /* Enable watchdog - prescale=256, watchdog mode=1, enable=1 */ writel(0x0000FF09, wdt->base + TWD_WDOG_CONTROL); } - spin_unlock(&wdt_lock); } static int mpcore_wdt_set_heartbeat(int t) @@ -360,7 +361,7 @@ static int __devinit mpcore_wdt_probe(struct platform_device *dev) mpcore_wdt_miscdev.parent = &dev->dev; ret = misc_register(&mpcore_wdt_miscdev); if (ret) { - dev_printk(KERN_ERR, _dev, + dev_printk(KERN_ERR, wdt->dev, "cannot register miscdev on minor=%d (err=%d)\n", WATCHDOG_MINOR, ret); goto err_misc; @@ -369,13 +370,13 @@ static int __devinit mpcore_wdt_probe(struct platform_device *dev) ret = request_irq(wdt->irq, mpcore_wdt_fire, IRQF_DISABLED, "mpcore_wdt", wdt); if (ret) { - dev_printk(KERN_ERR, _dev, + dev_printk(KERN_ERR, wdt->dev, "cannot register IRQ%d for watchdog\n", wdt->irq); goto err_irq; } mpcore_wdt_stop(wdt); - platform_set_drvdata(&dev->dev, wdt); + platform_set_drvdata(dev, wdt); mpcore_wdt_dev = dev; return 0; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index e84ef60ffe35..97a97839a867 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1481,12 +1481,17 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, ret = -EBADF; goto out_drop_write; } + src = src_file->f_dentry->d_inode; ret = -EINVAL; if (src == inode) goto out_fput; + /* the src must be open for reading */ + if (!(src_file->f_mode & FMODE_READ)) + goto out_fput; + ret = -EISDIR; if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode)) goto out_fput; diff --git a/fs/cachefiles/security.c b/fs/cachefiles/security.c index b5808cdb2232..039b5011d83b 100644 --- a/fs/cachefiles/security.c +++ b/fs/cachefiles/security.c @@ -77,6 +77,8 @@ static int cachefiles_check_cache_dir(struct cachefiles_cache *cache, /* * check the security details of the on-disk cache * - must be called with security override in force + * - must return with a security override in force - even in the case of an + * error */ int cachefiles_determine_cache_security(struct cachefiles_cache *cache, struct dentry *root, @@ -99,6 +101,8 @@ int cachefiles_determine_cache_security(struct cachefiles_cache *cache, * which create files */ ret = set_create_files_as(new, root->d_inode); if (ret < 0) { + abort_creds(new); + cachefiles_begin_secure(cache, _saved_cred); _leave(" = %d [cfa]", ret); return ret; } diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 4b42c2bb603f..a9005d862ed4 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -504,7 +504,6 @@ static void writepages_finish(struct ceph_osd_request *req, int i; struct ceph_snap_context *snapc = req->r_snapc; struct address_space *mapping = inode->i_mapping; - struct writeback_control *wbc = req->r_wbc; __s32 rc = -EIO; u64 bytes = 0; struct ceph_client *client = ceph_inode_to_client(inode); @@ -546,10 +545,6 @@ static void writepages_finish(struct ceph_osd_request *req, clear_bdi_congested(&client->backing_dev_info, BLK_RW_ASYNC); - if (i >= wrote) { - dout("inode %p skipping page %p\n", inode, page); - wbc->pages_skipped++; - } ceph_put_snap_context((void *)page->private); page->private = 0; ClearPagePrivate(page); @@ -799,7 +794,6 @@ get_more_pages: alloc_page_vec(client, req); req->r_callback = writepages_finish; req->r_inode = inode; - req->r_wbc = wbc; } /* note position of first page in pvec */ diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 0c1681806867..d9400534b279 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -858,6 +858,8 @@ static int __ceph_is_any_caps(struct ceph_inode_info *ci) } /* + * Remove a cap. Take steps to deal with a racing iterate_session_caps. + * * caller should hold i_lock. * caller will not hold session s_mutex if called from destroy_inode. */ @@ -866,15 +868,10 @@ void __ceph_remove_cap(struct ceph_cap *cap) struct ceph_mds_session *session = cap->session; struct ceph_inode_info *ci = cap->ci; struct ceph_mds_client *mdsc = &ceph_client(ci->vfs_inode.i_sb)->mdsc; + int removed = 0; dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode); - /* remove from inode list */ - rb_erase(&cap->ci_node, &ci->i_caps); - cap->ci = NULL; - if (ci->i_auth_cap == cap) - ci->i_auth_cap = NULL; - /* remove from session list */ spin_lock(&session->s_cap_lock); if (session->s_cap_iterator == cap) { @@ -885,10 +882,18 @@ void __ceph_remove_cap(struct ceph_cap *cap) list_del_init(&cap->session_caps); session->s_nr_caps--; cap->session = NULL; + removed = 1; } + /* protect backpointer with s_cap_lock: see iterate_session_caps */ + cap->ci = NULL; spin_unlock(&session->s_cap_lock); - if (cap->session == NULL) + /* remove from inode list */ + rb_erase(&cap->ci_node, &ci->i_caps); + if (ci->i_auth_cap == cap) + ci->i_auth_cap = NULL; + + if (removed) ceph_put_cap(cap); if (!__ceph_is_any_caps(ci) && ci->i_snap_realm) { diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 261f3e6c0bcf..85b4d2ffdeba 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -733,6 +733,10 @@ no_change: __ceph_get_fmode(ci, cap_fmode); spin_unlock(&inode->i_lock); } + } else if (cap_fmode >= 0) { + pr_warning("mds issued no caps on %llx.%llx\n", + ceph_vinop(inode)); + __ceph_get_fmode(ci, cap_fmode); } /* update delegation info? */ diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 60a9a4ae47be..24561a557e01 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -736,9 +736,10 @@ static void cleanup_cap_releases(struct ceph_mds_session *session) } /* - * Helper to safely iterate over all caps associated with a session. + * Helper to safely iterate over all caps associated with a session, with + * special care taken to handle a racing __ceph_remove_cap(). * - * caller must hold session s_mutex + * Caller must hold session s_mutex. */ static int iterate_session_caps(struct ceph_mds_session *session, int (*cb)(struct inode *, struct ceph_cap *, @@ -2136,7 +2137,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, int mds) struct ceph_mds_session *session = NULL; struct ceph_msg *reply; struct rb_node *p; - int err; + int err = -ENOMEM; struct ceph_pagelist *pagelist; pr_info("reconnect to recovering mds%d\n", mds); @@ -2185,7 +2186,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, int mds) goto fail; err = iterate_session_caps(session, encode_caps_cb, pagelist); if (err < 0) - goto out; + goto fail; /* * snaprealms. we provide mds with the ino, seq (version), and @@ -2213,28 +2214,31 @@ send: reply->nr_pages = calc_pages_for(0, pagelist->length); ceph_con_send(&session->s_con, reply); - if (session) { - session->s_state = CEPH_MDS_SESSION_OPEN; - __wake_requests(mdsc, &session->s_waiting); - } + session->s_state = CEPH_MDS_SESSION_OPEN; + mutex_unlock(&session->s_mutex); + + mutex_lock(&mdsc->mutex); + __wake_requests(mdsc, &session->s_waiting); + mutex_unlock(&mdsc->mutex); + + ceph_put_mds_session(session); -out: up_read(&mdsc->snap_rwsem); - if (session) { - mutex_unlock(&session->s_mutex); - ceph_put_mds_session(session); - } mutex_lock(&mdsc->mutex); return; fail: ceph_msg_put(reply); + up_read(&mdsc->snap_rwsem); + mutex_unlock(&session->s_mutex); + ceph_put_mds_session(session); fail_nomsg: ceph_pagelist_release(pagelist); kfree(pagelist); fail_nopagelist: - pr_err("ENOMEM preparing reconnect for mds%d\n", mds); - goto out; + pr_err("error %d preparing reconnect for mds%d\n", err, mds); + mutex_lock(&mdsc->mutex); + return; } diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c index 509f57d9ccb3..cd4fadb6491a 100644 --- a/fs/ceph/messenger.c +++ b/fs/ceph/messenger.c @@ -492,7 +492,14 @@ static void prepare_write_message(struct ceph_connection *con) list_move_tail(&m->list_head, &con->out_sent); } - m->hdr.seq = cpu_to_le64(++con->out_seq); + /* + * only assign outgoing seq # if we haven't sent this message + * yet. if it is requeued, resend with it's original seq. + */ + if (m->needs_out_seq) { + m->hdr.seq = cpu_to_le64(++con->out_seq); + m->needs_out_seq = false; + } dout("prepare_write_message %p seq %lld type %d len %d+%d+%d %d pgs\n", m, con->out_seq, le16_to_cpu(m->hdr.type), @@ -1986,6 +1993,8 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg) BUG_ON(msg->front.iov_len != le32_to_cpu(msg->hdr.front_len)); + msg->needs_out_seq = true; + /* queue */ mutex_lock(&con->mutex); BUG_ON(!list_empty(&msg->list_head)); @@ -2085,15 +2094,19 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, kref_init(&m->kref); INIT_LIST_HEAD(&m->list_head); + m->hdr.tid = 0; m->hdr.type = cpu_to_le16(type); + m->hdr.priority = cpu_to_le16(CEPH_MSG_PRIO_DEFAULT); + m->hdr.version = 0; m->hdr.front_len = cpu_to_le32(front_len); m->hdr.middle_len = 0; m->hdr.data_len = cpu_to_le32(page_len); m->hdr.data_off = cpu_to_le16(page_off); - m->hdr.priority = cpu_to_le16(CEPH_MSG_PRIO_DEFAULT); + m->hdr.reserved = 0; m->footer.front_crc = 0; m->footer.middle_crc = 0; m->footer.data_crc = 0; + m->footer.flags = 0; m->front_max = front_len; m->front_is_vmalloc = false; m->more_to_follow = false; diff --git a/fs/ceph/messenger.h b/fs/ceph/messenger.h index a343dae73cdc..a5caf91cc971 100644 --- a/fs/ceph/messenger.h +++ b/fs/ceph/messenger.h @@ -86,6 +86,7 @@ struct ceph_msg { struct kref kref; bool front_is_vmalloc; bool more_to_follow; + bool needs_out_seq; int front_max; struct ceph_msgpool *pool; diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c index c7b4dedaace6..3514f71ff85f 100644 --- a/fs/ceph/osd_client.c +++ b/fs/ceph/osd_client.c @@ -565,7 +565,8 @@ static int __map_osds(struct ceph_osd_client *osdc, { struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base; struct ceph_pg pgid; - int o = -1; + int acting[CEPH_PG_MAX_SIZE]; + int o = -1, num = 0; int err; dout("map_osds %p tid %lld\n", req, req->r_tid); @@ -576,10 +577,16 @@ static int __map_osds(struct ceph_osd_client *osdc, pgid = reqhead->layout.ol_pgid; req->r_pgid = pgid; - o = ceph_calc_pg_primary(osdc->osdmap, pgid); + err = ceph_calc_pg_acting(osdc->osdmap, pgid, acting); + if (err > 0) { + o = acting[0]; + num = err; + } if ((req->r_osd && req->r_osd->o_osd == o && - req->r_sent >= req->r_osd->o_incarnation) || + req->r_sent >= req->r_osd->o_incarnation && + req->r_num_pg_osds == num && + memcmp(req->r_pg_osds, acting, sizeof(acting[0])*num) == 0) || (req->r_osd == NULL && o == -1)) return 0; /* no change */ @@ -587,6 +594,10 @@ static int __map_osds(struct ceph_osd_client *osdc, req->r_tid, le32_to_cpu(pgid.pool), le16_to_cpu(pgid.ps), o, req->r_osd ? req->r_osd->o_osd : -1); + /* record full pg acting set */ + memcpy(req->r_pg_osds, acting, sizeof(acting[0]) * num); + req->r_num_pg_osds = num; + if (req->r_osd) { __cancel_request(req); list_del_init(&req->r_osd_item); @@ -612,7 +623,7 @@ static int __map_osds(struct ceph_osd_client *osdc, __remove_osd_from_lru(req->r_osd); list_add(&req->r_osd_item, &req->r_osd->o_requests); } - err = 1; /* osd changed */ + err = 1; /* osd or pg changed */ out: return err; @@ -779,16 +790,18 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, struct ceph_osd_request *req; u64 tid; int numops, object_len, flags; + s32 result; tid = le64_to_cpu(msg->hdr.tid); if (msg->front.iov_len < sizeof(*rhead)) goto bad; numops = le32_to_cpu(rhead->num_ops); object_len = le32_to_cpu(rhead->object_len); + result = le32_to_cpu(rhead->result); if (msg->front.iov_len != sizeof(*rhead) + object_len + numops * sizeof(struct ceph_osd_op)) goto bad; - dout("handle_reply %p tid %llu\n", msg, tid); + dout("handle_reply %p tid %llu result %d\n", msg, tid, (int)result); /* lookup */ mutex_lock(&osdc->request_mutex); @@ -834,7 +847,8 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, dout("handle_reply tid %llu flags %d\n", tid, flags); /* either this is a read, or we got the safe response */ - if ((flags & CEPH_OSD_FLAG_ONDISK) || + if (result < 0 || + (flags & CEPH_OSD_FLAG_ONDISK) || ((flags & CEPH_OSD_FLAG_WRITE) == 0)) __unregister_request(osdc, req); diff --git a/fs/ceph/osd_client.h b/fs/ceph/osd_client.h index b0759911e7c3..ce776989ef6a 100644 --- a/fs/ceph/osd_client.h +++ b/fs/ceph/osd_client.h @@ -48,6 +48,8 @@ struct ceph_osd_request { struct list_head r_osd_item; struct ceph_osd *r_osd; struct ceph_pg r_pgid; + int r_pg_osds[CEPH_PG_MAX_SIZE]; + int r_num_pg_osds; struct ceph_connection *r_con_filling_msg; @@ -66,7 +68,6 @@ struct ceph_osd_request { struct list_head r_unsafe_item; struct inode *r_inode; /* for use by callbacks */ - struct writeback_control *r_wbc; /* ditto */ char r_oid[40]; /* object name */ int r_oid_len; diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c index 2e2c15eed82a..cfdd8f4388b7 100644 --- a/fs/ceph/osdmap.c +++ b/fs/ceph/osdmap.c @@ -1041,12 +1041,33 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid, } /* + * Return acting set for given pgid. + */ +int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid, + int *acting) +{ + int rawosds[CEPH_PG_MAX_SIZE], *osds; + int i, o, num = CEPH_PG_MAX_SIZE; + + osds = calc_pg_raw(osdmap, pgid, rawosds, &num); + if (!osds) + return -1; + + /* primary is first up osd */ + o = 0; + for (i = 0; i < num; i++) + if (ceph_osd_is_up(osdmap, osds[i])) + acting[o++] = osds[i]; + return o; +} + +/* * Return primary osd for given pgid, or -1 if none. */ int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid) { - int rawosds[10], *osds; - int i, num = ARRAY_SIZE(rawosds); + int rawosds[CEPH_PG_MAX_SIZE], *osds; + int i, num = CEPH_PG_MAX_SIZE; osds = calc_pg_raw(osdmap, pgid, rawosds, &num); if (!osds) @@ -1054,9 +1075,7 @@ int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid) /* primary is first up osd */ for (i = 0; i < num; i++) - if (ceph_osd_is_up(osdmap, osds[i])) { + if (ceph_osd_is_up(osdmap, osds[i])) return osds[i]; - break; - } return -1; } diff --git a/fs/ceph/osdmap.h b/fs/ceph/osdmap.h index 8bc9f1e4f562..970b547e510d 100644 --- a/fs/ceph/osdmap.h +++ b/fs/ceph/osdmap.h @@ -120,6 +120,8 @@ extern int ceph_calc_object_layout(struct ceph_object_layout *ol, const char *oid, struct ceph_file_layout *fl, struct ceph_osdmap *osdmap); +extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid, + int *acting); extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid); diff --git a/fs/ceph/rados.h b/fs/ceph/rados.h index a1fc1d017b58..fd56451a871f 100644 --- a/fs/ceph/rados.h +++ b/fs/ceph/rados.h @@ -58,6 +58,7 @@ struct ceph_timespec { #define CEPH_PG_LAYOUT_LINEAR 2 #define CEPH_PG_LAYOUT_HYBRID 3 +#define CEPH_PG_MAX_SIZE 16 /* max # osds in a single pg */ /* * placement group. diff --git a/fs/ceph/super.c b/fs/ceph/super.c index f888cf487b7c..110857ba9269 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -47,10 +47,20 @@ const char *ceph_file_part(const char *s, int len) */ static void ceph_put_super(struct super_block *s) { - struct ceph_client *cl = ceph_client(s); + struct ceph_client *client = ceph_sb_to_client(s); dout("put_super\n"); - ceph_mdsc_close_sessions(&cl->mdsc); + ceph_mdsc_close_sessions(&client->mdsc); + + /* + * ensure we release the bdi before put_anon_super releases + * the device name. + */ + if (s->s_bdi == &client->backing_dev_info) { + bdi_unregister(&client->backing_dev_info); + s->s_bdi = NULL; + } + return; } @@ -636,6 +646,8 @@ static void ceph_destroy_client(struct ceph_client *client) destroy_workqueue(client->pg_inv_wq); destroy_workqueue(client->trunc_wq); + bdi_destroy(&client->backing_dev_info); + if (client->msgr) ceph_messenger_destroy(client->msgr); mempool_destroy(client->wb_pagevec_pool); @@ -876,14 +888,14 @@ static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client) { int err; - sb->s_bdi = &client->backing_dev_info; - /* set ra_pages based on rsize mount option? */ if (client->mount_args->rsize >= PAGE_CACHE_SIZE) client->backing_dev_info.ra_pages = (client->mount_args->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_SHIFT; err = bdi_register_dev(&client->backing_dev_info, sb->s_dev); + if (!err) + sb->s_bdi = &client->backing_dev_info; return err; } @@ -957,9 +969,6 @@ static void ceph_kill_sb(struct super_block *s) dout("kill_sb %p\n", s); ceph_mdsc_pre_umount(&client->mdsc); kill_anon_super(s); /* will call put_super after sb is r/o */ - if (s->s_bdi == &client->backing_dev_info) - bdi_unregister(&client->backing_dev_info); - bdi_destroy(&client->backing_dev_info); ceph_destroy_client(client); } diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index ecf0ffbe2b64..0c2fd17439c8 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -502,6 +502,7 @@ struct dfs_info3_param { #define CIFS_FATTR_DFS_REFERRAL 0x1 #define CIFS_FATTR_DELETE_PENDING 0x2 #define CIFS_FATTR_NEED_REVAL 0x4 +#define CIFS_FATTR_INO_COLLISION 0x8 struct cifs_fattr { u32 cf_flags; diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 35ec11716213..29b9ea244c81 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -715,6 +715,16 @@ cifs_find_inode(struct inode *inode, void *opaque) if (CIFS_I(inode)->uniqueid != fattr->cf_uniqueid) return 0; + /* + * uh oh -- it's a directory. We can't use it since hardlinked dirs are + * verboten. Disable serverino and return it as if it were found, the + * caller can discard it, generate a uniqueid and retry the find + */ + if (S_ISDIR(inode->i_mode) && !list_empty(&inode->i_dentry)) { + fattr->cf_flags |= CIFS_FATTR_INO_COLLISION; + cifs_autodisable_serverino(CIFS_SB(inode->i_sb)); + } + return 1; } @@ -734,15 +744,22 @@ cifs_iget(struct super_block *sb, struct cifs_fattr *fattr) unsigned long hash; struct inode *inode; +retry_iget5_locked: cFYI(1, ("looking for uniqueid=%llu", fattr->cf_uniqueid)); /* hash down to 32-bits on 32-bit arch */ hash = cifs_uniqueid_to_ino_t(fattr->cf_uniqueid); inode = iget5_locked(sb, hash, cifs_find_inode, cifs_init_inode, fattr); - - /* we have fattrs in hand, update the inode */ if (inode) { + /* was there a problematic inode number collision? */ + if (fattr->cf_flags & CIFS_FATTR_INO_COLLISION) { + iput(inode); + fattr->cf_uniqueid = iunique(sb, ROOT_I); + fattr->cf_flags &= ~CIFS_FATTR_INO_COLLISION; + goto retry_iget5_locked; + } + cifs_fattr_to_inode(inode, fattr); if (sb->s_flags & MS_NOATIME) inode->i_flags |= S_NOATIME | S_NOCMTIME; diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 8e48b52205aa..0b502f80c691 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -645,6 +645,7 @@ static void detach_groups(struct config_group *group) configfs_detach_group(sd->s_element); child->d_inode->i_flags |= S_DEAD; + dont_mount(child); mutex_unlock(&child->d_inode->i_mutex); @@ -840,6 +841,7 @@ static int configfs_attach_item(struct config_item *parent_item, mutex_lock(&dentry->d_inode->i_mutex); configfs_remove_dir(item); dentry->d_inode->i_flags |= S_DEAD; + dont_mount(dentry); mutex_unlock(&dentry->d_inode->i_mutex); d_delete(dentry); } @@ -882,6 +884,7 @@ static int configfs_attach_group(struct config_item *parent_item, if (ret) { configfs_detach_item(item); dentry->d_inode->i_flags |= S_DEAD; + dont_mount(dentry); } configfs_adjust_dir_dirent_depth_after_populate(sd); mutex_unlock(&dentry->d_inode->i_mutex); @@ -1725,6 +1728,7 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys) mutex_unlock(&configfs_symlink_mutex); configfs_detach_group(&group->cg_item); dentry->d_inode->i_flags |= S_DEAD; + dont_mount(dentry); mutex_unlock(&dentry->d_inode->i_mutex); d_delete(dentry); diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index 4d74fc72c195..0210898458b2 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -277,8 +277,10 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_x32, debugfs_u32_get, debugfs_u32_set, "0x%08llx\n" DEFINE_SIMPLE_ATTRIBUTE(fops_x32_ro, debugfs_u32_get, NULL, "0x%08llx\n"); DEFINE_SIMPLE_ATTRIBUTE(fops_x32_wo, NULL, debugfs_u32_set, "0x%08llx\n"); +DEFINE_SIMPLE_ATTRIBUTE(fops_x64, debugfs_u64_get, debugfs_u64_set, "0x%016llx\n"); + /* - * debugfs_create_x{8,16,32} - create a debugfs file that is used to read and write an unsigned {8,16,32}-bit value + * debugfs_create_x{8,16,32,64} - create a debugfs file that is used to read and write an unsigned {8,16,32,64}-bit value * * These functions are exactly the same as the above functions (but use a hex * output for the decimal challenged). For details look at the above unsigned @@ -357,6 +359,23 @@ struct dentry *debugfs_create_x32(const char *name, mode_t mode, } EXPORT_SYMBOL_GPL(debugfs_create_x32); +/** + * debugfs_create_x64 - create a debugfs file that is used to read and write an unsigned 64-bit value + * @name: a pointer to a string containing the name of the file to create. + * @mode: the permission that the file should have + * @parent: a pointer to the parent dentry for this file. This should be a + * directory dentry if set. If this parameter is %NULL, then the + * file will be created in the root of the debugfs filesystem. + * @value: a pointer to the variable that the file should read to and write + * from. + */ +struct dentry *debugfs_create_x64(const char *name, mode_t mode, + struct dentry *parent, u64 *value) +{ + return debugfs_create_file(name, mode, parent, value, &fops_x64); +} +EXPORT_SYMBOL_GPL(debugfs_create_x64); + static int debugfs_size_t_set(void *data, u64 val) { diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 157382fa6256..b66832ac33ac 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -446,10 +446,8 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) /* initialize the mount flag and determine the default error handler */ flag = JFS_ERR_REMOUNT_RO; - if (!parse_options((char *) data, sb, &newLVSize, &flag)) { - kfree(sbi); - return -EINVAL; - } + if (!parse_options((char *) data, sb, &newLVSize, &flag)) + goto out_kfree; sbi->flag = flag; #ifdef CONFIG_JFS_POSIX_ACL @@ -458,7 +456,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) if (newLVSize) { printk(KERN_ERR "resize option for remount only\n"); - return -EINVAL; + goto out_kfree; } /* @@ -478,7 +476,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) inode = new_inode(sb); if (inode == NULL) { ret = -ENOMEM; - goto out_kfree; + goto out_unload; } inode->i_ino = 0; inode->i_nlink = 1; @@ -550,9 +548,10 @@ out_mount_failed: make_bad_inode(sbi->direct_inode); iput(sbi->direct_inode); sbi->direct_inode = NULL; -out_kfree: +out_unload: if (sbi->nls_tab) unload_nls(sbi->nls_tab); +out_kfree: kfree(sbi); return ret; } diff --git a/fs/logfs/super.c b/fs/logfs/super.c index 5866ee6e1327..d7c23ed8349a 100644 --- a/fs/logfs/super.c +++ b/fs/logfs/super.c @@ -333,27 +333,27 @@ static int logfs_get_sb_final(struct super_block *sb, struct vfsmount *mnt) goto fail; sb->s_root = d_alloc_root(rootdir); - if (!sb->s_root) - goto fail2; + if (!sb->s_root) { + iput(rootdir); + goto fail; + } super->s_erase_page = alloc_pages(GFP_KERNEL, 0); if (!super->s_erase_page) - goto fail2; + goto fail; memset(page_address(super->s_erase_page), 0xFF, PAGE_SIZE); /* FIXME: check for read-only mounts */ err = logfs_make_writeable(sb); if (err) - goto fail3; + goto fail1; log_super("LogFS: Finished mounting\n"); simple_set_mnt(mnt, sb); return 0; -fail3: +fail1: __free_page(super->s_erase_page); -fail2: - iput(rootdir); fail: iput(logfs_super(sb)->s_master_inode); return -EIO; diff --git a/fs/namei.c b/fs/namei.c index a7dce91a7e42..b86b96fe1dc3 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1641,7 +1641,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path, if (nd->last.name[nd->last.len]) { if (open_flag & O_CREAT) goto exit; - nd->flags |= LOOKUP_DIRECTORY; + nd->flags |= LOOKUP_DIRECTORY | LOOKUP_FOLLOW; } /* just plain open? */ @@ -1830,6 +1830,8 @@ reval: } if (open_flag & O_DIRECTORY) nd.flags |= LOOKUP_DIRECTORY; + if (!(open_flag & O_NOFOLLOW)) + nd.flags |= LOOKUP_FOLLOW; filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); while (unlikely(!filp)) { /* trailing symlink */ struct path holder; @@ -1837,7 +1839,7 @@ reval: void *cookie; error = -ELOOP; /* S_ISDIR part is a temporary automount kludge */ - if ((open_flag & O_NOFOLLOW) && !S_ISDIR(inode->i_mode)) + if (!(nd.flags & LOOKUP_FOLLOW) && !S_ISDIR(inode->i_mode)) goto exit_dput; if (count++ == 32) goto exit_dput; @@ -2174,8 +2176,10 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry) error = security_inode_rmdir(dir, dentry); if (!error) { error = dir->i_op->rmdir(dir, dentry); - if (!error) + if (!error) { dentry->d_inode->i_flags |= S_DEAD; + dont_mount(dentry); + } } } mutex_unlock(&dentry->d_inode->i_mutex); @@ -2259,7 +2263,7 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry) if (!error) { error = dir->i_op->unlink(dir, dentry); if (!error) - dentry->d_inode->i_flags |= S_DEAD; + dont_mount(dentry); } } mutex_unlock(&dentry->d_inode->i_mutex); @@ -2570,17 +2574,20 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, return error; target = new_dentry->d_inode; - if (target) { + if (target) mutex_lock(&target->i_mutex); - dentry_unhash(new_dentry); - } if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) error = -EBUSY; - else + else { + if (target) + dentry_unhash(new_dentry); error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); + } if (target) { - if (!error) + if (!error) { target->i_flags |= S_DEAD; + dont_mount(new_dentry); + } mutex_unlock(&target->i_mutex); if (d_unhashed(new_dentry)) d_rehash(new_dentry); @@ -2612,7 +2619,7 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); if (!error) { if (target) - target->i_flags |= S_DEAD; + dont_mount(new_dentry); if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) d_move(old_dentry, new_dentry); } diff --git a/fs/namespace.c b/fs/namespace.c index 8174c8ab5c70..f20cb57d1067 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1432,7 +1432,7 @@ static int graft_tree(struct vfsmount *mnt, struct path *path) err = -ENOENT; mutex_lock(&path->dentry->d_inode->i_mutex); - if (IS_DEADDIR(path->dentry->d_inode)) + if (cant_mount(path->dentry)) goto out_unlock; err = security_sb_check_sb(mnt, path); @@ -1623,7 +1623,7 @@ static int do_move_mount(struct path *path, char *old_name) err = -ENOENT; mutex_lock(&path->dentry->d_inode->i_mutex); - if (IS_DEADDIR(path->dentry->d_inode)) + if (cant_mount(path->dentry)) goto out1; if (d_unlinked(path->dentry)) @@ -2234,7 +2234,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, if (!check_mnt(root.mnt)) goto out2; error = -ENOENT; - if (IS_DEADDIR(new.dentry->d_inode)) + if (cant_mount(old.dentry)) goto out2; if (d_unlinked(new.dentry)) goto out2; diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c index 1afb0a10229f..e27960cd76ab 100644 --- a/fs/notify/inotify/inotify_fsnotify.c +++ b/fs/notify/inotify/inotify_fsnotify.c @@ -28,6 +28,7 @@ #include <linux/path.h> /* struct path */ #include <linux/slab.h> /* kmem_* */ #include <linux/types.h> +#include <linux/sched.h> #include "inotify.h" @@ -146,6 +147,7 @@ static void inotify_free_group_priv(struct fsnotify_group *group) idr_for_each(&group->inotify_data.idr, idr_callback, group); idr_remove_all(&group->inotify_data.idr); idr_destroy(&group->inotify_data.idr); + free_uid(group->inotify_data.user); } void inotify_free_event_priv(struct fsnotify_event_private_data *fsn_event_priv) diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 472cdf29ef82..e46ca685b9be 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -546,21 +546,24 @@ retry: if (unlikely(!idr_pre_get(&group->inotify_data.idr, GFP_KERNEL))) goto out_err; + /* we are putting the mark on the idr, take a reference */ + fsnotify_get_mark(&tmp_ientry->fsn_entry); + spin_lock(&group->inotify_data.idr_lock); ret = idr_get_new_above(&group->inotify_data.idr, &tmp_ientry->fsn_entry, group->inotify_data.last_wd+1, &tmp_ientry->wd); spin_unlock(&group->inotify_data.idr_lock); if (ret) { + /* we didn't get on the idr, drop the idr reference */ + fsnotify_put_mark(&tmp_ientry->fsn_entry); + /* idr was out of memory allocate and try again */ if (ret == -EAGAIN) goto retry; goto out_err; } - /* we put the mark on the idr, take a reference */ - fsnotify_get_mark(&tmp_ientry->fsn_entry); - /* we are on the idr, now get on the inode */ ret = fsnotify_add_mark(&tmp_ientry->fsn_entry, group, inode); if (ret) { @@ -578,16 +581,13 @@ retry: /* return the watch descriptor for this new entry */ ret = tmp_ientry->wd; - /* match the ref from fsnotify_init_markentry() */ - fsnotify_put_mark(&tmp_ientry->fsn_entry); - /* if this mark added a new event update the group mask */ if (mask & ~group->mask) fsnotify_recalc_group_mask(group); out_err: - if (ret < 0) - kmem_cache_free(inotify_inode_mark_cachep, tmp_ientry); + /* match the ref from fsnotify_init_markentry() */ + fsnotify_put_mark(&tmp_ientry->fsn_entry); return ret; } diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c index 4e50286a4cc3..1dabed286b4c 100644 --- a/fs/sysv/dir.c +++ b/fs/sysv/dir.c @@ -164,8 +164,8 @@ struct sysv_dir_entry *sysv_find_entry(struct dentry *dentry, struct page **res_ name, de->name)) goto found; } + dir_put_page(page); } - dir_put_page(page); if (++n >= npages) n = 0; diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 7bf83ddf82e0..baacd98e7cc6 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -373,7 +373,7 @@ struct acpi_pci_root { struct acpi_pci_id id; struct pci_bus *bus; u16 segment; - u8 bus_nr; + struct resource secondary; /* downstream bus range */ u32 osc_support_set; /* _OSC state of support bits */ u32 osc_control_set; /* _OSC state of control bits */ diff --git a/include/acpi/acpi_drivers.h b/include/acpi/acpi_drivers.h index 4f7b44866b76..23d78b4d088b 100644 --- a/include/acpi/acpi_drivers.h +++ b/include/acpi/acpi_drivers.h @@ -104,8 +104,7 @@ int acpi_pci_bind_root(struct acpi_device *device); /* Arch-defined function to add a bus to the system */ -struct pci_bus *pci_acpi_scan_root(struct acpi_device *device, int domain, - int bus); +struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root); void pci_acpi_crs_quirks(void); /* -------------------------------------------------------------------------- diff --git a/include/acpi/acpi_hest.h b/include/acpi/acpi_hest.h deleted file mode 100644 index 63194d03cb2d..000000000000 --- a/include/acpi/acpi_hest.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef __ACPI_HEST_H -#define __ACPI_HEST_H - -#include <linux/pci.h> - -#ifdef CONFIG_ACPI -extern int acpi_hest_firmware_first_pci(struct pci_dev *pci); -#else -static inline int acpi_hest_firmware_first_pci(struct pci_dev *pci) { return 0; } -#endif - -#endif diff --git a/include/acpi/apei.h b/include/acpi/apei.h new file mode 100644 index 000000000000..b3365025ff8d --- /dev/null +++ b/include/acpi/apei.h @@ -0,0 +1,34 @@ +/* + * apei.h - ACPI Platform Error Interface + */ + +#ifndef ACPI_APEI_H +#define ACPI_APEI_H + +#include <linux/acpi.h> +#include <linux/cper.h> +#include <asm/ioctls.h> + +#define APEI_ERST_INVALID_RECORD_ID 0xffffffffffffffffULL + +#define APEI_ERST_CLEAR_RECORD _IOW('E', 1, u64) +#define APEI_ERST_GET_RECORD_COUNT _IOR('E', 2, u32) + +#ifdef __KERNEL__ + +extern int hest_disable; +extern int erst_disable; + +typedef int (*apei_hest_func_t)(struct acpi_hest_header *hest_hdr, void *data); +int apei_hest_parse(apei_hest_func_t func, void *data); + +int erst_write(const struct cper_record_header *record); +ssize_t erst_get_record_count(void); +int erst_get_next_record_id(u64 *record_id); +ssize_t erst_read(u64 record_id, struct cper_record_header *record, + size_t buflen); +ssize_t erst_read_next(struct cper_record_header *record, size_t buflen); +int erst_clear(u64 record_id); + +#endif +#endif diff --git a/include/acpi/atomicio.h b/include/acpi/atomicio.h new file mode 100644 index 000000000000..8b9fb4b0b9ce --- /dev/null +++ b/include/acpi/atomicio.h @@ -0,0 +1,10 @@ +#ifndef ACPI_ATOMIC_IO_H +#define ACPI_ATOMIC_IO_H + +int acpi_pre_map_gar(struct acpi_generic_address *reg); +int acpi_post_unmap_gar(struct acpi_generic_address *reg); + +int acpi_atomic_read(u64 *val, struct acpi_generic_address *reg); +int acpi_atomic_write(u64 val, struct acpi_generic_address *reg); + +#endif diff --git a/include/acpi/hed.h b/include/acpi/hed.h new file mode 100644 index 000000000000..46e1249b70cc --- /dev/null +++ b/include/acpi/hed.h @@ -0,0 +1,18 @@ +/* + * hed.h - ACPI Hardware Error Device + * + * Copyright (C) 2009, Intel Corp. + * Author: Huang Ying <ying.huang@intel.com> + * + * This file is released under the GPLv2. + */ + +#ifndef ACPI_HED_H +#define ACPI_HED_H + +#include <linux/notifier.h> + +int register_acpi_hed_notifier(struct notifier_block *nb); +void unregister_acpi_hed_notifier(struct notifier_block *nb); + +#endif diff --git a/include/linux/cper.h b/include/linux/cper.h new file mode 100644 index 000000000000..4b38f905b705 --- /dev/null +++ b/include/linux/cper.h @@ -0,0 +1,314 @@ +/* + * UEFI Common Platform Error Record + * + * Copyright (C) 2010, Intel Corp. + * Author: Huang Ying <ying.huang@intel.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef LINUX_CPER_H +#define LINUX_CPER_H + +#include <linux/uuid.h> + +/* CPER record signature and the size */ +#define CPER_SIG_RECORD "CPER" +#define CPER_SIG_SIZE 4 +/* Used in signature_end field in struct cper_record_header */ +#define CPER_SIG_END 0xffffffff + +/* + * CPER record header revision, used in revision field in struct + * cper_record_header + */ +#define CPER_RECORD_REV 0x0100 + +/* + * Severity difinition for error_severity in struct cper_record_header + * and section_severity in struct cper_section_descriptor + */ +#define CPER_SER_RECOVERABLE 0x0 +#define CPER_SER_FATAL 0x1 +#define CPER_SER_CORRECTED 0x2 +#define CPER_SER_INFORMATIONAL 0x3 + +/* + * Validation bits difinition for validation_bits in struct + * cper_record_header. If set, corresponding fields in struct + * cper_record_header contain valid information. + * + * corresponds platform_id + */ +#define CPER_VALID_PLATFORM_ID 0x0001 +/* corresponds timestamp */ +#define CPER_VALID_TIMESTAMP 0x0002 +/* corresponds partition_id */ +#define CPER_VALID_PARTITION_ID 0x0004 + +/* + * Notification type used to generate error record, used in + * notification_type in struct cper_record_header + * + * Corrected Machine Check + */ +#define CPER_NOTIFY_CMC \ + UUID_LE(0x2DCE8BB1, 0xBDD7, 0x450e, 0xB9, 0xAD, 0x9C, 0xF4, \ + 0xEB, 0xD4, 0xF8, 0x90) +/* Corrected Platform Error */ +#define CPER_NOTIFY_CPE \ + UUID_LE(0x4E292F96, 0xD843, 0x4a55, 0xA8, 0xC2, 0xD4, 0x81, \ + 0xF2, 0x7E, 0xBE, 0xEE) +/* Machine Check Exception */ +#define CPER_NOTIFY_MCE \ + UUID_LE(0xE8F56FFE, 0x919C, 0x4cc5, 0xBA, 0x88, 0x65, 0xAB, \ + 0xE1, 0x49, 0x13, 0xBB) +/* PCI Express Error */ +#define CPER_NOTIFY_PCIE \ + UUID_LE(0xCF93C01F, 0x1A16, 0x4dfc, 0xB8, 0xBC, 0x9C, 0x4D, \ + 0xAF, 0x67, 0xC1, 0x04) +/* INIT Record (for IPF) */ +#define CPER_NOTIFY_INIT \ + UUID_LE(0xCC5263E8, 0x9308, 0x454a, 0x89, 0xD0, 0x34, 0x0B, \ + 0xD3, 0x9B, 0xC9, 0x8E) +/* Non-Maskable Interrupt */ +#define CPER_NOTIFY_NMI \ + UUID_LE(0x5BAD89FF, 0xB7E6, 0x42c9, 0x81, 0x4A, 0xCF, 0x24, \ + 0x85, 0xD6, 0xE9, 0x8A) +/* BOOT Error Record */ +#define CPER_NOTIFY_BOOT \ + UUID_LE(0x3D61A466, 0xAB40, 0x409a, 0xA6, 0x98, 0xF3, 0x62, \ + 0xD4, 0x64, 0xB3, 0x8F) +/* DMA Remapping Error */ +#define CPER_NOTIFY_DMAR \ + UUID_LE(0x667DD791, 0xC6B3, 0x4c27, 0x8A, 0x6B, 0x0F, 0x8E, \ + 0x72, 0x2D, 0xEB, 0x41) + +/* + * Flags bits definitions for flags in struct cper_record_header + * If set, the error has been recovered + */ +#define CPER_HW_ERROR_FLAGS_RECOVERED 0x1 +/* If set, the error is for previous boot */ +#define CPER_HW_ERROR_FLAGS_PREVERR 0x2 +/* If set, the error is injected for testing */ +#define CPER_HW_ERROR_FLAGS_SIMULATED 0x4 + +/* + * CPER section header revision, used in revision field in struct + * cper_section_descriptor + */ +#define CPER_SEC_REV 0x0100 + +/* + * Validation bits difinition for validation_bits in struct + * cper_section_descriptor. If set, corresponding fields in struct + * cper_section_descriptor contain valid information. + * + * corresponds fru_id + */ +#define CPER_SEC_VALID_FRU_ID 0x1 +/* corresponds fru_text */ +#define CPER_SEC_VALID_FRU_TEXT 0x2 + +/* + * Flags bits definitions for flags in struct cper_section_descriptor + * + * If set, the section is associated with the error condition + * directly, and should be focused on + */ +#define CPER_SEC_PRIMARY 0x0001 +/* + * If set, the error was not contained within the processor or memory + * hierarchy and the error may have propagated to persistent storage + * or network + */ +#define CPER_SEC_CONTAINMENT_WARNING 0x0002 +/* If set, the component must be re-initialized or re-enabled prior to use */ +#define CPER_SEC_RESET 0x0004 +/* If set, Linux may choose to discontinue use of the resource */ +#define CPER_SEC_ERROR_THRESHOLD_EXCEEDED 0x0008 +/* + * If set, resource could not be queried for error information due to + * conflicts with other system software or resources. Some fields of + * the section will be invalid + */ +#define CPER_SEC_RESOURCE_NOT_ACCESSIBLE 0x0010 +/* + * If set, action has been taken to ensure error containment (such as + * poisoning data), but the error has not been fully corrected and the + * data has not been consumed. Linux may choose to take further + * corrective action before the data is consumed + */ +#define CPER_SEC_LATENT_ERROR 0x0020 + +/* + * Section type definitions, used in section_type field in struct + * cper_section_descriptor + * + * Processor Generic + */ +#define CPER_SEC_PROC_GENERIC \ + UUID_LE(0x9876CCAD, 0x47B4, 0x4bdb, 0xB6, 0x5E, 0x16, 0xF1, \ + 0x93, 0xC4, 0xF3, 0xDB) +/* Processor Specific: X86/X86_64 */ +#define CPER_SEC_PROC_IA \ + UUID_LE(0xDC3EA0B0, 0xA144, 0x4797, 0xB9, 0x5B, 0x53, 0xFA, \ + 0x24, 0x2B, 0x6E, 0x1D) +/* Processor Specific: IA64 */ +#define CPER_SEC_PROC_IPF \ + UUID_LE(0xE429FAF1, 0x3CB7, 0x11D4, 0x0B, 0xCA, 0x07, 0x00, \ + 0x80, 0xC7, 0x3C, 0x88, 0x81) +/* Platform Memory */ +#define CPER_SEC_PLATFORM_MEM \ + UUID_LE(0xA5BC1114, 0x6F64, 0x4EDE, 0xB8, 0x63, 0x3E, 0x83, \ + 0xED, 0x7C, 0x83, 0xB1) +#define CPER_SEC_PCIE \ + UUID_LE(0xD995E954, 0xBBC1, 0x430F, 0xAD, 0x91, 0xB4, 0x4D, \ + 0xCB, 0x3C, 0x6F, 0x35) +/* Firmware Error Record Reference */ +#define CPER_SEC_FW_ERR_REC_REF \ + UUID_LE(0x81212A96, 0x09ED, 0x4996, 0x94, 0x71, 0x8D, 0x72, \ + 0x9C, 0x8E, 0x69, 0xED) +/* PCI/PCI-X Bus */ +#define CPER_SEC_PCI_X_BUS \ + UUID_LE(0xC5753963, 0x3B84, 0x4095, 0xBF, 0x78, 0xED, 0xDA, \ + 0xD3, 0xF9, 0xC9, 0xDD) +/* PCI Component/Device */ +#define CPER_SEC_PCI_DEV \ + UUID_LE(0xEB5E4685, 0xCA66, 0x4769, 0xB6, 0xA2, 0x26, 0x06, \ + 0x8B, 0x00, 0x13, 0x26) +#define CPER_SEC_DMAR_GENERIC \ + UUID_LE(0x5B51FEF7, 0xC79D, 0x4434, 0x8F, 0x1B, 0xAA, 0x62, \ + 0xDE, 0x3E, 0x2C, 0x64) +/* Intel VT for Directed I/O specific DMAr */ +#define CPER_SEC_DMAR_VT \ + UUID_LE(0x71761D37, 0x32B2, 0x45cd, 0xA7, 0xD0, 0xB0, 0xFE, \ + 0xDD, 0x93, 0xE8, 0xCF) +/* IOMMU specific DMAr */ +#define CPER_SEC_DMAR_IOMMU \ + UUID_LE(0x036F84E1, 0x7F37, 0x428c, 0xA7, 0x9E, 0x57, 0x5F, \ + 0xDF, 0xAA, 0x84, 0xEC) + +/* + * All tables and structs must be byte-packed to match CPER + * specification, since the tables are provided by the system BIOS + */ +#pragma pack(1) + +struct cper_record_header { + char signature[CPER_SIG_SIZE]; /* must be CPER_SIG_RECORD */ + __u16 revision; /* must be CPER_RECORD_REV */ + __u32 signature_end; /* must be CPER_SIG_END */ + __u16 section_count; + __u32 error_severity; + __u32 validation_bits; + __u32 record_length; + __u64 timestamp; + uuid_le platform_id; + uuid_le partition_id; + uuid_le creator_id; + uuid_le notification_type; + __u64 record_id; + __u32 flags; + __u64 persistence_information; + __u8 reserved[12]; /* must be zero */ +}; + +struct cper_section_descriptor { + __u32 section_offset; /* Offset in bytes of the + * section body from the base + * of the record header */ + __u32 section_length; + __u16 revision; /* must be CPER_RECORD_REV */ + __u8 validation_bits; + __u8 reserved; /* must be zero */ + __u32 flags; + uuid_le section_type; + uuid_le fru_id; + __u32 section_severity; + __u8 fru_text[20]; +}; + +/* Generic Processor Error Section */ +struct cper_sec_proc_generic { + __u64 validation_bits; + __u8 proc_type; + __u8 proc_isa; + __u8 proc_error_type; + __u8 operation; + __u8 flags; + __u8 level; + __u16 reserved; + __u64 cpu_version; + char cpu_brand[128]; + __u64 proc_id; + __u64 target_addr; + __u64 requestor_id; + __u64 responder_id; + __u64 ip; +}; + +/* IA32/X64 Processor Error Section */ +struct cper_sec_proc_ia { + __u64 validation_bits; + __u8 lapic_id; + __u8 cpuid[48]; +}; + +/* IA32/X64 Processor Error Infomation Structure */ +struct cper_ia_err_info { + uuid_le err_type; + __u64 validation_bits; + __u64 check_info; + __u64 target_id; + __u64 requestor_id; + __u64 responder_id; + __u64 ip; +}; + +/* IA32/X64 Processor Context Information Structure */ +struct cper_ia_proc_ctx { + __u16 reg_ctx_type; + __u16 reg_arr_size; + __u32 msr_addr; + __u64 mm_reg_addr; +}; + +/* Memory Error Section */ +struct cper_sec_mem_err { + __u64 validation_bits; + __u64 error_status; + __u64 physical_addr; + __u64 physical_addr_mask; + __u16 node; + __u16 card; + __u16 module; + __u16 bank; + __u16 device; + __u16 row; + __u16 column; + __u16 bit_pos; + __u64 requestor_id; + __u64 responder_id; + __u64 target_id; + __u8 error_type; +}; + +/* Reset to default packing */ +#pragma pack() + +u64 cper_next_record_id(void); + +#endif diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 30b93b2a01a4..eebb617c17d8 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -186,6 +186,8 @@ d_iput: no no no yes #define DCACHE_FSNOTIFY_PARENT_WATCHED 0x0080 /* Parent inode is watched by some fsnotify listener */ +#define DCACHE_CANT_MOUNT 0x0100 + extern spinlock_t dcache_lock; extern seqlock_t rename_lock; @@ -358,6 +360,18 @@ static inline int d_unlinked(struct dentry *dentry) return d_unhashed(dentry) && !IS_ROOT(dentry); } +static inline int cant_mount(struct dentry *dentry) +{ + return (dentry->d_flags & DCACHE_CANT_MOUNT); +} + +static inline void dont_mount(struct dentry *dentry) +{ + spin_lock(&dentry->d_lock); + dentry->d_flags |= DCACHE_CANT_MOUNT; + spin_unlock(&dentry->d_lock); +} + static inline struct dentry *dget_parent(struct dentry *dentry) { struct dentry *ret; diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index fc1b930f246c..e7d9b20ddc5b 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -63,6 +63,8 @@ struct dentry *debugfs_create_x16(const char *name, mode_t mode, struct dentry *parent, u16 *value); struct dentry *debugfs_create_x32(const char *name, mode_t mode, struct dentry *parent, u32 *value); +struct dentry *debugfs_create_x64(const char *name, mode_t mode, + struct dentry *parent, u64 *value); struct dentry *debugfs_create_size_t(const char *name, mode_t mode, struct dentry *parent, size_t *value); struct dentry *debugfs_create_bool(const char *name, mode_t mode, diff --git a/include/linux/if_link.h b/include/linux/if_link.h index c9bf92cd7653..d94963b379d9 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -79,10 +79,7 @@ enum { IFLA_NET_NS_PID, IFLA_IFALIAS, IFLA_NUM_VF, /* Number of VFs if device is SR-IOV PF */ - IFLA_VF_MAC, /* Hardware queue specific attributes */ - IFLA_VF_VLAN, - IFLA_VF_TX_RATE, /* TX Bandwidth Allocation */ - IFLA_VFINFO, + IFLA_VFINFO_LIST, __IFLA_MAX }; @@ -203,6 +200,24 @@ enum macvlan_mode { /* SR-IOV virtual function managment section */ +enum { + IFLA_VF_INFO_UNSPEC, + IFLA_VF_INFO, + __IFLA_VF_INFO_MAX, +}; + +#define IFLA_VF_INFO_MAX (__IFLA_VF_INFO_MAX - 1) + +enum { + IFLA_VF_UNSPEC, + IFLA_VF_MAC, /* Hardware queue specific attributes */ + IFLA_VF_VLAN, + IFLA_VF_TX_RATE, /* TX Bandwidth Allocation */ + __IFLA_VF_MAX, +}; + +#define IFLA_VF_MAX (__IFLA_VF_MAX - 1) + struct ifla_vf_mac { __u32 vf; __u8 mac[32]; /* MAX_ADDR_LEN */ diff --git a/include/linux/pci.h b/include/linux/pci.h index a788fa12ff31..63967e877f20 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -311,7 +311,8 @@ struct pci_dev { unsigned int is_virtfn:1; unsigned int reset_fn:1; unsigned int is_hotplug_bridge:1; - unsigned int aer_firmware_first:1; + unsigned int __aer_firmware_first_valid:1; + unsigned int __aer_firmware_first:1; pci_dev_flags_t dev_flags; atomic_t enable_cnt; /* pci_enable_device has been called */ diff --git a/include/linux/uuid.h b/include/linux/uuid.h new file mode 100644 index 000000000000..5b7efbfcee4e --- /dev/null +++ b/include/linux/uuid.h @@ -0,0 +1,70 @@ +/* + * UUID/GUID definition + * + * Copyright (C) 2010, Intel Corp. + * Huang Ying <ying.huang@intel.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation; + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _LINUX_UUID_H_ +#define _LINUX_UUID_H_ + +#include <linux/types.h> +#include <linux/string.h> + +typedef struct { + __u8 b[16]; +} uuid_le; + +typedef struct { + __u8 b[16]; +} uuid_be; + +#define UUID_LE(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \ +((uuid_le) \ +{{ (a) & 0xff, ((a) >> 8) & 0xff, ((a) >> 16) & 0xff, ((a) >> 24) & 0xff, \ + (b) & 0xff, ((b) >> 8) & 0xff, \ + (c) & 0xff, ((c) >> 8) & 0xff, \ + (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) }}) + +#define UUID_BE(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \ +((uuid_be) \ +{{ ((a) >> 24) & 0xff, ((a) >> 16) & 0xff, ((a) >> 8) & 0xff, (a) & 0xff, \ + ((b) >> 8) & 0xff, (b) & 0xff, \ + ((c) >> 8) & 0xff, (c) & 0xff, \ + (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) }}) + +#define NULL_UUID_LE \ + UUID_LE(0x00000000, 0x0000, 0x0000, 0x00, 0x00, 0x00, 0x00, \ + 0x00, 0x00, 0x00, 0x00) + +#define NULL_UUID_BE \ + UUID_BE(0x00000000, 0x0000, 0x0000, 0x00, 0x00, 0x00, 0x00, \ + 0x00, 0x00, 0x00, 0x00) + +static inline int uuid_le_cmp(const uuid_le u1, const uuid_le u2) +{ + return memcmp(&u1, &u2, sizeof(uuid_le)); +} + +static inline int uuid_be_cmp(const uuid_be u1, const uuid_be u2) +{ + return memcmp(&u1, &u2, sizeof(uuid_be)); +} + +extern void uuid_le_gen(uuid_le *u); +extern void uuid_be_gen(uuid_be *u); + +#endif diff --git a/include/net/tcp.h b/include/net/tcp.h index 75be5a28815d..aa04b9a5093b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1197,30 +1197,15 @@ extern int tcp_v4_md5_do_del(struct sock *sk, extern struct tcp_md5sig_pool * __percpu *tcp_alloc_md5sig_pool(struct sock *); extern void tcp_free_md5sig_pool(void); -extern struct tcp_md5sig_pool *__tcp_get_md5sig_pool(int cpu); -extern void __tcp_put_md5sig_pool(void); +extern struct tcp_md5sig_pool *tcp_get_md5sig_pool(void); +extern void tcp_put_md5sig_pool(void); + extern int tcp_md5_hash_header(struct tcp_md5sig_pool *, struct tcphdr *); extern int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *, struct sk_buff *, unsigned header_len); extern int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, struct tcp_md5sig_key *key); -static inline -struct tcp_md5sig_pool *tcp_get_md5sig_pool(void) -{ - int cpu = get_cpu(); - struct tcp_md5sig_pool *ret = __tcp_get_md5sig_pool(cpu); - if (!ret) - put_cpu(); - return ret; -} - -static inline void tcp_put_md5sig_pool(void) -{ - __tcp_put_md5sig_pool(); - put_cpu(); -} - /* write queue abstraction */ static inline void tcp_write_queue_purge(struct sock *sk) { diff --git a/kernel/profile.c b/kernel/profile.c index a55d3a367ae8..dfadc5b729f1 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -127,8 +127,10 @@ int __ref profile_init(void) return 0; prof_buffer = vmalloc(buffer_bytes); - if (prof_buffer) + if (prof_buffer) { + memset(prof_buffer, 0, buffer_bytes); return 0; + } free_cpumask_var(prof_cpu_mask); return -ENOMEM; diff --git a/lib/Makefile b/lib/Makefile index 0d4015205c64..f3eb6e8766be 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -21,7 +21,7 @@ lib-y += kobject.o kref.o klist.o obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \ - string_helpers.o gcd.o lcm.o list_sort.o + string_helpers.o gcd.o lcm.o list_sort.o uuid.o ifeq ($(CONFIG_DEBUG_KOBJECT),y) CFLAGS_kobject.o += -DDEBUG diff --git a/lib/btree.c b/lib/btree.c index 41859a820218..c9c6f0351526 100644 --- a/lib/btree.c +++ b/lib/btree.c @@ -95,7 +95,8 @@ static unsigned long *btree_node_alloc(struct btree_head *head, gfp_t gfp) unsigned long *node; node = mempool_alloc(head->mempool, gfp); - memset(node, 0, NODESIZE); + if (likely(node)) + memset(node, 0, NODESIZE); return node; } diff --git a/lib/rwsem.c b/lib/rwsem.c index 3e3365e5665e..ceba8e28807a 100644 --- a/lib/rwsem.c +++ b/lib/rwsem.c @@ -136,9 +136,10 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading) out: return sem; - /* undo the change to count, but check for a transition 1->0 */ + /* undo the change to the active count, but check for a transition + * 1->0 */ undo: - if (rwsem_atomic_update(-RWSEM_ACTIVE_BIAS, sem) != 0) + if (rwsem_atomic_update(-RWSEM_ACTIVE_BIAS, sem) & RWSEM_ACTIVE_MASK) goto out; goto try_again; } diff --git a/lib/uuid.c b/lib/uuid.c new file mode 100644 index 000000000000..8fadd7cef46c --- /dev/null +++ b/lib/uuid.c @@ -0,0 +1,53 @@ +/* + * Unified UUID/GUID definition + * + * Copyright (C) 2009, Intel Corp. + * Huang Ying <ying.huang@intel.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation; + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/uuid.h> +#include <linux/random.h> + +static void __uuid_gen_common(__u8 b[16]) +{ + int i; + u32 r; + + for (i = 0; i < 4; i++) { + r = random32(); + memcpy(b + i * 4, &r, 4); + } + /* reversion 0b10 */ + b[8] = (b[8] & 0x3F) | 0x80; +} + +void uuid_le_gen(uuid_le *lu) +{ + __uuid_gen_common(lu->b); + /* version 4 : random generation */ + lu->b[7] = (lu->b[7] & 0x0F) | 0x40; +} +EXPORT_SYMBOL_GPL(uuid_le_gen); + +void uuid_be_gen(uuid_be *bu) +{ + __uuid_gen_common(bu->b); + /* version 4 : random generation */ + bu->b[6] = (bu->b[6] & 0x0F) | 0x40; +} +EXPORT_SYMBOL_GPL(uuid_be_gen); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index fe776c9ddeca..31e85d327aa2 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -602,12 +602,19 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a, a->tx_compressed = b->tx_compressed; }; +/* All VF info */ static inline int rtnl_vfinfo_size(const struct net_device *dev) { - if (dev->dev.parent && dev_is_pci(dev->dev.parent)) - return dev_num_vf(dev->dev.parent) * - sizeof(struct ifla_vf_info); - else + if (dev->dev.parent && dev_is_pci(dev->dev.parent)) { + + int num_vfs = dev_num_vf(dev->dev.parent); + size_t size = nlmsg_total_size(sizeof(struct nlattr)); + size += nlmsg_total_size(num_vfs * sizeof(struct nlattr)); + size += num_vfs * (sizeof(struct ifla_vf_mac) + + sizeof(struct ifla_vf_vlan) + + sizeof(struct ifla_vf_tx_rate)); + return size; + } else return 0; } @@ -629,7 +636,7 @@ static inline size_t if_nlmsg_size(const struct net_device *dev) + nla_total_size(1) /* IFLA_OPERSTATE */ + nla_total_size(1) /* IFLA_LINKMODE */ + nla_total_size(4) /* IFLA_NUM_VF */ - + nla_total_size(rtnl_vfinfo_size(dev)) /* IFLA_VFINFO */ + + rtnl_vfinfo_size(dev) /* IFLA_VFINFO_LIST */ + rtnl_link_get_size(dev); /* IFLA_LINKINFO */ } @@ -700,14 +707,37 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent) { int i; - struct ifla_vf_info ivi; - NLA_PUT_U32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent)); - for (i = 0; i < dev_num_vf(dev->dev.parent); i++) { + struct nlattr *vfinfo, *vf; + int num_vfs = dev_num_vf(dev->dev.parent); + + NLA_PUT_U32(skb, IFLA_NUM_VF, num_vfs); + vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST); + if (!vfinfo) + goto nla_put_failure; + for (i = 0; i < num_vfs; i++) { + struct ifla_vf_info ivi; + struct ifla_vf_mac vf_mac; + struct ifla_vf_vlan vf_vlan; + struct ifla_vf_tx_rate vf_tx_rate; if (dev->netdev_ops->ndo_get_vf_config(dev, i, &ivi)) break; - NLA_PUT(skb, IFLA_VFINFO, sizeof(ivi), &ivi); + vf_mac.vf = vf_vlan.vf = vf_tx_rate.vf = ivi.vf; + memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac)); + vf_vlan.vlan = ivi.vlan; + vf_vlan.qos = ivi.qos; + vf_tx_rate.rate = ivi.tx_rate; + vf = nla_nest_start(skb, IFLA_VF_INFO); + if (!vf) { + nla_nest_cancel(skb, vfinfo); + goto nla_put_failure; + } + NLA_PUT(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac); + NLA_PUT(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan); + NLA_PUT(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate), &vf_tx_rate); + nla_nest_end(skb, vf); } + nla_nest_end(skb, vfinfo); } if (dev->rtnl_link_ops) { if (rtnl_link_fill(skb, dev) < 0) @@ -769,12 +799,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_LINKINFO] = { .type = NLA_NESTED }, [IFLA_NET_NS_PID] = { .type = NLA_U32 }, [IFLA_IFALIAS] = { .type = NLA_STRING, .len = IFALIASZ-1 }, - [IFLA_VF_MAC] = { .type = NLA_BINARY, - .len = sizeof(struct ifla_vf_mac) }, - [IFLA_VF_VLAN] = { .type = NLA_BINARY, - .len = sizeof(struct ifla_vf_vlan) }, - [IFLA_VF_TX_RATE] = { .type = NLA_BINARY, - .len = sizeof(struct ifla_vf_tx_rate) }, + [IFLA_VFINFO_LIST] = {. type = NLA_NESTED }, }; EXPORT_SYMBOL(ifla_policy); @@ -783,6 +808,19 @@ static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { [IFLA_INFO_DATA] = { .type = NLA_NESTED }, }; +static const struct nla_policy ifla_vfinfo_policy[IFLA_VF_INFO_MAX+1] = { + [IFLA_VF_INFO] = { .type = NLA_NESTED }, +}; + +static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = { + [IFLA_VF_MAC] = { .type = NLA_BINARY, + .len = sizeof(struct ifla_vf_mac) }, + [IFLA_VF_VLAN] = { .type = NLA_BINARY, + .len = sizeof(struct ifla_vf_vlan) }, + [IFLA_VF_TX_RATE] = { .type = NLA_BINARY, + .len = sizeof(struct ifla_vf_tx_rate) }, +}; + struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[]) { struct net *net; @@ -812,6 +850,52 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[]) return 0; } +static int do_setvfinfo(struct net_device *dev, struct nlattr *attr) +{ + int rem, err = -EINVAL; + struct nlattr *vf; + const struct net_device_ops *ops = dev->netdev_ops; + + nla_for_each_nested(vf, attr, rem) { + switch (nla_type(vf)) { + case IFLA_VF_MAC: { + struct ifla_vf_mac *ivm; + ivm = nla_data(vf); + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_mac) + err = ops->ndo_set_vf_mac(dev, ivm->vf, + ivm->mac); + break; + } + case IFLA_VF_VLAN: { + struct ifla_vf_vlan *ivv; + ivv = nla_data(vf); + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_vlan) + err = ops->ndo_set_vf_vlan(dev, ivv->vf, + ivv->vlan, + ivv->qos); + break; + } + case IFLA_VF_TX_RATE: { + struct ifla_vf_tx_rate *ivt; + ivt = nla_data(vf); + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_tx_rate) + err = ops->ndo_set_vf_tx_rate(dev, ivt->vf, + ivt->rate); + break; + } + default: + err = -EINVAL; + break; + } + if (err) + break; + } + return err; +} + static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, struct nlattr **tb, char *ifname, int modified) { @@ -942,40 +1026,17 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, write_unlock_bh(&dev_base_lock); } - if (tb[IFLA_VF_MAC]) { - struct ifla_vf_mac *ivm; - ivm = nla_data(tb[IFLA_VF_MAC]); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_mac) - err = ops->ndo_set_vf_mac(dev, ivm->vf, ivm->mac); - if (err < 0) - goto errout; - modified = 1; - } - - if (tb[IFLA_VF_VLAN]) { - struct ifla_vf_vlan *ivv; - ivv = nla_data(tb[IFLA_VF_VLAN]); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_vlan) - err = ops->ndo_set_vf_vlan(dev, ivv->vf, - ivv->vlan, - ivv->qos); - if (err < 0) - goto errout; - modified = 1; - } - err = 0; - - if (tb[IFLA_VF_TX_RATE]) { - struct ifla_vf_tx_rate *ivt; - ivt = nla_data(tb[IFLA_VF_TX_RATE]); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_tx_rate) - err = ops->ndo_set_vf_tx_rate(dev, ivt->vf, ivt->rate); - if (err < 0) - goto errout; - modified = 1; + if (tb[IFLA_VFINFO_LIST]) { + struct nlattr *attr; + int rem; + nla_for_each_nested(attr, tb[IFLA_VFINFO_LIST], rem) { + if (nla_type(attr) != IFLA_VF_INFO) + goto errout; + err = do_setvfinfo(dev, attr); + if (err < 0) + goto errout; + modified = 1; + } } err = 0; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 0f8caf64caa3..296150b2a62f 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2839,7 +2839,6 @@ static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool * __percpu *pool) if (p->md5_desc.tfm) crypto_free_hash(p->md5_desc.tfm); kfree(p); - p = NULL; } } free_percpu(pool); @@ -2937,25 +2936,40 @@ retry: EXPORT_SYMBOL(tcp_alloc_md5sig_pool); -struct tcp_md5sig_pool *__tcp_get_md5sig_pool(int cpu) + +/** + * tcp_get_md5sig_pool - get md5sig_pool for this user + * + * We use percpu structure, so if we succeed, we exit with preemption + * and BH disabled, to make sure another thread or softirq handling + * wont try to get same context. + */ +struct tcp_md5sig_pool *tcp_get_md5sig_pool(void) { struct tcp_md5sig_pool * __percpu *p; - spin_lock_bh(&tcp_md5sig_pool_lock); + + local_bh_disable(); + + spin_lock(&tcp_md5sig_pool_lock); p = tcp_md5sig_pool; if (p) tcp_md5sig_users++; - spin_unlock_bh(&tcp_md5sig_pool_lock); - return (p ? *per_cpu_ptr(p, cpu) : NULL); -} + spin_unlock(&tcp_md5sig_pool_lock); + + if (p) + return *per_cpu_ptr(p, smp_processor_id()); -EXPORT_SYMBOL(__tcp_get_md5sig_pool); + local_bh_enable(); + return NULL; +} +EXPORT_SYMBOL(tcp_get_md5sig_pool); -void __tcp_put_md5sig_pool(void) +void tcp_put_md5sig_pool(void) { + local_bh_enable(); tcp_free_md5sig_pool(); } - -EXPORT_SYMBOL(__tcp_put_md5sig_pool); +EXPORT_SYMBOL(tcp_put_md5sig_pool); int tcp_md5_hash_header(struct tcp_md5sig_pool *hp, struct tcphdr *th) diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 4a368038d46f..165d54e07fcd 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -173,6 +173,10 @@ void sctp_transport_free(struct sctp_transport *transport) del_timer(&transport->T3_rtx_timer)) sctp_transport_put(transport); + /* Delete the ICMP proto unreachable timer if it's active. */ + if (timer_pending(&transport->proto_unreach_timer) && + del_timer(&transport->proto_unreach_timer)) + sctp_association_put(transport->asoc); sctp_transport_put(transport); } diff --git a/security/min_addr.c b/security/min_addr.c index e86f297522bf..f728728f193b 100644 --- a/security/min_addr.c +++ b/security/min_addr.c @@ -33,7 +33,7 @@ int mmap_min_addr_handler(struct ctl_table *table, int write, { int ret; - if (!capable(CAP_SYS_RAWIO)) + if (write && !capable(CAP_SYS_RAWIO)) return -EPERM; ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos); diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 872887624030..20b5982c996b 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -36,6 +36,9 @@ #include <sound/timer.h> #include <sound/minors.h> #include <asm/io.h> +#if defined(CONFIG_MIPS) && defined(CONFIG_DMA_NONCOHERENT) +#include <dma-coherence.h> +#endif /* * Compatibility @@ -3184,6 +3187,10 @@ static int snd_pcm_default_mmap(struct snd_pcm_substream *substream, substream->runtime->dma_area, substream->runtime->dma_addr, area->vm_end - area->vm_start); +#elif defined(CONFIG_MIPS) && defined(CONFIG_DMA_NONCOHERENT) + if (substream->dma_buffer.dev.type == SNDRV_DMA_TYPE_DEV && + !plat_device_is_coherent(substream->dma_buffer.dev.dev)) + area->vm_page_prot = pgprot_noncached(area->vm_page_prot); #endif /* ARCH_HAS_DMA_MMAP_COHERENT */ /* mmap with fault handler */ area->vm_ops = &snd_pcm_vm_ops_data_fault; diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index d8213e2231a6..feabb44c7ca4 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -1197,9 +1197,10 @@ static int patch_cxt5045(struct hda_codec *codec) case 0x103c: case 0x1631: case 0x1734: - /* HP, Packard Bell, & Fujitsu-Siemens laptops have really bad - * sound over 0dB on NID 0x17. Fix max PCM level to 0 dB - * (originally it has 0x2b steps with 0dB offset 0x14) + case 0x17aa: + /* HP, Packard Bell, Fujitsu-Siemens & Lenovo laptops have + * really bad sound over 0dB on NID 0x17. Fix max PCM level to + * 0 dB (originally it has 0x2b steps with 0dB offset 0x14) */ snd_hda_override_amp_caps(codec, 0x17, HDA_INPUT, (0x14 << AC_AMPCAP_OFFSET_SHIFT) | diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index 12825aa03106..a0e06d82da1f 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -104,6 +104,7 @@ enum { STAC_DELL_M4_2, STAC_DELL_M4_3, STAC_HP_M4, + STAC_HP_DV4, STAC_HP_DV5, STAC_HP_HDX, STAC_HP_DV4_1222NR, @@ -1691,6 +1692,7 @@ static unsigned int *stac92hd71bxx_brd_tbl[STAC_92HD71BXX_MODELS] = { [STAC_DELL_M4_2] = dell_m4_2_pin_configs, [STAC_DELL_M4_3] = dell_m4_3_pin_configs, [STAC_HP_M4] = NULL, + [STAC_HP_DV4] = NULL, [STAC_HP_DV5] = NULL, [STAC_HP_HDX] = NULL, [STAC_HP_DV4_1222NR] = NULL, @@ -1703,6 +1705,7 @@ static const char *stac92hd71bxx_models[STAC_92HD71BXX_MODELS] = { [STAC_DELL_M4_2] = "dell-m4-2", [STAC_DELL_M4_3] = "dell-m4-3", [STAC_HP_M4] = "hp-m4", + [STAC_HP_DV4] = "hp-dv4", [STAC_HP_DV5] = "hp-dv5", [STAC_HP_HDX] = "hp-hdx", [STAC_HP_DV4_1222NR] = "hp-dv4-1222nr", @@ -1721,7 +1724,7 @@ static struct snd_pci_quirk stac92hd71bxx_cfg_tbl[] = { SND_PCI_QUIRK_MASK(PCI_VENDOR_ID_HP, 0xfff0, 0x3080, "HP", STAC_HP_DV5), SND_PCI_QUIRK_MASK(PCI_VENDOR_ID_HP, 0xfff0, 0x30f0, - "HP dv4-7", STAC_HP_DV5), + "HP dv4-7", STAC_HP_DV4), SND_PCI_QUIRK_MASK(PCI_VENDOR_ID_HP, 0xfff0, 0x3600, "HP dv4-7", STAC_HP_DV5), SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x3610, @@ -4766,6 +4769,9 @@ static void set_hp_led_gpio(struct hda_codec *codec) struct sigmatel_spec *spec = codec->spec; unsigned int gpio; + if (spec->gpio_led) + return; + gpio = snd_hda_param_read(codec, codec->afg, AC_PAR_GPIO_CAP); gpio &= AC_GPIO_IO_COUNT; if (gpio > 3) @@ -5675,6 +5681,9 @@ again: spec->num_smuxes = 1; spec->num_dmuxes = 1; /* fallthrough */ + case STAC_HP_DV4: + spec->gpio_led = 0x01; + /* fallthrough */ case STAC_HP_DV5: snd_hda_codec_set_pincfg(codec, 0x0d, 0x90170010); stac92xx_auto_set_pinctl(codec, 0x0d, AC_PINCTL_OUT_EN); @@ -5688,6 +5697,7 @@ again: spec->num_dmics = 1; spec->num_dmuxes = 1; spec->num_smuxes = 1; + spec->gpio_led = 0x08; break; } @@ -5744,7 +5754,8 @@ again: } /* enable bass on HP dv7 */ - if (spec->board_config == STAC_HP_DV5) { + if (spec->board_config == STAC_HP_DV4 || + spec->board_config == STAC_HP_DV5) { unsigned int cap; cap = snd_hda_param_read(codec, 0x1, AC_PAR_GPIO_CAP); cap &= AC_GPIO_IO_COUNT; diff --git a/sound/pci/ice1712/maya44.c b/sound/pci/ice1712/maya44.c index 3e1c20ae2f1c..726fd4b92e19 100644 --- a/sound/pci/ice1712/maya44.c +++ b/sound/pci/ice1712/maya44.c @@ -347,7 +347,7 @@ static int maya_gpio_sw_put(struct snd_kcontrol *kcontrol, /* known working input slots (0-4) */ #define MAYA_LINE_IN 1 /* in-2 */ -#define MAYA_MIC_IN 4 /* in-5 */ +#define MAYA_MIC_IN 3 /* in-4 */ static void wm8776_select_input(struct snd_maya44 *chip, int idx, int line) { @@ -393,8 +393,8 @@ static int maya_rec_src_put(struct snd_kcontrol *kcontrol, int changed; mutex_lock(&chip->mutex); - changed = maya_set_gpio_bits(chip->ice, GPIO_MIC_RELAY, - sel ? GPIO_MIC_RELAY : 0); + changed = maya_set_gpio_bits(chip->ice, 1 << GPIO_MIC_RELAY, + sel ? (1 << GPIO_MIC_RELAY) : 0); wm8776_select_input(chip, 0, sel ? MAYA_MIC_IN : MAYA_LINE_IN); mutex_unlock(&chip->mutex); return changed; diff --git a/sound/pci/oxygen/xonar_cs43xx.c b/sound/pci/oxygen/xonar_cs43xx.c index 16c226bfcd2b..7c4986b27f2b 100644 --- a/sound/pci/oxygen/xonar_cs43xx.c +++ b/sound/pci/oxygen/xonar_cs43xx.c @@ -56,6 +56,7 @@ #include <sound/pcm_params.h> #include <sound/tlv.h> #include "xonar.h" +#include "cm9780.h" #include "cs4398.h" #include "cs4362a.h" @@ -172,6 +173,8 @@ static void xonar_d1_init(struct oxygen *chip) oxygen_clear_bits16(chip, OXYGEN_GPIO_DATA, GPIO_D1_FRONT_PANEL | GPIO_D1_INPUT_ROUTE); + oxygen_ac97_set_bits(chip, 0, CM9780_JACK, CM9780_FMIC2MIC); + xonar_init_cs53x1(chip); xonar_enable_output(chip); diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 3b8b6387c47c..f1411e9cdf47 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -563,6 +563,9 @@ static int __cmd_record(int argc, const char **argv) err = event__synthesize_kernel_mmap(process_synthesized_event, session, "_text"); + if (err < 0) + err = event__synthesize_kernel_mmap(process_synthesized_event, + session, "_stext"); if (err < 0) { pr_err("Couldn't record kernel reference relocation symbol.\n"); return err; diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index 03a5eb22da2b..7c79c1d76d0c 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -197,7 +197,7 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level) union kvm_ioapic_redirect_entry entry; int ret = 1; - mutex_lock(&ioapic->lock); + spin_lock(&ioapic->lock); if (irq >= 0 && irq < IOAPIC_NUM_PINS) { entry = ioapic->redirtbl[irq]; level ^= entry.fields.polarity; @@ -214,7 +214,7 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level) } trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0); } - mutex_unlock(&ioapic->lock); + spin_unlock(&ioapic->lock); return ret; } @@ -238,9 +238,9 @@ static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int vector, * is dropped it will be put into irr and will be delivered * after ack notifier returns. */ - mutex_unlock(&ioapic->lock); + spin_unlock(&ioapic->lock); kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, i); - mutex_lock(&ioapic->lock); + spin_lock(&ioapic->lock); if (trigger_mode != IOAPIC_LEVEL_TRIG) continue; @@ -259,9 +259,9 @@ void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode) smp_rmb(); if (!test_bit(vector, ioapic->handled_vectors)) return; - mutex_lock(&ioapic->lock); + spin_lock(&ioapic->lock); __kvm_ioapic_update_eoi(ioapic, vector, trigger_mode); - mutex_unlock(&ioapic->lock); + spin_unlock(&ioapic->lock); } static inline struct kvm_ioapic *to_ioapic(struct kvm_io_device *dev) @@ -287,7 +287,7 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len, ASSERT(!(addr & 0xf)); /* check alignment */ addr &= 0xff; - mutex_lock(&ioapic->lock); + spin_lock(&ioapic->lock); switch (addr) { case IOAPIC_REG_SELECT: result = ioapic->ioregsel; @@ -301,7 +301,7 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len, result = 0; break; } - mutex_unlock(&ioapic->lock); + spin_unlock(&ioapic->lock); switch (len) { case 8: @@ -338,7 +338,7 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, } addr &= 0xff; - mutex_lock(&ioapic->lock); + spin_lock(&ioapic->lock); switch (addr) { case IOAPIC_REG_SELECT: ioapic->ioregsel = data; @@ -356,7 +356,7 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, default: break; } - mutex_unlock(&ioapic->lock); + spin_unlock(&ioapic->lock); return 0; } @@ -386,7 +386,7 @@ int kvm_ioapic_init(struct kvm *kvm) ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL); if (!ioapic) return -ENOMEM; - mutex_init(&ioapic->lock); + spin_lock_init(&ioapic->lock); kvm->arch.vioapic = ioapic; kvm_ioapic_reset(ioapic); kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops); @@ -419,9 +419,9 @@ int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) if (!ioapic) return -EINVAL; - mutex_lock(&ioapic->lock); + spin_lock(&ioapic->lock); memcpy(state, ioapic, sizeof(struct kvm_ioapic_state)); - mutex_unlock(&ioapic->lock); + spin_unlock(&ioapic->lock); return 0; } @@ -431,9 +431,9 @@ int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) if (!ioapic) return -EINVAL; - mutex_lock(&ioapic->lock); + spin_lock(&ioapic->lock); memcpy(ioapic, state, sizeof(struct kvm_ioapic_state)); update_handled_vectors(ioapic); - mutex_unlock(&ioapic->lock); + spin_unlock(&ioapic->lock); return 0; } diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h index 8a751b78a430..0b190c34ccc3 100644 --- a/virt/kvm/ioapic.h +++ b/virt/kvm/ioapic.h @@ -45,7 +45,7 @@ struct kvm_ioapic { struct kvm_io_device dev; struct kvm *kvm; void (*ack_notifier)(void *opaque, int irq); - struct mutex lock; + spinlock_t lock; DECLARE_BITMAP(handled_vectors, 256); }; |