144 files changed, 5097 insertions, 704 deletions
diff --git a/Documentation/acpi/apei/einj.txt b/Documentation/acpi/apei/einj.txt
new file mode 100644
index 000000000000..dfab71848dc8
--- /dev/null
+++ b/Documentation/acpi/apei/einj.txt
@@ -0,0 +1,59 @@
+			APEI Error INJection
+			~~~~~~~~~~~~~~~~~~~~
+
+EINJ provides a hardware error injection mechanism
+It is very useful for debugging and testing of other APEI and RAS features.
+
+To use EINJ, make sure the following are enabled in your kernel
+configuration:
+
+CONFIG_DEBUG_FS
+CONFIG_ACPI_APEI
+CONFIG_ACPI_APEI_EINJ
+
+The user interface of EINJ is debug file system, under the
+directory apei/einj. The following files are provided.
+
+- available_error_type
+  Reading this file returns the error injection capability of the
+  platform, that is, which error types are supported. The error type
+  definition is as follow, the left field is the error type value, the
+  right field is error description.
+
+    0x00000001	Processor Correctable
+    0x00000002	Processor Uncorrectable non-fatal
+    0x00000004	Processor Uncorrectable fatal
+    0x00000008  Memory Correctable
+    0x00000010  Memory Uncorrectable non-fatal
+    0x00000020  Memory Uncorrectable fatal
+    0x00000040	PCI Express Correctable
+    0x00000080	PCI Express Uncorrectable fatal
+    0x00000100	PCI Express Uncorrectable non-fatal
+    0x00000200	Platform Correctable
+    0x00000400	Platform Uncorrectable non-fatal
+    0x00000800	Platform Uncorrectable fatal
+
+  The format of file contents are as above, except there are only the
+  available error type lines.
+
+- error_type
+  This file is used to set the error type value. The error type value
+  is defined in "available_error_type" description.
+
+- error_inject
+  Write any integer to this file to trigger the error
+  injection. Before this, please specify all necessary error
+  parameters.
+
+- param1
+  This file is used to set the first error parameter value. Effect of
+  parameter depends on error_type specified. For memory error, this is
+  physical memory address.
+
+- param2
+  This file is used to set the second error parameter value. Effect of
+  parameter depends on error_type specified. For memory error, this is
+  physical memory address mask.
+
+For more information about EINJ, please refer to ACPI specification
+version 4.0, section 17.5.
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 839b21b0699a..42d77735fd45 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -750,6 +750,10 @@ and is between 256 and 4096 characters. It is defined in the file
 			Default value is 0.
 			Value can be changed at runtime via /selinux/enforce.
 
+	erst_disable	[ACPI]
+			Disable Error Record Serialization Table (ERST)
+			support.
+
 	ether=		[HW,NET] Ethernet cards parameters
 			This option is obsoleted by the "netdev=" option, which
 			has equivalent usage. See its documentation for details.
@@ -843,6 +847,11 @@ and is between 256 and 4096 characters. It is defined in the file
 	hd=		[EIDE] (E)IDE hard drive subsystem geometry
 			Format: <cyl>,<head>,<sect>
 
+	hest_disable	[ACPI]
+			Disable Hardware Error Source Table (HEST) support;
+			corresponding firmware-first mode error processing
+			logic will be disabled.
+
 	highmem=nn[KMG]	[KNL,BOOT] forces the highmem zone to have an exact
 			size of <nn>. This works even on boxes that have no
 			highmem otherwise. This also works to reduce highmem
diff --git a/MAINTAINERS b/MAINTAINERS
index d5b0b1b6dc52..d329b053a718 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5492,7 +5492,7 @@ S:	Maintained
 F:	drivers/mmc/host/tmio_mmc.*
 
 TMPFS (SHMEM FILESYSTEM)
-M:	Hugh Dickins <hugh.dickins@tiscali.co.uk>
+M:	Hugh Dickins <hughd@google.com>
 L:	linux-mm@kvack.org
 S:	Maintained
 F:	include/linux/shmem_fs.h
diff --git a/Makefile b/Makefile
index 701bc65b3952..ebc8225f7a96 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 6
 SUBLEVEL = 34
-EXTRAVERSION = -rc7
+EXTRAVERSION =
 NAME = Sheep on Meth
 
 # *DOCUMENTATION*
diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index 0d08d4170b64..4656a24058d2 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -371,6 +371,10 @@ static inline void __flush_icache_all(void)
 #ifdef CONFIG_ARM_ERRATA_411920
 	extern void v6_icache_inval_all(void);
 	v6_icache_inval_all();
+#elif defined(CONFIG_SMP) && __LINUX_ARM_ARCH__ >= 7
+	asm("mcr	p15, 0, %0, c7, c1, 0	@ invalidate I-cache inner shareable\n"
+	    :
+	    : "r" (0));
 #else
 	asm("mcr	p15, 0, %0, c7, c5, 0	@ invalidate I-cache\n"
 	    :
diff --git a/arch/arm/include/asm/smp_twd.h b/arch/arm/include/asm/smp_twd.h
index 7be0978b2625..634f357be6bb 100644
--- a/arch/arm/include/asm/smp_twd.h
+++ b/arch/arm/include/asm/smp_twd.h
@@ -1,6 +1,23 @@
 #ifndef __ASMARM_SMP_TWD_H
 #define __ASMARM_SMP_TWD_H
 
+#define TWD_TIMER_LOAD			0x00
+#define TWD_TIMER_COUNTER		0x04
+#define TWD_TIMER_CONTROL		0x08
+#define TWD_TIMER_INTSTAT		0x0C
+
+#define TWD_WDOG_LOAD			0x20
+#define TWD_WDOG_COUNTER		0x24
+#define TWD_WDOG_CONTROL		0x28
+#define TWD_WDOG_INTSTAT		0x2C
+#define TWD_WDOG_RESETSTAT		0x30
+#define TWD_WDOG_DISABLE		0x34
+
+#define TWD_TIMER_CONTROL_ENABLE	(1 << 0)
+#define TWD_TIMER_CONTROL_ONESHOT	(0 << 1)
+#define TWD_TIMER_CONTROL_PERIODIC	(1 << 1)
+#define TWD_TIMER_CONTROL_IT_ENABLE	(1 << 2)
+
 struct clock_event_device;
 
 extern void __iomem *twd_base;
diff --git a/arch/arm/include/asm/tlbflush.h b/arch/arm/include/asm/tlbflush.h
index e085e2c545eb..bd863d8608cd 100644
--- a/arch/arm/include/asm/tlbflush.h
+++ b/arch/arm/include/asm/tlbflush.h
@@ -46,6 +46,9 @@
 #define TLB_V7_UIS_FULL (1 << 20)
 #define TLB_V7_UIS_ASID (1 << 21)
 
+/* Inner Shareable BTB operation (ARMv7 MP extensions) */
+#define TLB_V7_IS_BTB	(1 << 22)
+
 #define TLB_L2CLEAN_FR	(1 << 29)		/* Feroceon */
 #define TLB_DCLEAN	(1 << 30)
 #define TLB_WB		(1 << 31)
@@ -183,7 +186,7 @@
 #endif
 
 #ifdef CONFIG_SMP
-#define v7wbi_tlb_flags (TLB_WB | TLB_DCLEAN | TLB_BTB | \
+#define v7wbi_tlb_flags (TLB_WB | TLB_DCLEAN | TLB_V7_IS_BTB | \
 			 TLB_V7_UIS_FULL | TLB_V7_UIS_PAGE | TLB_V7_UIS_ASID)
 #else
 #define v7wbi_tlb_flags (TLB_WB | TLB_DCLEAN | TLB_BTB | \
@@ -339,6 +342,12 @@ static inline void local_flush_tlb_all(void)
 		dsb();
 		isb();
 	}
+	if (tlb_flag(TLB_V7_IS_BTB)) {
+		/* flush the branch target cache */
+		asm("mcr p15, 0, %0, c7, c1, 6" : : "r" (zero) : "cc");
+		dsb();
+		isb();
+	}
 }
 
 static inline void local_flush_tlb_mm(struct mm_struct *mm)
@@ -376,6 +385,12 @@ static inline void local_flush_tlb_mm(struct mm_struct *mm)
 		asm("mcr p15, 0, %0, c7, c5, 6" : : "r" (zero) : "cc");
 		dsb();
 	}
+	if (tlb_flag(TLB_V7_IS_BTB)) {
+		/* flush the branch target cache */
+		asm("mcr p15, 0, %0, c7, c1, 6" : : "r" (zero) : "cc");
+		dsb();
+		isb();
+	}
 }
 
 static inline void
@@ -416,6 +431,12 @@ local_flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)
 		asm("mcr p15, 0, %0, c7, c5, 6" : : "r" (zero) : "cc");
 		dsb();
 	}
+	if (tlb_flag(TLB_V7_IS_BTB)) {
+		/* flush the branch target cache */
+		asm("mcr p15, 0, %0, c7, c1, 6" : : "r" (zero) : "cc");
+		dsb();
+		isb();
+	}
 }
 
 static inline void local_flush_tlb_kernel_page(unsigned long kaddr)
@@ -454,6 +475,12 @@ static inline void local_flush_tlb_kernel_page(unsigned long kaddr)
 		dsb();
 		isb();
 	}
+	if (tlb_flag(TLB_V7_IS_BTB)) {
+		/* flush the branch target cache */
+		asm("mcr p15, 0, %0, c7, c1, 6" : : "r" (zero) : "cc");
+		dsb();
+		isb();
+	}
 }
 
 /*
diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c
index ea02a7b1c244..7c5f0c024db7 100644
--- a/arch/arm/kernel/smp_twd.c
+++ b/arch/arm/kernel/smp_twd.c
@@ -21,23 +21,6 @@
 #include <asm/smp_twd.h>
 #include <asm/hardware/gic.h>
 
-#define TWD_TIMER_LOAD 			0x00
-#define TWD_TIMER_COUNTER		0x04
-#define TWD_TIMER_CONTROL		0x08
-#define TWD_TIMER_INTSTAT		0x0C
-
-#define TWD_WDOG_LOAD			0x20
-#define TWD_WDOG_COUNTER		0x24
-#define TWD_WDOG_CONTROL		0x28
-#define TWD_WDOG_INTSTAT		0x2C
-#define TWD_WDOG_RESETSTAT		0x30
-#define TWD_WDOG_DISABLE		0x34
-
-#define TWD_TIMER_CONTROL_ENABLE	(1 << 0)
-#define TWD_TIMER_CONTROL_ONESHOT	(0 << 1)
-#define TWD_TIMER_CONTROL_PERIODIC	(1 << 1)
-#define TWD_TIMER_CONTROL_IT_ENABLE	(1 << 2)
-
 /* set up by the platform code */
 void __iomem *twd_base;
 
diff --git a/arch/arm/lib/clear_user.S b/arch/arm/lib/clear_user.S
index 5e3f99620c04..14a0d988c82c 100644
--- a/arch/arm/lib/clear_user.S
+++ b/arch/arm/lib/clear_user.S
@@ -45,6 +45,7 @@ USER(		strnebt	r2, [r0])
 		mov	r0, #0
 		ldmfd	sp!, {r1, pc}
 ENDPROC(__clear_user)
+ENDPROC(__clear_user_std)
 
 		.pushsection .fixup,"ax"
 		.align	0
diff --git a/arch/arm/lib/copy_to_user.S b/arch/arm/lib/copy_to_user.S
index 027b69bdbad1..d066df686e17 100644
--- a/arch/arm/lib/copy_to_user.S
+++ b/arch/arm/lib/copy_to_user.S
@@ -93,6 +93,7 @@ WEAK(__copy_to_user)
 #include "copy_template.S"
 
 ENDPROC(__copy_to_user)
+ENDPROC(__copy_to_user_std)
 
 	.pushsection .fixup,"ax"
 	.align 0
diff --git a/arch/arm/mach-davinci/da830.c b/arch/arm/mach-davinci/da830.c
index 122e61a9f505..e8cb982f5e8e 100644
--- a/arch/arm/mach-davinci/da830.c
+++ b/arch/arm/mach-davinci/da830.c
@@ -410,7 +410,7 @@ static struct clk_lookup da830_clks[] = {
 	CLK("davinci-mcasp.0",	NULL,		&mcasp0_clk),
 	CLK("davinci-mcasp.1",	NULL,		&mcasp1_clk),
 	CLK("davinci-mcasp.2",	NULL,		&mcasp2_clk),
-	CLK("musb_hdrc",	NULL,		&usb20_clk),
+	CLK(NULL,		"usb20",	&usb20_clk),
 	CLK(NULL,		"aemif",	&aemif_clk),
 	CLK(NULL,		"aintc",	&aintc_clk),
 	CLK(NULL,		"secu_mgr",	&secu_mgr_clk),
diff --git a/arch/arm/mm/cache-v6.S b/arch/arm/mm/cache-v6.S
index 9d89c67a1cc3..e46ecd847138 100644
--- a/arch/arm/mm/cache-v6.S
+++ b/arch/arm/mm/cache-v6.S
@@ -211,6 +211,9 @@ v6_dma_inv_range:
 	mcrne	p15, 0, r1, c7, c15, 1		@ clean & invalidate unified line
 #endif
 1:
+#ifdef CONFIG_SMP
+	str	r0, [r0]			@ write for ownership
+#endif
 #ifdef HARVARD_CACHE
 	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D line
 #else
@@ -231,6 +234,9 @@ v6_dma_inv_range:
 v6_dma_clean_range:
 	bic	r0, r0, #D_CACHE_LINE_SIZE - 1
 1:
+#ifdef CONFIG_SMP
+	ldr	r2, [r0]			@ read for ownership
+#endif
 #ifdef HARVARD_CACHE
 	mcr	p15, 0, r0, c7, c10, 1		@ clean D line
 #else
@@ -251,6 +257,10 @@ v6_dma_clean_range:
 ENTRY(v6_dma_flush_range)
 	bic	r0, r0, #D_CACHE_LINE_SIZE - 1
 1:
+#ifdef CONFIG_SMP
+	ldr	r2, [r0]			@ read for ownership
+	str	r2, [r0]			@ write for ownership
+#endif
 #ifdef HARVARD_CACHE
 	mcr	p15, 0, r0, c7, c14, 1		@ clean & invalidate D line
 #else
@@ -273,7 +283,9 @@ ENTRY(v6_dma_map_area)
 	add	r1, r1, r0
 	teq	r2, #DMA_FROM_DEVICE
 	beq	v6_dma_inv_range
-	b	v6_dma_clean_range
+	teq	r2, #DMA_TO_DEVICE
+	beq	v6_dma_clean_range
+	b	v6_dma_flush_range
 ENDPROC(v6_dma_map_area)
 
 /*
@@ -283,9 +295,6 @@ ENDPROC(v6_dma_map_area)
  *	- dir	- DMA direction
  */
 ENTRY(v6_dma_unmap_area)
-	add	r1, r1, r0
-	teq	r2, #DMA_TO_DEVICE
-	bne	v6_dma_inv_range
 	mov	pc, lr
 ENDPROC(v6_dma_unmap_area)
 
diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S
index bcd64f265870..06a90dcfc60a 100644
--- a/arch/arm/mm/cache-v7.S
+++ b/arch/arm/mm/cache-v7.S
@@ -167,7 +167,11 @@ ENTRY(v7_coherent_user_range)
 	cmp	r0, r1
 	blo	1b
 	mov	r0, #0
+#ifdef CONFIG_SMP
+	mcr	p15, 0, r0, c7, c1, 6		@ invalidate BTB Inner Shareable
+#else
 	mcr	p15, 0, r0, c7, c5, 6		@ invalidate BTB
+#endif
 	dsb
 	isb
 	mov	pc, lr
diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c
index 9bfeb6b9509a..33b327379f07 100644
--- a/arch/arm/mm/nommu.c
+++ b/arch/arm/mm/nommu.c
@@ -65,6 +65,15 @@ void flush_dcache_page(struct page *page)
 }
 EXPORT_SYMBOL(flush_dcache_page);
 
+void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
+		       unsigned long uaddr, void *dst, const void *src,
+		       unsigned long len)
+{
+	memcpy(dst, src, len);
+	if (vma->vm_flags & VM_EXEC)
+		__cpuc_coherent_user_range(uaddr, uaddr + len);
+}
+
 void __iomem *__arm_ioremap_pfn(unsigned long pfn, unsigned long offset,
 				size_t size, unsigned int mtype)
 {
@@ -87,8 +96,8 @@ void __iomem *__arm_ioremap(unsigned long phys_addr, size_t size,
 }
 EXPORT_SYMBOL(__arm_ioremap);
 
-void __iomem *__arm_ioremap(unsigned long phys_addr, size_t size,
-			    unsigned int mtype, void *caller)
+void __iomem *__arm_ioremap_caller(unsigned long phys_addr, size_t size,
+				   unsigned int mtype, void *caller)
 {
 	return __arm_ioremap(phys_addr, size, mtype);
 }
diff --git a/arch/arm/mm/tlb-v7.S b/arch/arm/mm/tlb-v7.S
index 0cb1848bd876..f3f288a9546d 100644
--- a/arch/arm/mm/tlb-v7.S
+++ b/arch/arm/mm/tlb-v7.S
@@ -50,7 +50,11 @@ ENTRY(v7wbi_flush_user_tlb_range)
 	cmp	r0, r1
 	blo	1b
 	mov	ip, #0
+#ifdef CONFIG_SMP
+	mcr	p15, 0, ip, c7, c1, 6		@ flush BTAC/BTB Inner Shareable
+#else
 	mcr	p15, 0, ip, c7, c5, 6		@ flush BTAC/BTB
+#endif
 	dsb
 	mov	pc, lr
 ENDPROC(v7wbi_flush_user_tlb_range)
@@ -79,7 +83,11 @@ ENTRY(v7wbi_flush_kern_tlb_range)
 	cmp	r0, r1
 	blo	1b
 	mov	r2, #0
+#ifdef CONFIG_SMP
+	mcr	p15, 0, r2, c7, c1, 6		@ flush BTAC/BTB Inner Shareable
+#else
 	mcr	p15, 0, r2, c7, c5, 6		@ flush BTAC/BTB
+#endif
 	dsb
 	isb
 	mov	pc, lr
diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c
index 64aff520b899..aa2533ae7e9e 100644
--- a/arch/ia64/pci/pci.c
+++ b/arch/ia64/pci/pci.c
@@ -335,8 +335,11 @@ pcibios_setup_root_windows(struct pci_bus *bus, struct pci_controller *ctrl)
 }
 
 struct pci_bus * __devinit
-pci_acpi_scan_root(struct acpi_device *device, int domain, int bus)
+pci_acpi_scan_root(struct acpi_pci_root *root)
 {
+	struct acpi_device *device = root->device;
+	int domain = root->segment;
+	int bus = root->secondary.start;
 	struct pci_controller *controller;
 	unsigned int windows = 0;
 	struct pci_bus *pbus;
diff --git a/arch/microblaze/include/asm/uaccess.h b/arch/microblaze/include/asm/uaccess.h
index 446bec29b142..26460d15b338 100644
--- a/arch/microblaze/include/asm/uaccess.h
+++ b/arch/microblaze/include/asm/uaccess.h
@@ -182,6 +182,39 @@ extern long __user_bad(void);
  * Returns zero on success, or -EFAULT on error.
  * On error, the variable @x is set to zero.
  */
+#define get_user(x, ptr)						\
+	__get_user_check((x), (ptr), sizeof(*(ptr)))
+
+#define __get_user_check(x, ptr, size)					\
+({									\
+	unsigned long __gu_val = 0;					\
+	const typeof(*(ptr)) __user *__gu_addr = (ptr);			\
+	int __gu_err = 0;						\
+									\
+	if (access_ok(VERIFY_READ, __gu_addr, size)) {			\
+		switch (size) {						\
+		case 1:							\
+			__get_user_asm("lbu", __gu_addr, __gu_val,	\
+				       __gu_err);			\
+			break;						\
+		case 2:							\
+			__get_user_asm("lhu", __gu_addr, __gu_val,	\
+				       __gu_err);			\
+			break;						\
+		case 4:							\
+			__get_user_asm("lw", __gu_addr, __gu_val,	\
+				       __gu_err);			\
+			break;						\
+		default:						\
+			__gu_err = __user_bad();			\
+			break;						\
+		}							\
+	} else {							\
+		__gu_err = -EFAULT;					\
+	}								\
+	x = (typeof(*(ptr)))__gu_val;					\
+	__gu_err;							\
+})
 
 #define __get_user(x, ptr)						\
 ({									\
@@ -206,12 +239,6 @@ extern long __user_bad(void);
 })
 
 
-#define get_user(x, ptr)						\
-({									\
-	access_ok(VERIFY_READ, (ptr), sizeof(*(ptr)))			\
-		? __get_user((x), (ptr)) : -EFAULT;			\
-})
-
 #define __put_user_asm(insn, __gu_ptr, __gu_val, __gu_err)	\
 ({								\
 	__asm__ __volatile__ (					\
@@ -266,6 +293,42 @@ extern long __user_bad(void);
  *
  * Returns zero on success, or -EFAULT on error.
  */
+#define put_user(x, ptr)						\
+	__put_user_check((x), (ptr), sizeof(*(ptr)))
+
+#define __put_user_check(x, ptr, size)					\
+({									\
+	typeof(*(ptr)) __pu_val;					\
+	typeof(*(ptr)) __user *__pu_addr = (ptr);			\
+	int __pu_err = 0;						\
+									\
+	__pu_val = (x);							\
+	if (access_ok(VERIFY_WRITE, __pu_addr, size)) {			\
+		switch (size) {						\
+		case 1:							\
+			__put_user_asm("sb", __pu_addr, __pu_val,	\
+				       __pu_err);			\
+			break;						\
+		case 2:							\
+			__put_user_asm("sh", __pu_addr, __pu_val,	\
+				       __pu_err);			\
+			break;						\
+		case 4:							\
+			__put_user_asm("sw", __pu_addr, __pu_val,	\
+				       __pu_err);			\
+			break;						\
+		case 8:							\
+			__put_user_asm_8(__pu_addr, __pu_val, __pu_err);\
+			break;						\
+		default:						\
+			__pu_err = __user_bad();			\
+			break;						\
+		}							\
+	} else {							\
+		__pu_err = -EFAULT;					\
+	}								\
+	__pu_err;							\
+})
 
 #define __put_user(x, ptr)						\
 ({									\
@@ -290,18 +353,6 @@ extern long __user_bad(void);
 	__gu_err;							\
 })
 
-#ifndef CONFIG_MMU
-
-#define put_user(x, ptr)	__put_user((x), (ptr))
-
-#else /* CONFIG_MMU */
-
-#define put_user(x, ptr)						\
-({									\
-	access_ok(VERIFY_WRITE, (ptr), sizeof(*(ptr)))			\
-		? __put_user((x), (ptr)) : -EFAULT;			\
-})
-#endif /* CONFIG_MMU */
 
 /* copy_to_from_user */
 #define __copy_from_user(to, from, n)	\
diff --git a/arch/microblaze/kernel/cpu/cache.c b/arch/microblaze/kernel/cpu/cache.c
index 21c3a92394de..109876e8d643 100644
--- a/arch/microblaze/kernel/cpu/cache.c
+++ b/arch/microblaze/kernel/cpu/cache.c
@@ -137,8 +137,9 @@ do {									\
 do {									\
 	int step = -line_length;					\
 	int align = ~(line_length - 1);					\
+	int count;							\
 	end = ((end & align) == end) ? end - line_length : end & align;	\
-	int count = end - start;					\
+	count = end - start;						\
 	WARN_ON(count < 0);						\
 									\
 	__asm__ __volatile__ (" 1:	" #op "	%0, %1;			\
diff --git a/arch/microblaze/kernel/entry-nommu.S b/arch/microblaze/kernel/entry-nommu.S
index 391d6197fc3b..8cc18cd2cce6 100644
--- a/arch/microblaze/kernel/entry-nommu.S
+++ b/arch/microblaze/kernel/entry-nommu.S
@@ -476,6 +476,8 @@ ENTRY(ret_from_fork)
 	nop
 
 work_pending:
+	enable_irq
+
 	andi	r11, r19, _TIF_NEED_RESCHED
 	beqi	r11, 1f
 	bralid	r15, schedule
diff --git a/arch/microblaze/kernel/microblaze_ksyms.c b/arch/microblaze/kernel/microblaze_ksyms.c
index bc4dcb7d3861..ff85f7718035 100644
--- a/arch/microblaze/kernel/microblaze_ksyms.c
+++ b/arch/microblaze/kernel/microblaze_ksyms.c
@@ -52,3 +52,14 @@ EXPORT_SYMBOL_GPL(_ebss);
 extern void _mcount(void);
 EXPORT_SYMBOL(_mcount);
 #endif
+
+/*
+ * Assembly functions that may be used (directly or indirectly) by modules
+ */
+EXPORT_SYMBOL(__copy_tofrom_user);
+EXPORT_SYMBOL(__strncpy_user);
+
+#ifdef CONFIG_OPT_LIB_ASM
+EXPORT_SYMBOL(memcpy);
+EXPORT_SYMBOL(memmove);
+#endif
diff --git a/arch/microblaze/kernel/module.c b/arch/microblaze/kernel/module.c
index cbecf110dc30..0e73f6606547 100644
--- a/arch/microblaze/kernel/module.c
+++ b/arch/microblaze/kernel/module.c
@@ -16,6 +16,7 @@
 #include <linux/string.h>
 
 #include <asm/pgtable.h>
+#include <asm/cacheflush.h>
 
 void *module_alloc(unsigned long size)
 {
@@ -151,6 +152,7 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, const char *strtab,
 int module_finalize(const Elf32_Ehdr *hdr, const Elf_Shdr *sechdrs,
 		struct module *module)
 {
+	flush_dcache();
 	return 0;
 }
 
diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c
index f42c2dde8b1c..cca3579d4268 100644
--- a/arch/microblaze/mm/init.c
+++ b/arch/microblaze/mm/init.c
@@ -47,6 +47,7 @@ unsigned long memory_start;
 EXPORT_SYMBOL(memory_start);
 unsigned long memory_end; /* due to mm/nommu.c */
 unsigned long memory_size;
+EXPORT_SYMBOL(memory_size);
 
 /*
  * paging_init() sets up the page tables - in fact we've already done this.
diff --git a/arch/microblaze/mm/pgtable.c b/arch/microblaze/mm/pgtable.c
index 784557fb28cf..59bf2335a4ce 100644
--- a/arch/microblaze/mm/pgtable.c
+++ b/arch/microblaze/mm/pgtable.c
@@ -42,6 +42,7 @@
 
 unsigned long ioremap_base;
 unsigned long ioremap_bot;
+EXPORT_SYMBOL(ioremap_bot);
 
 /* The maximum lowmem defaults to 768Mb, but this can be configured to
  * another value.
diff --git a/arch/microblaze/pci/pci-common.c b/arch/microblaze/pci/pci-common.c
index 01c8c97c15b7..9cb782b8e036 100644
--- a/arch/microblaze/pci/pci-common.c
+++ b/arch/microblaze/pci/pci-common.c
@@ -1507,7 +1507,7 @@ void pcibios_finish_adding_to_bus(struct pci_bus *bus)
 	pci_bus_add_devices(bus);
 
 	/* Fixup EEH */
-	eeh_add_device_tree_late(bus);
+	/* eeh_add_device_tree_late(bus); */
 }
 EXPORT_SYMBOL_GPL(pcibios_finish_adding_to_bus);
 
diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h
index 49382d5e891a..c6e3c93ce7c7 100644
--- a/arch/mips/include/asm/mipsregs.h
+++ b/arch/mips/include/asm/mipsregs.h
@@ -135,6 +135,12 @@
 #define FPU_CSR_COND7   0x80000000      /* $fcc7 */
 
 /*
+ * Bits 18 - 20 of the FPU Status Register will be read as 0,
+ * and should be written as zero.
+ */
+#define FPU_CSR_RSVD	0x001c0000
+
+/*
  * X the exception cause indicator
  * E the exception enable
  * S the sticky/flag bit
@@ -161,7 +167,8 @@
 #define FPU_CSR_UDF_S   0x00000008
 #define FPU_CSR_INE_S   0x00000004
 
-/* rounding mode */
+/* Bits 0 and 1 of FPU Status Register specify the rounding mode */
+#define FPU_CSR_RM	0x00000003
 #define FPU_CSR_RN      0x0     /* nearest */
 #define FPU_CSR_RZ      0x1     /* towards zero */
 #define FPU_CSR_RU      0x2     /* towards +Infinity */
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index 44337ba03717..a5297e2a353a 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -385,7 +385,7 @@ EXPORT(sysn32_call_table)
 	PTR	sys_fchmodat
 	PTR	sys_faccessat
 	PTR	compat_sys_pselect6
-	PTR	sys_ppoll			/* 6265 */
+	PTR	compat_sys_ppoll		/* 6265 */
 	PTR	sys_unshare
 	PTR	sys_splice
 	PTR	sys_sync_file_range
diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c
index 8f2f8e9d8b21..f2338d1c0b48 100644
--- a/arch/mips/math-emu/cp1emu.c
+++ b/arch/mips/math-emu/cp1emu.c
@@ -78,6 +78,9 @@ DEFINE_PER_CPU(struct mips_fpu_emulator_stats, fpuemustats);
 #define FPCREG_RID	0	/* $0  = revision id */
 #define FPCREG_CSR	31	/* $31 = csr */
 
+/* Determine rounding mode from the RM bits of the FCSR */
+#define modeindex(v) ((v) & FPU_CSR_RM)
+
 /* Convert Mips rounding mode (0..3) to IEEE library modes. */
 static const unsigned char ieee_rm[4] = {
 	[FPU_CSR_RN] = IEEE754_RN,
@@ -384,10 +387,14 @@ static int cop1Emulate(struct pt_regs *xcp, struct mips_fpu_struct *ctx)
 					(void *) (xcp->cp0_epc),
 					MIPSInst_RT(ir), value);
 #endif
-				value &= (FPU_CSR_FLUSH | FPU_CSR_ALL_E | FPU_CSR_ALL_S | 0x03);
-				ctx->fcr31 &= ~(FPU_CSR_FLUSH | FPU_CSR_ALL_E | FPU_CSR_ALL_S | 0x03);
-				/* convert to ieee library modes */
-				ctx->fcr31 |= (value & ~0x3) | ieee_rm[value & 0x3];
+
+				/*
+				 * Don't write reserved bits,
+				 * and convert to ieee library modes
+				 */
+				ctx->fcr31 = (value &
+						~(FPU_CSR_RSVD | FPU_CSR_RM)) |
+						ieee_rm[modeindex(value)];
 			}
 			if ((ctx->fcr31 >> 5) & ctx->fcr31 & FPU_CSR_ALL_E) {
 				return SIGFPE;
diff --git a/arch/mips/oprofile/op_model_loongson2.c b/arch/mips/oprofile/op_model_loongson2.c
index 29e2326b6257..fa3bf661ae29 100644
--- a/arch/mips/oprofile/op_model_loongson2.c
+++ b/arch/mips/oprofile/op_model_loongson2.c
@@ -122,7 +122,7 @@ static irqreturn_t loongson2_perfcount_handler(int irq, void *dev_id)
 	 */
 
 	/* Check whether the irq belongs to me */
-	enabled = read_c0_perfcnt() & LOONGSON2_PERFCNT_INT_EN;
+	enabled = read_c0_perfctrl() & LOONGSON2_PERFCNT_INT_EN;
 	if (!enabled)
 		return IRQ_NONE;
 	enabled = reg.cnt1_enabled | reg.cnt2_enabled;
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index 9f4c9d4f5803..bd100fcf40d0 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -130,43 +130,5 @@ static inline int irqs_disabled_flags(unsigned long flags)
  */
 struct irq_chip;
 
-#ifdef CONFIG_PERF_EVENTS
-
-#ifdef CONFIG_PPC64
-static inline unsigned long test_perf_event_pending(void)
-{
-	unsigned long x;
-
-	asm volatile("lbz %0,%1(13)"
-		: "=r" (x)
-		: "i" (offsetof(struct paca_struct, perf_event_pending)));
-	return x;
-}
-
-static inline void set_perf_event_pending(void)
-{
-	asm volatile("stb %0,%1(13)" : :
-		"r" (1),
-		"i" (offsetof(struct paca_struct, perf_event_pending)));
-}
-
-static inline void clear_perf_event_pending(void)
-{
-	asm volatile("stb %0,%1(13)" : :
-		"r" (0),
-		"i" (offsetof(struct paca_struct, perf_event_pending)));
-}
-#endif /* CONFIG_PPC64 */
-
-#else  /* CONFIG_PERF_EVENTS */
-
-static inline unsigned long test_perf_event_pending(void)
-{
-	return 0;
-}
-
-static inline void clear_perf_event_pending(void) {}
-#endif /* CONFIG_PERF_EVENTS */
-
 #endif	/* __KERNEL__ */
 #endif	/* _ASM_POWERPC_HW_IRQ_H */
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 957ceb7059c5..c09138d150d4 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -133,7 +133,6 @@ int main(void)
 	DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr));
 	DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled));
 	DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled));
-	DEFINE(PACAPERFPEND, offsetof(struct paca_struct, perf_event_pending));
 	DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
 #ifdef CONFIG_PPC_MM_SLICES
 	DEFINE(PACALOWSLICESPSIZE, offsetof(struct paca_struct,
diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c
index 59c928564a03..4ff4da2c238b 100644
--- a/arch/powerpc/kernel/dma-swiotlb.c
+++ b/arch/powerpc/kernel/dma-swiotlb.c
@@ -1,7 +1,8 @@
 /*
  * Contains routines needed to support swiotlb for ppc.
  *
- * Copyright (C) 2009 Becky Bruce, Freescale Semiconductor
+ * Copyright (C) 2009-2010 Freescale Semiconductor, Inc.
+ * Author: Becky Bruce
  *
  * This program is free software; you can redistribute  it and/or modify it
  * under  the terms of  the GNU General  Public License as published by the
@@ -70,7 +71,7 @@ static int ppc_swiotlb_bus_notify(struct notifier_block *nb,
 	sd->max_direct_dma_addr = 0;
 
 	/* May need to bounce if the device can't address all of DRAM */
-	if (dma_get_mask(dev) < lmb_end_of_DRAM())
+	if ((dma_get_mask(dev) + 1) < lmb_end_of_DRAM())
 		set_dma_ops(dev, &swiotlb_dma_ops);
 
 	return NOTIFY_DONE;
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 07109d843787..42e9d908914a 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -556,15 +556,6 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES)
 2:
 	TRACE_AND_RESTORE_IRQ(r5);
 
-#ifdef CONFIG_PERF_EVENTS
-	/* check paca->perf_event_pending if we're enabling ints */
-	lbz	r3,PACAPERFPEND(r13)
-	and.	r3,r3,r5
-	beq	27f
-	bl	.perf_event_do_pending
-27:
-#endif /* CONFIG_PERF_EVENTS */
-
 	/* extract EE bit and use it to restore paca->hard_enabled */
 	ld	r3,_MSR(r1)
 	rldicl	r4,r3,49,63		/* r0 = (r3 >> 15) & 1 */
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 64f6f2031c22..066bd31551d5 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -53,7 +53,6 @@
 #include <linux/bootmem.h>
 #include <linux/pci.h>
 #include <linux/debugfs.h>
-#include <linux/perf_event.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -145,11 +144,6 @@ notrace void raw_local_irq_restore(unsigned long en)
 	}
 #endif /* CONFIG_PPC_STD_MMU_64 */
 
-	if (test_perf_event_pending()) {
-		clear_perf_event_pending();
-		perf_event_do_pending();
-	}
-
 	/*
 	 * if (get_paca()->hard_enabled) return;
 	 * But again we need to take care that gcc gets hard_enabled directly
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 1b16b9a3e49a..0441bbdadbd1 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -532,25 +532,60 @@ void __init iSeries_time_init_early(void)
 }
 #endif /* CONFIG_PPC_ISERIES */
 
-#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_PPC32)
-DEFINE_PER_CPU(u8, perf_event_pending);
+#ifdef CONFIG_PERF_EVENTS
 
-void set_perf_event_pending(void)
+/*
+ * 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable...
+ */
+#ifdef CONFIG_PPC64
+static inline unsigned long test_perf_event_pending(void)
 {
-	get_cpu_var(perf_event_pending) = 1;
-	set_dec(1);
-	put_cpu_var(perf_event_pending);
+	unsigned long x;
+
+	asm volatile("lbz %0,%1(13)"
+		: "=r" (x)
+		: "i" (offsetof(struct paca_struct, perf_event_pending)));
+	return x;
 }
 
+static inline void set_perf_event_pending_flag(void)
+{
+	asm volatile("stb %0,%1(13)" : :
+		"r" (1),
+		"i" (offsetof(struct paca_struct, perf_event_pending)));
+}
+
+static inline void clear_perf_event_pending(void)
+{
+	asm volatile("stb %0,%1(13)" : :
+		"r" (0),
+		"i" (offsetof(struct paca_struct, perf_event_pending)));
+}
+
+#else /* 32-bit */
+
+DEFINE_PER_CPU(u8, perf_event_pending);
+
+#define set_perf_event_pending_flag()	__get_cpu_var(perf_event_pending) = 1
 #define test_perf_event_pending()	__get_cpu_var(perf_event_pending)
 #define clear_perf_event_pending()	__get_cpu_var(perf_event_pending) = 0
 
-#else  /* CONFIG_PERF_EVENTS && CONFIG_PPC32 */
+#endif /* 32 vs 64 bit */
+
+void set_perf_event_pending(void)
+{
+	preempt_disable();
+	set_perf_event_pending_flag();
+	set_dec(1);
+	preempt_enable();
+}
+
+#else  /* CONFIG_PERF_EVENTS */
 
 #define test_perf_event_pending()	0
 #define clear_perf_event_pending()
 
-#endif /* CONFIG_PERF_EVENTS && CONFIG_PPC32 */
+#endif /* CONFIG_PERF_EVENTS */
 
 /*
  * For iSeries shared processors, we have to let the hypervisor
@@ -582,10 +617,6 @@ void timer_interrupt(struct pt_regs * regs)
 	set_dec(DECREMENTER_MAX);
 
 #ifdef CONFIG_PPC32
-	if (test_perf_event_pending()) {
-		clear_perf_event_pending();
-		perf_event_do_pending();
-	}
 	if (atomic_read(&ppc_n_lost_interrupts) != 0)
 		do_IRQ(regs);
 #endif
@@ -604,6 +635,11 @@ void timer_interrupt(struct pt_regs * regs)
 
 	calculate_steal_time();
 
+	if (test_perf_event_pending()) {
+		clear_perf_event_pending();
+		perf_event_do_pending();
+	}
+
 #ifdef CONFIG_PPC_ISERIES
 	if (firmware_has_feature(FW_FEATURE_ISERIES))
 		get_lppaca()->int_dword.fields.decr_int = 0;
diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
index 2570fcc7665d..812312542e50 100644
--- a/arch/powerpc/kvm/44x_tlb.c
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -440,7 +440,7 @@ int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
 	unsigned int gtlb_index;
 
 	gtlb_index = kvmppc_get_gpr(vcpu, ra);
-	if (gtlb_index > KVM44x_GUEST_TLB_SIZE) {
+	if (gtlb_index >= KVM44x_GUEST_TLB_SIZE) {
 		printk("%s: index %d\n", __func__, gtlb_index);
 		kvmppc_dump_vcpu(vcpu);
 		return EMULATE_FAIL;
diff --git a/arch/s390/kernel/head31.S b/arch/s390/kernel/head31.S
index 1bbcc499d455..b8f8dc126102 100644
--- a/arch/s390/kernel/head31.S
+++ b/arch/s390/kernel/head31.S
@@ -82,7 +82,7 @@ startup_continue:
 _ehead:
 
 #ifdef CONFIG_SHARED_KERNEL
-	.org	0x100000
+	.org	0x100000 - 0x11000	# head.o ends at 0x11000
 #endif
 
 #
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index 1f70970de0aa..cdef68717416 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -80,7 +80,7 @@ startup_continue:
 _ehead:
 
 #ifdef CONFIG_SHARED_KERNEL
-	.org	0x100000
+	.org	0x100000 - 0x11000	# head.o ends at 0x11000
 #endif
 
 #
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 33fdc5a79764..9f654da4cecc 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -640,7 +640,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
 
 asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
 {
-	long ret;
+	long ret = 0;
 
 	/* Do the secure computing check first. */
 	secure_computing(regs->gprs[2]);
@@ -649,7 +649,6 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
 	 * The sysc_tracesys code in entry.S stored the system
 	 * call number to gprs[2].
 	 */
-	ret = regs->gprs[2];
 	if (test_thread_flag(TIF_SYSCALL_TRACE) &&
 	    (tracehook_report_syscall_entry(regs) ||
 	     regs->gprs[2] >= NR_syscalls)) {
@@ -671,7 +670,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
 				    regs->gprs[2], regs->orig_gpr2,
 				    regs->gprs[3], regs->gprs[4],
 				    regs->gprs[5]);
-	return ret;
+	return ret ?: regs->gprs[2];
 }
 
 asmlinkage void do_syscall_trace_exit(struct pt_regs *regs)
diff --git a/arch/x86/include/asm/k8.h b/arch/x86/include/asm/k8.h
index f70e60071fe8..af00bd1d2089 100644
--- a/arch/x86/include/asm/k8.h
+++ b/arch/x86/include/asm/k8.h
@@ -16,11 +16,16 @@ extern int k8_numa_init(unsigned long start_pfn, unsigned long end_pfn);
 extern int k8_scan_nodes(void);
 
 #ifdef CONFIG_K8_NB
+extern int num_k8_northbridges;
+
 static inline struct pci_dev *node_to_k8_nb_misc(int node)
 {
 	return (node < num_k8_northbridges) ? k8_northbridges[node] : NULL;
 }
+
 #else
+#define num_k8_northbridges 0
+
 static inline struct pci_dev *node_to_k8_nb_misc(int node)
 {
 	return NULL;
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 6c3fdd631ed3..f32a4301c4d4 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -225,5 +225,13 @@ extern void mcheck_intel_therm_init(void);
 static inline void mcheck_intel_therm_init(void) { }
 #endif
 
+/*
+ * Used by APEI to report memory error via /dev/mcelog
+ */
+
+struct cper_sec_mem_err;
+extern void apei_mce_report_mem_error(int corrected,
+				      struct cper_sec_mem_err *mem_err);
+
 #endif /* __KERNEL__ */
 #endif /* _ASM_X86_MCE_H */
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index b3eeb66c0a51..95962a93f99a 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -340,6 +340,10 @@ amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf)
 	     (boot_cpu_data.x86_mask  < 0x1)))
 		return;
 
+	/* not in virtualized environments */
+	if (num_k8_northbridges == 0)
+		return;
+
 	this_leaf->can_disable = true;
 	this_leaf->l3_indices  = amd_calc_l3_indices();
 }
diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile
index 4ac6d48fe11b..bb34b03af252 100644
--- a/arch/x86/kernel/cpu/mcheck/Makefile
+++ b/arch/x86/kernel/cpu/mcheck/Makefile
@@ -7,3 +7,5 @@ obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o
 obj-$(CONFIG_X86_MCE_INJECT)	+= mce-inject.o
 
 obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o
+
+obj-$(CONFIG_ACPI_APEI)		+= mce-apei.o
diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mcheck/mce-apei.c
new file mode 100644
index 000000000000..745b54f9be89
--- /dev/null
+++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c
@@ -0,0 +1,138 @@
+/*
+ * Bridge between MCE and APEI
+ *
+ * On some machine, corrected memory errors are reported via APEI
+ * generic hardware error source (GHES) instead of corrected Machine
+ * Check. These corrected memory errors can be reported to user space
+ * through /dev/mcelog via faking a corrected Machine Check, so that
+ * the error memory page can be offlined by /sbin/mcelog if the error
+ * count for one page is beyond the threshold.
+ *
+ * For fatal MCE, save MCE record into persistent storage via ERST, so
+ * that the MCE record can be logged after reboot via ERST.
+ *
+ * Copyright 2010 Intel Corp.
+ *   Author: Huang Ying <ying.huang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/acpi.h>
+#include <linux/cper.h>
+#include <acpi/apei.h>
+#include <asm/mce.h>
+
+#include "mce-internal.h"
+
+void apei_mce_report_mem_error(int corrected, struct cper_sec_mem_err *mem_err)
+{
+	struct mce m;
+
+	/* Only corrected MC is reported */
+	if (!corrected)
+		return;
+
+	mce_setup(&m);
+	m.bank = 1;
+	/* Fake a memory read corrected error with unknown channel */
+	m.status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | 0x9f;
+	m.addr = mem_err->physical_addr;
+	mce_log(&m);
+	mce_notify_irq();
+}
+EXPORT_SYMBOL_GPL(apei_mce_report_mem_error);
+
+#define CPER_CREATOR_MCE						\
+	UUID_LE(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c,	\
+		0x64, 0x90, 0xb8, 0x9d)
+#define CPER_SECTION_TYPE_MCE						\
+	UUID_LE(0xfe08ffbe, 0x95e4, 0x4be7, 0xbc, 0x73, 0x40, 0x96,	\
+		0x04, 0x4a, 0x38, 0xfc)
+
+/*
+ * CPER specification (in UEFI specification 2.3 appendix N) requires
+ * byte-packed.
+ */
+struct cper_mce_record {
+	struct cper_record_header hdr;
+	struct cper_section_descriptor sec_hdr;
+	struct mce mce;
+} __packed;
+
+int apei_write_mce(struct mce *m)
+{
+	struct cper_mce_record rcd;
+
+	memset(&rcd, 0, sizeof(rcd));
+	memcpy(rcd.hdr.signature, CPER_SIG_RECORD, CPER_SIG_SIZE);
+	rcd.hdr.revision = CPER_RECORD_REV;
+	rcd.hdr.signature_end = CPER_SIG_END;
+	rcd.hdr.section_count = 1;
+	rcd.hdr.error_severity = CPER_SER_FATAL;
+	/* timestamp, platform_id, partition_id are all invalid */
+	rcd.hdr.validation_bits = 0;
+	rcd.hdr.record_length = sizeof(rcd);
+	rcd.hdr.creator_id = CPER_CREATOR_MCE;
+	rcd.hdr.notification_type = CPER_NOTIFY_MCE;
+	rcd.hdr.record_id = cper_next_record_id();
+	rcd.hdr.flags = CPER_HW_ERROR_FLAGS_PREVERR;
+
+	rcd.sec_hdr.section_offset = (void *)&rcd.mce - (void *)&rcd;
+	rcd.sec_hdr.section_length = sizeof(rcd.mce);
+	rcd.sec_hdr.revision = CPER_SEC_REV;
+	/* fru_id and fru_text is invalid */
+	rcd.sec_hdr.validation_bits = 0;
+	rcd.sec_hdr.flags = CPER_SEC_PRIMARY;
+	rcd.sec_hdr.section_type = CPER_SECTION_TYPE_MCE;
+	rcd.sec_hdr.section_severity = CPER_SER_FATAL;
+
+	memcpy(&rcd.mce, m, sizeof(*m));
+
+	return erst_write(&rcd.hdr);
+}
+
+ssize_t apei_read_mce(struct mce *m, u64 *record_id)
+{
+	struct cper_mce_record rcd;
+	ssize_t len;
+
+	len = erst_read_next(&rcd.hdr, sizeof(rcd));
+	if (len <= 0)
+		return len;
+	/* Can not skip other records in storage via ERST unless clear them */
+	else if (len != sizeof(rcd) ||
+		 uuid_le_cmp(rcd.hdr.creator_id, CPER_CREATOR_MCE)) {
+		if (printk_ratelimit())
+			pr_warning(
+			"MCE-APEI: Can not skip the unknown record in ERST");
+		return -EIO;
+	}
+
+	memcpy(m, &rcd.mce, sizeof(*m));
+	*record_id = rcd.hdr.record_id;
+
+	return sizeof(*m);
+}
+
+/* Check whether there is record in ERST */
+int apei_check_mce(void)
+{
+	return erst_get_record_count();
+}
+
+int apei_clear_mce(u64 record_id)
+{
+	return erst_clear(record_id);
+}
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index 32996f9fab67..fefcc69ee8b5 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -28,3 +28,26 @@ extern int mce_ser;
 
 extern struct mce_bank *mce_banks;
 
+#ifdef CONFIG_ACPI_APEI
+int apei_write_mce(struct mce *m);
+ssize_t apei_read_mce(struct mce *m, u64 *record_id);
+int apei_check_mce(void);
+int apei_clear_mce(u64 record_id);
+#else
+static inline int apei_write_mce(struct mce *m)
+{
+	return -EINVAL;
+}
+static inline ssize_t apei_read_mce(struct mce *m, u64 *record_id)
+{
+	return 0;
+}
+static inline int apei_check_mce(void)
+{
+	return 0;
+}
+static inline int apei_clear_mce(u64 record_id)
+{
+	return -EINVAL;
+}
+#endif
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 8a6f0afa767e..09535ca9b9d7 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -264,7 +264,7 @@ static void wait_for_panic(void)
 
 static void mce_panic(char *msg, struct mce *final, char *exp)
 {
-	int i;
+	int i, apei_err = 0;
 
 	if (!fake_panic) {
 		/*
@@ -287,8 +287,11 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
 		struct mce *m = &mcelog.entry[i];
 		if (!(m->status & MCI_STATUS_VAL))
 			continue;
-		if (!(m->status & MCI_STATUS_UC))
+		if (!(m->status & MCI_STATUS_UC)) {
 			print_mce(m);
+			if (!apei_err)
+				apei_err = apei_write_mce(m);
+		}
 	}
 	/* Now print uncorrected but with the final one last */
 	for (i = 0; i < MCE_LOG_LEN; i++) {
@@ -297,11 +300,17 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
 			continue;
 		if (!(m->status & MCI_STATUS_UC))
 			continue;
-		if (!final || memcmp(m, final, sizeof(struct mce)))
+		if (!final || memcmp(m, final, sizeof(struct mce))) {
 			print_mce(m);
+			if (!apei_err)
+				apei_err = apei_write_mce(m);
+		}
 	}
-	if (final)
+	if (final) {
 		print_mce(final);
+		if (!apei_err)
+			apei_err = apei_write_mce(final);
+	}
 	if (cpu_missing)
 		printk(KERN_EMERG "Some CPUs didn't answer in synchronization\n");
 	print_mce_tail();
@@ -1493,6 +1502,43 @@ static void collect_tscs(void *data)
 	rdtscll(cpu_tsc[smp_processor_id()]);
 }
 
+static int mce_apei_read_done;
+
+/* Collect MCE record of previous boot in persistent storage via APEI ERST. */
+static int __mce_read_apei(char __user **ubuf, size_t usize)
+{
+	int rc;
+	u64 record_id;
+	struct mce m;
+
+	if (usize < sizeof(struct mce))
+		return -EINVAL;
+
+	rc = apei_read_mce(&m, &record_id);
+	/* Error or no more MCE record */
+	if (rc <= 0) {
+		mce_apei_read_done = 1;
+		return rc;
+	}
+	rc = -EFAULT;
+	if (copy_to_user(*ubuf, &m, sizeof(struct mce)))
+		return rc;
+	/*
+	 * In fact, we should have cleared the record after that has
+	 * been flushed to the disk or sent to network in
+	 * /sbin/mcelog, but we have no interface to support that now,
+	 * so just clear it to avoid duplication.
+	 */
+	rc = apei_clear_mce(record_id);
+	if (rc) {
+		mce_apei_read_done = 1;
+		return rc;
+	}
+	*ubuf += sizeof(struct mce);
+
+	return 0;
+}
+
 static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
 			loff_t *off)
 {
@@ -1506,15 +1552,19 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
 		return -ENOMEM;
 
 	mutex_lock(&mce_read_mutex);
+
+	if (!mce_apei_read_done) {
+		err = __mce_read_apei(&buf, usize);
+		if (err || buf != ubuf)
+			goto out;
+	}
+
 	next = rcu_dereference_check_mce(mcelog.next);
 
 	/* Only supports full reads right now */
-	if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) {
-		mutex_unlock(&mce_read_mutex);
-		kfree(cpu_tsc);
-
-		return -EINVAL;
-	}
+	err = -EINVAL;
+	if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce))
+		goto out;
 
 	err = 0;
 	prev = 0;
@@ -1562,10 +1612,15 @@ timeout:
 			memset(&mcelog.entry[i], 0, sizeof(struct mce));
 		}
 	}
+
+	if (err)
+		err = -EFAULT;
+
+out:
 	mutex_unlock(&mce_read_mutex);
 	kfree(cpu_tsc);
 
-	return err ? -EFAULT : buf - ubuf;
+	return err ? err : buf - ubuf;
 }
 
 static unsigned int mce_poll(struct file *file, poll_table *wait)
@@ -1573,6 +1628,8 @@ static unsigned int mce_poll(struct file *file, poll_table *wait)
 	poll_wait(file, &mce_wait, wait);
 	if (rcu_dereference_check_mce(mcelog.next))
 		return POLLIN | POLLRDNORM;
+	if (!mce_apei_read_done && apei_check_mce())
+		return POLLIN | POLLRDNORM;
 	return 0;
 }
 
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 28ad9f4d8b94..0415c3ef91b5 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -546,11 +546,13 @@ static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c)
 		 * check OSVW bit for CPUs that are not affected
 		 * by erratum #400
 		 */
-		rdmsrl(MSR_AMD64_OSVW_ID_LENGTH, val);
-		if (val >= 2) {
-			rdmsrl(MSR_AMD64_OSVW_STATUS, val);
-			if (!(val & BIT(1)))
-				goto no_c1e_idle;
+		if (cpu_has(c, X86_FEATURE_OSVW)) {
+			rdmsrl(MSR_AMD64_OSVW_ID_LENGTH, val);
+			if (val >= 2) {
+				rdmsrl(MSR_AMD64_OSVW_STATUS, val);
+				if (!(val & BIT(1)))
+					goto no_c1e_idle;
+			}
 		}
 		return 1;
 	}
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 2ba58206812a..737361fcd503 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2067,7 +2067,7 @@ static int cpuid_interception(struct vcpu_svm *svm)
 static int iret_interception(struct vcpu_svm *svm)
 {
 	++svm->vcpu.stat.nmi_window_exits;
-	svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET);
+	svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_IRET);
 	svm->vcpu.arch.hflags |= HF_IRET_MASK;
 	return 1;
 }
@@ -2479,7 +2479,7 @@ static void svm_inject_nmi(struct kvm_vcpu *vcpu)
 
 	svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
 	vcpu->arch.hflags |= HF_NMI_MASK;
-	svm->vmcb->control.intercept |= (1UL << INTERCEPT_IRET);
+	svm->vmcb->control.intercept |= (1ULL << INTERCEPT_IRET);
 	++vcpu->stat.nmi_injections;
 }
 
@@ -2539,10 +2539,10 @@ static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
 
 	if (masked) {
 		svm->vcpu.arch.hflags |= HF_NMI_MASK;
-		svm->vmcb->control.intercept |= (1UL << INTERCEPT_IRET);
+		svm->vmcb->control.intercept |= (1ULL << INTERCEPT_IRET);
 	} else {
 		svm->vcpu.arch.hflags &= ~HF_NMI_MASK;
-		svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET);
+		svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_IRET);
 	}
 }
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index bc933cfb4e66..2f8db0ec8ae4 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2703,8 +2703,7 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
 		return 0;
 
 	return	!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
-			(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS |
-				GUEST_INTR_STATE_NMI));
+			(GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_NMI));
 }
 
 static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3c4ca98ad27f..c4f35b545c1d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1712,6 +1712,7 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
 	if (copy_from_user(cpuid_entries, entries,
 			   cpuid->nent * sizeof(struct kvm_cpuid_entry)))
 		goto out_free;
+	vcpu_load(vcpu);
 	for (i = 0; i < cpuid->nent; i++) {
 		vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function;
 		vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax;
@@ -1729,6 +1730,7 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
 	r = 0;
 	kvm_apic_set_version(vcpu);
 	kvm_x86_ops->cpuid_update(vcpu);
+	vcpu_put(vcpu);
 
 out_free:
 	vfree(cpuid_entries);
@@ -1749,9 +1751,11 @@ static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
 	if (copy_from_user(&vcpu->arch.cpuid_entries, entries,
 			   cpuid->nent * sizeof(struct kvm_cpuid_entry2)))
 		goto out;
+	vcpu_load(vcpu);
 	vcpu->arch.cpuid_nent = cpuid->nent;
 	kvm_apic_set_version(vcpu);
 	kvm_x86_ops->cpuid_update(vcpu);
+	vcpu_put(vcpu);
 	return 0;
 
 out:
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 28c68762648f..38512d0c4742 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -461,7 +461,8 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
 		 * node, it must now point to the fake node ID.
 		 */
 		for (j = 0; j < MAX_LOCAL_APIC; j++)
-			if (apicid_to_node[j] == nid)
+			if (apicid_to_node[j] == nid &&
+			    fake_apicid_to_node[j] == NUMA_NO_NODE)
 				fake_apicid_to_node[j] = i;
 	}
 	for (i = 0; i < num_nodes; i++)
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 31930fd30ea9..9dcf43d7d0c0 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -224,8 +224,11 @@ res_alloc_fail:
 	return;
 }
 
-struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int domain, int busnum)
+struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root)
 {
+	struct acpi_device *device = root->device;
+	int domain = root->segment;
+	int busnum = root->secondary.start;
 	struct pci_bus *bus;
 	struct pci_sysdata *sd;
 	int node;
diff --git a/arch/x86/pci/mrst.c b/arch/x86/pci/mrst.c
index 8bf2fcb88d04..1cdc02cf8fa4 100644
--- a/arch/x86/pci/mrst.c
+++ b/arch/x86/pci/mrst.c
@@ -247,6 +247,10 @@ static void __devinit pci_fixed_bar_fixup(struct pci_dev *dev)
 	u32 size;
 	int i;
 
+	/* Must have extended configuration space */
+	if (dev->cfg_size < PCIE_CAP_OFFSET + 4)
+		return;
+
 	/* Fixup the BAR sizes for fixed BAR devices and make them unmoveable */
 	offset = fixed_bar_cap(dev->bus, dev->devfn);
 	if (!offset || PCI_DEVFN(2, 0) == dev->devfn ||
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index 93d2c7971df6..746411518802 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -360,4 +360,13 @@ config ACPI_SBS
 	  To compile this driver as a module, choose M here:
 	  the modules will be called sbs and sbshc.
 
+config ACPI_HED
+	tristate "Hardware Error Device"
+	help
+	  This driver supports the Hardware Error Device (PNP0C33),
+	  which is used to report some hardware errors notified via
+	  SCI, mainly the corrected errors.
+
+source "drivers/acpi/apei/Kconfig"
+
 endif	# ACPI
diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index a8d8998dd5c5..6ee33169e1dc 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -19,7 +19,7 @@ obj-y				+= acpi.o \
 
 # All the builtin files are in the "acpi." module_param namespace.
 acpi-y				+= osl.o utils.o reboot.o
-acpi-y				+= hest.o
+acpi-y				+= atomicio.o
 
 # sleep related files
 acpi-y				+= wakeup.o
@@ -59,6 +59,7 @@ obj-$(CONFIG_ACPI_BATTERY)	+= battery.o
 obj-$(CONFIG_ACPI_SBS)		+= sbshc.o
 obj-$(CONFIG_ACPI_SBS)		+= sbs.o
 obj-$(CONFIG_ACPI_POWER_METER)	+= power_meter.o
+obj-$(CONFIG_ACPI_HED)		+= hed.o
 
 # processor has its own "processor." module_param namespace
 processor-y			:= processor_driver.o processor_throttling.o
@@ -66,3 +67,5 @@ processor-y			+= processor_idle.o processor_thermal.o
 processor-$(CONFIG_CPU_FREQ)	+= processor_perflib.o
 
 obj-$(CONFIG_ACPI_PROCESSOR_AGGREGATOR) += acpi_pad.o
+
+obj-$(CONFIG_ACPI_APEI)		+= apei/
diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig
new file mode 100644
index 000000000000..f8c668f27b5a
--- /dev/null
+++ b/drivers/acpi/apei/Kconfig
@@ -0,0 +1,30 @@
+config ACPI_APEI
+	bool "ACPI Platform Error Interface (APEI)"
+	depends on X86
+	help
+	  APEI allows to report errors (for example from the chipset)
+	  to the operating system. This improves NMI handling
+	  especially. In addition it supports error serialization and
+	  error injection.
+
+config ACPI_APEI_GHES
+	tristate "APEI Generic Hardware Error Source"
+	depends on ACPI_APEI && X86
+	select ACPI_HED
+	help
+	  Generic Hardware Error Source provides a way to report
+	  platform hardware errors (such as that from chipset). It
+	  works in so called "Firmware First" mode, that is, hardware
+	  errors are reported to firmware firstly, then reported to
+	  Linux by firmware. This way, some non-standard hardware
+	  error registers or non-standard hardware link can be checked
+	  by firmware to produce more valuable hardware error
+	  information for Linux.
+
+config ACPI_APEI_EINJ
+	tristate "APEI Error INJection (EINJ)"
+	depends on ACPI_APEI && DEBUG_FS
+	help
+	  EINJ provides a hardware error injection mechanism, it is
+	  mainly used for debugging and testing the other parts of
+	  APEI and some other RAS features.
diff --git a/drivers/acpi/apei/Makefile b/drivers/acpi/apei/Makefile
new file mode 100644
index 000000000000..b13b03a17789
--- /dev/null
+++ b/drivers/acpi/apei/Makefile
@@ -0,0 +1,5 @@
+obj-$(CONFIG_ACPI_APEI)		+= apei.o
+obj-$(CONFIG_ACPI_APEI_GHES)	+= ghes.o
+obj-$(CONFIG_ACPI_APEI_EINJ)	+= einj.o
+
+apei-y := apei-base.o hest.o cper.o erst.o
diff --git a/drivers/acpi/apei/apei-base.c b/drivers/acpi/apei/apei-base.c
new file mode 100644
index 000000000000..db3946e9c66b
--- /dev/null
+++ b/drivers/acpi/apei/apei-base.c
@@ -0,0 +1,593 @@
+/*
+ * apei-base.c - ACPI Platform Error Interface (APEI) supporting
+ * infrastructure
+ *
+ * APEI allows to report errors (for example from the chipset) to the
+ * the operating system. This improves NMI handling especially. In
+ * addition it supports error serialization and error injection.
+ *
+ * For more information about APEI, please refer to ACPI Specification
+ * version 4.0, chapter 17.
+ *
+ * This file has Common functions used by more than one APEI table,
+ * including framework of interpreter for ERST and EINJ; resource
+ * management for APEI registers.
+ *
+ * Copyright (C) 2009, Intel Corp.
+ *	Author: Huang Ying <ying.huang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/acpi.h>
+#include <linux/io.h>
+#include <linux/kref.h>
+#include <linux/rculist.h>
+#include <linux/interrupt.h>
+#include <linux/debugfs.h>
+#include <acpi/atomicio.h>
+
+#include "apei-internal.h"
+
+#define APEI_PFX "APEI: "
+
+/*
+ * APEI ERST (Error Record Serialization Table) and EINJ (Error
+ * INJection) interpreter framework.
+ */
+
+#define APEI_EXEC_PRESERVE_REGISTER	0x1
+
+void apei_exec_ctx_init(struct apei_exec_context *ctx,
+			struct apei_exec_ins_type *ins_table,
+			u32 instructions,
+			struct acpi_whea_header *action_table,
+			u32 entries)
+{
+	ctx->ins_table = ins_table;
+	ctx->instructions = instructions;
+	ctx->action_table = action_table;
+	ctx->entries = entries;
+}
+EXPORT_SYMBOL_GPL(apei_exec_ctx_init);
+
+int __apei_exec_read_register(struct acpi_whea_header *entry, u64 *val)
+{
+	int rc;
+
+	rc = acpi_atomic_read(val, &entry->register_region);
+	if (rc)
+		return rc;
+	*val >>= entry->register_region.bit_offset;
+	*val &= entry->mask;
+
+	return 0;
+}
+
+int apei_exec_read_register(struct apei_exec_context *ctx,
+			    struct acpi_whea_header *entry)
+{
+	int rc;
+	u64 val = 0;
+
+	rc = __apei_exec_read_register(entry, &val);
+	if (rc)
+		return rc;
+	ctx->value = val;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(apei_exec_read_register);
+
+int apei_exec_read_register_value(struct apei_exec_context *ctx,
+				  struct acpi_whea_header *entry)
+{
+	int rc;
+
+	rc = apei_exec_read_register(ctx, entry);
+	if (rc)
+		return rc;
+	ctx->value = (ctx->value == entry->value);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(apei_exec_read_register_value);
+
+int __apei_exec_write_register(struct acpi_whea_header *entry, u64 val)
+{
+	int rc;
+
+	val &= entry->mask;
+	val <<= entry->register_region.bit_offset;
+	if (entry->flags & APEI_EXEC_PRESERVE_REGISTER) {
+		u64 valr = 0;
+		rc = acpi_atomic_read(&valr, &entry->register_region);
+		if (rc)
+			return rc;
+		valr &= ~(entry->mask << entry->register_region.bit_offset);
+		val |= valr;
+	}
+	rc = acpi_atomic_write(val, &entry->register_region);
+
+	return rc;
+}
+
+int apei_exec_write_register(struct apei_exec_context *ctx,
+			     struct acpi_whea_header *entry)
+{
+	return __apei_exec_write_register(entry, ctx->value);
+}
+EXPORT_SYMBOL_GPL(apei_exec_write_register);
+
+int apei_exec_write_register_value(struct apei_exec_context *ctx,
+				   struct acpi_whea_header *entry)
+{
+	int rc;
+
+	ctx->value = entry->value;
+	rc = apei_exec_write_register(ctx, entry);
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(apei_exec_write_register_value);
+
+int apei_exec_noop(struct apei_exec_context *ctx,
+		   struct acpi_whea_header *entry)
+{
+	return 0;
+}
+EXPORT_SYMBOL_GPL(apei_exec_noop);
+
+/*
+ * Interpret the specified action. Go through whole action table,
+ * execute all instructions belong to the action.
+ */
+int apei_exec_run(struct apei_exec_context *ctx, u8 action)
+{
+	int rc;
+	u32 i, ip;
+	struct acpi_whea_header *entry;
+	apei_exec_ins_func_t run;
+
+	ctx->ip = 0;
+
+	/*
+	 * "ip" is the instruction pointer of current instruction,
+	 * "ctx->ip" specifies the next instruction to executed,
+	 * instruction "run" function may change the "ctx->ip" to
+	 * implement "goto" semantics.
+	 */
+rewind:
+	ip = 0;
+	for (i = 0; i < ctx->entries; i++) {
+		entry = &ctx->action_table[i];
+		if (entry->action != action)
+			continue;
+		if (ip == ctx->ip) {
+			if (entry->instruction >= ctx->instructions ||
+			    !ctx->ins_table[entry->instruction].run) {
+				pr_warning(FW_WARN APEI_PFX
+			"Invalid action table, unknown instruction type: %d\n",
+					   entry->instruction);
+				return -EINVAL;
+			}
+			run = ctx->ins_table[entry->instruction].run;
+			rc = run(ctx, entry);
+			if (rc < 0)
+				return rc;
+			else if (rc != APEI_EXEC_SET_IP)
+				ctx->ip++;
+		}
+		ip++;
+		if (ctx->ip < ip)
+			goto rewind;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(apei_exec_run);
+
+typedef int (*apei_exec_entry_func_t)(struct apei_exec_context *ctx,
+				      struct acpi_whea_header *entry,
+				      void *data);
+
+static int apei_exec_for_each_entry(struct apei_exec_context *ctx,
+				    apei_exec_entry_func_t func,
+				    void *data,
+				    int *end)
+{
+	u8 ins;
+	int i, rc;
+	struct acpi_whea_header *entry;
+	struct apei_exec_ins_type *ins_table = ctx->ins_table;
+
+	for (i = 0; i < ctx->entries; i++) {
+		entry = ctx->action_table + i;
+		ins = entry->instruction;
+		if (end)
+			*end = i;
+		if (ins >= ctx->instructions || !ins_table[ins].run) {
+			pr_warning(FW_WARN APEI_PFX
+			"Invalid action table, unknown instruction type: %d\n",
+				   ins);
+			return -EINVAL;
+		}
+		rc = func(ctx, entry, data);
+		if (rc)
+			return rc;
+	}
+
+	return 0;
+}
+
+static int pre_map_gar_callback(struct apei_exec_context *ctx,
+				struct acpi_whea_header *entry,
+				void *data)
+{
+	u8 ins = entry->instruction;
+
+	if (ctx->ins_table[ins].flags & APEI_EXEC_INS_ACCESS_REGISTER)
+		return acpi_pre_map_gar(&entry->register_region);
+
+	return 0;
+}
+
+/*
+ * Pre-map all GARs in action table to make it possible to access them
+ * in NMI handler.
+ */
+int apei_exec_pre_map_gars(struct apei_exec_context *ctx)
+{
+	int rc, end;
+
+	rc = apei_exec_for_each_entry(ctx, pre_map_gar_callback,
+				      NULL, &end);
+	if (rc) {
+		struct apei_exec_context ctx_unmap;
+		memcpy(&ctx_unmap, ctx, sizeof(*ctx));
+		ctx_unmap.entries = end;
+		apei_exec_post_unmap_gars(&ctx_unmap);
+	}
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(apei_exec_pre_map_gars);
+
+static int post_unmap_gar_callback(struct apei_exec_context *ctx,
+				   struct acpi_whea_header *entry,
+				   void *data)
+{
+	u8 ins = entry->instruction;
+
+	if (ctx->ins_table[ins].flags & APEI_EXEC_INS_ACCESS_REGISTER)
+		acpi_post_unmap_gar(&entry->register_region);
+
+	return 0;
+}
+
+/* Post-unmap all GAR in action table. */
+int apei_exec_post_unmap_gars(struct apei_exec_context *ctx)
+{
+	return apei_exec_for_each_entry(ctx, post_unmap_gar_callback,
+					NULL, NULL);
+}
+EXPORT_SYMBOL_GPL(apei_exec_post_unmap_gars);
+
+/*
+ * Resource management for GARs in APEI
+ */
+struct apei_res {
+	struct list_head list;
+	unsigned long start;
+	unsigned long end;
+};
+
+/* Collect all resources requested, to avoid conflict */
+struct apei_resources apei_resources_all = {
+	.iomem = LIST_HEAD_INIT(apei_resources_all.iomem),
+	.ioport = LIST_HEAD_INIT(apei_resources_all.ioport),
+};
+
+static int apei_res_add(struct list_head *res_list,
+			unsigned long start, unsigned long size)
+{
+	struct apei_res *res, *resn, *res_ins = NULL;
+	unsigned long end = start + size;
+
+	if (end <= start)
+		return 0;
+repeat:
+	list_for_each_entry_safe(res, resn, res_list, list) {
+		if (res->start > end || res->end < start)
+			continue;
+		else if (end <= res->end && start >= res->start) {
+			kfree(res_ins);
+			return 0;
+		}
+		list_del(&res->list);
+		res->start = start = min(res->start, start);
+		res->end = end = max(res->end, end);
+		kfree(res_ins);
+		res_ins = res;
+		goto repeat;
+	}
+
+	if (res_ins)
+		list_add(&res_ins->list, res_list);
+	else {
+		res_ins = kmalloc(sizeof(*res), GFP_KERNEL);
+		if (!res_ins)
+			return -ENOMEM;
+		res_ins->start = start;
+		res_ins->end = end;
+		list_add(&res_ins->list, res_list);
+	}
+
+	return 0;
+}
+
+static int apei_res_sub(struct list_head *res_list1,
+			struct list_head *res_list2)
+{
+	struct apei_res *res1, *resn1, *res2, *res;
+	res1 = list_entry(res_list1->next, struct apei_res, list);
+	resn1 = list_entry(res1->list.next, struct apei_res, list);
+	while (&res1->list != res_list1) {
+		list_for_each_entry(res2, res_list2, list) {
+			if (res1->start >= res2->end ||
+			    res1->end <= res2->start)
+				continue;
+			else if (res1->end <= res2->end &&
+				 res1->start >= res2->start) {
+				list_del(&res1->list);
+				kfree(res1);
+				break;
+			} else if (res1->end > res2->end &&
+				   res1->start < res2->start) {
+				res = kmalloc(sizeof(*res), GFP_KERNEL);
+				if (!res)
+					return -ENOMEM;
+				res->start = res2->end;
+				res->end = res1->end;
+				res1->end = res2->start;
+				list_add(&res->list, &res1->list);
+				resn1 = res;
+			} else {
+				if (res1->start < res2->start)
+					res1->end = res2->start;
+				else
+					res1->start = res2->end;
+			}
+		}
+		res1 = resn1;
+		resn1 = list_entry(resn1->list.next, struct apei_res, list);
+	}
+
+	return 0;
+}
+
+static void apei_res_clean(struct list_head *res_list)
+{
+	struct apei_res *res, *resn;
+
+	list_for_each_entry_safe(res, resn, res_list, list) {
+		list_del(&res->list);
+		kfree(res);
+	}
+}
+
+void apei_resources_fini(struct apei_resources *resources)
+{
+	apei_res_clean(&resources->iomem);
+	apei_res_clean(&resources->ioport);
+}
+EXPORT_SYMBOL_GPL(apei_resources_fini);
+
+static int apei_resources_merge(struct apei_resources *resources1,
+				struct apei_resources *resources2)
+{
+	int rc;
+	struct apei_res *res;
+
+	list_for_each_entry(res, &resources2->iomem, list) {
+		rc = apei_res_add(&resources1->iomem, res->start,
+				  res->end - res->start);
+		if (rc)
+			return rc;
+	}
+	list_for_each_entry(res, &resources2->ioport, list) {
+		rc = apei_res_add(&resources1->ioport, res->start,
+				  res->end - res->start);
+		if (rc)
+			return rc;
+	}
+
+	return 0;
+}
+
+/*
+ * EINJ has two groups of GARs (EINJ table entry and trigger table
+ * entry), so common resources are subtracted from the trigger table
+ * resources before the second requesting.
+ */
+int apei_resources_sub(struct apei_resources *resources1,
+		       struct apei_resources *resources2)
+{
+	int rc;
+
+	rc = apei_res_sub(&resources1->iomem, &resources2->iomem);
+	if (rc)
+		return rc;
+	return apei_res_sub(&resources1->ioport, &resources2->ioport);
+}
+EXPORT_SYMBOL_GPL(apei_resources_sub);
+
+/*
+ * IO memory/port rersource management mechanism is used to check
+ * whether memory/port area used by GARs conflicts with normal memory
+ * or IO memory/port of devices.
+ */
+int apei_resources_request(struct apei_resources *resources,
+			   const char *desc)
+{
+	struct apei_res *res, *res_bak;
+	struct resource *r;
+
+	apei_resources_sub(resources, &apei_resources_all);
+
+	list_for_each_entry(res, &resources->iomem, list) {
+		r = request_mem_region(res->start, res->end - res->start,
+				       desc);
+		if (!r) {
+			pr_err(APEI_PFX
+		"Can not request iomem region <%016llx-%016llx> for GARs.\n",
+			       (unsigned long long)res->start,
+			       (unsigned long long)res->end);
+			res_bak = res;
+			goto err_unmap_iomem;
+		}
+	}
+
+	list_for_each_entry(res, &resources->ioport, list) {
+		r = request_region(res->start, res->end - res->start, desc);
+		if (!r) {
+			pr_err(APEI_PFX
+		"Can not request ioport region <%016llx-%016llx> for GARs.\n",
+			       (unsigned long long)res->start,
+			       (unsigned long long)res->end);
+			res_bak = res;
+			goto err_unmap_ioport;
+		}
+	}
+
+	apei_resources_merge(&apei_resources_all, resources);
+
+	return 0;
+err_unmap_ioport:
+	list_for_each_entry(res, &resources->ioport, list) {
+		if (res == res_bak)
+			break;
+		release_mem_region(res->start, res->end - res->start);
+	}
+	res_bak = NULL;
+err_unmap_iomem:
+	list_for_each_entry(res, &resources->iomem, list) {
+		if (res == res_bak)
+			break;
+		release_region(res->start, res->end - res->start);
+	}
+	return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(apei_resources_request);
+
+void apei_resources_release(struct apei_resources *resources)
+{
+	struct apei_res *res;
+
+	list_for_each_entry(res, &resources->iomem, list)
+		release_mem_region(res->start, res->end - res->start);
+	list_for_each_entry(res, &resources->ioport, list)
+		release_region(res->start, res->end - res->start);
+
+	apei_resources_sub(&apei_resources_all, resources);
+}
+EXPORT_SYMBOL_GPL(apei_resources_release);
+
+static int apei_check_gar(struct acpi_generic_address *reg, u64 *paddr)
+{
+	u32 width, space_id;
+
+	width = reg->bit_width;
+	space_id = reg->space_id;
+	/* Handle possible alignment issues */
+	memcpy(paddr, &reg->address, sizeof(*paddr));
+	if (!*paddr) {
+		pr_warning(FW_BUG APEI_PFX
+			   "Invalid physical address in GAR [0x%llx/%u/%u]\n",
+			   *paddr, width, space_id);
+		return -EINVAL;
+	}
+
+	if ((width != 8) && (width != 16) && (width != 32) && (width != 64)) {
+		pr_warning(FW_BUG APEI_PFX
+			   "Invalid bit width in GAR [0x%llx/%u/%u]\n",
+			   *paddr, width, space_id);
+		return -EINVAL;
+	}
+
+	if (space_id != ACPI_ADR_SPACE_SYSTEM_MEMORY &&
+	    space_id != ACPI_ADR_SPACE_SYSTEM_IO) {
+		pr_warning(FW_BUG APEI_PFX
+			   "Invalid address space type in GAR [0x%llx/%u/%u]\n",
+			   *paddr, width, space_id);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int collect_res_callback(struct apei_exec_context *ctx,
+				struct acpi_whea_header *entry,
+				void *data)
+{
+	struct apei_resources *resources = data;
+	struct acpi_generic_address *reg = &entry->register_region;
+	u8 ins = entry->instruction;
+	u64 paddr;
+	int rc;
+
+	if (!(ctx->ins_table[ins].flags & APEI_EXEC_INS_ACCESS_REGISTER))
+		return 0;
+
+	rc = apei_check_gar(reg, &paddr);
+	if (rc)
+		return rc;
+
+	switch (reg->space_id) {
+	case ACPI_ADR_SPACE_SYSTEM_MEMORY:
+		return apei_res_add(&resources->iomem, paddr,
+				    reg->bit_width / 8);
+	case ACPI_ADR_SPACE_SYSTEM_IO:
+		return apei_res_add(&resources->ioport, paddr,
+				    reg->bit_width / 8);
+	default:
+		return -EINVAL;
+	}
+}
+
+/*
+ * Same register may be used by multiple instructions in GARs, so
+ * resources are collected before requesting.
+ */
+int apei_exec_collect_resources(struct apei_exec_context *ctx,
+				struct apei_resources *resources)
+{
+	return apei_exec_for_each_entry(ctx, collect_res_callback,
+					resources, NULL);
+}
+EXPORT_SYMBOL_GPL(apei_exec_collect_resources);
+
+struct dentry *apei_get_debugfs_dir(void)
+{
+	static struct dentry *dapei;
+
+	if (!dapei)
+		dapei = debugfs_create_dir("apei", NULL);
+
+	return dapei;
+}
+EXPORT_SYMBOL_GPL(apei_get_debugfs_dir);
diff --git a/drivers/acpi/apei/apei-internal.h b/drivers/acpi/apei/apei-internal.h
new file mode 100644
index 000000000000..18df1e940276
--- /dev/null
+++ b/drivers/acpi/apei/apei-internal.h
@@ -0,0 +1,114 @@
+/*
+ * apei-internal.h - ACPI Platform Error Interface internal
+ * definations.
+ */
+
+#ifndef APEI_INTERNAL_H
+#define APEI_INTERNAL_H
+
+#include <linux/cper.h>
+
+struct apei_exec_context;
+
+typedef int (*apei_exec_ins_func_t)(struct apei_exec_context *ctx,
+				    struct acpi_whea_header *entry);
+
+#define APEI_EXEC_INS_ACCESS_REGISTER	0x0001
+
+struct apei_exec_ins_type {
+	u32 flags;
+	apei_exec_ins_func_t run;
+};
+
+struct apei_exec_context {
+	u32 ip;
+	u64 value;
+	u64 var1;
+	u64 var2;
+	u64 src_base;
+	u64 dst_base;
+	struct apei_exec_ins_type *ins_table;
+	u32 instructions;
+	struct acpi_whea_header *action_table;
+	u32 entries;
+};
+
+void apei_exec_ctx_init(struct apei_exec_context *ctx,
+			struct apei_exec_ins_type *ins_table,
+			u32 instructions,
+			struct acpi_whea_header *action_table,
+			u32 entries);
+
+static inline void apei_exec_ctx_set_input(struct apei_exec_context *ctx,
+					   u64 input)
+{
+	ctx->value = input;
+}
+
+static inline u64 apei_exec_ctx_get_output(struct apei_exec_context *ctx)
+{
+	return ctx->value;
+}
+
+int apei_exec_run(struct apei_exec_context *ctx, u8 action);
+
+/* Common instruction implementation */
+
+/* IP has been set in instruction function */
+#define APEI_EXEC_SET_IP	1
+
+int __apei_exec_read_register(struct acpi_whea_header *entry, u64 *val);
+int __apei_exec_write_register(struct acpi_whea_header *entry, u64 val);
+int apei_exec_read_register(struct apei_exec_context *ctx,
+			    struct acpi_whea_header *entry);
+int apei_exec_read_register_value(struct apei_exec_context *ctx,
+				  struct acpi_whea_header *entry);
+int apei_exec_write_register(struct apei_exec_context *ctx,
+			     struct acpi_whea_header *entry);
+int apei_exec_write_register_value(struct apei_exec_context *ctx,
+				   struct acpi_whea_header *entry);
+int apei_exec_noop(struct apei_exec_context *ctx,
+		   struct acpi_whea_header *entry);
+int apei_exec_pre_map_gars(struct apei_exec_context *ctx);
+int apei_exec_post_unmap_gars(struct apei_exec_context *ctx);
+
+struct apei_resources {
+	struct list_head iomem;
+	struct list_head ioport;
+};
+
+static inline void apei_resources_init(struct apei_resources *resources)
+{
+	INIT_LIST_HEAD(&resources->iomem);
+	INIT_LIST_HEAD(&resources->ioport);
+}
+
+void apei_resources_fini(struct apei_resources *resources);
+int apei_resources_sub(struct apei_resources *resources1,
+		       struct apei_resources *resources2);
+int apei_resources_request(struct apei_resources *resources,
+			   const char *desc);
+void apei_resources_release(struct apei_resources *resources);
+int apei_exec_collect_resources(struct apei_exec_context *ctx,
+				struct apei_resources *resources);
+
+struct dentry;
+struct dentry *apei_get_debugfs_dir(void);
+
+#define apei_estatus_for_each_section(estatus, section)			\
+	for (section = (struct acpi_hest_generic_data *)(estatus + 1);	\
+	     (void *)section - (void *)estatus < estatus->data_length;	\
+	     section = (void *)(section+1) + section->error_data_length)
+
+static inline u32 apei_estatus_len(struct acpi_hest_generic_status *estatus)
+{
+	if (estatus->raw_data_length)
+		return estatus->raw_data_offset + \
+			estatus->raw_data_length;
+	else
+		return sizeof(*estatus) + estatus->data_length;
+}
+
+int apei_estatus_check_header(const struct acpi_hest_generic_status *estatus);
+int apei_estatus_check(const struct acpi_hest_generic_status *estatus);
+#endif
diff --git a/drivers/acpi/apei/cper.c b/drivers/acpi/apei/cper.c
new file mode 100644
index 000000000000..f4cf2fc4c8c1
--- /dev/null
+++ b/drivers/acpi/apei/cper.c
@@ -0,0 +1,84 @@
+/*
+ * UEFI Common Platform Error Record (CPER) support
+ *
+ * Copyright (C) 2010, Intel Corp.
+ *	Author: Huang Ying <ying.huang@intel.com>
+ *
+ * CPER is the format used to describe platform hardware error by
+ * various APEI tables, such as ERST, BERT and HEST etc.
+ *
+ * For more information about CPER, please refer to Appendix N of UEFI
+ * Specification version 2.3.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/time.h>
+#include <linux/cper.h>
+#include <linux/acpi.h>
+
+/*
+ * CPER record ID need to be unique even after reboot, because record
+ * ID is used as index for ERST storage, while CPER records from
+ * multiple boot may co-exist in ERST.
+ */
+u64 cper_next_record_id(void)
+{
+	static atomic64_t seq;
+
+	if (!atomic64_read(&seq))
+		atomic64_set(&seq, ((u64)get_seconds()) << 32);
+
+	return atomic64_inc_return(&seq);
+}
+EXPORT_SYMBOL_GPL(cper_next_record_id);
+
+int apei_estatus_check_header(const struct acpi_hest_generic_status *estatus)
+{
+	if (estatus->data_length &&
+	    estatus->data_length < sizeof(struct acpi_hest_generic_data))
+		return -EINVAL;
+	if (estatus->raw_data_length &&
+	    estatus->raw_data_offset < sizeof(*estatus) + estatus->data_length)
+		return -EINVAL;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(apei_estatus_check_header);
+
+int apei_estatus_check(const struct acpi_hest_generic_status *estatus)
+{
+	struct acpi_hest_generic_data *gdata;
+	unsigned int data_len, gedata_len;
+	int rc;
+
+	rc = apei_estatus_check_header(estatus);
+	if (rc)
+		return rc;
+	data_len = estatus->data_length;
+	gdata = (struct acpi_hest_generic_data *)(estatus + 1);
+	while (data_len > sizeof(*gdata)) {
+		gedata_len = gdata->error_data_length;
+		if (gedata_len > data_len - sizeof(*gdata))
+			return -EINVAL;
+		data_len -= gedata_len + sizeof(*gdata);
+	}
+	if (data_len)
+		return -EINVAL;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(apei_estatus_check);
diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c
new file mode 100644
index 000000000000..465c885938ee
--- /dev/null
+++ b/drivers/acpi/apei/einj.c
@@ -0,0 +1,548 @@
+/*
+ * APEI Error INJection support
+ *
+ * EINJ provides a hardware error injection mechanism, this is useful
+ * for debugging and testing of other APEI and RAS features.
+ *
+ * For more information about EINJ, please refer to ACPI Specification
+ * version 4.0, section 17.5.
+ *
+ * Copyright 2009-2010 Intel Corp.
+ *   Author: Huang Ying <ying.huang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/nmi.h>
+#include <linux/delay.h>
+#include <acpi/acpi.h>
+
+#include "apei-internal.h"
+
+#define EINJ_PFX "EINJ: "
+
+#define SPIN_UNIT		100			/* 100ns */
+/* Firmware should respond within 1 miliseconds */
+#define FIRMWARE_TIMEOUT	(1 * NSEC_PER_MSEC)
+
+/*
+ * Some BIOSes allow parameters to the SET_ERROR_TYPE entries in the
+ * EINJ table through an unpublished extension. Use with caution as
+ * most will ignore the parameter and make their own choice of address
+ * for error injection.
+ */
+struct einj_parameter {
+	u64 type;
+	u64 reserved1;
+	u64 reserved2;
+	u64 param1;
+	u64 param2;
+};
+
+#define EINJ_OP_BUSY			0x1
+#define EINJ_STATUS_SUCCESS		0x0
+#define EINJ_STATUS_FAIL		0x1
+#define EINJ_STATUS_INVAL		0x2
+
+#define EINJ_TAB_ENTRY(tab)						\
+	((struct acpi_whea_header *)((char *)(tab) +			\
+				    sizeof(struct acpi_table_einj)))
+
+static struct acpi_table_einj *einj_tab;
+
+static struct apei_resources einj_resources;
+
+static struct apei_exec_ins_type einj_ins_type[] = {
+	[ACPI_EINJ_READ_REGISTER] = {
+		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
+		.run   = apei_exec_read_register,
+	},
+	[ACPI_EINJ_READ_REGISTER_VALUE] = {
+		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
+		.run   = apei_exec_read_register_value,
+	},
+	[ACPI_EINJ_WRITE_REGISTER] = {
+		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
+		.run   = apei_exec_write_register,
+	},
+	[ACPI_EINJ_WRITE_REGISTER_VALUE] = {
+		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
+		.run   = apei_exec_write_register_value,
+	},
+	[ACPI_EINJ_NOOP] = {
+		.flags = 0,
+		.run   = apei_exec_noop,
+	},
+};
+
+/*
+ * Prevent EINJ interpreter to run simultaneously, because the
+ * corresponding firmware implementation may not work properly when
+ * invoked simultaneously.
+ */
+static DEFINE_MUTEX(einj_mutex);
+
+static struct einj_parameter *einj_param;
+
+static void einj_exec_ctx_init(struct apei_exec_context *ctx)
+{
+	apei_exec_ctx_init(ctx, einj_ins_type, ARRAY_SIZE(einj_ins_type),
+			   EINJ_TAB_ENTRY(einj_tab), einj_tab->entries);
+}
+
+static int __einj_get_available_error_type(u32 *type)
+{
+	struct apei_exec_context ctx;
+	int rc;
+
+	einj_exec_ctx_init(&ctx);
+	rc = apei_exec_run(&ctx, ACPI_EINJ_GET_ERROR_TYPE);
+	if (rc)
+		return rc;
+	*type = apei_exec_ctx_get_output(&ctx);
+
+	return 0;
+}
+
+/* Get error injection capabilities of the platform */
+static int einj_get_available_error_type(u32 *type)
+{
+	int rc;
+
+	mutex_lock(&einj_mutex);
+	rc = __einj_get_available_error_type(type);
+	mutex_unlock(&einj_mutex);
+
+	return rc;
+}
+
+static int einj_timedout(u64 *t)
+{
+	if ((s64)*t < SPIN_UNIT) {
+		pr_warning(FW_WARN EINJ_PFX
+			   "Firmware does not respond in time\n");
+		return 1;
+	}
+	*t -= SPIN_UNIT;
+	ndelay(SPIN_UNIT);
+	touch_nmi_watchdog();
+	return 0;
+}
+
+static u64 einj_get_parameter_address(void)
+{
+	int i;
+	u64 paddr = 0;
+	struct acpi_whea_header *entry;
+
+	entry = EINJ_TAB_ENTRY(einj_tab);
+	for (i = 0; i < einj_tab->entries; i++) {
+		if (entry->action == ACPI_EINJ_SET_ERROR_TYPE &&
+		    entry->instruction == ACPI_EINJ_WRITE_REGISTER &&
+		    entry->register_region.space_id ==
+		    ACPI_ADR_SPACE_SYSTEM_MEMORY)
+			memcpy(&paddr, &entry->register_region.address,
+			       sizeof(paddr));
+		entry++;
+	}
+
+	return paddr;
+}
+
+/* do sanity check to trigger table */
+static int einj_check_trigger_header(struct acpi_einj_trigger *trigger_tab)
+{
+	if (trigger_tab->header_size != sizeof(struct acpi_einj_trigger))
+		return -EINVAL;
+	if (trigger_tab->table_size > PAGE_SIZE ||
+	    trigger_tab->table_size <= trigger_tab->header_size)
+		return -EINVAL;
+	if (trigger_tab->entry_count !=
+	    (trigger_tab->table_size - trigger_tab->header_size) /
+	    sizeof(struct acpi_einj_entry))
+		return -EINVAL;
+
+	return 0;
+}
+
+/* Execute instructions in trigger error action table */
+static int __einj_error_trigger(u64 trigger_paddr)
+{
+	struct acpi_einj_trigger *trigger_tab = NULL;
+	struct apei_exec_context trigger_ctx;
+	struct apei_resources trigger_resources;
+	struct acpi_whea_header *trigger_entry;
+	struct resource *r;
+	u32 table_size;
+	int rc = -EIO;
+
+	r = request_mem_region(trigger_paddr, sizeof(*trigger_tab),
+			       "APEI EINJ Trigger Table");
+	if (!r) {
+		pr_err(EINJ_PFX
+	"Can not request iomem region <%016llx-%016llx> for Trigger table.\n",
+		       (unsigned long long)trigger_paddr,
+		       (unsigned long long)trigger_paddr+sizeof(*trigger_tab));
+		goto out;
+	}
+	trigger_tab = ioremap_cache(trigger_paddr, sizeof(*trigger_tab));
+	if (!trigger_tab) {
+		pr_err(EINJ_PFX "Failed to map trigger table!\n");
+		goto out_rel_header;
+	}
+	rc = einj_check_trigger_header(trigger_tab);
+	if (rc) {
+		pr_warning(FW_BUG EINJ_PFX
+			   "The trigger error action table is invalid\n");
+		goto out_rel_header;
+	}
+	rc = -EIO;
+	table_size = trigger_tab->table_size;
+	r = request_mem_region(trigger_paddr + sizeof(*trigger_tab),
+			       table_size - sizeof(*trigger_tab),
+			       "APEI EINJ Trigger Table");
+	if (!r) {
+		pr_err(EINJ_PFX
+"Can not request iomem region <%016llx-%016llx> for Trigger Table Entry.\n",
+		       (unsigned long long)trigger_paddr+sizeof(*trigger_tab),
+		       (unsigned long long)trigger_paddr + table_size);
+		goto out_rel_header;
+	}
+	iounmap(trigger_tab);
+	trigger_tab = ioremap_cache(trigger_paddr, table_size);
+	if (!trigger_tab) {
+		pr_err(EINJ_PFX "Failed to map trigger table!\n");
+		goto out_rel_entry;
+	}
+	trigger_entry = (struct acpi_whea_header *)
+		((char *)trigger_tab + sizeof(struct acpi_einj_trigger));
+	apei_resources_init(&trigger_resources);
+	apei_exec_ctx_init(&trigger_ctx, einj_ins_type,
+			   ARRAY_SIZE(einj_ins_type),
+			   trigger_entry, trigger_tab->entry_count);
+	rc = apei_exec_collect_resources(&trigger_ctx, &trigger_resources);
+	if (rc)
+		goto out_fini;
+	rc = apei_resources_sub(&trigger_resources, &einj_resources);
+	if (rc)
+		goto out_fini;
+	rc = apei_resources_request(&trigger_resources, "APEI EINJ Trigger");
+	if (rc)
+		goto out_fini;
+	rc = apei_exec_pre_map_gars(&trigger_ctx);
+	if (rc)
+		goto out_release;
+
+	rc = apei_exec_run(&trigger_ctx, ACPI_EINJ_TRIGGER_ERROR);
+
+	apei_exec_post_unmap_gars(&trigger_ctx);
+out_release:
+	apei_resources_release(&trigger_resources);
+out_fini:
+	apei_resources_fini(&trigger_resources);
+out_rel_entry:
+	release_mem_region(trigger_paddr + sizeof(*trigger_tab),
+			   table_size - sizeof(*trigger_tab));
+out_rel_header:
+	release_mem_region(trigger_paddr, sizeof(*trigger_tab));
+out:
+	if (trigger_tab)
+		iounmap(trigger_tab);
+
+	return rc;
+}
+
+static int __einj_error_inject(u32 type, u64 param1, u64 param2)
+{
+	struct apei_exec_context ctx;
+	u64 val, trigger_paddr, timeout = FIRMWARE_TIMEOUT;
+	int rc;
+
+	einj_exec_ctx_init(&ctx);
+
+	rc = apei_exec_run(&ctx, ACPI_EINJ_BEGIN_OPERATION);
+	if (rc)
+		return rc;
+	apei_exec_ctx_set_input(&ctx, type);
+	rc = apei_exec_run(&ctx, ACPI_EINJ_SET_ERROR_TYPE);
+	if (rc)
+		return rc;
+	if (einj_param) {
+		writeq(param1, &einj_param->param1);
+		writeq(param2, &einj_param->param2);
+	}
+	rc = apei_exec_run(&ctx, ACPI_EINJ_EXECUTE_OPERATION);
+	if (rc)
+		return rc;
+	for (;;) {
+		rc = apei_exec_run(&ctx, ACPI_EINJ_CHECK_BUSY_STATUS);
+		if (rc)
+			return rc;
+		val = apei_exec_ctx_get_output(&ctx);
+		if (!(val & EINJ_OP_BUSY))
+			break;
+		if (einj_timedout(&timeout))
+			return -EIO;
+	}
+	rc = apei_exec_run(&ctx, ACPI_EINJ_GET_COMMAND_STATUS);
+	if (rc)
+		return rc;
+	val = apei_exec_ctx_get_output(&ctx);
+	if (val != EINJ_STATUS_SUCCESS)
+		return -EBUSY;
+
+	rc = apei_exec_run(&ctx, ACPI_EINJ_GET_TRIGGER_TABLE);
+	if (rc)
+		return rc;
+	trigger_paddr = apei_exec_ctx_get_output(&ctx);
+	rc = __einj_error_trigger(trigger_paddr);
+	if (rc)
+		return rc;
+	rc = apei_exec_run(&ctx, ACPI_EINJ_END_OPERATION);
+
+	return rc;
+}
+
+/* Inject the specified hardware error */
+static int einj_error_inject(u32 type, u64 param1, u64 param2)
+{
+	int rc;
+
+	mutex_lock(&einj_mutex);
+	rc = __einj_error_inject(type, param1, param2);
+	mutex_unlock(&einj_mutex);
+
+	return rc;
+}
+
+static u32 error_type;
+static u64 error_param1;
+static u64 error_param2;
+static struct dentry *einj_debug_dir;
+
+static int available_error_type_show(struct seq_file *m, void *v)
+{
+	int rc;
+	u32 available_error_type = 0;
+
+	rc = einj_get_available_error_type(&available_error_type);
+	if (rc)
+		return rc;
+	if (available_error_type & 0x0001)
+		seq_printf(m, "0x00000001\tProcessor Correctable\n");
+	if (available_error_type & 0x0002)
+		seq_printf(m, "0x00000002\tProcessor Uncorrectable non-fatal\n");
+	if (available_error_type & 0x0004)
+		seq_printf(m, "0x00000004\tProcessor Uncorrectable fatal\n");
+	if (available_error_type & 0x0008)
+		seq_printf(m, "0x00000008\tMemory Correctable\n");
+	if (available_error_type & 0x0010)
+		seq_printf(m, "0x00000010\tMemory Uncorrectable non-fatal\n");
+	if (available_error_type & 0x0020)
+		seq_printf(m, "0x00000020\tMemory Uncorrectable fatal\n");
+	if (available_error_type & 0x0040)
+		seq_printf(m, "0x00000040\tPCI Express Correctable\n");
+	if (available_error_type & 0x0080)
+		seq_printf(m, "0x00000080\tPCI Express Uncorrectable non-fatal\n");
+	if (available_error_type & 0x0100)
+		seq_printf(m, "0x00000100\tPCI Express Uncorrectable fatal\n");
+	if (available_error_type & 0x0200)
+		seq_printf(m, "0x00000200\tPlatform Correctable\n");
+	if (available_error_type & 0x0400)
+		seq_printf(m, "0x00000400\tPlatform Uncorrectable non-fatal\n");
+	if (available_error_type & 0x0800)
+		seq_printf(m, "0x00000800\tPlatform Uncorrectable fatal\n");
+
+	return 0;
+}
+
+static int available_error_type_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, available_error_type_show, NULL);
+}
+
+static const struct file_operations available_error_type_fops = {
+	.open		= available_error_type_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int error_type_get(void *data, u64 *val)
+{
+	*val = error_type;
+
+	return 0;
+}
+
+static int error_type_set(void *data, u64 val)
+{
+	int rc;
+	u32 available_error_type = 0;
+
+	/* Only one error type can be specified */
+	if (val & (val - 1))
+		return -EINVAL;
+	rc = einj_get_available_error_type(&available_error_type);
+	if (rc)
+		return rc;
+	if (!(val & available_error_type))
+		return -EINVAL;
+	error_type = val;
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(error_type_fops, error_type_get,
+			error_type_set, "0x%llx\n");
+
+static int error_inject_set(void *data, u64 val)
+{
+	if (!error_type)
+		return -EINVAL;
+
+	return einj_error_inject(error_type, error_param1, error_param2);
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(error_inject_fops, NULL,
+			error_inject_set, "%llu\n");
+
+static int einj_check_table(struct acpi_table_einj *einj_tab)
+{
+	if (einj_tab->header_length != sizeof(struct acpi_table_einj))
+		return -EINVAL;
+	if (einj_tab->header.length < sizeof(struct acpi_table_einj))
+		return -EINVAL;
+	if (einj_tab->entries !=
+	    (einj_tab->header.length - sizeof(struct acpi_table_einj)) /
+	    sizeof(struct acpi_einj_entry))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int __init einj_init(void)
+{
+	int rc;
+	u64 param_paddr;
+	acpi_status status;
+	struct dentry *fentry;
+	struct apei_exec_context ctx;
+
+	if (acpi_disabled)
+		return -ENODEV;
+
+	status = acpi_get_table(ACPI_SIG_EINJ, 0,
+				(struct acpi_table_header **)&einj_tab);
+	if (status == AE_NOT_FOUND) {
+		pr_info(EINJ_PFX "Table is not found!\n");
+		return -ENODEV;
+	} else if (ACPI_FAILURE(status)) {
+		const char *msg = acpi_format_exception(status);
+		pr_err(EINJ_PFX "Failed to get table, %s\n", msg);
+		return -EINVAL;
+	}
+
+	rc = einj_check_table(einj_tab);
+	if (rc) {
+		pr_warning(FW_BUG EINJ_PFX "EINJ table is invalid\n");
+		return -EINVAL;
+	}
+
+	rc = -ENOMEM;
+	einj_debug_dir = debugfs_create_dir("einj", apei_get_debugfs_dir());
+	if (!einj_debug_dir)
+		goto err_cleanup;
+	fentry = debugfs_create_file("available_error_type", S_IRUSR,
+				     einj_debug_dir, NULL,
+				     &available_error_type_fops);
+	if (!fentry)
+		goto err_cleanup;
+	fentry = debugfs_create_file("error_type", S_IRUSR | S_IWUSR,
+				     einj_debug_dir, NULL, &error_type_fops);
+	if (!fentry)
+		goto err_cleanup;
+	fentry = debugfs_create_x64("param1", S_IRUSR | S_IWUSR,
+				    einj_debug_dir, &error_param1);
+	if (!fentry)
+		goto err_cleanup;
+	fentry = debugfs_create_x64("param2", S_IRUSR | S_IWUSR,
+				    einj_debug_dir, &error_param2);
+	if (!fentry)
+		goto err_cleanup;
+	fentry = debugfs_create_file("error_inject", S_IWUSR,
+				     einj_debug_dir, NULL, &error_inject_fops);
+	if (!fentry)
+		goto err_cleanup;
+
+	apei_resources_init(&einj_resources);
+	einj_exec_ctx_init(&ctx);
+	rc = apei_exec_collect_resources(&ctx, &einj_resources);
+	if (rc)
+		goto err_fini;
+	rc = apei_resources_request(&einj_resources, "APEI EINJ");
+	if (rc)
+		goto err_fini;
+	rc = apei_exec_pre_map_gars(&ctx);
+	if (rc)
+		goto err_release;
+	param_paddr = einj_get_parameter_address();
+	if (param_paddr) {
+		einj_param = ioremap(param_paddr, sizeof(*einj_param));
+		rc = -ENOMEM;
+		if (!einj_param)
+			goto err_unmap;
+	}
+
+	pr_info(EINJ_PFX "Error INJection is initialized.\n");
+
+	return 0;
+
+err_unmap:
+	apei_exec_post_unmap_gars(&ctx);
+err_release:
+	apei_resources_release(&einj_resources);
+err_fini:
+	apei_resources_fini(&einj_resources);
+err_cleanup:
+	debugfs_remove_recursive(einj_debug_dir);
+
+	return rc;
+}
+
+static void __exit einj_exit(void)
+{
+	struct apei_exec_context ctx;
+
+	if (einj_param)
+		iounmap(einj_param);
+	einj_exec_ctx_init(&ctx);
+	apei_exec_post_unmap_gars(&ctx);
+	apei_resources_release(&einj_resources);
+	apei_resources_fini(&einj_resources);
+	debugfs_remove_recursive(einj_debug_dir);
+}
+
+module_init(einj_init);
+module_exit(einj_exit);
+
+MODULE_AUTHOR("Huang Ying");
+MODULE_DESCRIPTION("APEI Error INJection support");
+MODULE_LICENSE("GPL");
diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c
new file mode 100644
index 000000000000..2ebc39115507
--- /dev/null
+++ b/drivers/acpi/apei/erst.c
@@ -0,0 +1,855 @@
+/*
+ * APEI Error Record Serialization Table support
+ *
+ * ERST is a way provided by APEI to save and retrieve hardware error
+ * infomation to and from a persistent store.
+ *
+ * For more information about ERST, please refer to ACPI Specification
+ * version 4.0, section 17.4.
+ *
+ * Copyright 2010 Intel Corp.
+ *   Author: Huang Ying <ying.huang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/acpi.h>
+#include <linux/uaccess.h>
+#include <linux/cper.h>
+#include <linux/nmi.h>
+#include <acpi/apei.h>
+
+#include "apei-internal.h"
+
+#define ERST_PFX "ERST: "
+
+/* ERST command status */
+#define ERST_STATUS_SUCCESS			0x0
+#define ERST_STATUS_NOT_ENOUGH_SPACE		0x1
+#define ERST_STATUS_HARDWARE_NOT_AVAILABLE	0x2
+#define ERST_STATUS_FAILED			0x3
+#define ERST_STATUS_RECORD_STORE_EMPTY		0x4
+#define ERST_STATUS_RECORD_NOT_FOUND		0x5
+
+#define ERST_TAB_ENTRY(tab)						\
+	((struct acpi_whea_header *)((char *)(tab) +			\
+				     sizeof(struct acpi_table_erst)))
+
+#define SPIN_UNIT		100			/* 100ns */
+/* Firmware should respond within 1 miliseconds */
+#define FIRMWARE_TIMEOUT	(1 * NSEC_PER_MSEC)
+#define FIRMWARE_MAX_STALL	50			/* 50us */
+
+int erst_disable;
+EXPORT_SYMBOL_GPL(erst_disable);
+
+static struct acpi_table_erst *erst_tab;
+
+/* ERST Error Log Address Range atrributes */
+#define ERST_RANGE_RESERVED	0x0001
+#define ERST_RANGE_NVRAM	0x0002
+#define ERST_RANGE_SLOW		0x0004
+
+/*
+ * ERST Error Log Address Range, used as buffer for reading/writing
+ * error records.
+ */
+static struct erst_erange {
+	u64 base;
+	u64 size;
+	void __iomem *vaddr;
+	u32 attr;
+} erst_erange;
+
+/*
+ * Prevent ERST interpreter to run simultaneously, because the
+ * corresponding firmware implementation may not work properly when
+ * invoked simultaneously.
+ *
+ * It is used to provide exclusive accessing for ERST Error Log
+ * Address Range too.
+ */
+static DEFINE_SPINLOCK(erst_lock);
+
+static inline int erst_errno(int command_status)
+{
+	switch (command_status) {
+	case ERST_STATUS_SUCCESS:
+		return 0;
+	case ERST_STATUS_HARDWARE_NOT_AVAILABLE:
+		return -ENODEV;
+	case ERST_STATUS_NOT_ENOUGH_SPACE:
+		return -ENOSPC;
+	case ERST_STATUS_RECORD_STORE_EMPTY:
+	case ERST_STATUS_RECORD_NOT_FOUND:
+		return -ENOENT;
+	default:
+		return -EINVAL;
+	}
+}
+
+static int erst_timedout(u64 *t, u64 spin_unit)
+{
+	if ((s64)*t < spin_unit) {
+		pr_warning(FW_WARN ERST_PFX
+			   "Firmware does not respond in time\n");
+		return 1;
+	}
+	*t -= spin_unit;
+	ndelay(spin_unit);
+	touch_nmi_watchdog();
+	return 0;
+}
+
+static int erst_exec_load_var1(struct apei_exec_context *ctx,
+			       struct acpi_whea_header *entry)
+{
+	return __apei_exec_read_register(entry, &ctx->var1);
+}
+
+static int erst_exec_load_var2(struct apei_exec_context *ctx,
+			       struct acpi_whea_header *entry)
+{
+	return __apei_exec_read_register(entry, &ctx->var2);
+}
+
+static int erst_exec_store_var1(struct apei_exec_context *ctx,
+				struct acpi_whea_header *entry)
+{
+	return __apei_exec_write_register(entry, ctx->var1);
+}
+
+static int erst_exec_add(struct apei_exec_context *ctx,
+			 struct acpi_whea_header *entry)
+{
+	ctx->var1 += ctx->var2;
+	return 0;
+}
+
+static int erst_exec_subtract(struct apei_exec_context *ctx,
+			      struct acpi_whea_header *entry)
+{
+	ctx->var1 -= ctx->var2;
+	return 0;
+}
+
+static int erst_exec_add_value(struct apei_exec_context *ctx,
+			       struct acpi_whea_header *entry)
+{
+	int rc;
+	u64 val;
+
+	rc = __apei_exec_read_register(entry, &val);
+	if (rc)
+		return rc;
+	val += ctx->value;
+	rc = __apei_exec_write_register(entry, val);
+	return rc;
+}
+
+static int erst_exec_subtract_value(struct apei_exec_context *ctx,
+				    struct acpi_whea_header *entry)
+{
+	int rc;
+	u64 val;
+
+	rc = __apei_exec_read_register(entry, &val);
+	if (rc)
+		return rc;
+	val -= ctx->value;
+	rc = __apei_exec_write_register(entry, val);
+	return rc;
+}
+
+static int erst_exec_stall(struct apei_exec_context *ctx,
+			   struct acpi_whea_header *entry)
+{
+	u64 stall_time;
+
+	if (ctx->value > FIRMWARE_MAX_STALL) {
+		if (!in_nmi())
+			pr_warning(FW_WARN ERST_PFX
+			"Too long stall time for stall instruction: %llx.\n",
+				   ctx->value);
+		stall_time = FIRMWARE_MAX_STALL;
+	} else
+		stall_time = ctx->value;
+	udelay(stall_time);
+	return 0;
+}
+
+static int erst_exec_stall_while_true(struct apei_exec_context *ctx,
+				      struct acpi_whea_header *entry)
+{
+	int rc;
+	u64 val;
+	u64 timeout = FIRMWARE_TIMEOUT;
+	u64 stall_time;
+
+	if (ctx->var1 > FIRMWARE_MAX_STALL) {
+		if (!in_nmi())
+			pr_warning(FW_WARN ERST_PFX
+		"Too long stall time for stall while true instruction: %llx.\n",
+				   ctx->var1);
+		stall_time = FIRMWARE_MAX_STALL;
+	} else
+		stall_time = ctx->var1;
+
+	for (;;) {
+		rc = __apei_exec_read_register(entry, &val);
+		if (rc)
+			return rc;
+		if (val != ctx->value)
+			break;
+		if (erst_timedout(&timeout, stall_time * NSEC_PER_USEC))
+			return -EIO;
+	}
+	return 0;
+}
+
+static int erst_exec_skip_next_instruction_if_true(
+	struct apei_exec_context *ctx,
+	struct acpi_whea_header *entry)
+{
+	int rc;
+	u64 val;
+
+	rc = __apei_exec_read_register(entry, &val);
+	if (rc)
+		return rc;
+	if (val == ctx->value) {
+		ctx->ip += 2;
+		return APEI_EXEC_SET_IP;
+	}
+
+	return 0;
+}
+
+static int erst_exec_goto(struct apei_exec_context *ctx,
+			  struct acpi_whea_header *entry)
+{
+	ctx->ip = ctx->value;
+	return APEI_EXEC_SET_IP;
+}
+
+static int erst_exec_set_src_address_base(struct apei_exec_context *ctx,
+					  struct acpi_whea_header *entry)
+{
+	return __apei_exec_read_register(entry, &ctx->src_base);
+}
+
+static int erst_exec_set_dst_address_base(struct apei_exec_context *ctx,
+					  struct acpi_whea_header *entry)
+{
+	return __apei_exec_read_register(entry, &ctx->dst_base);
+}
+
+static int erst_exec_move_data(struct apei_exec_context *ctx,
+			       struct acpi_whea_header *entry)
+{
+	int rc;
+	u64 offset;
+
+	rc = __apei_exec_read_register(entry, &offset);
+	if (rc)
+		return rc;
+	memmove((void *)ctx->dst_base + offset,
+		(void *)ctx->src_base + offset,
+		ctx->var2);
+
+	return 0;
+}
+
+static struct apei_exec_ins_type erst_ins_type[] = {
+	[ACPI_ERST_READ_REGISTER] = {
+		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
+		.run = apei_exec_read_register,
+	},
+	[ACPI_ERST_READ_REGISTER_VALUE] = {
+		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
+		.run = apei_exec_read_register_value,
+	},
+	[ACPI_ERST_WRITE_REGISTER] = {
+		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
+		.run = apei_exec_write_register,
+	},
+	[ACPI_ERST_WRITE_REGISTER_VALUE] = {
+		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
+		.run = apei_exec_write_register_value,
+	},
+	[ACPI_ERST_NOOP] = {
+		.flags = 0,
+		.run = apei_exec_noop,
+	},
+	[ACPI_ERST_LOAD_VAR1] = {
+		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
+		.run = erst_exec_load_var1,
+	},
+	[ACPI_ERST_LOAD_VAR2] = {
+		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
+		.run = erst_exec_load_var2,
+	},
+	[ACPI_ERST_STORE_VAR1] = {
+		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
+		.run = erst_exec_store_var1,
+	},
+	[ACPI_ERST_ADD] = {
+		.flags = 0,
+		.run = erst_exec_add,
+	},
+	[ACPI_ERST_SUBTRACT] = {
+		.flags = 0,
+		.run = erst_exec_subtract,
+	},
+	[ACPI_ERST_ADD_VALUE] = {
+		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
+		.run = erst_exec_add_value,
+	},
+	[ACPI_ERST_SUBTRACT_VALUE] = {
+		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
+		.run = erst_exec_subtract_value,
+	},
+	[ACPI_ERST_STALL] = {
+		.flags = 0,
+		.run = erst_exec_stall,
+	},
+	[ACPI_ERST_STALL_WHILE_TRUE] = {
+		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
+		.run = erst_exec_stall_while_true,
+	},
+	[ACPI_ERST_SKIP_NEXT_IF_TRUE] = {
+		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
+		.run = erst_exec_skip_next_instruction_if_true,
+	},
+	[ACPI_ERST_GOTO] = {
+		.flags = 0,
+		.run = erst_exec_goto,
+	},
+	[ACPI_ERST_SET_SRC_ADDRESS_BASE] = {
+		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
+		.run = erst_exec_set_src_address_base,
+	},
+	[ACPI_ERST_SET_DST_ADDRESS_BASE] = {
+		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
+		.run = erst_exec_set_dst_address_base,
+	},
+	[ACPI_ERST_MOVE_DATA] = {
+		.flags = APEI_EXEC_INS_ACCESS_REGISTER,
+		.run = erst_exec_move_data,
+	},
+};
+
+static inline void erst_exec_ctx_init(struct apei_exec_context *ctx)
+{
+	apei_exec_ctx_init(ctx, erst_ins_type, ARRAY_SIZE(erst_ins_type),
+			   ERST_TAB_ENTRY(erst_tab), erst_tab->entries);
+}
+
+static int erst_get_erange(struct erst_erange *range)
+{
+	struct apei_exec_context ctx;
+	int rc;
+
+	erst_exec_ctx_init(&ctx);
+	rc = apei_exec_run(&ctx, ACPI_ERST_GET_ERROR_RANGE);
+	if (rc)
+		return rc;
+	range->base = apei_exec_ctx_get_output(&ctx);
+	rc = apei_exec_run(&ctx, ACPI_ERST_GET_ERROR_LENGTH);
+	if (rc)
+		return rc;
+	range->size = apei_exec_ctx_get_output(&ctx);
+	rc = apei_exec_run(&ctx, ACPI_ERST_GET_ERROR_ATTRIBUTES);
+	if (rc)
+		return rc;
+	range->attr = apei_exec_ctx_get_output(&ctx);
+
+	return 0;
+}
+
+static ssize_t __erst_get_record_count(void)
+{
+	struct apei_exec_context ctx;
+	int rc;
+
+	erst_exec_ctx_init(&ctx);
+	rc = apei_exec_run(&ctx, ACPI_ERST_GET_RECORD_COUNT);
+	if (rc)
+		return rc;
+	return apei_exec_ctx_get_output(&ctx);
+}
+
+ssize_t erst_get_record_count(void)
+{
+	ssize_t count;
+	unsigned long flags;
+
+	if (erst_disable)
+		return -ENODEV;
+
+	spin_lock_irqsave(&erst_lock, flags);
+	count = __erst_get_record_count();
+	spin_unlock_irqrestore(&erst_lock, flags);
+
+	return count;
+}
+EXPORT_SYMBOL_GPL(erst_get_record_count);
+
+static int __erst_get_next_record_id(u64 *record_id)
+{
+	struct apei_exec_context ctx;
+	int rc;
+
+	erst_exec_ctx_init(&ctx);
+	rc = apei_exec_run(&ctx, ACPI_ERST_GET_RECORD_ID);
+	if (rc)
+		return rc;
+	*record_id = apei_exec_ctx_get_output(&ctx);
+
+	return 0;
+}
+
+/*
+ * Get the record ID of an existing error record on the persistent
+ * storage. If there is no error record on the persistent storage, the
+ * returned record_id is APEI_ERST_INVALID_RECORD_ID.
+ */
+int erst_get_next_record_id(u64 *record_id)
+{
+	int rc;
+	unsigned long flags;
+
+	if (erst_disable)
+		return -ENODEV;
+
+	spin_lock_irqsave(&erst_lock, flags);
+	rc = __erst_get_next_record_id(record_id);
+	spin_unlock_irqrestore(&erst_lock, flags);
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(erst_get_next_record_id);
+
+static int __erst_write_to_storage(u64 offset)
+{
+	struct apei_exec_context ctx;
+	u64 timeout = FIRMWARE_TIMEOUT;
+	u64 val;
+	int rc;
+
+	erst_exec_ctx_init(&ctx);
+	rc = apei_exec_run(&ctx, ACPI_ERST_BEGIN_WRITE);
+	if (rc)
+		return rc;
+	apei_exec_ctx_set_input(&ctx, offset);
+	rc = apei_exec_run(&ctx, ACPI_ERST_SET_RECORD_OFFSET);
+	if (rc)
+		return rc;
+	rc = apei_exec_run(&ctx, ACPI_ERST_EXECUTE_OPERATION);
+	if (rc)
+		return rc;
+	for (;;) {
+		rc = apei_exec_run(&ctx, ACPI_ERST_CHECK_BUSY_STATUS);
+		if (rc)
+			return rc;
+		val = apei_exec_ctx_get_output(&ctx);
+		if (!val)
+			break;
+		if (erst_timedout(&timeout, SPIN_UNIT))
+			return -EIO;
+	}
+	rc = apei_exec_run(&ctx, ACPI_ERST_GET_COMMAND_STATUS);
+	if (rc)
+		return rc;
+	val = apei_exec_ctx_get_output(&ctx);
+	rc = apei_exec_run(&ctx, ACPI_ERST_END);
+	if (rc)
+		return rc;
+
+	return erst_errno(val);
+}
+
+static int __erst_read_from_storage(u64 record_id, u64 offset)
+{
+	struct apei_exec_context ctx;
+	u64 timeout = FIRMWARE_TIMEOUT;
+	u64 val;
+	int rc;
+
+	erst_exec_ctx_init(&ctx);
+	rc = apei_exec_run(&ctx, ACPI_ERST_BEGIN_READ);
+	if (rc)
+		return rc;
+	apei_exec_ctx_set_input(&ctx, offset);
+	rc = apei_exec_run(&ctx, ACPI_ERST_SET_RECORD_OFFSET);
+	if (rc)
+		return rc;
+	apei_exec_ctx_set_input(&ctx, record_id);
+	rc = apei_exec_run(&ctx, ACPI_ERST_SET_RECORD_ID);
+	if (rc)
+		return rc;
+	rc = apei_exec_run(&ctx, ACPI_ERST_EXECUTE_OPERATION);
+	if (rc)
+		return rc;
+	for (;;) {
+		rc = apei_exec_run(&ctx, ACPI_ERST_CHECK_BUSY_STATUS);
+		if (rc)
+			return rc;
+		val = apei_exec_ctx_get_output(&ctx);
+		if (!val)
+			break;
+		if (erst_timedout(&timeout, SPIN_UNIT))
+			return -EIO;
+	};
+	rc = apei_exec_run(&ctx, ACPI_ERST_GET_COMMAND_STATUS);
+	if (rc)
+		return rc;
+	val = apei_exec_ctx_get_output(&ctx);
+	rc = apei_exec_run(&ctx, ACPI_ERST_END);
+	if (rc)
+		return rc;
+
+	return erst_errno(val);
+}
+
+static int __erst_clear_from_storage(u64 record_id)
+{
+	struct apei_exec_context ctx;
+	u64 timeout = FIRMWARE_TIMEOUT;
+	u64 val;
+	int rc;
+
+	erst_exec_ctx_init(&ctx);
+	rc = apei_exec_run(&ctx, ACPI_ERST_BEGIN_CLEAR);
+	if (rc)
+		return rc;
+	apei_exec_ctx_set_input(&ctx, record_id);
+	rc = apei_exec_run(&ctx, ACPI_ERST_SET_RECORD_ID);
+	if (rc)
+		return rc;
+	rc = apei_exec_run(&ctx, ACPI_ERST_EXECUTE_OPERATION);
+	if (rc)
+		return rc;
+	for (;;) {
+		rc = apei_exec_run(&ctx, ACPI_ERST_CHECK_BUSY_STATUS);
+		if (rc)
+			return rc;
+		val = apei_exec_ctx_get_output(&ctx);
+		if (!val)
+			break;
+		if (erst_timedout(&timeout, SPIN_UNIT))
+			return -EIO;
+	}
+	rc = apei_exec_run(&ctx, ACPI_ERST_GET_COMMAND_STATUS);
+	if (rc)
+		return rc;
+	val = apei_exec_ctx_get_output(&ctx);
+	rc = apei_exec_run(&ctx, ACPI_ERST_END);
+	if (rc)
+		return rc;
+
+	return erst_errno(val);
+}
+
+/* NVRAM ERST Error Log Address Range is not supported yet */
+static void pr_unimpl_nvram(void)
+{
+	if (printk_ratelimit())
+		pr_warning(ERST_PFX
+		"NVRAM ERST Log Address Range is not implemented yet\n");
+}
+
+static int __erst_write_to_nvram(const struct cper_record_header *record)
+{
+	/* do not print message, because printk is not safe for NMI */
+	return -ENOSYS;
+}
+
+static int __erst_read_to_erange_from_nvram(u64 record_id, u64 *offset)
+{
+	pr_unimpl_nvram();
+	return -ENOSYS;
+}
+
+static int __erst_clear_from_nvram(u64 record_id)
+{
+	pr_unimpl_nvram();
+	return -ENOSYS;
+}
+
+int erst_write(const struct cper_record_header *record)
+{
+	int rc;
+	unsigned long flags;
+	struct cper_record_header *rcd_erange;
+
+	if (erst_disable)
+		return -ENODEV;
+
+	if (memcmp(record->signature, CPER_SIG_RECORD, CPER_SIG_SIZE))
+		return -EINVAL;
+
+	if (erst_erange.attr & ERST_RANGE_NVRAM) {
+		if (!spin_trylock_irqsave(&erst_lock, flags))
+			return -EBUSY;
+		rc = __erst_write_to_nvram(record);
+		spin_unlock_irqrestore(&erst_lock, flags);
+		return rc;
+	}
+
+	if (record->record_length > erst_erange.size)
+		return -EINVAL;
+
+	if (!spin_trylock_irqsave(&erst_lock, flags))
+		return -EBUSY;
+	memcpy(erst_erange.vaddr, record, record->record_length);
+	rcd_erange = erst_erange.vaddr;
+	/* signature for serialization system */
+	memcpy(&rcd_erange->persistence_information, "ER", 2);
+
+	rc = __erst_write_to_storage(0);
+	spin_unlock_irqrestore(&erst_lock, flags);
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(erst_write);
+
+static int __erst_read_to_erange(u64 record_id, u64 *offset)
+{
+	int rc;
+
+	if (erst_erange.attr & ERST_RANGE_NVRAM)
+		return __erst_read_to_erange_from_nvram(
+			record_id, offset);
+
+	rc = __erst_read_from_storage(record_id, 0);
+	if (rc)
+		return rc;
+	*offset = 0;
+
+	return 0;
+}
+
+static ssize_t __erst_read(u64 record_id, struct cper_record_header *record,
+			   size_t buflen)
+{
+	int rc;
+	u64 offset, len = 0;
+	struct cper_record_header *rcd_tmp;
+
+	rc = __erst_read_to_erange(record_id, &offset);
+	if (rc)
+		return rc;
+	rcd_tmp = erst_erange.vaddr + offset;
+	len = rcd_tmp->record_length;
+	if (len <= buflen)
+		memcpy(record, rcd_tmp, len);
+
+	return len;
+}
+
+/*
+ * If return value > buflen, the buffer size is not big enough,
+ * else if return value < 0, something goes wrong,
+ * else everything is OK, and return value is record length
+ */
+ssize_t erst_read(u64 record_id, struct cper_record_header *record,
+		  size_t buflen)
+{
+	ssize_t len;
+	unsigned long flags;
+
+	if (erst_disable)
+		return -ENODEV;
+
+	spin_lock_irqsave(&erst_lock, flags);
+	len = __erst_read(record_id, record, buflen);
+	spin_unlock_irqrestore(&erst_lock, flags);
+	return len;
+}
+EXPORT_SYMBOL_GPL(erst_read);
+
+/*
+ * If return value > buflen, the buffer size is not big enough,
+ * else if return value = 0, there is no more record to read,
+ * else if return value < 0, something goes wrong,
+ * else everything is OK, and return value is record length
+ */
+ssize_t erst_read_next(struct cper_record_header *record, size_t buflen)
+{
+	int rc;
+	ssize_t len;
+	unsigned long flags;
+	u64 record_id;
+
+	if (erst_disable)
+		return -ENODEV;
+
+	spin_lock_irqsave(&erst_lock, flags);
+	rc = __erst_get_next_record_id(&record_id);
+	if (rc) {
+		spin_unlock_irqrestore(&erst_lock, flags);
+		return rc;
+	}
+	/* no more record */
+	if (record_id == APEI_ERST_INVALID_RECORD_ID) {
+		spin_unlock_irqrestore(&erst_lock, flags);
+		return 0;
+	}
+
+	len = __erst_read(record_id, record, buflen);
+	spin_unlock_irqrestore(&erst_lock, flags);
+
+	return len;
+}
+EXPORT_SYMBOL_GPL(erst_read_next);
+
+int erst_clear(u64 record_id)
+{
+	int rc;
+	unsigned long flags;
+
+	if (erst_disable)
+		return -ENODEV;
+
+	spin_lock_irqsave(&erst_lock, flags);
+	if (erst_erange.attr & ERST_RANGE_NVRAM)
+		rc = __erst_clear_from_nvram(record_id);
+	else
+		rc = __erst_clear_from_storage(record_id);
+	spin_unlock_irqrestore(&erst_lock, flags);
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(erst_clear);
+
+static int __init setup_erst_disable(char *str)
+{
+	erst_disable = 1;
+	return 0;
+}
+
+__setup("erst_disable", setup_erst_disable);
+
+static int erst_check_table(struct acpi_table_erst *erst_tab)
+{
+	if (erst_tab->header_length != sizeof(struct acpi_table_erst))
+		return -EINVAL;
+	if (erst_tab->header.length < sizeof(struct acpi_table_erst))
+		return -EINVAL;
+	if (erst_tab->entries !=
+	    (erst_tab->header.length - sizeof(struct acpi_table_erst)) /
+	    sizeof(struct acpi_erst_entry))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int __init erst_init(void)
+{
+	int rc = 0;
+	acpi_status status;
+	struct apei_exec_context ctx;
+	struct apei_resources erst_resources;
+	struct resource *r;
+
+	if (acpi_disabled)
+		goto err;
+
+	if (erst_disable) {
+		pr_info(ERST_PFX
+	"Error Record Serialization Table (ERST) support is disabled.\n");
+		goto err;
+	}
+
+	status = acpi_get_table(ACPI_SIG_ERST, 0,
+				(struct acpi_table_header **)&erst_tab);
+	if (status == AE_NOT_FOUND) {
+		pr_err(ERST_PFX "Table is not found!\n");
+		goto err;
+	} else if (ACPI_FAILURE(status)) {
+		const char *msg = acpi_format_exception(status);
+		pr_err(ERST_PFX "Failed to get table, %s\n", msg);
+		rc = -EINVAL;
+		goto err;
+	}
+
+	rc = erst_check_table(erst_tab);
+	if (rc) {
+		pr_err(FW_BUG ERST_PFX "ERST table is invalid\n");
+		goto err;
+	}
+
+	apei_resources_init(&erst_resources);
+	erst_exec_ctx_init(&ctx);
+	rc = apei_exec_collect_resources(&ctx, &erst_resources);
+	if (rc)
+		goto err_fini;
+	rc = apei_resources_request(&erst_resources, "APEI ERST");
+	if (rc)
+		goto err_fini;
+	rc = apei_exec_pre_map_gars(&ctx);
+	if (rc)
+		goto err_release;
+	rc = erst_get_erange(&erst_erange);
+	if (rc) {
+		if (rc == -ENODEV)
+			pr_info(ERST_PFX
+	"The corresponding hardware device or firmware implementation "
+	"is not available.\n");
+		else
+			pr_err(ERST_PFX
+			       "Failed to get Error Log Address Range.\n");
+		goto err_unmap_reg;
+	}
+
+	r = request_mem_region(erst_erange.base, erst_erange.size, "APEI ERST");
+	if (!r) {
+		pr_err(ERST_PFX
+		"Can not request iomem region <0x%16llx-0x%16llx> for ERST.\n",
+		(unsigned long long)erst_erange.base,
+		(unsigned long long)erst_erange.base + erst_erange.size);
+		rc = -EIO;
+		goto err_unmap_reg;
+	}
+	rc = -ENOMEM;
+	erst_erange.vaddr = ioremap_cache(erst_erange.base,
+					  erst_erange.size);
+	if (!erst_erange.vaddr)
+		goto err_release_erange;
+
+	pr_info(ERST_PFX
+	"Error Record Serialization Table (ERST) support is initialized.\n");
+
+	return 0;
+
+err_release_erange:
+	release_mem_region(erst_erange.base, erst_erange.size);
+err_unmap_reg:
+	apei_exec_post_unmap_gars(&ctx);
+err_release:
+	apei_resources_release(&erst_resources);
+err_fini:
+	apei_resources_fini(&erst_resources);
+err:
+	erst_disable = 1;
+	return rc;
+}
+
+device_initcall(erst_init);
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
new file mode 100644
index 000000000000..fd0cc016a099
--- /dev/null
+++ b/drivers/acpi/apei/ghes.c
@@ -0,0 +1,427 @@
+/*
+ * APEI Generic Hardware Error Source support
+ *
+ * Generic Hardware Error Source provides a way to report platform
+ * hardware errors (such as that from chipset). It works in so called
+ * "Firmware First" mode, that is, hardware errors are reported to
+ * firmware firstly, then reported to Linux by firmware. This way,
+ * some non-standard hardware error registers or non-standard hardware
+ * link can be checked by firmware to produce more hardware error
+ * information for Linux.
+ *
+ * For more information about Generic Hardware Error Source, please
+ * refer to ACPI Specification version 4.0, section 17.3.2.6
+ *
+ * Now, only SCI notification type and memory errors are
+ * supported. More notification type and hardware error type will be
+ * added later.
+ *
+ * Copyright 2010 Intel Corp.
+ *   Author: Huang Ying <ying.huang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation;
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/acpi.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/cper.h>
+#include <linux/kdebug.h>
+#include <acpi/apei.h>
+#include <acpi/atomicio.h>
+#include <acpi/hed.h>
+#include <asm/mce.h>
+
+#include "apei-internal.h"
+
+#define GHES_PFX	"GHES: "
+
+#define GHES_ESTATUS_MAX_SIZE		65536
+
+/*
+ * One struct ghes is created for each generic hardware error
+ * source.
+ *
+ * It provides the context for APEI hardware error timer/IRQ/SCI/NMI
+ * handler. Handler for one generic hardware error source is only
+ * triggered after the previous one is done. So handler can uses
+ * struct ghes without locking.
+ *
+ * estatus: memory buffer for error status block, allocated during
+ * HEST parsing.
+ */
+#define GHES_TO_CLEAR		0x0001
+
+struct ghes {
+	struct acpi_hest_generic *generic;
+	struct acpi_hest_generic_status *estatus;
+	struct list_head list;
+	u64 buffer_paddr;
+	unsigned long flags;
+};
+
+/*
+ * Error source lists, one list for each notification method. The
+ * members in lists are struct ghes.
+ *
+ * The list members are only added in HEST parsing and deleted during
+ * module_exit, that is, single-threaded. So no lock is needed for
+ * that.
+ *
+ * But the mutual exclusion is needed between members adding/deleting
+ * and timer/IRQ/SCI/NMI handler, which may traverse the list. RCU is
+ * used for that.
+ */
+static LIST_HEAD(ghes_sci);
+
+static struct ghes *ghes_new(struct acpi_hest_generic *generic)
+{
+	struct ghes *ghes;
+	unsigned int error_block_length;
+	int rc;
+
+	ghes = kzalloc(sizeof(*ghes), GFP_KERNEL);
+	if (!ghes)
+		return ERR_PTR(-ENOMEM);
+	ghes->generic = generic;
+	INIT_LIST_HEAD(&ghes->list);
+	rc = acpi_pre_map_gar(&generic->error_status_address);
+	if (rc)
+		goto err_free;
+	error_block_length = generic->error_block_length;
+	if (error_block_length > GHES_ESTATUS_MAX_SIZE) {
+		pr_warning(FW_WARN GHES_PFX
+			   "Error status block length is too long: %u for "
+			   "generic hardware error source: %d.\n",
+			   error_block_length, generic->header.source_id);
+		error_block_length = GHES_ESTATUS_MAX_SIZE;
+	}
+	ghes->estatus = kmalloc(error_block_length, GFP_KERNEL);
+	if (!ghes->estatus) {
+		rc = -ENOMEM;
+		goto err_unmap;
+	}
+
+	return ghes;
+
+err_unmap:
+	acpi_post_unmap_gar(&generic->error_status_address);
+err_free:
+	kfree(ghes);
+	return ERR_PTR(rc);
+}
+
+static void ghes_fini(struct ghes *ghes)
+{
+	kfree(ghes->estatus);
+	acpi_post_unmap_gar(&ghes->generic->error_status_address);
+}
+
+enum {
+	GHES_SER_NO = 0x0,
+	GHES_SER_CORRECTED = 0x1,
+	GHES_SER_RECOVERABLE = 0x2,
+	GHES_SER_PANIC = 0x3,
+};
+
+static inline int ghes_severity(int severity)
+{
+	switch (severity) {
+	case CPER_SER_INFORMATIONAL:
+		return GHES_SER_NO;
+	case CPER_SER_CORRECTED:
+		return GHES_SER_CORRECTED;
+	case CPER_SER_RECOVERABLE:
+		return GHES_SER_RECOVERABLE;
+	case CPER_SER_FATAL:
+		return GHES_SER_PANIC;
+	default:
+		/* Unkown, go panic */
+		return GHES_SER_PANIC;
+	}
+}
+
+/* SCI handler run in work queue, so ioremap can be used here */
+static int ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len,
+				 int from_phys)
+{
+	void *vaddr;
+
+	vaddr = ioremap_cache(paddr, len);
+	if (!vaddr)
+		return -ENOMEM;
+	if (from_phys)
+		memcpy(buffer, vaddr, len);
+	else
+		memcpy(vaddr, buffer, len);
+	iounmap(vaddr);
+
+	return 0;
+}
+
+static int ghes_read_estatus(struct ghes *ghes, int silent)
+{
+	struct acpi_hest_generic *g = ghes->generic;
+	u64 buf_paddr;
+	u32 len;
+	int rc;
+
+	rc = acpi_atomic_read(&buf_paddr, &g->error_status_address);
+	if (rc) {
+		if (!silent && printk_ratelimit())
+			pr_warning(FW_WARN GHES_PFX
+"Failed to read error status block address for hardware error source: %d.\n",
+				   g->header.source_id);
+		return -EIO;
+	}
+	if (!buf_paddr)
+		return -ENOENT;
+
+	rc = ghes_copy_tofrom_phys(ghes->estatus, buf_paddr,
+				   sizeof(*ghes->estatus), 1);
+	if (rc)
+		return rc;
+	if (!ghes->estatus->block_status)
+		return -ENOENT;
+
+	ghes->buffer_paddr = buf_paddr;
+	ghes->flags |= GHES_TO_CLEAR;
+
+	rc = -EIO;
+	len = apei_estatus_len(ghes->estatus);
+	if (len < sizeof(*ghes->estatus))
+		goto err_read_block;
+	if (len > ghes->generic->error_block_length)
+		goto err_read_block;
+	if (apei_estatus_check_header(ghes->estatus))
+		goto err_read_block;
+	rc = ghes_copy_tofrom_phys(ghes->estatus + 1,
+				   buf_paddr + sizeof(*ghes->estatus),
+				   len - sizeof(*ghes->estatus), 1);
+	if (rc)
+		return rc;
+	if (apei_estatus_check(ghes->estatus))
+		goto err_read_block;
+	rc = 0;
+
+err_read_block:
+	if (rc && !silent)
+		pr_warning(FW_WARN GHES_PFX
+			   "Failed to read error status block!\n");
+	return rc;
+}
+
+static void ghes_clear_estatus(struct ghes *ghes)
+{
+	ghes->estatus->block_status = 0;
+	if (!(ghes->flags & GHES_TO_CLEAR))
+		return;
+	ghes_copy_tofrom_phys(ghes->estatus, ghes->buffer_paddr,
+			      sizeof(ghes->estatus->block_status), 0);
+	ghes->flags &= ~GHES_TO_CLEAR;
+}
+
+static void ghes_do_proc(struct ghes *ghes)
+{
+	int ser, processed = 0;
+	struct acpi_hest_generic_data *gdata;
+
+	ser = ghes_severity(ghes->estatus->error_severity);
+	apei_estatus_for_each_section(ghes->estatus, gdata) {
+#ifdef CONFIG_X86_MCE
+		if (!uuid_le_cmp(*(uuid_le *)gdata->section_type,
+				 CPER_SEC_PLATFORM_MEM)) {
+			apei_mce_report_mem_error(
+				ser == GHES_SER_CORRECTED,
+				(struct cper_sec_mem_err *)(gdata+1));
+			processed = 1;
+		}
+#endif
+	}
+
+	if (!processed && printk_ratelimit())
+		pr_warning(GHES_PFX
+		"Unknown error record from generic hardware error source: %d\n",
+			   ghes->generic->header.source_id);
+}
+
+static int ghes_proc(struct ghes *ghes)
+{
+	int rc;
+
+	rc = ghes_read_estatus(ghes, 0);
+	if (rc)
+		goto out;
+	ghes_do_proc(ghes);
+
+out:
+	ghes_clear_estatus(ghes);
+	return 0;
+}
+
+static int ghes_notify_sci(struct notifier_block *this,
+				  unsigned long event, void *data)
+{
+	struct ghes *ghes;
+	int ret = NOTIFY_DONE;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(ghes, &ghes_sci, list) {
+		if (!ghes_proc(ghes))
+			ret = NOTIFY_OK;
+	}
+	rcu_read_unlock();
+
+	return ret;
+}
+
+static struct notifier_block ghes_notifier_sci = {
+	.notifier_call = ghes_notify_sci,
+};
+
+static int hest_ghes_parse(struct acpi_hest_header *hest_hdr, void *data)
+{
+	struct acpi_hest_generic *generic;
+	struct ghes *ghes = NULL;
+	int rc = 0;
+
+	if (hest_hdr->type != ACPI_HEST_TYPE_GENERIC_ERROR)
+		return 0;
+
+	generic = (struct acpi_hest_generic *)hest_hdr;
+	if (!generic->enabled)
+		return 0;
+
+	if (generic->error_block_length <
+	    sizeof(struct acpi_hest_generic_status)) {
+		pr_warning(FW_BUG GHES_PFX
+"Invalid error block length: %u for generic hardware error source: %d\n",
+			   generic->error_block_length,
+			   generic->header.source_id);
+		goto err;
+	}
+	if (generic->records_to_preallocate == 0) {
+		pr_warning(FW_BUG GHES_PFX
+"Invalid records to preallocate: %u for generic hardware error source: %d\n",
+			   generic->records_to_preallocate,
+			   generic->header.source_id);
+		goto err;
+	}
+	ghes = ghes_new(generic);
+	if (IS_ERR(ghes)) {
+		rc = PTR_ERR(ghes);
+		ghes = NULL;
+		goto err;
+	}
+	switch (generic->notify.type) {
+	case ACPI_HEST_NOTIFY_POLLED:
+		pr_warning(GHES_PFX
+"Generic hardware error source: %d notified via POLL is not supported!\n",
+			   generic->header.source_id);
+		break;
+	case ACPI_HEST_NOTIFY_EXTERNAL:
+	case ACPI_HEST_NOTIFY_LOCAL:
+		pr_warning(GHES_PFX
+"Generic hardware error source: %d notified via IRQ is not supported!\n",
+			   generic->header.source_id);
+		break;
+	case ACPI_HEST_NOTIFY_SCI:
+		if (list_empty(&ghes_sci))
+			register_acpi_hed_notifier(&ghes_notifier_sci);
+		list_add_rcu(&ghes->list, &ghes_sci);
+		break;
+	case ACPI_HEST_NOTIFY_NMI:
+		pr_warning(GHES_PFX
+"Generic hardware error source: %d notified via NMI is not supported!\n",
+			   generic->header.source_id);
+		break;
+	default:
+		pr_warning(FW_WARN GHES_PFX
+	"Unknown notification type: %u for generic hardware error source: %d\n",
+			   generic->notify.type, generic->header.source_id);
+		break;
+	}
+
+	return 0;
+err:
+	if (ghes)
+		ghes_fini(ghes);
+	return rc;
+}
+
+static void ghes_cleanup(void)
+{
+	struct ghes *ghes, *nghes;
+
+	if (!list_empty(&ghes_sci))
+		unregister_acpi_hed_notifier(&ghes_notifier_sci);
+
+	synchronize_rcu();
+
+	list_for_each_entry_safe(ghes, nghes, &ghes_sci, list) {
+		list_del(&ghes->list);
+		ghes_fini(ghes);
+		kfree(ghes);
+	}
+}
+
+static int __init ghes_init(void)
+{
+	int rc;
+
+	if (acpi_disabled)
+		return -ENODEV;
+
+	if (hest_disable) {
+		pr_info(GHES_PFX "HEST is not enabled!\n");
+		return -EINVAL;
+	}
+
+	rc = apei_hest_parse(hest_ghes_parse, NULL);
+	if (rc) {
+		pr_err(GHES_PFX
+		"Error during parsing HEST generic hardware error sources.\n");
+		goto err_cleanup;
+	}
+
+	if (list_empty(&ghes_sci)) {
+		pr_info(GHES_PFX
+			"No functional generic hardware error sources.\n");
+		rc = -ENODEV;
+		goto err_cleanup;
+	}
+
+	pr_info(GHES_PFX
+		"Generic Hardware Error Source support is initialized.\n");
+
+	return 0;
+err_cleanup:
+	ghes_cleanup();
+	return rc;
+}
+
+static void __exit ghes_exit(void)
+{
+	ghes_cleanup();
+}
+
+module_init(ghes_init);
+module_exit(ghes_exit);
+
+MODULE_AUTHOR("Huang Ying");
+MODULE_DESCRIPTION("APEI Generic Hardware Error Source support");
+MODULE_LICENSE("GPL");
diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c
new file mode 100644
index 000000000000..e7f40d362cb3
--- /dev/null
+++ b/drivers/acpi/apei/hest.c
@@ -0,0 +1,173 @@
+/*
+ * APEI Hardware Error Souce Table support
+ *
+ * HEST describes error sources in detail; communicates operational
+ * parameters (i.e. severity levels, masking bits, and threshold
+ * values) to Linux as necessary. It also allows the BIOS to report
+ * non-standard error sources to Linux (for example, chipset-specific
+ * error registers).
+ *
+ * For more information about HEST, please refer to ACPI Specification
+ * version 4.0, section 17.3.2.
+ *
+ * Copyright 2009 Intel Corp.
+ *   Author: Huang Ying <ying.huang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation;
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/acpi.h>
+#include <linux/kdebug.h>
+#include <linux/highmem.h>
+#include <linux/io.h>
+#include <acpi/apei.h>
+
+#include "apei-internal.h"
+
+#define HEST_PFX "HEST: "
+
+int hest_disable;
+EXPORT_SYMBOL_GPL(hest_disable);
+
+/* HEST table parsing */
+
+static struct acpi_table_hest *hest_tab;
+
+static int hest_void_parse(struct acpi_hest_header *hest_hdr, void *data)
+{
+	return 0;
+}
+
+static int hest_esrc_len_tab[ACPI_HEST_TYPE_RESERVED] = {
+	[ACPI_HEST_TYPE_IA32_CHECK] = -1,	/* need further calculation */
+	[ACPI_HEST_TYPE_IA32_CORRECTED_CHECK] = -1,
+	[ACPI_HEST_TYPE_IA32_NMI] = sizeof(struct acpi_hest_ia_nmi),
+	[ACPI_HEST_TYPE_AER_ROOT_PORT] = sizeof(struct acpi_hest_aer_root),
+	[ACPI_HEST_TYPE_AER_ENDPOINT] = sizeof(struct acpi_hest_aer),
+	[ACPI_HEST_TYPE_AER_BRIDGE] = sizeof(struct acpi_hest_aer_bridge),
+	[ACPI_HEST_TYPE_GENERIC_ERROR] = sizeof(struct acpi_hest_generic),
+};
+
+static int hest_esrc_len(struct acpi_hest_header *hest_hdr)
+{
+	u16 hest_type = hest_hdr->type;
+	int len;
+
+	if (hest_type >= ACPI_HEST_TYPE_RESERVED)
+		return 0;
+
+	len = hest_esrc_len_tab[hest_type];
+
+	if (hest_type == ACPI_HEST_TYPE_IA32_CORRECTED_CHECK) {
+		struct acpi_hest_ia_corrected *cmc;
+		cmc = (struct acpi_hest_ia_corrected *)hest_hdr;
+		len = sizeof(*cmc) + cmc->num_hardware_banks *
+			sizeof(struct acpi_hest_ia_error_bank);
+	} else if (hest_type == ACPI_HEST_TYPE_IA32_CHECK) {
+		struct acpi_hest_ia_machine_check *mc;
+		mc = (struct acpi_hest_ia_machine_check *)hest_hdr;
+		len = sizeof(*mc) + mc->num_hardware_banks *
+			sizeof(struct acpi_hest_ia_error_bank);
+	}
+	BUG_ON(len == -1);
+
+	return len;
+};
+
+int apei_hest_parse(apei_hest_func_t func, void *data)
+{
+	struct acpi_hest_header *hest_hdr;
+	int i, rc, len;
+
+	if (hest_disable)
+		return -EINVAL;
+
+	hest_hdr = (struct acpi_hest_header *)(hest_tab + 1);
+	for (i = 0; i < hest_tab->error_source_count; i++) {
+		len = hest_esrc_len(hest_hdr);
+		if (!len) {
+			pr_warning(FW_WARN HEST_PFX
+				   "Unknown or unused hardware error source "
+				   "type: %d for hardware error source: %d.\n",
+				   hest_hdr->type, hest_hdr->source_id);
+			return -EINVAL;
+		}
+		if ((void *)hest_hdr + len >
+		    (void *)hest_tab + hest_tab->header.length) {
+			pr_warning(FW_BUG HEST_PFX
+		"Table contents overflow for hardware error source: %d.\n",
+				hest_hdr->source_id);
+			return -EINVAL;
+		}
+
+		rc = func(hest_hdr, data);
+		if (rc)
+			return rc;
+
+		hest_hdr = (void *)hest_hdr + len;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(apei_hest_parse);
+
+static int __init setup_hest_disable(char *str)
+{
+	hest_disable = 1;
+	return 0;
+}
+
+__setup("hest_disable", setup_hest_disable);
+
+static int __init hest_init(void)
+{
+	acpi_status status;
+	int rc = -ENODEV;
+
+	if (acpi_disabled)
+		goto err;
+
+	if (hest_disable) {
+		pr_info(HEST_PFX "HEST tabling parsing is disabled.\n");
+		goto err;
+	}
+
+	status = acpi_get_table(ACPI_SIG_HEST, 0,
+				(struct acpi_table_header **)&hest_tab);
+	if (status == AE_NOT_FOUND) {
+		pr_info(HEST_PFX "Table is not found!\n");
+		goto err;
+	} else if (ACPI_FAILURE(status)) {
+		const char *msg = acpi_format_exception(status);
+		pr_err(HEST_PFX "Failed to get table, %s\n", msg);
+		rc = -EINVAL;
+		goto err;
+	}
+
+	rc = apei_hest_parse(hest_void_parse, NULL);
+	if (rc)
+		goto err;
+
+	pr_info(HEST_PFX "HEST table parsing is initialized.\n");
+
+	return 0;
+err:
+	hest_disable = 1;
+	return rc;
+}
+
+subsys_initcall(hest_init);
diff --git a/drivers/acpi/atomicio.c b/drivers/acpi/atomicio.c
new file mode 100644
index 000000000000..814b19249616
--- /dev/null
+++ b/drivers/acpi/atomicio.c
@@ -0,0 +1,360 @@
+/*
+ * atomicio.c - ACPI IO memory pre-mapping/post-unmapping, then
+ * accessing in atomic context.
+ *
+ * This is used for NMI handler to access IO memory area, because
+ * ioremap/iounmap can not be used in NMI handler. The IO memory area
+ * is pre-mapped in process context and accessed in NMI handler.
+ *
+ * Copyright (C) 2009-2010, Intel Corp.
+ *	Author: Huang Ying <ying.huang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/acpi.h>
+#include <linux/io.h>
+#include <linux/kref.h>
+#include <linux/rculist.h>
+#include <linux/interrupt.h>
+#include <acpi/atomicio.h>
+
+#define ACPI_PFX "ACPI: "
+
+static LIST_HEAD(acpi_iomaps);
+/*
+ * Used for mutual exclusion between writers of acpi_iomaps list, for
+ * synchronization between readers and writer, RCU is used.
+ */
+static DEFINE_SPINLOCK(acpi_iomaps_lock);
+
+struct acpi_iomap {
+	struct list_head list;
+	void __iomem *vaddr;
+	unsigned long size;
+	phys_addr_t paddr;
+	struct kref ref;
+};
+
+/* acpi_iomaps_lock or RCU read lock must be held before calling */
+static struct acpi_iomap *__acpi_find_iomap(phys_addr_t paddr,
+					    unsigned long size)
+{
+	struct acpi_iomap *map;
+
+	list_for_each_entry_rcu(map, &acpi_iomaps, list) {
+		if (map->paddr + map->size >= paddr + size &&
+		    map->paddr <= paddr)
+			return map;
+	}
+	return NULL;
+}
+
+/*
+ * Atomic "ioremap" used by NMI handler, if the specified IO memory
+ * area is not pre-mapped, NULL will be returned.
+ *
+ * acpi_iomaps_lock or RCU read lock must be held before calling
+ */
+static void __iomem *__acpi_ioremap_fast(phys_addr_t paddr,
+					 unsigned long size)
+{
+	struct acpi_iomap *map;
+
+	map = __acpi_find_iomap(paddr, size);
+	if (map)
+		return map->vaddr + (paddr - map->paddr);
+	else
+		return NULL;
+}
+
+/* acpi_iomaps_lock must be held before calling */
+static void __iomem *__acpi_try_ioremap(phys_addr_t paddr,
+					unsigned long size)
+{
+	struct acpi_iomap *map;
+
+	map = __acpi_find_iomap(paddr, size);
+	if (map) {
+		kref_get(&map->ref);
+		return map->vaddr + (paddr - map->paddr);
+	} else
+		return NULL;
+}
+
+/*
+ * Used to pre-map the specified IO memory area. First try to find
+ * whether the area is already pre-mapped, if it is, increase the
+ * reference count (in __acpi_try_ioremap) and return; otherwise, do
+ * the real ioremap, and add the mapping into acpi_iomaps list.
+ */
+static void __iomem *acpi_pre_map(phys_addr_t paddr,
+				  unsigned long size)
+{
+	void __iomem *vaddr;
+	struct acpi_iomap *map;
+	unsigned long pg_sz, flags;
+	phys_addr_t pg_off;
+
+	spin_lock_irqsave(&acpi_iomaps_lock, flags);
+	vaddr = __acpi_try_ioremap(paddr, size);
+	spin_unlock_irqrestore(&acpi_iomaps_lock, flags);
+	if (vaddr)
+		return vaddr;
+
+	pg_off = paddr & PAGE_MASK;
+	pg_sz = ((paddr + size + PAGE_SIZE - 1) & PAGE_MASK) - pg_off;
+	vaddr = ioremap(pg_off, pg_sz);
+	if (!vaddr)
+		return NULL;
+	map = kmalloc(sizeof(*map), GFP_KERNEL);
+	if (!map)
+		goto err_unmap;
+	INIT_LIST_HEAD(&map->list);
+	map->paddr = pg_off;
+	map->size = pg_sz;
+	map->vaddr = vaddr;
+	kref_init(&map->ref);
+
+	spin_lock_irqsave(&acpi_iomaps_lock, flags);
+	vaddr = __acpi_try_ioremap(paddr, size);
+	if (vaddr) {
+		spin_unlock_irqrestore(&acpi_iomaps_lock, flags);
+		iounmap(map->vaddr);
+		kfree(map);
+		return vaddr;
+	}
+	list_add_tail_rcu(&map->list, &acpi_iomaps);
+	spin_unlock_irqrestore(&acpi_iomaps_lock, flags);
+
+	return vaddr + (paddr - pg_off);
+err_unmap:
+	iounmap(vaddr);
+	return NULL;
+}
+
+/* acpi_iomaps_lock must be held before calling */
+static void __acpi_kref_del_iomap(struct kref *ref)
+{
+	struct acpi_iomap *map;
+
+	map = container_of(ref, struct acpi_iomap, ref);
+	list_del_rcu(&map->list);
+}
+
+/*
+ * Used to post-unmap the specified IO memory area. The iounmap is
+ * done only if the reference count goes zero.
+ */
+static void acpi_post_unmap(phys_addr_t paddr, unsigned long size)
+{
+	struct acpi_iomap *map;
+	unsigned long flags;
+	int del;
+
+	spin_lock_irqsave(&acpi_iomaps_lock, flags);
+	map = __acpi_find_iomap(paddr, size);
+	BUG_ON(!map);
+	del = kref_put(&map->ref, __acpi_kref_del_iomap);
+	spin_unlock_irqrestore(&acpi_iomaps_lock, flags);
+
+	if (!del)
+		return;
+
+	synchronize_rcu();
+	iounmap(map->vaddr);
+	kfree(map);
+}
+
+/* In NMI handler, should set silent = 1 */
+static int acpi_check_gar(struct acpi_generic_address *reg,
+			  u64 *paddr, int silent)
+{
+	u32 width, space_id;
+
+	width = reg->bit_width;
+	space_id = reg->space_id;
+	/* Handle possible alignment issues */
+	memcpy(paddr, &reg->address, sizeof(*paddr));
+	if (!*paddr) {
+		if (!silent)
+			pr_warning(FW_BUG ACPI_PFX
+			"Invalid physical address in GAR [0x%llx/%u/%u]\n",
+				   *paddr, width, space_id);
+		return -EINVAL;
+	}
+
+	if ((width != 8) && (width != 16) && (width != 32) && (width != 64)) {
+		if (!silent)
+			pr_warning(FW_BUG ACPI_PFX
+				   "Invalid bit width in GAR [0x%llx/%u/%u]\n",
+				   *paddr, width, space_id);
+		return -EINVAL;
+	}
+
+	if (space_id != ACPI_ADR_SPACE_SYSTEM_MEMORY &&
+	    space_id != ACPI_ADR_SPACE_SYSTEM_IO) {
+		if (!silent)
+			pr_warning(FW_BUG ACPI_PFX
+			"Invalid address space type in GAR [0x%llx/%u/%u]\n",
+				   *paddr, width, space_id);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/* Pre-map, working on GAR */
+int acpi_pre_map_gar(struct acpi_generic_address *reg)
+{
+	u64 paddr;
+	void __iomem *vaddr;
+	int rc;
+
+	if (reg->space_id != ACPI_ADR_SPACE_SYSTEM_MEMORY)
+		return 0;
+
+	rc = acpi_check_gar(reg, &paddr, 0);
+	if (rc)
+		return rc;
+
+	vaddr = acpi_pre_map(paddr, reg->bit_width / 8);
+	if (!vaddr)
+		return -EIO;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(acpi_pre_map_gar);
+
+/* Post-unmap, working on GAR */
+int acpi_post_unmap_gar(struct acpi_generic_address *reg)
+{
+	u64 paddr;
+	int rc;
+
+	if (reg->space_id != ACPI_ADR_SPACE_SYSTEM_MEMORY)
+		return 0;
+
+	rc = acpi_check_gar(reg, &paddr, 0);
+	if (rc)
+		return rc;
+
+	acpi_post_unmap(paddr, reg->bit_width / 8);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(acpi_post_unmap_gar);
+
+/*
+ * Can be used in atomic (including NMI) or process context. RCU read
+ * lock can only be released after the IO memory area accessing.
+ */
+static int acpi_atomic_read_mem(u64 paddr, u64 *val, u32 width)
+{
+	void __iomem *addr;
+
+	rcu_read_lock();
+	addr = __acpi_ioremap_fast(paddr, width);
+	switch (width) {
+	case 8:
+		*val = readb(addr);
+		break;
+	case 16:
+		*val = readw(addr);
+		break;
+	case 32:
+		*val = readl(addr);
+		break;
+	case 64:
+		*val = readq(addr);
+		break;
+	default:
+		return -EINVAL;
+	}
+	rcu_read_unlock();
+
+	return 0;
+}
+
+static int acpi_atomic_write_mem(u64 paddr, u64 val, u32 width)
+{
+	void __iomem *addr;
+
+	rcu_read_lock();
+	addr = __acpi_ioremap_fast(paddr, width);
+	switch (width) {
+	case 8:
+		writeb(val, addr);
+		break;
+	case 16:
+		writew(val, addr);
+		break;
+	case 32:
+		writel(val, addr);
+		break;
+	case 64:
+		writeq(val, addr);
+		break;
+	default:
+		return -EINVAL;
+	}
+	rcu_read_unlock();
+
+	return 0;
+}
+
+/* GAR accessing in atomic (including NMI) or process context */
+int acpi_atomic_read(u64 *val, struct acpi_generic_address *reg)
+{
+	u64 paddr;
+	int rc;
+
+	rc = acpi_check_gar(reg, &paddr, 1);
+	if (rc)
+		return rc;
+
+	*val = 0;
+	switch (reg->space_id) {
+	case ACPI_ADR_SPACE_SYSTEM_MEMORY:
+		return acpi_atomic_read_mem(paddr, val, reg->bit_width);
+	case ACPI_ADR_SPACE_SYSTEM_IO:
+		return acpi_os_read_port(paddr, (u32 *)val, reg->bit_width);
+	default:
+		return -EINVAL;
+	}
+}
+EXPORT_SYMBOL_GPL(acpi_atomic_read);
+
+int acpi_atomic_write(u64 val, struct acpi_generic_address *reg)
+{
+	u64 paddr;
+	int rc;
+
+	rc = acpi_check_gar(reg, &paddr, 1);
+	if (rc)
+		return rc;
+
+	switch (reg->space_id) {
+	case ACPI_ADR_SPACE_SYSTEM_MEMORY:
+		return acpi_atomic_write_mem(paddr, val, reg->bit_width);
+	case ACPI_ADR_SPACE_SYSTEM_IO:
+		return acpi_os_write_port(paddr, val, reg->bit_width);
+	default:
+		return -EINVAL;
+	}
+}
+EXPORT_SYMBOL_GPL(acpi_atomic_write);
diff --git a/drivers/acpi/hed.c b/drivers/acpi/hed.c
new file mode 100644
index 000000000000..d0c1967f7597
--- /dev/null
+++ b/drivers/acpi/hed.c
@@ -0,0 +1,112 @@
+/*
+ * ACPI Hardware Error Device (PNP0C33) Driver
+ *
+ * Copyright (C) 2010, Intel Corp.
+ *	Author: Huang Ying <ying.huang@intel.com>
+ *
+ * ACPI Hardware Error Device is used to report some hardware errors
+ * notified via SCI, mainly the corrected errors.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation;
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/acpi.h>
+#include <acpi/acpi_bus.h>
+#include <acpi/acpi_drivers.h>
+#include <acpi/hed.h>
+
+static struct acpi_device_id acpi_hed_ids[] = {
+	{"PNP0C33", 0},
+	{"", 0},
+};
+MODULE_DEVICE_TABLE(acpi, acpi_hed_ids);
+
+static acpi_handle hed_handle;
+
+static BLOCKING_NOTIFIER_HEAD(acpi_hed_notify_list);
+
+int register_acpi_hed_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_register(&acpi_hed_notify_list, nb);
+}
+EXPORT_SYMBOL_GPL(register_acpi_hed_notifier);
+
+void unregister_acpi_hed_notifier(struct notifier_block *nb)
+{
+	blocking_notifier_chain_unregister(&acpi_hed_notify_list, nb);
+}
+EXPORT_SYMBOL_GPL(unregister_acpi_hed_notifier);
+
+/*
+ * SCI to report hardware error is forwarded to the listeners of HED,
+ * it is used by HEST Generic Hardware Error Source with notify type
+ * SCI.
+ */
+static void acpi_hed_notify(struct acpi_device *device, u32 event)
+{
+	blocking_notifier_call_chain(&acpi_hed_notify_list, 0, NULL);
+}
+
+static int __devinit acpi_hed_add(struct acpi_device *device)
+{
+	/* Only one hardware error device */
+	if (hed_handle)
+		return -EINVAL;
+	hed_handle = device->handle;
+	return 0;
+}
+
+static int __devexit acpi_hed_remove(struct acpi_device *device, int type)
+{
+	hed_handle = NULL;
+	return 0;
+}
+
+static struct acpi_driver acpi_hed_driver = {
+	.name = "hardware_error_device",
+	.class = "hardware_error",
+	.ids = acpi_hed_ids,
+	.ops = {
+		.add = acpi_hed_add,
+		.remove = acpi_hed_remove,
+		.notify = acpi_hed_notify,
+	},
+};
+
+static int __init acpi_hed_init(void)
+{
+	if (acpi_disabled)
+		return -ENODEV;
+
+	if (acpi_bus_register_driver(&acpi_hed_driver) < 0)
+		return -ENODEV;
+
+	return 0;
+}
+
+static void __exit acpi_hed_exit(void)
+{
+	acpi_bus_unregister_driver(&acpi_hed_driver);
+}
+
+module_init(acpi_hed_init);
+module_exit(acpi_hed_exit);
+
+ACPI_MODULE_NAME("hed");
+MODULE_AUTHOR("Huang Ying");
+MODULE_DESCRIPTION("ACPI Hardware Error Device Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/acpi/hest.c b/drivers/acpi/hest.c
deleted file mode 100644
index 1c527a192872..000000000000
--- a/drivers/acpi/hest.c
+++ /dev/null
@@ -1,139 +0,0 @@
-#include <linux/acpi.h>
-#include <linux/pci.h>
-
-#define PREFIX "ACPI: "
-
-static inline unsigned long parse_acpi_hest_ia_machine_check(struct acpi_hest_ia_machine_check *p)
-{
-	return sizeof(*p) +
-		(sizeof(struct acpi_hest_ia_error_bank) * p->num_hardware_banks);
-}
-
-static inline unsigned long parse_acpi_hest_ia_corrected(struct acpi_hest_ia_corrected *p)
-{
-	return sizeof(*p) +
-		(sizeof(struct acpi_hest_ia_error_bank) * p->num_hardware_banks);
-}
-
-static inline unsigned long parse_acpi_hest_ia_nmi(struct acpi_hest_ia_nmi *p)
-{
-	return sizeof(*p);
-}
-
-static inline unsigned long parse_acpi_hest_generic(struct acpi_hest_generic *p)
-{
-	return sizeof(*p);
-}
-
-static inline unsigned int hest_match_pci(struct acpi_hest_aer_common *p, struct pci_dev *pci)
-{
-	return	(0           == pci_domain_nr(pci->bus) &&
-		 p->bus      == pci->bus->number &&
-		 p->device   == PCI_SLOT(pci->devfn) &&
-		 p->function == PCI_FUNC(pci->devfn));
-}
-
-static unsigned long parse_acpi_hest_aer(void *hdr, int type, struct pci_dev *pci, int *firmware_first)
-{
-	struct acpi_hest_aer_common *p = hdr + sizeof(struct acpi_hest_header);
-	unsigned long rc=0;
-	u8 pcie_type = 0;
-	u8 bridge = 0;
-	switch (type) {
-	case ACPI_HEST_TYPE_AER_ROOT_PORT:
-		rc = sizeof(struct acpi_hest_aer_root);
-		pcie_type = PCI_EXP_TYPE_ROOT_PORT;
-		break;
-	case ACPI_HEST_TYPE_AER_ENDPOINT:
-		rc = sizeof(struct acpi_hest_aer);
-		pcie_type = PCI_EXP_TYPE_ENDPOINT;
-		break;
-	case ACPI_HEST_TYPE_AER_BRIDGE:
-		rc = sizeof(struct acpi_hest_aer_bridge);
-		if ((pci->class >> 16) == PCI_BASE_CLASS_BRIDGE)
-			bridge = 1;
-		break;
-	}
-
-	if (p->flags & ACPI_HEST_GLOBAL) {
-		if ((pci->is_pcie && (pci->pcie_type == pcie_type)) || bridge)
-			*firmware_first = !!(p->flags & ACPI_HEST_FIRMWARE_FIRST);
-	}
-	else
-		if (hest_match_pci(p, pci))
-			*firmware_first = !!(p->flags & ACPI_HEST_FIRMWARE_FIRST);
-	return rc;
-}
-
-static int acpi_hest_firmware_first(struct acpi_table_header *stdheader, struct pci_dev *pci)
-{
-	struct acpi_table_hest *hest = (struct acpi_table_hest *)stdheader;
-	void *p = (void *)hest + sizeof(*hest); /* defined by the ACPI 4.0 spec */
-	struct acpi_hest_header *hdr = p;
-
-	int i;
-	int firmware_first = 0;
-	static unsigned char printed_unused = 0;
-	static unsigned char printed_reserved = 0;
-
-	for (i=0, hdr=p; p < (((void *)hest) + hest->header.length) && i < hest->error_source_count; i++) {
-		switch (hdr->type) {
-		case ACPI_HEST_TYPE_IA32_CHECK:
-			p += parse_acpi_hest_ia_machine_check(p);
-			break;
-		case ACPI_HEST_TYPE_IA32_CORRECTED_CHECK:
-			p += parse_acpi_hest_ia_corrected(p);
-			break;
-		case ACPI_HEST_TYPE_IA32_NMI:
-			p += parse_acpi_hest_ia_nmi(p);
-			break;
-		/* These three should never appear */
-		case ACPI_HEST_TYPE_NOT_USED3:
-		case ACPI_HEST_TYPE_NOT_USED4:
-		case ACPI_HEST_TYPE_NOT_USED5:
-			if (!printed_unused) {
-				printk(KERN_DEBUG PREFIX
-				       "HEST Error Source list contains an obsolete type (%d).\n", hdr->type);
-				printed_unused = 1;
-			}
-			break;
-		case ACPI_HEST_TYPE_AER_ROOT_PORT:
-		case ACPI_HEST_TYPE_AER_ENDPOINT:
-		case ACPI_HEST_TYPE_AER_BRIDGE:
-			p += parse_acpi_hest_aer(p, hdr->type, pci, &firmware_first);
-			break;
-		case ACPI_HEST_TYPE_GENERIC_ERROR:
-			p += parse_acpi_hest_generic(p);
-			break;
-		/* These should never appear either */
-		case ACPI_HEST_TYPE_RESERVED:
-		default:
-			if (!printed_reserved) {
-				printk(KERN_DEBUG PREFIX
-				       "HEST Error Source list contains a reserved type (%d).\n", hdr->type);
-				printed_reserved = 1;
-			}
-			break;
-		}
-	}
-	return firmware_first;
-}
-
-int acpi_hest_firmware_first_pci(struct pci_dev *pci)
-{
-	acpi_status status = AE_NOT_FOUND;
-	struct acpi_table_header *hest = NULL;
-
-	if (acpi_disabled)
-		return 0;
-
-	status = acpi_get_table(ACPI_SIG_HEST, 1, &hest);
-
-	if (ACPI_SUCCESS(status)) {
-		if (acpi_hest_firmware_first(hest, pci)) {
-			return 1;
-		}
-	}
-	return 0;
-}
-EXPORT_SYMBOL_GPL(acpi_hest_firmware_first_pci);
diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index aefce33f2a09..4eac59393edc 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -120,7 +120,8 @@ acpi_handle acpi_get_pci_rootbridge_handle(unsigned int seg, unsigned int bus)
 	struct acpi_pci_root *root;
 	
 	list_for_each_entry(root, &acpi_pci_roots, node)
-		if ((root->segment == (u16) seg) && (root->bus_nr == (u16) bus))
+		if ((root->segment == (u16) seg) &&
+		    (root->secondary.start == (u16) bus))
 			return root->device->handle;
 	return NULL;		
 }
@@ -154,7 +155,7 @@ EXPORT_SYMBOL_GPL(acpi_is_root_bridge);
 static acpi_status
 get_root_bridge_busnr_callback(struct acpi_resource *resource, void *data)
 {
-	int *busnr = data;
+	struct resource *res = data;
 	struct acpi_resource_address64 address;
 
 	if (resource->type != ACPI_RESOURCE_TYPE_ADDRESS16 &&
@@ -164,28 +165,27 @@ get_root_bridge_busnr_callback(struct acpi_resource *resource, void *data)
 
 	acpi_resource_to_address64(resource, &address);
 	if ((address.address_length > 0) &&
-	    (address.resource_type == ACPI_BUS_NUMBER_RANGE))
-		*busnr = address.minimum;
+	    (address.resource_type == ACPI_BUS_NUMBER_RANGE)) {
+		res->start = address.minimum;
+		res->end = address.minimum + address.address_length - 1;
+	}
 
 	return AE_OK;
 }
 
 static acpi_status try_get_root_bridge_busnr(acpi_handle handle,
-					     unsigned long long *bus)
+					     struct resource *res)
 {
 	acpi_status status;
-	int busnum;
 
-	busnum = -1;
+	res->start = -1;
 	status =
 	    acpi_walk_resources(handle, METHOD_NAME__CRS,
-				get_root_bridge_busnr_callback, &busnum);
+				get_root_bridge_busnr_callback, res);
 	if (ACPI_FAILURE(status))
 		return status;
-	/* Check if we really get a bus number from _CRS */
-	if (busnum == -1)
+	if (res->start == -1)
 		return AE_ERROR;
-	*bus = busnum;
 	return AE_OK;
 }
 
@@ -429,34 +429,47 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device)
 	struct acpi_device *child;
 	u32 flags, base_flags;
 
+	root = kzalloc(sizeof(struct acpi_pci_root), GFP_KERNEL);
+	if (!root)
+		return -ENOMEM;
+
 	segment = 0;
 	status = acpi_evaluate_integer(device->handle, METHOD_NAME__SEG, NULL,
 				       &segment);
 	if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) {
 		printk(KERN_ERR PREFIX "can't evaluate _SEG\n");
-		return -ENODEV;
+		result = -ENODEV;
+		goto end;
 	}
 
 	/* Check _CRS first, then _BBN.  If no _BBN, default to zero. */
-	bus = 0;
-	status = try_get_root_bridge_busnr(device->handle, &bus);
+	root->secondary.flags = IORESOURCE_BUS;
+	status = try_get_root_bridge_busnr(device->handle, &root->secondary);
 	if (ACPI_FAILURE(status)) {
+		/*
+		 * We need both the start and end of the downstream bus range
+		 * to interpret _CBA (MMCONFIG base address), so it really is
+		 * supposed to be in _CRS.  If we don't find it there, all we
+		 * can do is assume [_BBN-0xFF] or [0-0xFF].
+		 */
+		root->secondary.end = 0xFF;
+		printk(KERN_WARNING FW_BUG PREFIX
+		       "no secondary bus range in _CRS\n");
 		status = acpi_evaluate_integer(device->handle, METHOD_NAME__BBN,					       NULL, &bus);
-		if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) {
-			printk(KERN_ERR PREFIX
-			     "no bus number in _CRS and can't evaluate _BBN\n");
-			return -ENODEV;
+		if (ACPI_SUCCESS(status))
+			root->secondary.start = bus;
+		else if (status == AE_NOT_FOUND)
+			root->secondary.start = 0;
+		else {
+			printk(KERN_ERR PREFIX "can't evaluate _BBN\n");
+			result = -ENODEV;
+			goto end;
 		}
 	}
 
-	root = kzalloc(sizeof(struct acpi_pci_root), GFP_KERNEL);
-	if (!root)
-		return -ENOMEM;
-
 	INIT_LIST_HEAD(&root->node);
 	root->device = device;
 	root->segment = segment & 0xFFFF;
-	root->bus_nr = bus & 0xFF;
 	strcpy(acpi_device_name(device), ACPI_PCI_ROOT_DEVICE_NAME);
 	strcpy(acpi_device_class(device), ACPI_PCI_ROOT_CLASS);
 	device->driver_data = root;
@@ -475,9 +488,9 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device)
 	/* TBD: Locking */
 	list_add_tail(&root->node, &acpi_pci_roots);
 
-	printk(KERN_INFO PREFIX "%s [%s] (%04x:%02x)\n",
+	printk(KERN_INFO PREFIX "%s [%s] (domain %04x %pR)\n",
 	       acpi_device_name(device), acpi_device_bid(device),
-	       root->segment, root->bus_nr);
+	       root->segment, &root->secondary);
 
 	/*
 	 * Scan the Root Bridge
@@ -486,11 +499,11 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device)
 	 * PCI namespace does not get created until this call is made (and 
 	 * thus the root bridge's pci_dev does not exist).
 	 */
-	root->bus = pci_acpi_scan_root(device, segment, bus);
+	root->bus = pci_acpi_scan_root(root);
 	if (!root->bus) {
 		printk(KERN_ERR PREFIX
 			    "Bus %04x:%02x not present in PCI namespace\n",
-			    root->segment, root->bus_nr);
+			    root->segment, (unsigned int)root->secondary.start);
 		result = -ENODEV;
 		goto end;
 	}
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index 6da962c9b21c..d71f0fc34b46 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -1875,6 +1875,7 @@ got_driver:
 		 */
 		if (filp->f_op == &hung_up_tty_fops)
 			filp->f_op = &tty_fops;
+		unlock_kernel();
 		goto retry_open;
 	}
 	unlock_kernel();
diff --git a/drivers/input/joystick/iforce/iforce-main.c b/drivers/input/joystick/iforce/iforce-main.c
index b1edd778639c..405febd94f24 100644
--- a/drivers/input/joystick/iforce/iforce-main.c
+++ b/drivers/input/joystick/iforce/iforce-main.c
@@ -54,6 +54,9 @@ static signed short btn_avb_wheel[] =
 static signed short abs_joystick[] =
 { ABS_X, ABS_Y, ABS_THROTTLE, ABS_HAT0X, ABS_HAT0Y, -1 };
 
+static signed short abs_joystick_rudder[] =
+{ ABS_X, ABS_Y, ABS_THROTTLE, ABS_RUDDER, ABS_HAT0X, ABS_HAT0Y, -1 };
+
 static signed short abs_avb_pegasus[] =
 { ABS_X, ABS_Y, ABS_THROTTLE, ABS_RUDDER, ABS_HAT0X, ABS_HAT0Y,
   ABS_HAT1X, ABS_HAT1Y, -1 };
@@ -76,8 +79,9 @@ static struct iforce_device iforce_device[] = {
 	{ 0x061c, 0xc0a4, "ACT LABS Force RS",                          btn_wheel, abs_wheel, ff_iforce }, //?
 	{ 0x061c, 0xc084, "ACT LABS Force RS",				btn_wheel, abs_wheel, ff_iforce },
 	{ 0x06f8, 0x0001, "Guillemot Race Leader Force Feedback",	btn_wheel, abs_wheel, ff_iforce }, //?
+	{ 0x06f8, 0x0001, "Guillemot Jet Leader Force Feedback",	btn_joystick, abs_joystick_rudder, ff_iforce },
 	{ 0x06f8, 0x0004, "Guillemot Force Feedback Racing Wheel",	btn_wheel, abs_wheel, ff_iforce }, //?
-	{ 0x06f8, 0x0004, "Gullemot Jet Leader 3D",			btn_joystick, abs_joystick, ff_iforce }, //?
+	{ 0x06f8, 0xa302, "Guillemot Jet Leader 3D",			btn_joystick, abs_joystick, ff_iforce }, //?
 	{ 0x06d6, 0x29bc, "Trust Force Feedback Race Master",		btn_wheel, abs_wheel, ff_iforce },
 	{ 0x0000, 0x0000, "Unknown I-Force Device [%04x:%04x]",		btn_joystick, abs_joystick, ff_iforce }
 };
diff --git a/drivers/input/joystick/iforce/iforce-usb.c b/drivers/input/joystick/iforce/iforce-usb.c
index b41303d3ec54..6c96631ae5d9 100644
--- a/drivers/input/joystick/iforce/iforce-usb.c
+++ b/drivers/input/joystick/iforce/iforce-usb.c
@@ -212,6 +212,7 @@ static struct usb_device_id iforce_usb_ids [] = {
 	{ USB_DEVICE(0x061c, 0xc0a4) },         /* ACT LABS Force RS */
 	{ USB_DEVICE(0x061c, 0xc084) },         /* ACT LABS Force RS */
 	{ USB_DEVICE(0x06f8, 0x0001) },		/* Guillemot Race Leader Force Feedback */
+	{ USB_DEVICE(0x06f8, 0x0003) },		/* Guillemot Jet Leader Force Feedback */
 	{ USB_DEVICE(0x06f8, 0x0004) },		/* Guillemot Force Feedback Racing Wheel */
 	{ USB_DEVICE(0x06f8, 0xa302) },		/* Guillemot Jet Leader 3D */
 	{ }					/* Terminating entry */
diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c
index 0520c2e19927..112b4ee52ff2 100644
--- a/drivers/input/mouse/elantech.c
+++ b/drivers/input/mouse/elantech.c
@@ -185,7 +185,7 @@ static void elantech_report_absolute_v1(struct psmouse *psmouse)
 	int fingers;
 	static int old_fingers;
 
-	if (etd->fw_version_maj == 0x01) {
+	if (etd->fw_version < 0x020000) {
 		/*
 		 * byte 0:  D   U  p1  p2   1  p3   R   L
 		 * byte 1:  f   0  th  tw  x9  x8  y9  y8
@@ -227,7 +227,7 @@ static void elantech_report_absolute_v1(struct psmouse *psmouse)
 	input_report_key(dev, BTN_LEFT, packet[0] & 0x01);
 	input_report_key(dev, BTN_RIGHT, packet[0] & 0x02);
 
-	if ((etd->fw_version_maj == 0x01) &&
+	if (etd->fw_version < 0x020000 &&
 	    (etd->capabilities & ETP_CAP_HAS_ROCKER)) {
 		/* rocker up */
 		input_report_key(dev, BTN_FORWARD, packet[0] & 0x40);
@@ -321,7 +321,7 @@ static int elantech_check_parity_v1(struct psmouse *psmouse)
 	unsigned char p1, p2, p3;
 
 	/* Parity bits are placed differently */
-	if (etd->fw_version_maj == 0x01) {
+	if (etd->fw_version < 0x020000) {
 		/* byte 0:  D   U  p1  p2   1  p3   R   L */
 		p1 = (packet[0] & 0x20) >> 5;
 		p2 = (packet[0] & 0x10) >> 4;
@@ -457,7 +457,7 @@ static void elantech_set_input_params(struct psmouse *psmouse)
 	switch (etd->hw_version) {
 	case 1:
 		/* Rocker button */
-		if ((etd->fw_version_maj == 0x01) &&
+		if (etd->fw_version < 0x020000 &&
 		    (etd->capabilities & ETP_CAP_HAS_ROCKER)) {
 			__set_bit(BTN_FORWARD, dev->keybit);
 			__set_bit(BTN_BACK, dev->keybit);
@@ -686,15 +686,14 @@ int elantech_init(struct psmouse *psmouse)
 		pr_err("elantech.c: failed to query firmware version.\n");
 		goto init_fail;
 	}
-	etd->fw_version_maj = param[0];
-	etd->fw_version_min = param[2];
+
+	etd->fw_version = (param[0] << 16) | (param[1] << 8) | param[2];
 
 	/*
 	 * Assume every version greater than this is new EeePC style
 	 * hardware with 6 byte packets
 	 */
-	if ((etd->fw_version_maj == 0x02 && etd->fw_version_min >= 0x30) ||
-	    etd->fw_version_maj > 0x02) {
+	if (etd->fw_version >= 0x020030) {
 		etd->hw_version = 2;
 		/* For now show extra debug information */
 		etd->debug = 1;
@@ -704,8 +703,9 @@ int elantech_init(struct psmouse *psmouse)
 		etd->hw_version = 1;
 		etd->paritycheck = 1;
 	}
-	pr_info("elantech.c: assuming hardware version %d, firmware version %d.%d\n",
-		etd->hw_version, etd->fw_version_maj, etd->fw_version_min);
+
+	pr_info("elantech.c: assuming hardware version %d, firmware version %d.%d.%d\n",
+		etd->hw_version, param[0], param[1], param[2]);
 
 	if (synaptics_send_cmd(psmouse, ETP_CAPABILITIES_QUERY, param)) {
 		pr_err("elantech.c: failed to query capabilities.\n");
@@ -720,8 +720,8 @@ int elantech_init(struct psmouse *psmouse)
 	 * a touch action starts causing the mouse cursor or scrolled page
 	 * to jump. Enable a workaround.
 	 */
-	if (etd->fw_version_maj == 0x02 && etd->fw_version_min == 0x22) {
-		pr_info("elantech.c: firmware version 2.34 detected, "
+	if (etd->fw_version == 0x020022) {
+		pr_info("elantech.c: firmware version 2.0.34 detected, "
 			"enabling jumpy cursor workaround\n");
 		etd->jumpy_cursor = 1;
 	}
diff --git a/drivers/input/mouse/elantech.h b/drivers/input/mouse/elantech.h
index feac5f7af966..ac57bde1bb9f 100644
--- a/drivers/input/mouse/elantech.h
+++ b/drivers/input/mouse/elantech.h
@@ -100,11 +100,10 @@ struct elantech_data {
 	unsigned char reg_26;
 	unsigned char debug;
 	unsigned char capabilities;
-	unsigned char fw_version_maj;
-	unsigned char fw_version_min;
-	unsigned char hw_version;
 	unsigned char paritycheck;
 	unsigned char jumpy_cursor;
+	unsigned char hw_version;
+	unsigned int  fw_version;
 	unsigned char parity[256];
 };
 
diff --git a/drivers/input/mouse/psmouse-base.c b/drivers/input/mouse/psmouse-base.c
index cbc807264940..a3c97315a473 100644
--- a/drivers/input/mouse/psmouse-base.c
+++ b/drivers/input/mouse/psmouse-base.c
@@ -1394,6 +1394,7 @@ static int psmouse_reconnect(struct serio *serio)
 	struct psmouse *psmouse = serio_get_drvdata(serio);
 	struct psmouse *parent = NULL;
 	struct serio_driver *drv = serio->drv;
+	unsigned char type;
 	int rc = -1;
 
 	if (!drv || !psmouse) {
@@ -1413,10 +1414,15 @@ static int psmouse_reconnect(struct serio *serio)
 	if (psmouse->reconnect) {
 		if (psmouse->reconnect(psmouse))
 			goto out;
-	} else if (psmouse_probe(psmouse) < 0 ||
-		   psmouse->type != psmouse_extensions(psmouse,
-						psmouse_max_proto, false)) {
-		goto out;
+	} else {
+		psmouse_reset(psmouse);
+
+		if (psmouse_probe(psmouse) < 0)
+			goto out;
+
+		type = psmouse_extensions(psmouse, psmouse_max_proto, false);
+		if (psmouse->type != type)
+			goto out;
 	}
 
 	/* ok, the device type (and capabilities) match the old one,
diff --git a/drivers/input/touchscreen/ad7877.c b/drivers/input/touchscreen/ad7877.c
index e019d53d1ab4..0d2d7e54b465 100644
--- a/drivers/input/touchscreen/ad7877.c
+++ b/drivers/input/touchscreen/ad7877.c
@@ -156,9 +156,14 @@ struct ser_req {
 	u16			reset;
 	u16			ref_on;
 	u16			command;
-	u16			sample;
 	struct spi_message	msg;
 	struct spi_transfer	xfer[6];
+
+	/*
+	 * DMA (thus cache coherency maintenance) requires the
+	 * transfer buffers to live in their own cache lines.
+	 */
+	u16 sample ____cacheline_aligned;
 };
 
 struct ad7877 {
@@ -182,8 +187,6 @@ struct ad7877 {
 	u8			averaging;
 	u8			pen_down_acc_interval;
 
-	u16			conversion_data[AD7877_NR_SENSE];
-
 	struct spi_transfer	xfer[AD7877_NR_SENSE + 2];
 	struct spi_message	msg;
 
@@ -195,6 +198,12 @@ struct ad7877 {
 	spinlock_t		lock;
 	struct timer_list	timer;		/* P: lock */
 	unsigned		pending:1;	/* P: lock */
+
+	/*
+	 * DMA (thus cache coherency maintenance) requires the
+	 * transfer buffers to live in their own cache lines.
+	 */
+	u16 conversion_data[AD7877_NR_SENSE] ____cacheline_aligned;
 };
 
 static int gpio3;
diff --git a/drivers/mfd/wm831x-core.c b/drivers/mfd/wm831x-core.c
index a3d5728b6449..f2ab025ad97a 100644
--- a/drivers/mfd/wm831x-core.c
+++ b/drivers/mfd/wm831x-core.c
@@ -349,6 +349,9 @@ int wm831x_auxadc_read(struct wm831x *wm831x, enum wm831x_auxadc input)
 		goto disable;
 	}
 
+	/* If an interrupt arrived late clean up after it */
+	try_wait_for_completion(&wm831x->auxadc_done);
+
 	/* Ignore the result to allow us to soldier on without IRQ hookup */
 	wait_for_completion_timeout(&wm831x->auxadc_done, msecs_to_jiffies(5));
 
diff --git a/drivers/mfd/wm8350-core.c b/drivers/mfd/wm8350-core.c
index e400a3bed063..b5807484b4c9 100644
--- a/drivers/mfd/wm8350-core.c
+++ b/drivers/mfd/wm8350-core.c
@@ -363,6 +363,10 @@ int wm8350_read_auxadc(struct wm8350 *wm8350, int channel, int scale, int vref)
 	reg |= 1 << channel | WM8350_AUXADC_POLL;
 	wm8350_reg_write(wm8350, WM8350_DIGITISER_CONTROL_1, reg);
 
+	/* If a late IRQ left the completion signalled then consume
+	 * the completion. */
+	try_wait_for_completion(&wm8350->auxadc_done);
+
 	/* We ignore the result of the completion and just check for a
 	 * conversion result, allowing us to soldier on if the IRQ
 	 * infrastructure is not set up for the chip. */
diff --git a/drivers/mmc/host/at91_mci.c b/drivers/mmc/host/at91_mci.c
index a6dd7da37357..336d9f553f3e 100644
--- a/drivers/mmc/host/at91_mci.c
+++ b/drivers/mmc/host/at91_mci.c
@@ -314,8 +314,8 @@ static void at91_mci_post_dma_read(struct at91mci_host *host)
 			dmabuf = (unsigned *)tmpv;
 		}
 
+		flush_kernel_dcache_page(sg_page(sg));
 		kunmap_atomic(sgbuffer, KM_BIO_SRC_IRQ);
-		dmac_flush_range((void *)sgbuffer, ((void *)sgbuffer) + amount);
 		data->bytes_xfered += amount;
 		if (size == 0)
 			break;
diff --git a/drivers/pci/pcie/aer/aerdrv.h b/drivers/pci/pcie/aer/aerdrv.h
index bd833ea3ba49..7182c337aef1 100644
--- a/drivers/pci/pcie/aer/aerdrv.h
+++ b/drivers/pci/pcie/aer/aerdrv.h
@@ -134,4 +134,21 @@ static inline int aer_osc_setup(struct pcie_device *pciedev)
 }
 #endif
 
+#ifdef CONFIG_ACPI_APEI
+extern int pcie_aer_get_firmware_first(struct pci_dev *pci_dev);
+#else
+static inline int pcie_aer_get_firmware_first(struct pci_dev *pci_dev)
+{
+	if (pci_dev->__aer_firmware_first_valid)
+		return pci_dev->__aer_firmware_first;
+	return 0;
+}
+#endif
+
+static inline void pcie_aer_force_firmware_first(struct pci_dev *pci_dev,
+						 int enable)
+{
+	pci_dev->__aer_firmware_first = !!enable;
+	pci_dev->__aer_firmware_first_valid = 1;
+}
 #endif /* _AERDRV_H_ */
diff --git a/drivers/pci/pcie/aer/aerdrv_acpi.c b/drivers/pci/pcie/aer/aerdrv_acpi.c
index 04814087658d..f278d7b0d95d 100644
--- a/drivers/pci/pcie/aer/aerdrv_acpi.c
+++ b/drivers/pci/pcie/aer/aerdrv_acpi.c
@@ -16,6 +16,7 @@
 #include <linux/acpi.h>
 #include <linux/pci-acpi.h>
 #include <linux/delay.h>
+#include <acpi/apei.h>
 #include "aerdrv.h"
 
 /**
@@ -53,3 +54,79 @@ int aer_osc_setup(struct pcie_device *pciedev)
 
 	return 0;
 }
+
+#ifdef CONFIG_ACPI_APEI
+static inline int hest_match_pci(struct acpi_hest_aer_common *p,
+				 struct pci_dev *pci)
+{
+	return	(0           == pci_domain_nr(pci->bus) &&
+		 p->bus      == pci->bus->number &&
+		 p->device   == PCI_SLOT(pci->devfn) &&
+		 p->function == PCI_FUNC(pci->devfn));
+}
+
+struct aer_hest_parse_info {
+	struct pci_dev *pci_dev;
+	int firmware_first;
+};
+
+static int aer_hest_parse(struct acpi_hest_header *hest_hdr, void *data)
+{
+	struct aer_hest_parse_info *info = data;
+	struct acpi_hest_aer_common *p;
+	u8 pcie_type = 0;
+	u8 bridge = 0;
+	int ff = 0;
+
+	switch (hest_hdr->type) {
+	case ACPI_HEST_TYPE_AER_ROOT_PORT:
+		pcie_type = PCI_EXP_TYPE_ROOT_PORT;
+		break;
+	case ACPI_HEST_TYPE_AER_ENDPOINT:
+		pcie_type = PCI_EXP_TYPE_ENDPOINT;
+		break;
+	case ACPI_HEST_TYPE_AER_BRIDGE:
+		if ((info->pci_dev->class >> 16) == PCI_BASE_CLASS_BRIDGE)
+			bridge = 1;
+		break;
+	default:
+		return 0;
+	}
+
+	p = (struct acpi_hest_aer_common *)(hest_hdr + 1);
+	if (p->flags & ACPI_HEST_GLOBAL) {
+		if ((info->pci_dev->is_pcie &&
+		     info->pci_dev->pcie_type == pcie_type) || bridge)
+			ff = !!(p->flags & ACPI_HEST_FIRMWARE_FIRST);
+	} else
+		if (hest_match_pci(p, info->pci_dev))
+			ff = !!(p->flags & ACPI_HEST_FIRMWARE_FIRST);
+	info->firmware_first = ff;
+
+	return 0;
+}
+
+static void aer_set_firmware_first(struct pci_dev *pci_dev)
+{
+	int rc;
+	struct aer_hest_parse_info info = {
+		.pci_dev	= pci_dev,
+		.firmware_first	= 0,
+	};
+
+	rc = apei_hest_parse(aer_hest_parse, &info);
+
+	if (rc)
+		pci_dev->__aer_firmware_first = 0;
+	else
+		pci_dev->__aer_firmware_first = info.firmware_first;
+	pci_dev->__aer_firmware_first_valid = 1;
+}
+
+int pcie_aer_get_firmware_first(struct pci_dev *dev)
+{
+	if (!dev->__aer_firmware_first_valid)
+		aer_set_firmware_first(dev);
+	return dev->__aer_firmware_first;
+}
+#endif
diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c
index aceb04b67b60..586b6713e417 100644
--- a/drivers/pci/pcie/aer/aerdrv_core.c
+++ b/drivers/pci/pcie/aer/aerdrv_core.c
@@ -36,7 +36,7 @@ int pci_enable_pcie_error_reporting(struct pci_dev *dev)
 	u16 reg16 = 0;
 	int pos;
 
-	if (dev->aer_firmware_first)
+	if (pcie_aer_get_firmware_first(dev))
 		return -EIO;
 
 	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
@@ -64,7 +64,7 @@ int pci_disable_pcie_error_reporting(struct pci_dev *dev)
 	u16 reg16 = 0;
 	int pos;
 
-	if (dev->aer_firmware_first)
+	if (pcie_aer_get_firmware_first(dev))
 		return -EIO;
 
 	pos = pci_pcie_cap(dev);
@@ -859,7 +859,7 @@ void aer_delete_rootport(struct aer_rpc *rpc)
  */
 int aer_init(struct pcie_device *dev)
 {
-	if (dev->port->aer_firmware_first) {
+	if (pcie_aer_get_firmware_first(dev->port)) {
 		dev_printk(KERN_DEBUG, &dev->device,
 			   "PCIe errors handled by platform firmware.\n");
 		goto out;
@@ -873,7 +873,7 @@ out:
 	if (forceload) {
 		dev_printk(KERN_DEBUG, &dev->device,
 			   "aerdrv forceload requested.\n");
-		dev->port->aer_firmware_first = 0;
+		pcie_aer_force_firmware_first(dev->port, 0);
 		return 0;
 	}
 	return -ENXIO;
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index c82548afcd5c..f4adba2d1dd3 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -10,7 +10,6 @@
 #include <linux/module.h>
 #include <linux/cpumask.h>
 #include <linux/pci-aspm.h>
-#include <acpi/acpi_hest.h>
 #include "pci.h"
 
 #define CARDBUS_LATENCY_TIMER	176	/* secondary latency timer */
@@ -904,12 +903,6 @@ void set_pcie_hotplug_bridge(struct pci_dev *pdev)
 		pdev->is_hotplug_bridge = 1;
 }
 
-static void set_pci_aer_firmware_first(struct pci_dev *pdev)
-{
-	if (acpi_hest_firmware_first_pci(pdev))
-		pdev->aer_firmware_first = 1;
-}
-
 #define LEGACY_IO_RESOURCE	(IORESOURCE_IO | IORESOURCE_PCI_FIXED)
 
 /**
@@ -939,7 +932,6 @@ int pci_setup_device(struct pci_dev *dev)
 	dev->multifunction = !!(hdr_type & 0x80);
 	dev->error_state = pci_channel_io_normal;
 	set_pcie_port_type(dev);
-	set_pci_aer_firmware_first(dev);
 
 	list_for_each_entry(slot, &dev->bus->slots, list)
 		if (PCI_SLOT(dev->devfn) == slot->number)
diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index 4fe36d2e1049..19b111383f62 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -838,65 +838,11 @@ static void pci_bus_dump_resources(struct pci_bus *bus)
 	}
 }
 
-static int __init pci_bus_get_depth(struct pci_bus *bus)
-{
-	int depth = 0;
-	struct pci_dev *dev;
-
-	list_for_each_entry(dev, &bus->devices, bus_list) {
-		int ret;
-		struct pci_bus *b = dev->subordinate;
-		if (!b)
-			continue;
-
-		ret = pci_bus_get_depth(b);
-		if (ret + 1 > depth)
-			depth = ret + 1;
-	}
-
-	return depth;
-}
-static int __init pci_get_max_depth(void)
-{
-	int depth = 0;
-	struct pci_bus *bus;
-
-	list_for_each_entry(bus, &pci_root_buses, node) {
-		int ret;
-
-		ret = pci_bus_get_depth(bus);
-		if (ret > depth)
-			depth = ret;
-	}
-
-	return depth;
-}
-
-/*
- * first try will not touch pci bridge res
- * second  and later try will clear small leaf bridge res
- * will stop till to the max  deepth if can not find good one
- */
 void __init
 pci_assign_unassigned_resources(void)
 {
 	struct pci_bus *bus;
-	int tried_times = 0;
-	enum release_type rel_type = leaf_only;
-	struct resource_list_x head, *list;
-	unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
-				  IORESOURCE_PREFETCH;
-	unsigned long failed_type;
-	int max_depth = pci_get_max_depth();
-	int pci_try_num;
 
-	head.next = NULL;
-
-	pci_try_num = max_depth + 1;
-	printk(KERN_DEBUG "PCI: max bus depth: %d pci_try_num: %d\n",
-		 max_depth, pci_try_num);
-
-again:
 	/* Depth first, calculate sizes and alignments of all
 	   subordinate buses. */
 	list_for_each_entry(bus, &pci_root_buses, node) {
@@ -904,65 +850,9 @@ again:
 	}
 	/* Depth last, allocate resources and update the hardware. */
 	list_for_each_entry(bus, &pci_root_buses, node) {
-		__pci_bus_assign_resources(bus, &head);
-	}
-	tried_times++;
-
-	/* any device complain? */
-	if (!head.next)
-		goto enable_and_dump;
-	failed_type = 0;
-	for (list = head.next; list;) {
-		failed_type |= list->flags;
-		list = list->next;
-	}
-	/*
-	 * io port are tight, don't try extra
-	 * or if reach the limit, don't want to try more
-	 */
-	failed_type &= type_mask;
-	if ((failed_type == IORESOURCE_IO) || (tried_times >= pci_try_num)) {
-		free_failed_list(&head);
-		goto enable_and_dump;
-	}
-
-	printk(KERN_DEBUG "PCI: No. %d try to assign unassigned res\n",
-			 tried_times + 1);
-
-	/* third times and later will not check if it is leaf */
-	if ((tried_times + 1) > 2)
-		rel_type = whole_subtree;
-
-	/*
-	 * Try to release leaf bridge's resources that doesn't fit resource of
-	 * child device under that bridge
-	 */
-	for (list = head.next; list;) {
-		bus = list->dev->bus;
-		pci_bus_release_bridge_resources(bus, list->flags & type_mask,
-						  rel_type);
-		list = list->next;
-	}
-	/* restore size and flags */
-	for (list = head.next; list;) {
-		struct resource *res = list->res;
-
-		res->start = list->start;
-		res->end = list->end;
-		res->flags = list->flags;
-		if (list->dev->subordinate)
-			res->flags = 0;
-
-		list = list->next;
-	}
-	free_failed_list(&head);
-
-	goto again;
-
-enable_and_dump:
-	/* Depth last, update the hardware. */
-	list_for_each_entry(bus, &pci_root_buses, node)
+		pci_bus_assign_resources(bus);
 		pci_enable_bridges(bus);
+	}
 
 	/* dump the resource on buses */
 	list_for_each_entry(bus, &pci_root_buses, node) {
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index acf222f91f5a..fa2339cb1681 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -37,6 +37,9 @@
  */
 #define DASD_CHANQ_MAX_SIZE 4
 
+#define DASD_SLEEPON_START_TAG	(void *) 1
+#define DASD_SLEEPON_END_TAG	(void *) 2
+
 /*
  * SECTION: exported variables of dasd.c
  */
@@ -1472,7 +1475,10 @@ void dasd_add_request_tail(struct dasd_ccw_req *cqr)
  */
 static void dasd_wakeup_cb(struct dasd_ccw_req *cqr, void *data)
 {
-	wake_up((wait_queue_head_t *) data);
+	spin_lock_irq(get_ccwdev_lock(cqr->startdev->cdev));
+	cqr->callback_data = DASD_SLEEPON_END_TAG;
+	spin_unlock_irq(get_ccwdev_lock(cqr->startdev->cdev));
+	wake_up(&generic_waitq);
 }
 
 static inline int _wait_for_wakeup(struct dasd_ccw_req *cqr)
@@ -1482,10 +1488,7 @@ static inline int _wait_for_wakeup(struct dasd_ccw_req *cqr)
 
 	device = cqr->startdev;
 	spin_lock_irq(get_ccwdev_lock(device->cdev));
-	rc = ((cqr->status == DASD_CQR_DONE ||
-	       cqr->status == DASD_CQR_NEED_ERP ||
-	       cqr->status == DASD_CQR_TERMINATED) &&
-	      list_empty(&cqr->devlist));
+	rc = (cqr->callback_data == DASD_SLEEPON_END_TAG);
 	spin_unlock_irq(get_ccwdev_lock(device->cdev));
 	return rc;
 }
@@ -1573,7 +1576,7 @@ static int _dasd_sleep_on(struct dasd_ccw_req *maincqr, int interruptible)
 			wait_event(generic_waitq, !(device->stopped));
 
 		cqr->callback = dasd_wakeup_cb;
-		cqr->callback_data = (void *) &generic_waitq;
+		cqr->callback_data = DASD_SLEEPON_START_TAG;
 		dasd_add_request_tail(cqr);
 		if (interruptible) {
 			rc = wait_event_interruptible(
@@ -1652,7 +1655,7 @@ int dasd_sleep_on_immediatly(struct dasd_ccw_req *cqr)
 	}
 
 	cqr->callback = dasd_wakeup_cb;
-	cqr->callback_data = (void *) &generic_waitq;
+	cqr->callback_data = DASD_SLEEPON_START_TAG;
 	cqr->status = DASD_CQR_QUEUED;
 	list_add(&cqr->devlist, &device->ccw_queue);
 
diff --git a/drivers/serial/imx.c b/drivers/serial/imx.c
index 4315b23590bd..eacb588a9345 100644
--- a/drivers/serial/imx.c
+++ b/drivers/serial/imx.c
@@ -120,7 +120,8 @@
 #define  MX2_UCR3_RXDMUXSEL	 (1<<2)  /* RXD Muxed Input Select, on mx2/mx3 */
 #define  UCR3_INVT  	 (1<<1)  /* Inverted Infrared transmission */
 #define  UCR3_BPEN  	 (1<<0)  /* Preset registers enable */
-#define  UCR4_CTSTL_32   (32<<10) /* CTS trigger level (32 chars) */
+#define  UCR4_CTSTL_SHF  10      /* CTS trigger level shift */
+#define  UCR4_CTSTL_MASK 0x3F    /* CTS trigger is 6 bits wide */
 #define  UCR4_INVR  	 (1<<9)  /* Inverted infrared reception */
 #define  UCR4_ENIRI 	 (1<<8)  /* Serial infrared interrupt enable */
 #define  UCR4_WKEN  	 (1<<7)  /* Wake interrupt enable */
@@ -591,6 +592,9 @@ static int imx_setup_ufcr(struct imx_port *sport, unsigned int mode)
 	return 0;
 }
 
+/* half the RX buffer size */
+#define CTSTL 16
+
 static int imx_startup(struct uart_port *port)
 {
 	struct imx_port *sport = (struct imx_port *)port;
@@ -607,6 +611,10 @@ static int imx_startup(struct uart_port *port)
 	if (USE_IRDA(sport))
 		temp |= UCR4_IRSC;
 
+	/* set the trigger level for CTS */
+	temp &= ~(UCR4_CTSTL_MASK<<  UCR4_CTSTL_SHF);
+	temp |= CTSTL<<  UCR4_CTSTL_SHF;
+
 	writel(temp & ~UCR4_DREN, sport->port.membase + UCR4);
 
 	if (USE_IRDA(sport)) {
diff --git a/drivers/serial/mpc52xx_uart.c b/drivers/serial/mpc52xx_uart.c
index a176ab4bd65b..02469c31bf0b 100644
--- a/drivers/serial/mpc52xx_uart.c
+++ b/drivers/serial/mpc52xx_uart.c
@@ -1467,7 +1467,7 @@ mpc52xx_uart_init(void)
 	/*
 	 * Map the PSC FIFO Controller and init if on MPC512x.
 	 */
-	if (psc_ops->fifoc_init) {
+	if (psc_ops && psc_ops->fifoc_init) {
 		ret = psc_ops->fifoc_init();
 		if (ret)
 			return ret;
diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c
index 4a6366a42129..111a01a747fc 100644
--- a/drivers/usb/core/inode.c
+++ b/drivers/usb/core/inode.c
@@ -380,6 +380,7 @@ static int usbfs_rmdir(struct inode *dir, struct dentry *dentry)
 	mutex_lock(&inode->i_mutex);
 	dentry_unhash(dentry);
 	if (usbfs_empty(dentry)) {
+		dont_mount(dentry);
 		drop_nlink(dentry->d_inode);
 		drop_nlink(dentry->d_inode);
 		dput(dentry);
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index e69d238c5af0..49fa953aaf6e 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -1035,7 +1035,12 @@ int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len)
 /* This actually signals the guest, using eventfd. */
 void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq)
 {
-	__u16 flags = 0;
+	__u16 flags;
+	/* Flush out used index updates. This is paired
+	 * with the barrier that the Guest executes when enabling
+	 * interrupts. */
+	smp_mb();
+
 	if (get_user(flags, &vq->avail->flags)) {
 		vq_err(vq, "Failed to get flags");
 		return;
diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index 0bf5020d0d32..b87ba23442d2 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -175,7 +175,7 @@ config SA1100_WATCHDOG
 
 config MPCORE_WATCHDOG
 	tristate "MPcore watchdog"
-	depends on ARM_MPCORE_PLATFORM && LOCAL_TIMERS
+	depends on HAVE_ARM_TWD
 	help
 	  Watchdog timer embedded into the MPcore system.
 
diff --git a/drivers/watchdog/mpcore_wdt.c b/drivers/watchdog/mpcore_wdt.c
index 016c6a791cab..b8ec7aca3c8e 100644
--- a/drivers/watchdog/mpcore_wdt.c
+++ b/drivers/watchdog/mpcore_wdt.c
@@ -31,8 +31,9 @@
 #include <linux/platform_device.h>
 #include <linux/uaccess.h>
 #include <linux/slab.h>
+#include <linux/io.h>
 
-#include <asm/hardware/arm_twd.h>
+#include <asm/smp_twd.h>
 
 struct mpcore_wdt {
 	unsigned long	timer_alive;
@@ -44,7 +45,7 @@ struct mpcore_wdt {
 };
 
 static struct platform_device *mpcore_wdt_dev;
-extern unsigned int mpcore_timer_rate;
+static DEFINE_SPINLOCK(wdt_lock);
 
 #define TIMER_MARGIN	60
 static int mpcore_margin = TIMER_MARGIN;
@@ -94,13 +95,15 @@ static irqreturn_t mpcore_wdt_fire(int irq, void *arg)
  */
 static void mpcore_wdt_keepalive(struct mpcore_wdt *wdt)
 {
-	unsigned int count;
+	unsigned long count;
 
+	spin_lock(&wdt_lock);
 	/* Assume prescale is set to 256 */
-	count = (mpcore_timer_rate / 256) * mpcore_margin;
+	count =  __raw_readl(wdt->base + TWD_WDOG_COUNTER);
+	count = (0xFFFFFFFFU - count) * (HZ / 5);
+	count = (count / 256) * mpcore_margin;
 
 	/* Reload the counter */
-	spin_lock(&wdt_lock);
 	writel(count + wdt->perturb, wdt->base + TWD_WDOG_LOAD);
 	wdt->perturb = wdt->perturb ? 0 : 1;
 	spin_unlock(&wdt_lock);
@@ -119,7 +122,6 @@ static void mpcore_wdt_start(struct mpcore_wdt *wdt)
 {
 	dev_printk(KERN_INFO, wdt->dev, "enabling watchdog.\n");
 
-	spin_lock(&wdt_lock);
 	/* This loads the count register but does NOT start the count yet */
 	mpcore_wdt_keepalive(wdt);
 
@@ -130,7 +132,6 @@ static void mpcore_wdt_start(struct mpcore_wdt *wdt)
 		/* Enable watchdog - prescale=256, watchdog mode=1, enable=1 */
 		writel(0x0000FF09, wdt->base + TWD_WDOG_CONTROL);
 	}
-	spin_unlock(&wdt_lock);
 }
 
 static int mpcore_wdt_set_heartbeat(int t)
@@ -360,7 +361,7 @@ static int __devinit mpcore_wdt_probe(struct platform_device *dev)
 	mpcore_wdt_miscdev.parent = &dev->dev;
 	ret = misc_register(&mpcore_wdt_miscdev);
 	if (ret) {
-		dev_printk(KERN_ERR, _dev,
+		dev_printk(KERN_ERR, wdt->dev,
 			"cannot register miscdev on minor=%d (err=%d)\n",
 							WATCHDOG_MINOR, ret);
 		goto err_misc;
@@ -369,13 +370,13 @@ static int __devinit mpcore_wdt_probe(struct platform_device *dev)
 	ret = request_irq(wdt->irq, mpcore_wdt_fire, IRQF_DISABLED,
 							"mpcore_wdt", wdt);
 	if (ret) {
-		dev_printk(KERN_ERR, _dev,
+		dev_printk(KERN_ERR, wdt->dev,
 			"cannot register IRQ%d for watchdog\n", wdt->irq);
 		goto err_irq;
 	}
 
 	mpcore_wdt_stop(wdt);
-	platform_set_drvdata(&dev->dev, wdt);
+	platform_set_drvdata(dev, wdt);
 	mpcore_wdt_dev = dev;
 
 	return 0;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index e84ef60ffe35..97a97839a867 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1481,12 +1481,17 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
 		ret = -EBADF;
 		goto out_drop_write;
 	}
+
 	src = src_file->f_dentry->d_inode;
 
 	ret = -EINVAL;
 	if (src == inode)
 		goto out_fput;
 
+	/* the src must be open for reading */
+	if (!(src_file->f_mode & FMODE_READ))
+		goto out_fput;
+
 	ret = -EISDIR;
 	if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode))
 		goto out_fput;
diff --git a/fs/cachefiles/security.c b/fs/cachefiles/security.c
index b5808cdb2232..039b5011d83b 100644
--- a/fs/cachefiles/security.c
+++ b/fs/cachefiles/security.c
@@ -77,6 +77,8 @@ static int cachefiles_check_cache_dir(struct cachefiles_cache *cache,
 /*
  * check the security details of the on-disk cache
  * - must be called with security override in force
+ * - must return with a security override in force - even in the case of an
+ *   error
  */
 int cachefiles_determine_cache_security(struct cachefiles_cache *cache,
 					struct dentry *root,
@@ -99,6 +101,8 @@ int cachefiles_determine_cache_security(struct cachefiles_cache *cache,
 	 * which create files */
 	ret = set_create_files_as(new, root->d_inode);
 	if (ret < 0) {
+		abort_creds(new);
+		cachefiles_begin_secure(cache, _saved_cred);
 		_leave(" = %d [cfa]", ret);
 		return ret;
 	}
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 4b42c2bb603f..a9005d862ed4 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -504,7 +504,6 @@ static void writepages_finish(struct ceph_osd_request *req,
 	int i;
 	struct ceph_snap_context *snapc = req->r_snapc;
 	struct address_space *mapping = inode->i_mapping;
-	struct writeback_control *wbc = req->r_wbc;
 	__s32 rc = -EIO;
 	u64 bytes = 0;
 	struct ceph_client *client = ceph_inode_to_client(inode);
@@ -546,10 +545,6 @@ static void writepages_finish(struct ceph_osd_request *req,
 			clear_bdi_congested(&client->backing_dev_info,
 					    BLK_RW_ASYNC);
 
-		if (i >= wrote) {
-			dout("inode %p skipping page %p\n", inode, page);
-			wbc->pages_skipped++;
-		}
 		ceph_put_snap_context((void *)page->private);
 		page->private = 0;
 		ClearPagePrivate(page);
@@ -799,7 +794,6 @@ get_more_pages:
 				alloc_page_vec(client, req);
 				req->r_callback = writepages_finish;
 				req->r_inode = inode;
-				req->r_wbc = wbc;
 			}
 
 			/* note position of first page in pvec */
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 0c1681806867..d9400534b279 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -858,6 +858,8 @@ static int __ceph_is_any_caps(struct ceph_inode_info *ci)
 }
 
 /*
+ * Remove a cap.  Take steps to deal with a racing iterate_session_caps.
+ *
  * caller should hold i_lock.
  * caller will not hold session s_mutex if called from destroy_inode.
  */
@@ -866,15 +868,10 @@ void __ceph_remove_cap(struct ceph_cap *cap)
 	struct ceph_mds_session *session = cap->session;
 	struct ceph_inode_info *ci = cap->ci;
 	struct ceph_mds_client *mdsc = &ceph_client(ci->vfs_inode.i_sb)->mdsc;
+	int removed = 0;
 
 	dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode);
 
-	/* remove from inode list */
-	rb_erase(&cap->ci_node, &ci->i_caps);
-	cap->ci = NULL;
-	if (ci->i_auth_cap == cap)
-		ci->i_auth_cap = NULL;
-
 	/* remove from session list */
 	spin_lock(&session->s_cap_lock);
 	if (session->s_cap_iterator == cap) {
@@ -885,10 +882,18 @@ void __ceph_remove_cap(struct ceph_cap *cap)
 		list_del_init(&cap->session_caps);
 		session->s_nr_caps--;
 		cap->session = NULL;
+		removed = 1;
 	}
+	/* protect backpointer with s_cap_lock: see iterate_session_caps */
+	cap->ci = NULL;
 	spin_unlock(&session->s_cap_lock);
 
-	if (cap->session == NULL)
+	/* remove from inode list */
+	rb_erase(&cap->ci_node, &ci->i_caps);
+	if (ci->i_auth_cap == cap)
+		ci->i_auth_cap = NULL;
+
+	if (removed)
 		ceph_put_cap(cap);
 
 	if (!__ceph_is_any_caps(ci) && ci->i_snap_realm) {
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 261f3e6c0bcf..85b4d2ffdeba 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -733,6 +733,10 @@ no_change:
 				__ceph_get_fmode(ci, cap_fmode);
 			spin_unlock(&inode->i_lock);
 		}
+	} else if (cap_fmode >= 0) {
+		pr_warning("mds issued no caps on %llx.%llx\n",
+			   ceph_vinop(inode));
+		__ceph_get_fmode(ci, cap_fmode);
 	}
 
 	/* update delegation info? */
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 60a9a4ae47be..24561a557e01 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -736,9 +736,10 @@ static void cleanup_cap_releases(struct ceph_mds_session *session)
 }
 
 /*
- * Helper to safely iterate over all caps associated with a session.
+ * Helper to safely iterate over all caps associated with a session, with
+ * special care taken to handle a racing __ceph_remove_cap().
  *
- * caller must hold session s_mutex
+ * Caller must hold session s_mutex.
  */
 static int iterate_session_caps(struct ceph_mds_session *session,
 				 int (*cb)(struct inode *, struct ceph_cap *,
@@ -2136,7 +2137,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, int mds)
 	struct ceph_mds_session *session = NULL;
 	struct ceph_msg *reply;
 	struct rb_node *p;
-	int err;
+	int err = -ENOMEM;
 	struct ceph_pagelist *pagelist;
 
 	pr_info("reconnect to recovering mds%d\n", mds);
@@ -2185,7 +2186,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, int mds)
 		goto fail;
 	err = iterate_session_caps(session, encode_caps_cb, pagelist);
 	if (err < 0)
-		goto out;
+		goto fail;
 
 	/*
 	 * snaprealms.  we provide mds with the ino, seq (version), and
@@ -2213,28 +2214,31 @@ send:
 	reply->nr_pages = calc_pages_for(0, pagelist->length);
 	ceph_con_send(&session->s_con, reply);
 
-	if (session) {
-		session->s_state = CEPH_MDS_SESSION_OPEN;
-		__wake_requests(mdsc, &session->s_waiting);
-	}
+	session->s_state = CEPH_MDS_SESSION_OPEN;
+	mutex_unlock(&session->s_mutex);
+
+	mutex_lock(&mdsc->mutex);
+	__wake_requests(mdsc, &session->s_waiting);
+	mutex_unlock(&mdsc->mutex);
+
+	ceph_put_mds_session(session);
 
-out:
 	up_read(&mdsc->snap_rwsem);
-	if (session) {
-		mutex_unlock(&session->s_mutex);
-		ceph_put_mds_session(session);
-	}
 	mutex_lock(&mdsc->mutex);
 	return;
 
 fail:
 	ceph_msg_put(reply);
+	up_read(&mdsc->snap_rwsem);
+	mutex_unlock(&session->s_mutex);
+	ceph_put_mds_session(session);
 fail_nomsg:
 	ceph_pagelist_release(pagelist);
 	kfree(pagelist);
 fail_nopagelist:
-	pr_err("ENOMEM preparing reconnect for mds%d\n", mds);
-	goto out;
+	pr_err("error %d preparing reconnect for mds%d\n", err, mds);
+	mutex_lock(&mdsc->mutex);
+	return;
 }
 
 
diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c
index 509f57d9ccb3..cd4fadb6491a 100644
--- a/fs/ceph/messenger.c
+++ b/fs/ceph/messenger.c
@@ -492,7 +492,14 @@ static void prepare_write_message(struct ceph_connection *con)
 		list_move_tail(&m->list_head, &con->out_sent);
 	}
 
-	m->hdr.seq = cpu_to_le64(++con->out_seq);
+	/*
+	 * only assign outgoing seq # if we haven't sent this message
+	 * yet.  if it is requeued, resend with it's original seq.
+	 */
+	if (m->needs_out_seq) {
+		m->hdr.seq = cpu_to_le64(++con->out_seq);
+		m->needs_out_seq = false;
+	}
 
 	dout("prepare_write_message %p seq %lld type %d len %d+%d+%d %d pgs\n",
 	     m, con->out_seq, le16_to_cpu(m->hdr.type),
@@ -1986,6 +1993,8 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg)
 
 	BUG_ON(msg->front.iov_len != le32_to_cpu(msg->hdr.front_len));
 
+	msg->needs_out_seq = true;
+
 	/* queue */
 	mutex_lock(&con->mutex);
 	BUG_ON(!list_empty(&msg->list_head));
@@ -2085,15 +2094,19 @@ struct ceph_msg *ceph_msg_new(int type, int front_len,
 	kref_init(&m->kref);
 	INIT_LIST_HEAD(&m->list_head);
 
+	m->hdr.tid = 0;
 	m->hdr.type = cpu_to_le16(type);
+	m->hdr.priority = cpu_to_le16(CEPH_MSG_PRIO_DEFAULT);
+	m->hdr.version = 0;
 	m->hdr.front_len = cpu_to_le32(front_len);
 	m->hdr.middle_len = 0;
 	m->hdr.data_len = cpu_to_le32(page_len);
 	m->hdr.data_off = cpu_to_le16(page_off);
-	m->hdr.priority = cpu_to_le16(CEPH_MSG_PRIO_DEFAULT);
+	m->hdr.reserved = 0;
 	m->footer.front_crc = 0;
 	m->footer.middle_crc = 0;
 	m->footer.data_crc = 0;
+	m->footer.flags = 0;
 	m->front_max = front_len;
 	m->front_is_vmalloc = false;
 	m->more_to_follow = false;
diff --git a/fs/ceph/messenger.h b/fs/ceph/messenger.h
index a343dae73cdc..a5caf91cc971 100644
--- a/fs/ceph/messenger.h
+++ b/fs/ceph/messenger.h
@@ -86,6 +86,7 @@ struct ceph_msg {
 	struct kref kref;
 	bool front_is_vmalloc;
 	bool more_to_follow;
+	bool needs_out_seq;
 	int front_max;
 
 	struct ceph_msgpool *pool;
diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c
index c7b4dedaace6..3514f71ff85f 100644
--- a/fs/ceph/osd_client.c
+++ b/fs/ceph/osd_client.c
@@ -565,7 +565,8 @@ static int __map_osds(struct ceph_osd_client *osdc,
 {
 	struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base;
 	struct ceph_pg pgid;
-	int o = -1;
+	int acting[CEPH_PG_MAX_SIZE];
+	int o = -1, num = 0;
 	int err;
 
 	dout("map_osds %p tid %lld\n", req, req->r_tid);
@@ -576,10 +577,16 @@ static int __map_osds(struct ceph_osd_client *osdc,
 	pgid = reqhead->layout.ol_pgid;
 	req->r_pgid = pgid;
 
-	o = ceph_calc_pg_primary(osdc->osdmap, pgid);
+	err = ceph_calc_pg_acting(osdc->osdmap, pgid, acting);
+	if (err > 0) {
+		o = acting[0];
+		num = err;
+	}
 
 	if ((req->r_osd && req->r_osd->o_osd == o &&
-	     req->r_sent >= req->r_osd->o_incarnation) ||
+	     req->r_sent >= req->r_osd->o_incarnation &&
+	     req->r_num_pg_osds == num &&
+	     memcmp(req->r_pg_osds, acting, sizeof(acting[0])*num) == 0) ||
 	    (req->r_osd == NULL && o == -1))
 		return 0;  /* no change */
 
@@ -587,6 +594,10 @@ static int __map_osds(struct ceph_osd_client *osdc,
 	     req->r_tid, le32_to_cpu(pgid.pool), le16_to_cpu(pgid.ps), o,
 	     req->r_osd ? req->r_osd->o_osd : -1);
 
+	/* record full pg acting set */
+	memcpy(req->r_pg_osds, acting, sizeof(acting[0]) * num);
+	req->r_num_pg_osds = num;
+
 	if (req->r_osd) {
 		__cancel_request(req);
 		list_del_init(&req->r_osd_item);
@@ -612,7 +623,7 @@ static int __map_osds(struct ceph_osd_client *osdc,
 		__remove_osd_from_lru(req->r_osd);
 		list_add(&req->r_osd_item, &req->r_osd->o_requests);
 	}
-	err = 1;   /* osd changed */
+	err = 1;   /* osd or pg changed */
 
 out:
 	return err;
@@ -779,16 +790,18 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
 	struct ceph_osd_request *req;
 	u64 tid;
 	int numops, object_len, flags;
+	s32 result;
 
 	tid = le64_to_cpu(msg->hdr.tid);
 	if (msg->front.iov_len < sizeof(*rhead))
 		goto bad;
 	numops = le32_to_cpu(rhead->num_ops);
 	object_len = le32_to_cpu(rhead->object_len);
+	result = le32_to_cpu(rhead->result);
 	if (msg->front.iov_len != sizeof(*rhead) + object_len +
 	    numops * sizeof(struct ceph_osd_op))
 		goto bad;
-	dout("handle_reply %p tid %llu\n", msg, tid);
+	dout("handle_reply %p tid %llu result %d\n", msg, tid, (int)result);
 
 	/* lookup */
 	mutex_lock(&osdc->request_mutex);
@@ -834,7 +847,8 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
 	dout("handle_reply tid %llu flags %d\n", tid, flags);
 
 	/* either this is a read, or we got the safe response */
-	if ((flags & CEPH_OSD_FLAG_ONDISK) ||
+	if (result < 0 ||
+	    (flags & CEPH_OSD_FLAG_ONDISK) ||
 	    ((flags & CEPH_OSD_FLAG_WRITE) == 0))
 		__unregister_request(osdc, req);
 
diff --git a/fs/ceph/osd_client.h b/fs/ceph/osd_client.h
index b0759911e7c3..ce776989ef6a 100644
--- a/fs/ceph/osd_client.h
+++ b/fs/ceph/osd_client.h
@@ -48,6 +48,8 @@ struct ceph_osd_request {
 	struct list_head r_osd_item;
 	struct ceph_osd *r_osd;
 	struct ceph_pg   r_pgid;
+	int              r_pg_osds[CEPH_PG_MAX_SIZE];
+	int              r_num_pg_osds;
 
 	struct ceph_connection *r_con_filling_msg;
 
@@ -66,7 +68,6 @@ struct ceph_osd_request {
 	struct list_head  r_unsafe_item;
 
 	struct inode *r_inode;         	      /* for use by callbacks */
-	struct writeback_control *r_wbc;      /* ditto */
 
 	char              r_oid[40];          /* object name */
 	int               r_oid_len;
diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c
index 2e2c15eed82a..cfdd8f4388b7 100644
--- a/fs/ceph/osdmap.c
+++ b/fs/ceph/osdmap.c
@@ -1041,12 +1041,33 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
 }
 
 /*
+ * Return acting set for given pgid.
+ */
+int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
+			int *acting)
+{
+	int rawosds[CEPH_PG_MAX_SIZE], *osds;
+	int i, o, num = CEPH_PG_MAX_SIZE;
+
+	osds = calc_pg_raw(osdmap, pgid, rawosds, &num);
+	if (!osds)
+		return -1;
+
+	/* primary is first up osd */
+	o = 0;
+	for (i = 0; i < num; i++)
+		if (ceph_osd_is_up(osdmap, osds[i]))
+			acting[o++] = osds[i];
+	return o;
+}
+
+/*
  * Return primary osd for given pgid, or -1 if none.
  */
 int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid)
 {
-	int rawosds[10], *osds;
-	int i, num = ARRAY_SIZE(rawosds);
+	int rawosds[CEPH_PG_MAX_SIZE], *osds;
+	int i, num = CEPH_PG_MAX_SIZE;
 
 	osds = calc_pg_raw(osdmap, pgid, rawosds, &num);
 	if (!osds)
@@ -1054,9 +1075,7 @@ int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid)
 
 	/* primary is first up osd */
 	for (i = 0; i < num; i++)
-		if (ceph_osd_is_up(osdmap, osds[i])) {
+		if (ceph_osd_is_up(osdmap, osds[i]))
 			return osds[i];
-			break;
-		}
 	return -1;
 }
diff --git a/fs/ceph/osdmap.h b/fs/ceph/osdmap.h
index 8bc9f1e4f562..970b547e510d 100644
--- a/fs/ceph/osdmap.h
+++ b/fs/ceph/osdmap.h
@@ -120,6 +120,8 @@ extern int ceph_calc_object_layout(struct ceph_object_layout *ol,
 				   const char *oid,
 				   struct ceph_file_layout *fl,
 				   struct ceph_osdmap *osdmap);
+extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
+			       int *acting);
 extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap,
 				struct ceph_pg pgid);
 
diff --git a/fs/ceph/rados.h b/fs/ceph/rados.h
index a1fc1d017b58..fd56451a871f 100644
--- a/fs/ceph/rados.h
+++ b/fs/ceph/rados.h
@@ -58,6 +58,7 @@ struct ceph_timespec {
 #define CEPH_PG_LAYOUT_LINEAR 2
 #define CEPH_PG_LAYOUT_HYBRID 3
 
+#define CEPH_PG_MAX_SIZE      16  /* max # osds in a single pg */
 
 /*
  * placement group.
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index f888cf487b7c..110857ba9269 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -47,10 +47,20 @@ const char *ceph_file_part(const char *s, int len)
  */
 static void ceph_put_super(struct super_block *s)
 {
-	struct ceph_client *cl = ceph_client(s);
+	struct ceph_client *client = ceph_sb_to_client(s);
 
 	dout("put_super\n");
-	ceph_mdsc_close_sessions(&cl->mdsc);
+	ceph_mdsc_close_sessions(&client->mdsc);
+
+	/*
+	 * ensure we release the bdi before put_anon_super releases
+	 * the device name.
+	 */
+	if (s->s_bdi == &client->backing_dev_info) {
+		bdi_unregister(&client->backing_dev_info);
+		s->s_bdi = NULL;
+	}
+
 	return;
 }
 
@@ -636,6 +646,8 @@ static void ceph_destroy_client(struct ceph_client *client)
 	destroy_workqueue(client->pg_inv_wq);
 	destroy_workqueue(client->trunc_wq);
 
+	bdi_destroy(&client->backing_dev_info);
+
 	if (client->msgr)
 		ceph_messenger_destroy(client->msgr);
 	mempool_destroy(client->wb_pagevec_pool);
@@ -876,14 +888,14 @@ static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client)
 {
 	int err;
 
-	sb->s_bdi = &client->backing_dev_info;
-
 	/* set ra_pages based on rsize mount option? */
 	if (client->mount_args->rsize >= PAGE_CACHE_SIZE)
 		client->backing_dev_info.ra_pages =
 			(client->mount_args->rsize + PAGE_CACHE_SIZE - 1)
 			>> PAGE_SHIFT;
 	err = bdi_register_dev(&client->backing_dev_info, sb->s_dev);
+	if (!err)
+		sb->s_bdi = &client->backing_dev_info;
 	return err;
 }
 
@@ -957,9 +969,6 @@ static void ceph_kill_sb(struct super_block *s)
 	dout("kill_sb %p\n", s);
 	ceph_mdsc_pre_umount(&client->mdsc);
 	kill_anon_super(s);    /* will call put_super after sb is r/o */
-	if (s->s_bdi == &client->backing_dev_info)
-		bdi_unregister(&client->backing_dev_info);
-	bdi_destroy(&client->backing_dev_info);
 	ceph_destroy_client(client);
 }
 
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index ecf0ffbe2b64..0c2fd17439c8 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -502,6 +502,7 @@ struct dfs_info3_param {
 #define CIFS_FATTR_DFS_REFERRAL		0x1
 #define CIFS_FATTR_DELETE_PENDING	0x2
 #define CIFS_FATTR_NEED_REVAL		0x4
+#define CIFS_FATTR_INO_COLLISION	0x8
 
 struct cifs_fattr {
 	u32		cf_flags;
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 35ec11716213..29b9ea244c81 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -715,6 +715,16 @@ cifs_find_inode(struct inode *inode, void *opaque)
 	if (CIFS_I(inode)->uniqueid != fattr->cf_uniqueid)
 		return 0;
 
+	/*
+	 * uh oh -- it's a directory. We can't use it since hardlinked dirs are
+	 * verboten. Disable serverino and return it as if it were found, the
+	 * caller can discard it, generate a uniqueid and retry the find
+	 */
+	if (S_ISDIR(inode->i_mode) && !list_empty(&inode->i_dentry)) {
+		fattr->cf_flags |= CIFS_FATTR_INO_COLLISION;
+		cifs_autodisable_serverino(CIFS_SB(inode->i_sb));
+	}
+
 	return 1;
 }
 
@@ -734,15 +744,22 @@ cifs_iget(struct super_block *sb, struct cifs_fattr *fattr)
 	unsigned long hash;
 	struct inode *inode;
 
+retry_iget5_locked:
 	cFYI(1, ("looking for uniqueid=%llu", fattr->cf_uniqueid));
 
 	/* hash down to 32-bits on 32-bit arch */
 	hash = cifs_uniqueid_to_ino_t(fattr->cf_uniqueid);
 
 	inode = iget5_locked(sb, hash, cifs_find_inode, cifs_init_inode, fattr);
-
-	/* we have fattrs in hand, update the inode */
 	if (inode) {
+		/* was there a problematic inode number collision? */
+		if (fattr->cf_flags & CIFS_FATTR_INO_COLLISION) {
+			iput(inode);
+			fattr->cf_uniqueid = iunique(sb, ROOT_I);
+			fattr->cf_flags &= ~CIFS_FATTR_INO_COLLISION;
+			goto retry_iget5_locked;
+		}
+
 		cifs_fattr_to_inode(inode, fattr);
 		if (sb->s_flags & MS_NOATIME)
 			inode->i_flags |= S_NOATIME | S_NOCMTIME;
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 8e48b52205aa..0b502f80c691 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -645,6 +645,7 @@ static void detach_groups(struct config_group *group)
 
 		configfs_detach_group(sd->s_element);
 		child->d_inode->i_flags |= S_DEAD;
+		dont_mount(child);
 
 		mutex_unlock(&child->d_inode->i_mutex);
 
@@ -840,6 +841,7 @@ static int configfs_attach_item(struct config_item *parent_item,
 			mutex_lock(&dentry->d_inode->i_mutex);
 			configfs_remove_dir(item);
 			dentry->d_inode->i_flags |= S_DEAD;
+			dont_mount(dentry);
 			mutex_unlock(&dentry->d_inode->i_mutex);
 			d_delete(dentry);
 		}
@@ -882,6 +884,7 @@ static int configfs_attach_group(struct config_item *parent_item,
 		if (ret) {
 			configfs_detach_item(item);
 			dentry->d_inode->i_flags |= S_DEAD;
+			dont_mount(dentry);
 		}
 		configfs_adjust_dir_dirent_depth_after_populate(sd);
 		mutex_unlock(&dentry->d_inode->i_mutex);
@@ -1725,6 +1728,7 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys)
 	mutex_unlock(&configfs_symlink_mutex);
 	configfs_detach_group(&group->cg_item);
 	dentry->d_inode->i_flags |= S_DEAD;
+	dont_mount(dentry);
 	mutex_unlock(&dentry->d_inode->i_mutex);
 
 	d_delete(dentry);
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 4d74fc72c195..0210898458b2 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -277,8 +277,10 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_x32, debugfs_u32_get, debugfs_u32_set, "0x%08llx\n"
 DEFINE_SIMPLE_ATTRIBUTE(fops_x32_ro, debugfs_u32_get, NULL, "0x%08llx\n");
 DEFINE_SIMPLE_ATTRIBUTE(fops_x32_wo, NULL, debugfs_u32_set, "0x%08llx\n");
 
+DEFINE_SIMPLE_ATTRIBUTE(fops_x64, debugfs_u64_get, debugfs_u64_set, "0x%016llx\n");
+
 /*
- * debugfs_create_x{8,16,32} - create a debugfs file that is used to read and write an unsigned {8,16,32}-bit value
+ * debugfs_create_x{8,16,32,64} - create a debugfs file that is used to read and write an unsigned {8,16,32,64}-bit value
  *
  * These functions are exactly the same as the above functions (but use a hex
  * output for the decimal challenged). For details look at the above unsigned
@@ -357,6 +359,23 @@ struct dentry *debugfs_create_x32(const char *name, mode_t mode,
 }
 EXPORT_SYMBOL_GPL(debugfs_create_x32);
 
+/**
+ * debugfs_create_x64 - create a debugfs file that is used to read and write an unsigned 64-bit value
+ * @name: a pointer to a string containing the name of the file to create.
+ * @mode: the permission that the file should have
+ * @parent: a pointer to the parent dentry for this file.  This should be a
+ *          directory dentry if set.  If this parameter is %NULL, then the
+ *          file will be created in the root of the debugfs filesystem.
+ * @value: a pointer to the variable that the file should read to and write
+ *         from.
+ */
+struct dentry *debugfs_create_x64(const char *name, mode_t mode,
+				 struct dentry *parent, u64 *value)
+{
+	return debugfs_create_file(name, mode, parent, value, &fops_x64);
+}
+EXPORT_SYMBOL_GPL(debugfs_create_x64);
+
 
 static int debugfs_size_t_set(void *data, u64 val)
 {
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 157382fa6256..b66832ac33ac 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -446,10 +446,8 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
 	/* initialize the mount flag and determine the default error handler */
 	flag = JFS_ERR_REMOUNT_RO;
 
-	if (!parse_options((char *) data, sb, &newLVSize, &flag)) {
-		kfree(sbi);
-		return -EINVAL;
-	}
+	if (!parse_options((char *) data, sb, &newLVSize, &flag))
+		goto out_kfree;
 	sbi->flag = flag;
 
 #ifdef CONFIG_JFS_POSIX_ACL
@@ -458,7 +456,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
 
 	if (newLVSize) {
 		printk(KERN_ERR "resize option for remount only\n");
-		return -EINVAL;
+		goto out_kfree;
 	}
 
 	/*
@@ -478,7 +476,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
 	inode = new_inode(sb);
 	if (inode == NULL) {
 		ret = -ENOMEM;
-		goto out_kfree;
+		goto out_unload;
 	}
 	inode->i_ino = 0;
 	inode->i_nlink = 1;
@@ -550,9 +548,10 @@ out_mount_failed:
 	make_bad_inode(sbi->direct_inode);
 	iput(sbi->direct_inode);
 	sbi->direct_inode = NULL;
-out_kfree:
+out_unload:
 	if (sbi->nls_tab)
 		unload_nls(sbi->nls_tab);
+out_kfree:
 	kfree(sbi);
 	return ret;
 }
diff --git a/fs/logfs/super.c b/fs/logfs/super.c
index 5866ee6e1327..d7c23ed8349a 100644
--- a/fs/logfs/super.c
+++ b/fs/logfs/super.c
@@ -333,27 +333,27 @@ static int logfs_get_sb_final(struct super_block *sb, struct vfsmount *mnt)
 		goto fail;
 
 	sb->s_root = d_alloc_root(rootdir);
-	if (!sb->s_root)
-		goto fail2;
+	if (!sb->s_root) {
+		iput(rootdir);
+		goto fail;
+	}
 
 	super->s_erase_page = alloc_pages(GFP_KERNEL, 0);
 	if (!super->s_erase_page)
-		goto fail2;
+		goto fail;
 	memset(page_address(super->s_erase_page), 0xFF, PAGE_SIZE);
 
 	/* FIXME: check for read-only mounts */
 	err = logfs_make_writeable(sb);
 	if (err)
-		goto fail3;
+		goto fail1;
 
 	log_super("LogFS: Finished mounting\n");
 	simple_set_mnt(mnt, sb);
 	return 0;
 
-fail3:
+fail1:
 	__free_page(super->s_erase_page);
-fail2:
-	iput(rootdir);
 fail:
 	iput(logfs_super(sb)->s_master_inode);
 	return -EIO;
diff --git a/fs/namei.c b/fs/namei.c
index a7dce91a7e42..b86b96fe1dc3 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1641,7 +1641,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
 	if (nd->last.name[nd->last.len]) {
 		if (open_flag & O_CREAT)
 			goto exit;
-		nd->flags |= LOOKUP_DIRECTORY;
+		nd->flags |= LOOKUP_DIRECTORY | LOOKUP_FOLLOW;
 	}
 
 	/* just plain open? */
@@ -1830,6 +1830,8 @@ reval:
 	}
 	if (open_flag & O_DIRECTORY)
 		nd.flags |= LOOKUP_DIRECTORY;
+	if (!(open_flag & O_NOFOLLOW))
+		nd.flags |= LOOKUP_FOLLOW;
 	filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname);
 	while (unlikely(!filp)) { /* trailing symlink */
 		struct path holder;
@@ -1837,7 +1839,7 @@ reval:
 		void *cookie;
 		error = -ELOOP;
 		/* S_ISDIR part is a temporary automount kludge */
-		if ((open_flag & O_NOFOLLOW) && !S_ISDIR(inode->i_mode))
+		if (!(nd.flags & LOOKUP_FOLLOW) && !S_ISDIR(inode->i_mode))
 			goto exit_dput;
 		if (count++ == 32)
 			goto exit_dput;
@@ -2174,8 +2176,10 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
 		error = security_inode_rmdir(dir, dentry);
 		if (!error) {
 			error = dir->i_op->rmdir(dir, dentry);
-			if (!error)
+			if (!error) {
 				dentry->d_inode->i_flags |= S_DEAD;
+				dont_mount(dentry);
+			}
 		}
 	}
 	mutex_unlock(&dentry->d_inode->i_mutex);
@@ -2259,7 +2263,7 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry)
 		if (!error) {
 			error = dir->i_op->unlink(dir, dentry);
 			if (!error)
-				dentry->d_inode->i_flags |= S_DEAD;
+				dont_mount(dentry);
 		}
 	}
 	mutex_unlock(&dentry->d_inode->i_mutex);
@@ -2570,17 +2574,20 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
 		return error;
 
 	target = new_dentry->d_inode;
-	if (target) {
+	if (target)
 		mutex_lock(&target->i_mutex);
-		dentry_unhash(new_dentry);
-	}
 	if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
 		error = -EBUSY;
-	else 
+	else {
+		if (target)
+			dentry_unhash(new_dentry);
 		error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
+	}
 	if (target) {
-		if (!error)
+		if (!error) {
 			target->i_flags |= S_DEAD;
+			dont_mount(new_dentry);
+		}
 		mutex_unlock(&target->i_mutex);
 		if (d_unhashed(new_dentry))
 			d_rehash(new_dentry);
@@ -2612,7 +2619,7 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
 		error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
 	if (!error) {
 		if (target)
-			target->i_flags |= S_DEAD;
+			dont_mount(new_dentry);
 		if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
 			d_move(old_dentry, new_dentry);
 	}
diff --git a/fs/namespace.c b/fs/namespace.c
index 8174c8ab5c70..f20cb57d1067 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1432,7 +1432,7 @@ static int graft_tree(struct vfsmount *mnt, struct path *path)
 
 	err = -ENOENT;
 	mutex_lock(&path->dentry->d_inode->i_mutex);
-	if (IS_DEADDIR(path->dentry->d_inode))
+	if (cant_mount(path->dentry))
 		goto out_unlock;
 
 	err = security_sb_check_sb(mnt, path);
@@ -1623,7 +1623,7 @@ static int do_move_mount(struct path *path, char *old_name)
 
 	err = -ENOENT;
 	mutex_lock(&path->dentry->d_inode->i_mutex);
-	if (IS_DEADDIR(path->dentry->d_inode))
+	if (cant_mount(path->dentry))
 		goto out1;
 
 	if (d_unlinked(path->dentry))
@@ -2234,7 +2234,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
 	if (!check_mnt(root.mnt))
 		goto out2;
 	error = -ENOENT;
-	if (IS_DEADDIR(new.dentry->d_inode))
+	if (cant_mount(old.dentry))
 		goto out2;
 	if (d_unlinked(new.dentry))
 		goto out2;
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 1afb0a10229f..e27960cd76ab 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -28,6 +28,7 @@
 #include <linux/path.h> /* struct path */
 #include <linux/slab.h> /* kmem_* */
 #include <linux/types.h>
+#include <linux/sched.h>
 
 #include "inotify.h"
 
@@ -146,6 +147,7 @@ static void inotify_free_group_priv(struct fsnotify_group *group)
 	idr_for_each(&group->inotify_data.idr, idr_callback, group);
 	idr_remove_all(&group->inotify_data.idr);
 	idr_destroy(&group->inotify_data.idr);
+	free_uid(group->inotify_data.user);
 }
 
 void inotify_free_event_priv(struct fsnotify_event_private_data *fsn_event_priv)
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 472cdf29ef82..e46ca685b9be 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -546,21 +546,24 @@ retry:
 	if (unlikely(!idr_pre_get(&group->inotify_data.idr, GFP_KERNEL)))
 		goto out_err;
 
+	/* we are putting the mark on the idr, take a reference */
+	fsnotify_get_mark(&tmp_ientry->fsn_entry);
+
 	spin_lock(&group->inotify_data.idr_lock);
 	ret = idr_get_new_above(&group->inotify_data.idr, &tmp_ientry->fsn_entry,
 				group->inotify_data.last_wd+1,
 				&tmp_ientry->wd);
 	spin_unlock(&group->inotify_data.idr_lock);
 	if (ret) {
+		/* we didn't get on the idr, drop the idr reference */
+		fsnotify_put_mark(&tmp_ientry->fsn_entry);
+
 		/* idr was out of memory allocate and try again */
 		if (ret == -EAGAIN)
 			goto retry;
 		goto out_err;
 	}
 
-	/* we put the mark on the idr, take a reference */
-	fsnotify_get_mark(&tmp_ientry->fsn_entry);
-
 	/* we are on the idr, now get on the inode */
 	ret = fsnotify_add_mark(&tmp_ientry->fsn_entry, group, inode);
 	if (ret) {
@@ -578,16 +581,13 @@ retry:
 	/* return the watch descriptor for this new entry */
 	ret = tmp_ientry->wd;
 
-	/* match the ref from fsnotify_init_markentry() */
-	fsnotify_put_mark(&tmp_ientry->fsn_entry);
-
 	/* if this mark added a new event update the group mask */
 	if (mask & ~group->mask)
 		fsnotify_recalc_group_mask(group);
 
 out_err:
-	if (ret < 0)
-		kmem_cache_free(inotify_inode_mark_cachep, tmp_ientry);
+	/* match the ref from fsnotify_init_markentry() */
+	fsnotify_put_mark(&tmp_ientry->fsn_entry);
 
 	return ret;
 }
diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c
index 4e50286a4cc3..1dabed286b4c 100644
--- a/fs/sysv/dir.c
+++ b/fs/sysv/dir.c
@@ -164,8 +164,8 @@ struct sysv_dir_entry *sysv_find_entry(struct dentry *dentry, struct page **res_
 							name, de->name))
 					goto found;
 			}
+			dir_put_page(page);
 		}
-		dir_put_page(page);
 
 		if (++n >= npages)
 			n = 0;
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 7bf83ddf82e0..baacd98e7cc6 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -373,7 +373,7 @@ struct acpi_pci_root {
 	struct acpi_pci_id id;
 	struct pci_bus *bus;
 	u16 segment;
-	u8 bus_nr;
+	struct resource secondary;	/* downstream bus range */
 
 	u32 osc_support_set;	/* _OSC state of support bits */
 	u32 osc_control_set;	/* _OSC state of control bits */
diff --git a/include/acpi/acpi_drivers.h b/include/acpi/acpi_drivers.h
index 4f7b44866b76..23d78b4d088b 100644
--- a/include/acpi/acpi_drivers.h
+++ b/include/acpi/acpi_drivers.h
@@ -104,8 +104,7 @@ int acpi_pci_bind_root(struct acpi_device *device);
 
 /* Arch-defined function to add a bus to the system */
 
-struct pci_bus *pci_acpi_scan_root(struct acpi_device *device, int domain,
-				   int bus);
+struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root);
 void pci_acpi_crs_quirks(void);
 
 /* --------------------------------------------------------------------------
diff --git a/include/acpi/acpi_hest.h b/include/acpi/acpi_hest.h
deleted file mode 100644
index 63194d03cb2d..000000000000
--- a/include/acpi/acpi_hest.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef __ACPI_HEST_H
-#define __ACPI_HEST_H
-
-#include <linux/pci.h>
-
-#ifdef CONFIG_ACPI
-extern int acpi_hest_firmware_first_pci(struct pci_dev *pci);
-#else
-static inline int acpi_hest_firmware_first_pci(struct pci_dev *pci) { return 0; }
-#endif
-
-#endif
diff --git a/include/acpi/apei.h b/include/acpi/apei.h
new file mode 100644
index 000000000000..b3365025ff8d
--- /dev/null
+++ b/include/acpi/apei.h
@@ -0,0 +1,34 @@
+/*
+ * apei.h - ACPI Platform Error Interface
+ */
+
+#ifndef ACPI_APEI_H
+#define ACPI_APEI_H
+
+#include <linux/acpi.h>
+#include <linux/cper.h>
+#include <asm/ioctls.h>
+
+#define APEI_ERST_INVALID_RECORD_ID	0xffffffffffffffffULL
+
+#define APEI_ERST_CLEAR_RECORD		_IOW('E', 1, u64)
+#define APEI_ERST_GET_RECORD_COUNT	_IOR('E', 2, u32)
+
+#ifdef __KERNEL__
+
+extern int hest_disable;
+extern int erst_disable;
+
+typedef int (*apei_hest_func_t)(struct acpi_hest_header *hest_hdr, void *data);
+int apei_hest_parse(apei_hest_func_t func, void *data);
+
+int erst_write(const struct cper_record_header *record);
+ssize_t erst_get_record_count(void);
+int erst_get_next_record_id(u64 *record_id);
+ssize_t erst_read(u64 record_id, struct cper_record_header *record,
+		  size_t buflen);
+ssize_t erst_read_next(struct cper_record_header *record, size_t buflen);
+int erst_clear(u64 record_id);
+
+#endif
+#endif
diff --git a/include/acpi/atomicio.h b/include/acpi/atomicio.h
new file mode 100644
index 000000000000..8b9fb4b0b9ce
--- /dev/null
+++ b/include/acpi/atomicio.h
@@ -0,0 +1,10 @@
+#ifndef ACPI_ATOMIC_IO_H
+#define ACPI_ATOMIC_IO_H
+
+int acpi_pre_map_gar(struct acpi_generic_address *reg);
+int acpi_post_unmap_gar(struct acpi_generic_address *reg);
+
+int acpi_atomic_read(u64 *val, struct acpi_generic_address *reg);
+int acpi_atomic_write(u64 val, struct acpi_generic_address *reg);
+
+#endif
diff --git a/include/acpi/hed.h b/include/acpi/hed.h
new file mode 100644
index 000000000000..46e1249b70cc
--- /dev/null
+++ b/include/acpi/hed.h
@@ -0,0 +1,18 @@
+/*
+ * hed.h - ACPI Hardware Error Device
+ *
+ * Copyright (C) 2009, Intel Corp.
+ *	Author: Huang Ying <ying.huang@intel.com>
+ *
+ * This file is released under the GPLv2.
+ */
+
+#ifndef ACPI_HED_H
+#define ACPI_HED_H
+
+#include <linux/notifier.h>
+
+int register_acpi_hed_notifier(struct notifier_block *nb);
+void unregister_acpi_hed_notifier(struct notifier_block *nb);
+
+#endif
diff --git a/include/linux/cper.h b/include/linux/cper.h
new file mode 100644
index 000000000000..4b38f905b705
--- /dev/null
+++ b/include/linux/cper.h
@@ -0,0 +1,314 @@
+/*
+ * UEFI Common Platform Error Record
+ *
+ * Copyright (C) 2010, Intel Corp.
+ *	Author: Huang Ying <ying.huang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef LINUX_CPER_H
+#define LINUX_CPER_H
+
+#include <linux/uuid.h>
+
+/* CPER record signature and the size */
+#define CPER_SIG_RECORD				"CPER"
+#define CPER_SIG_SIZE				4
+/* Used in signature_end field in struct cper_record_header */
+#define CPER_SIG_END				0xffffffff
+
+/*
+ * CPER record header revision, used in revision field in struct
+ * cper_record_header
+ */
+#define CPER_RECORD_REV				0x0100
+
+/*
+ * Severity difinition for error_severity in struct cper_record_header
+ * and section_severity in struct cper_section_descriptor
+ */
+#define CPER_SER_RECOVERABLE			0x0
+#define CPER_SER_FATAL				0x1
+#define CPER_SER_CORRECTED			0x2
+#define CPER_SER_INFORMATIONAL			0x3
+
+/*
+ * Validation bits difinition for validation_bits in struct
+ * cper_record_header. If set, corresponding fields in struct
+ * cper_record_header contain valid information.
+ *
+ * corresponds platform_id
+ */
+#define CPER_VALID_PLATFORM_ID			0x0001
+/* corresponds timestamp */
+#define CPER_VALID_TIMESTAMP			0x0002
+/* corresponds partition_id */
+#define CPER_VALID_PARTITION_ID			0x0004
+
+/*
+ * Notification type used to generate error record, used in
+ * notification_type in struct cper_record_header
+ *
+ * Corrected Machine Check
+ */
+#define CPER_NOTIFY_CMC							\
+	UUID_LE(0x2DCE8BB1, 0xBDD7, 0x450e, 0xB9, 0xAD, 0x9C, 0xF4,	\
+		0xEB, 0xD4, 0xF8, 0x90)
+/* Corrected Platform Error */
+#define CPER_NOTIFY_CPE							\
+	UUID_LE(0x4E292F96, 0xD843, 0x4a55, 0xA8, 0xC2, 0xD4, 0x81,	\
+		0xF2, 0x7E, 0xBE, 0xEE)
+/* Machine Check Exception */
+#define CPER_NOTIFY_MCE							\
+	UUID_LE(0xE8F56FFE, 0x919C, 0x4cc5, 0xBA, 0x88, 0x65, 0xAB,	\
+		0xE1, 0x49, 0x13, 0xBB)
+/* PCI Express Error */
+#define CPER_NOTIFY_PCIE						\
+	UUID_LE(0xCF93C01F, 0x1A16, 0x4dfc, 0xB8, 0xBC, 0x9C, 0x4D,	\
+		0xAF, 0x67, 0xC1, 0x04)
+/* INIT Record (for IPF) */
+#define CPER_NOTIFY_INIT						\
+	UUID_LE(0xCC5263E8, 0x9308, 0x454a, 0x89, 0xD0, 0x34, 0x0B,	\
+		0xD3, 0x9B, 0xC9, 0x8E)
+/* Non-Maskable Interrupt */
+#define CPER_NOTIFY_NMI							\
+	UUID_LE(0x5BAD89FF, 0xB7E6, 0x42c9, 0x81, 0x4A, 0xCF, 0x24,	\
+		0x85, 0xD6, 0xE9, 0x8A)
+/* BOOT Error Record */
+#define CPER_NOTIFY_BOOT						\
+	UUID_LE(0x3D61A466, 0xAB40, 0x409a, 0xA6, 0x98, 0xF3, 0x62,	\
+		0xD4, 0x64, 0xB3, 0x8F)
+/* DMA Remapping Error */
+#define CPER_NOTIFY_DMAR						\
+	UUID_LE(0x667DD791, 0xC6B3, 0x4c27, 0x8A, 0x6B, 0x0F, 0x8E,	\
+		0x72, 0x2D, 0xEB, 0x41)
+
+/*
+ * Flags bits definitions for flags in struct cper_record_header
+ * If set, the error has been recovered
+ */
+#define CPER_HW_ERROR_FLAGS_RECOVERED		0x1
+/* If set, the error is for previous boot */
+#define CPER_HW_ERROR_FLAGS_PREVERR		0x2
+/* If set, the error is injected for testing */
+#define CPER_HW_ERROR_FLAGS_SIMULATED		0x4
+
+/*
+ * CPER section header revision, used in revision field in struct
+ * cper_section_descriptor
+ */
+#define CPER_SEC_REV				0x0100
+
+/*
+ * Validation bits difinition for validation_bits in struct
+ * cper_section_descriptor. If set, corresponding fields in struct
+ * cper_section_descriptor contain valid information.
+ *
+ * corresponds fru_id
+ */
+#define CPER_SEC_VALID_FRU_ID			0x1
+/* corresponds fru_text */
+#define CPER_SEC_VALID_FRU_TEXT			0x2
+
+/*
+ * Flags bits definitions for flags in struct cper_section_descriptor
+ *
+ * If set, the section is associated with the error condition
+ * directly, and should be focused on
+ */
+#define CPER_SEC_PRIMARY			0x0001
+/*
+ * If set, the error was not contained within the processor or memory
+ * hierarchy and the error may have propagated to persistent storage
+ * or network
+ */
+#define CPER_SEC_CONTAINMENT_WARNING		0x0002
+/* If set, the component must be re-initialized or re-enabled prior to use */
+#define CPER_SEC_RESET				0x0004
+/* If set, Linux may choose to discontinue use of the resource */
+#define CPER_SEC_ERROR_THRESHOLD_EXCEEDED	0x0008
+/*
+ * If set, resource could not be queried for error information due to
+ * conflicts with other system software or resources. Some fields of
+ * the section will be invalid
+ */
+#define CPER_SEC_RESOURCE_NOT_ACCESSIBLE	0x0010
+/*
+ * If set, action has been taken to ensure error containment (such as
+ * poisoning data), but the error has not been fully corrected and the
+ * data has not been consumed. Linux may choose to take further
+ * corrective action before the data is consumed
+ */
+#define CPER_SEC_LATENT_ERROR			0x0020
+
+/*
+ * Section type definitions, used in section_type field in struct
+ * cper_section_descriptor
+ *
+ * Processor Generic
+ */
+#define CPER_SEC_PROC_GENERIC						\
+	UUID_LE(0x9876CCAD, 0x47B4, 0x4bdb, 0xB6, 0x5E, 0x16, 0xF1,	\
+		0x93, 0xC4, 0xF3, 0xDB)
+/* Processor Specific: X86/X86_64 */
+#define CPER_SEC_PROC_IA						\
+	UUID_LE(0xDC3EA0B0, 0xA144, 0x4797, 0xB9, 0x5B, 0x53, 0xFA,	\
+		0x24, 0x2B, 0x6E, 0x1D)
+/* Processor Specific: IA64 */
+#define CPER_SEC_PROC_IPF						\
+	UUID_LE(0xE429FAF1, 0x3CB7, 0x11D4, 0x0B, 0xCA, 0x07, 0x00,	\
+		0x80, 0xC7, 0x3C, 0x88, 0x81)
+/* Platform Memory */
+#define CPER_SEC_PLATFORM_MEM						\
+	UUID_LE(0xA5BC1114, 0x6F64, 0x4EDE, 0xB8, 0x63, 0x3E, 0x83,	\
+		0xED, 0x7C, 0x83, 0xB1)
+#define CPER_SEC_PCIE							\
+	UUID_LE(0xD995E954, 0xBBC1, 0x430F, 0xAD, 0x91, 0xB4, 0x4D,	\
+		0xCB, 0x3C, 0x6F, 0x35)
+/* Firmware Error Record Reference */
+#define CPER_SEC_FW_ERR_REC_REF						\
+	UUID_LE(0x81212A96, 0x09ED, 0x4996, 0x94, 0x71, 0x8D, 0x72,	\
+		0x9C, 0x8E, 0x69, 0xED)
+/* PCI/PCI-X Bus */
+#define CPER_SEC_PCI_X_BUS						\
+	UUID_LE(0xC5753963, 0x3B84, 0x4095, 0xBF, 0x78, 0xED, 0xDA,	\
+		0xD3, 0xF9, 0xC9, 0xDD)
+/* PCI Component/Device */
+#define CPER_SEC_PCI_DEV						\
+	UUID_LE(0xEB5E4685, 0xCA66, 0x4769, 0xB6, 0xA2, 0x26, 0x06,	\
+		0x8B, 0x00, 0x13, 0x26)
+#define CPER_SEC_DMAR_GENERIC						\
+	UUID_LE(0x5B51FEF7, 0xC79D, 0x4434, 0x8F, 0x1B, 0xAA, 0x62,	\
+		0xDE, 0x3E, 0x2C, 0x64)
+/* Intel VT for Directed I/O specific DMAr */
+#define CPER_SEC_DMAR_VT						\
+	UUID_LE(0x71761D37, 0x32B2, 0x45cd, 0xA7, 0xD0, 0xB0, 0xFE,	\
+		0xDD, 0x93, 0xE8, 0xCF)
+/* IOMMU specific DMAr */
+#define CPER_SEC_DMAR_IOMMU						\
+	UUID_LE(0x036F84E1, 0x7F37, 0x428c, 0xA7, 0x9E, 0x57, 0x5F,	\
+		0xDF, 0xAA, 0x84, 0xEC)
+
+/*
+ * All tables and structs must be byte-packed to match CPER
+ * specification, since the tables are provided by the system BIOS
+ */
+#pragma pack(1)
+
+struct cper_record_header {
+	char	signature[CPER_SIG_SIZE];	/* must be CPER_SIG_RECORD */
+	__u16	revision;			/* must be CPER_RECORD_REV */
+	__u32	signature_end;			/* must be CPER_SIG_END */
+	__u16	section_count;
+	__u32	error_severity;
+	__u32	validation_bits;
+	__u32	record_length;
+	__u64	timestamp;
+	uuid_le	platform_id;
+	uuid_le	partition_id;
+	uuid_le	creator_id;
+	uuid_le	notification_type;
+	__u64	record_id;
+	__u32	flags;
+	__u64	persistence_information;
+	__u8	reserved[12];			/* must be zero */
+};
+
+struct cper_section_descriptor {
+	__u32	section_offset;		/* Offset in bytes of the
+					 *  section body from the base
+					 *  of the record header */
+	__u32	section_length;
+	__u16	revision;		/* must be CPER_RECORD_REV */
+	__u8	validation_bits;
+	__u8	reserved;		/* must be zero */
+	__u32	flags;
+	uuid_le	section_type;
+	uuid_le	fru_id;
+	__u32	section_severity;
+	__u8	fru_text[20];
+};
+
+/* Generic Processor Error Section */
+struct cper_sec_proc_generic {
+	__u64	validation_bits;
+	__u8	proc_type;
+	__u8	proc_isa;
+	__u8	proc_error_type;
+	__u8	operation;
+	__u8	flags;
+	__u8	level;
+	__u16	reserved;
+	__u64	cpu_version;
+	char	cpu_brand[128];
+	__u64	proc_id;
+	__u64	target_addr;
+	__u64	requestor_id;
+	__u64	responder_id;
+	__u64	ip;
+};
+
+/* IA32/X64 Processor Error Section */
+struct cper_sec_proc_ia {
+	__u64	validation_bits;
+	__u8	lapic_id;
+	__u8	cpuid[48];
+};
+
+/* IA32/X64 Processor Error Infomation Structure */
+struct cper_ia_err_info {
+	uuid_le	err_type;
+	__u64	validation_bits;
+	__u64	check_info;
+	__u64	target_id;
+	__u64	requestor_id;
+	__u64	responder_id;
+	__u64	ip;
+};
+
+/* IA32/X64 Processor Context Information Structure */
+struct cper_ia_proc_ctx {
+	__u16	reg_ctx_type;
+	__u16	reg_arr_size;
+	__u32	msr_addr;
+	__u64	mm_reg_addr;
+};
+
+/* Memory Error Section */
+struct cper_sec_mem_err {
+	__u64	validation_bits;
+	__u64	error_status;
+	__u64	physical_addr;
+	__u64	physical_addr_mask;
+	__u16	node;
+	__u16	card;
+	__u16	module;
+	__u16	bank;
+	__u16	device;
+	__u16	row;
+	__u16	column;
+	__u16	bit_pos;
+	__u64	requestor_id;
+	__u64	responder_id;
+	__u64	target_id;
+	__u8	error_type;
+};
+
+/* Reset to default packing */
+#pragma pack()
+
+u64 cper_next_record_id(void);
+
+#endif
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 30b93b2a01a4..eebb617c17d8 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -186,6 +186,8 @@ d_iput:		no		no		no       yes
 
 #define DCACHE_FSNOTIFY_PARENT_WATCHED	0x0080 /* Parent inode is watched by some fsnotify listener */
 
+#define DCACHE_CANT_MOUNT	0x0100
+
 extern spinlock_t dcache_lock;
 extern seqlock_t rename_lock;
 
@@ -358,6 +360,18 @@ static inline int d_unlinked(struct dentry *dentry)
 	return d_unhashed(dentry) && !IS_ROOT(dentry);
 }
 
+static inline int cant_mount(struct dentry *dentry)
+{
+	return (dentry->d_flags & DCACHE_CANT_MOUNT);
+}
+
+static inline void dont_mount(struct dentry *dentry)
+{
+	spin_lock(&dentry->d_lock);
+	dentry->d_flags |= DCACHE_CANT_MOUNT;
+	spin_unlock(&dentry->d_lock);
+}
+
 static inline struct dentry *dget_parent(struct dentry *dentry)
 {
 	struct dentry *ret;
diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
index fc1b930f246c..e7d9b20ddc5b 100644
--- a/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@ -63,6 +63,8 @@ struct dentry *debugfs_create_x16(const char *name, mode_t mode,
 				  struct dentry *parent, u16 *value);
 struct dentry *debugfs_create_x32(const char *name, mode_t mode,
 				  struct dentry *parent, u32 *value);
+struct dentry *debugfs_create_x64(const char *name, mode_t mode,
+				  struct dentry *parent, u64 *value);
 struct dentry *debugfs_create_size_t(const char *name, mode_t mode,
 				     struct dentry *parent, size_t *value);
 struct dentry *debugfs_create_bool(const char *name, mode_t mode,
diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index c9bf92cd7653..d94963b379d9 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -79,10 +79,7 @@ enum {
 	IFLA_NET_NS_PID,
 	IFLA_IFALIAS,
 	IFLA_NUM_VF,		/* Number of VFs if device is SR-IOV PF */
-	IFLA_VF_MAC,		/* Hardware queue specific attributes */
-	IFLA_VF_VLAN,
-	IFLA_VF_TX_RATE,	/* TX Bandwidth Allocation */
-	IFLA_VFINFO,
+	IFLA_VFINFO_LIST,
 	__IFLA_MAX
 };
 
@@ -203,6 +200,24 @@ enum macvlan_mode {
 
 /* SR-IOV virtual function managment section */
 
+enum {
+	IFLA_VF_INFO_UNSPEC,
+	IFLA_VF_INFO,
+	__IFLA_VF_INFO_MAX,
+};
+
+#define IFLA_VF_INFO_MAX (__IFLA_VF_INFO_MAX - 1)
+
+enum {
+	IFLA_VF_UNSPEC,
+	IFLA_VF_MAC,		/* Hardware queue specific attributes */
+	IFLA_VF_VLAN,
+	IFLA_VF_TX_RATE,	/* TX Bandwidth Allocation */
+	__IFLA_VF_MAX,
+};
+
+#define IFLA_VF_MAX (__IFLA_VF_MAX - 1)
+
 struct ifla_vf_mac {
 	__u32 vf;
 	__u8 mac[32]; /* MAX_ADDR_LEN */
diff --git a/include/linux/pci.h b/include/linux/pci.h
index a788fa12ff31..63967e877f20 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -311,7 +311,8 @@ struct pci_dev {
 	unsigned int	is_virtfn:1;
 	unsigned int	reset_fn:1;
 	unsigned int    is_hotplug_bridge:1;
-	unsigned int    aer_firmware_first:1;
+	unsigned int    __aer_firmware_first_valid:1;
+	unsigned int	__aer_firmware_first:1;
 	pci_dev_flags_t dev_flags;
 	atomic_t	enable_cnt;	/* pci_enable_device has been called */
 
diff --git a/include/linux/uuid.h b/include/linux/uuid.h
new file mode 100644
index 000000000000..5b7efbfcee4e
--- /dev/null
+++ b/include/linux/uuid.h
@@ -0,0 +1,70 @@
+/*
+ * UUID/GUID definition
+ *
+ * Copyright (C) 2010, Intel Corp.
+ *	Huang Ying <ying.huang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation;
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef _LINUX_UUID_H_
+#define _LINUX_UUID_H_
+
+#include <linux/types.h>
+#include <linux/string.h>
+
+typedef struct {
+	__u8 b[16];
+} uuid_le;
+
+typedef struct {
+	__u8 b[16];
+} uuid_be;
+
+#define UUID_LE(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7)		\
+((uuid_le)								\
+{{ (a) & 0xff, ((a) >> 8) & 0xff, ((a) >> 16) & 0xff, ((a) >> 24) & 0xff, \
+   (b) & 0xff, ((b) >> 8) & 0xff,					\
+   (c) & 0xff, ((c) >> 8) & 0xff,					\
+   (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) }})
+
+#define UUID_BE(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7)		\
+((uuid_be)								\
+{{ ((a) >> 24) & 0xff, ((a) >> 16) & 0xff, ((a) >> 8) & 0xff, (a) & 0xff, \
+   ((b) >> 8) & 0xff, (b) & 0xff,					\
+   ((c) >> 8) & 0xff, (c) & 0xff,					\
+   (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) }})
+
+#define NULL_UUID_LE							\
+	UUID_LE(0x00000000, 0x0000, 0x0000, 0x00, 0x00, 0x00, 0x00,	\
+		0x00, 0x00, 0x00, 0x00)
+
+#define NULL_UUID_BE							\
+	UUID_BE(0x00000000, 0x0000, 0x0000, 0x00, 0x00, 0x00, 0x00,	\
+		0x00, 0x00, 0x00, 0x00)
+
+static inline int uuid_le_cmp(const uuid_le u1, const uuid_le u2)
+{
+	return memcmp(&u1, &u2, sizeof(uuid_le));
+}
+
+static inline int uuid_be_cmp(const uuid_be u1, const uuid_be u2)
+{
+	return memcmp(&u1, &u2, sizeof(uuid_be));
+}
+
+extern void uuid_le_gen(uuid_le *u);
+extern void uuid_be_gen(uuid_be *u);
+
+#endif
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 75be5a28815d..aa04b9a5093b 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1197,30 +1197,15 @@ extern int			tcp_v4_md5_do_del(struct sock *sk,
 extern struct tcp_md5sig_pool * __percpu *tcp_alloc_md5sig_pool(struct sock *);
 extern void			tcp_free_md5sig_pool(void);
 
-extern struct tcp_md5sig_pool	*__tcp_get_md5sig_pool(int cpu);
-extern void			__tcp_put_md5sig_pool(void);
+extern struct tcp_md5sig_pool	*tcp_get_md5sig_pool(void);
+extern void			tcp_put_md5sig_pool(void);
+
 extern int tcp_md5_hash_header(struct tcp_md5sig_pool *, struct tcphdr *);
 extern int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *, struct sk_buff *,
 				 unsigned header_len);
 extern int tcp_md5_hash_key(struct tcp_md5sig_pool *hp,
 			    struct tcp_md5sig_key *key);
 
-static inline
-struct tcp_md5sig_pool		*tcp_get_md5sig_pool(void)
-{
-	int cpu = get_cpu();
-	struct tcp_md5sig_pool *ret = __tcp_get_md5sig_pool(cpu);
-	if (!ret)
-		put_cpu();
-	return ret;
-}
-
-static inline void		tcp_put_md5sig_pool(void)
-{
-	__tcp_put_md5sig_pool();
-	put_cpu();
-}
-
 /* write queue abstraction */
 static inline void tcp_write_queue_purge(struct sock *sk)
 {
diff --git a/kernel/profile.c b/kernel/profile.c
index a55d3a367ae8..dfadc5b729f1 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -127,8 +127,10 @@ int __ref profile_init(void)
 		return 0;
 
 	prof_buffer = vmalloc(buffer_bytes);
-	if (prof_buffer)
+	if (prof_buffer) {
+		memset(prof_buffer, 0, buffer_bytes);
 		return 0;
+	}
 
 	free_cpumask_var(prof_cpu_mask);
 	return -ENOMEM;
diff --git a/lib/Makefile b/lib/Makefile
index 0d4015205c64..f3eb6e8766be 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -21,7 +21,7 @@ lib-y	+= kobject.o kref.o klist.o
 
 obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
 	 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
-	 string_helpers.o gcd.o lcm.o list_sort.o
+	 string_helpers.o gcd.o lcm.o list_sort.o uuid.o
 
 ifeq ($(CONFIG_DEBUG_KOBJECT),y)
 CFLAGS_kobject.o += -DDEBUG
diff --git a/lib/btree.c b/lib/btree.c
index 41859a820218..c9c6f0351526 100644
--- a/lib/btree.c
+++ b/lib/btree.c
@@ -95,7 +95,8 @@ static unsigned long *btree_node_alloc(struct btree_head *head, gfp_t gfp)
 	unsigned long *node;
 
 	node = mempool_alloc(head->mempool, gfp);
-	memset(node, 0, NODESIZE);
+	if (likely(node))
+		memset(node, 0, NODESIZE);
 	return node;
 }
 
diff --git a/lib/rwsem.c b/lib/rwsem.c
index 3e3365e5665e..ceba8e28807a 100644
--- a/lib/rwsem.c
+++ b/lib/rwsem.c
@@ -136,9 +136,10 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading)
  out:
 	return sem;
 
-	/* undo the change to count, but check for a transition 1->0 */
+	/* undo the change to the active count, but check for a transition
+	 * 1->0 */
  undo:
-	if (rwsem_atomic_update(-RWSEM_ACTIVE_BIAS, sem) != 0)
+	if (rwsem_atomic_update(-RWSEM_ACTIVE_BIAS, sem) & RWSEM_ACTIVE_MASK)
 		goto out;
 	goto try_again;
 }
diff --git a/lib/uuid.c b/lib/uuid.c
new file mode 100644
index 000000000000..8fadd7cef46c
--- /dev/null
+++ b/lib/uuid.c
@@ -0,0 +1,53 @@
+/*
+ * Unified UUID/GUID definition
+ *
+ * Copyright (C) 2009, Intel Corp.
+ *	Huang Ying <ying.huang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation;
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/uuid.h>
+#include <linux/random.h>
+
+static void __uuid_gen_common(__u8 b[16])
+{
+	int i;
+	u32 r;
+
+	for (i = 0; i < 4; i++) {
+		r = random32();
+		memcpy(b + i * 4, &r, 4);
+	}
+	/* reversion 0b10 */
+	b[8] = (b[8] & 0x3F) | 0x80;
+}
+
+void uuid_le_gen(uuid_le *lu)
+{
+	__uuid_gen_common(lu->b);
+	/* version 4 : random generation */
+	lu->b[7] = (lu->b[7] & 0x0F) | 0x40;
+}
+EXPORT_SYMBOL_GPL(uuid_le_gen);
+
+void uuid_be_gen(uuid_be *bu)
+{
+	__uuid_gen_common(bu->b);
+	/* version 4 : random generation */
+	bu->b[6] = (bu->b[6] & 0x0F) | 0x40;
+}
+EXPORT_SYMBOL_GPL(uuid_be_gen);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index fe776c9ddeca..31e85d327aa2 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -602,12 +602,19 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
 	a->tx_compressed = b->tx_compressed;
 };
 
+/* All VF info */
 static inline int rtnl_vfinfo_size(const struct net_device *dev)
 {
-	if (dev->dev.parent && dev_is_pci(dev->dev.parent))
-		return dev_num_vf(dev->dev.parent) *
-			sizeof(struct ifla_vf_info);
-	else
+	if (dev->dev.parent && dev_is_pci(dev->dev.parent)) {
+
+		int num_vfs = dev_num_vf(dev->dev.parent);
+		size_t size = nlmsg_total_size(sizeof(struct nlattr));
+		size += nlmsg_total_size(num_vfs * sizeof(struct nlattr));
+		size += num_vfs * (sizeof(struct ifla_vf_mac) +
+				  sizeof(struct ifla_vf_vlan) +
+				  sizeof(struct ifla_vf_tx_rate));
+		return size;
+	} else
 		return 0;
 }
 
@@ -629,7 +636,7 @@ static inline size_t if_nlmsg_size(const struct net_device *dev)
 	       + nla_total_size(1) /* IFLA_OPERSTATE */
 	       + nla_total_size(1) /* IFLA_LINKMODE */
 	       + nla_total_size(4) /* IFLA_NUM_VF */
-	       + nla_total_size(rtnl_vfinfo_size(dev)) /* IFLA_VFINFO */
+	       + rtnl_vfinfo_size(dev) /* IFLA_VFINFO_LIST */
 	       + rtnl_link_get_size(dev); /* IFLA_LINKINFO */
 }
 
@@ -700,14 +707,37 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 
 	if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent) {
 		int i;
-		struct ifla_vf_info ivi;
 
-		NLA_PUT_U32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent));
-		for (i = 0; i < dev_num_vf(dev->dev.parent); i++) {
+		struct nlattr *vfinfo, *vf;
+		int num_vfs = dev_num_vf(dev->dev.parent);
+
+		NLA_PUT_U32(skb, IFLA_NUM_VF, num_vfs);
+		vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST);
+		if (!vfinfo)
+			goto nla_put_failure;
+		for (i = 0; i < num_vfs; i++) {
+			struct ifla_vf_info ivi;
+			struct ifla_vf_mac vf_mac;
+			struct ifla_vf_vlan vf_vlan;
+			struct ifla_vf_tx_rate vf_tx_rate;
 			if (dev->netdev_ops->ndo_get_vf_config(dev, i, &ivi))
 				break;
-			NLA_PUT(skb, IFLA_VFINFO, sizeof(ivi), &ivi);
+			vf_mac.vf = vf_vlan.vf = vf_tx_rate.vf = ivi.vf;
+			memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
+			vf_vlan.vlan = ivi.vlan;
+			vf_vlan.qos = ivi.qos;
+			vf_tx_rate.rate = ivi.tx_rate;
+			vf = nla_nest_start(skb, IFLA_VF_INFO);
+			if (!vf) {
+				nla_nest_cancel(skb, vfinfo);
+				goto nla_put_failure;
+			}
+			NLA_PUT(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac);
+			NLA_PUT(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan);
+			NLA_PUT(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate), &vf_tx_rate);
+			nla_nest_end(skb, vf);
 		}
+		nla_nest_end(skb, vfinfo);
 	}
 	if (dev->rtnl_link_ops) {
 		if (rtnl_link_fill(skb, dev) < 0)
@@ -769,12 +799,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
 	[IFLA_LINKINFO]		= { .type = NLA_NESTED },
 	[IFLA_NET_NS_PID]	= { .type = NLA_U32 },
 	[IFLA_IFALIAS]	        = { .type = NLA_STRING, .len = IFALIASZ-1 },
-	[IFLA_VF_MAC]		= { .type = NLA_BINARY,
-				    .len = sizeof(struct ifla_vf_mac) },
-	[IFLA_VF_VLAN]		= { .type = NLA_BINARY,
-				    .len = sizeof(struct ifla_vf_vlan) },
-	[IFLA_VF_TX_RATE]	= { .type = NLA_BINARY,
-				    .len = sizeof(struct ifla_vf_tx_rate) },
+	[IFLA_VFINFO_LIST]	= {. type = NLA_NESTED },
 };
 EXPORT_SYMBOL(ifla_policy);
 
@@ -783,6 +808,19 @@ static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
 	[IFLA_INFO_DATA]	= { .type = NLA_NESTED },
 };
 
+static const struct nla_policy ifla_vfinfo_policy[IFLA_VF_INFO_MAX+1] = {
+	[IFLA_VF_INFO]		= { .type = NLA_NESTED },
+};
+
+static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
+	[IFLA_VF_MAC]		= { .type = NLA_BINARY,
+				    .len = sizeof(struct ifla_vf_mac) },
+	[IFLA_VF_VLAN]		= { .type = NLA_BINARY,
+				    .len = sizeof(struct ifla_vf_vlan) },
+	[IFLA_VF_TX_RATE]	= { .type = NLA_BINARY,
+				    .len = sizeof(struct ifla_vf_tx_rate) },
+};
+
 struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[])
 {
 	struct net *net;
@@ -812,6 +850,52 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
 	return 0;
 }
 
+static int do_setvfinfo(struct net_device *dev, struct nlattr *attr)
+{
+	int rem, err = -EINVAL;
+	struct nlattr *vf;
+	const struct net_device_ops *ops = dev->netdev_ops;
+
+	nla_for_each_nested(vf, attr, rem) {
+		switch (nla_type(vf)) {
+		case IFLA_VF_MAC: {
+			struct ifla_vf_mac *ivm;
+			ivm = nla_data(vf);
+			err = -EOPNOTSUPP;
+			if (ops->ndo_set_vf_mac)
+				err = ops->ndo_set_vf_mac(dev, ivm->vf,
+							  ivm->mac);
+			break;
+		}
+		case IFLA_VF_VLAN: {
+			struct ifla_vf_vlan *ivv;
+			ivv = nla_data(vf);
+			err = -EOPNOTSUPP;
+			if (ops->ndo_set_vf_vlan)
+				err = ops->ndo_set_vf_vlan(dev, ivv->vf,
+							   ivv->vlan,
+							   ivv->qos);
+			break;
+		}
+		case IFLA_VF_TX_RATE: {
+			struct ifla_vf_tx_rate *ivt;
+			ivt = nla_data(vf);
+			err = -EOPNOTSUPP;
+			if (ops->ndo_set_vf_tx_rate)
+				err = ops->ndo_set_vf_tx_rate(dev, ivt->vf,
+							      ivt->rate);
+			break;
+		}
+		default:
+			err = -EINVAL;
+			break;
+		}
+		if (err)
+			break;
+	}
+	return err;
+}
+
 static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
 		      struct nlattr **tb, char *ifname, int modified)
 {
@@ -942,40 +1026,17 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
 		write_unlock_bh(&dev_base_lock);
 	}
 
-	if (tb[IFLA_VF_MAC]) {
-		struct ifla_vf_mac *ivm;
-		ivm = nla_data(tb[IFLA_VF_MAC]);
-		err = -EOPNOTSUPP;
-		if (ops->ndo_set_vf_mac)
-			err = ops->ndo_set_vf_mac(dev, ivm->vf, ivm->mac);
-		if (err < 0)
-			goto errout;
-		modified = 1;
-	}
-
-	if (tb[IFLA_VF_VLAN]) {
-		struct ifla_vf_vlan *ivv;
-		ivv = nla_data(tb[IFLA_VF_VLAN]);
-		err = -EOPNOTSUPP;
-		if (ops->ndo_set_vf_vlan)
-			err = ops->ndo_set_vf_vlan(dev, ivv->vf,
-						   ivv->vlan,
-						   ivv->qos);
-		if (err < 0)
-			goto errout;
-		modified = 1;
-	}
-	err = 0;
-
-	if (tb[IFLA_VF_TX_RATE]) {
-		struct ifla_vf_tx_rate *ivt;
-		ivt = nla_data(tb[IFLA_VF_TX_RATE]);
-		err = -EOPNOTSUPP;
-		if (ops->ndo_set_vf_tx_rate)
-			err = ops->ndo_set_vf_tx_rate(dev, ivt->vf, ivt->rate);
-		if (err < 0)
-			goto errout;
-		modified = 1;
+	if (tb[IFLA_VFINFO_LIST]) {
+		struct nlattr *attr;
+		int rem;
+		nla_for_each_nested(attr, tb[IFLA_VFINFO_LIST], rem) {
+			if (nla_type(attr) != IFLA_VF_INFO)
+				goto errout;
+			err = do_setvfinfo(dev, attr);
+			if (err < 0)
+				goto errout;
+			modified = 1;
+		}
 	}
 	err = 0;
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 0f8caf64caa3..296150b2a62f 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2839,7 +2839,6 @@ static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool * __percpu *pool)
 			if (p->md5_desc.tfm)
 				crypto_free_hash(p->md5_desc.tfm);
 			kfree(p);
-			p = NULL;
 		}
 	}
 	free_percpu(pool);
@@ -2937,25 +2936,40 @@ retry:
 
 EXPORT_SYMBOL(tcp_alloc_md5sig_pool);
 
-struct tcp_md5sig_pool *__tcp_get_md5sig_pool(int cpu)
+
+/**
+ *	tcp_get_md5sig_pool - get md5sig_pool for this user
+ *
+ *	We use percpu structure, so if we succeed, we exit with preemption
+ *	and BH disabled, to make sure another thread or softirq handling
+ *	wont try to get same context.
+ */
+struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
 {
 	struct tcp_md5sig_pool * __percpu *p;
-	spin_lock_bh(&tcp_md5sig_pool_lock);
+
+	local_bh_disable();
+
+	spin_lock(&tcp_md5sig_pool_lock);
 	p = tcp_md5sig_pool;
 	if (p)
 		tcp_md5sig_users++;
-	spin_unlock_bh(&tcp_md5sig_pool_lock);
-	return (p ? *per_cpu_ptr(p, cpu) : NULL);
-}
+	spin_unlock(&tcp_md5sig_pool_lock);
+
+	if (p)
+		return *per_cpu_ptr(p, smp_processor_id());
 
-EXPORT_SYMBOL(__tcp_get_md5sig_pool);
+	local_bh_enable();
+	return NULL;
+}
+EXPORT_SYMBOL(tcp_get_md5sig_pool);
 
-void __tcp_put_md5sig_pool(void)
+void tcp_put_md5sig_pool(void)
 {
+	local_bh_enable();
 	tcp_free_md5sig_pool();
 }
-
-EXPORT_SYMBOL(__tcp_put_md5sig_pool);
+EXPORT_SYMBOL(tcp_put_md5sig_pool);
 
 int tcp_md5_hash_header(struct tcp_md5sig_pool *hp,
 			struct tcphdr *th)
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 4a368038d46f..165d54e07fcd 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -173,6 +173,10 @@ void sctp_transport_free(struct sctp_transport *transport)
 	    del_timer(&transport->T3_rtx_timer))
 		sctp_transport_put(transport);
 
+	/* Delete the ICMP proto unreachable timer if it's active. */
+	if (timer_pending(&transport->proto_unreach_timer) &&
+	    del_timer(&transport->proto_unreach_timer))
+		sctp_association_put(transport->asoc);
 
 	sctp_transport_put(transport);
 }
diff --git a/security/min_addr.c b/security/min_addr.c
index e86f297522bf..f728728f193b 100644
--- a/security/min_addr.c
+++ b/security/min_addr.c
@@ -33,7 +33,7 @@ int mmap_min_addr_handler(struct ctl_table *table, int write,
 {
 	int ret;
 
-	if (!capable(CAP_SYS_RAWIO))
+	if (write && !capable(CAP_SYS_RAWIO))
 		return -EPERM;
 
 	ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index 872887624030..20b5982c996b 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -36,6 +36,9 @@
 #include <sound/timer.h>
 #include <sound/minors.h>
 #include <asm/io.h>
+#if defined(CONFIG_MIPS) && defined(CONFIG_DMA_NONCOHERENT)
+#include <dma-coherence.h>
+#endif
 
 /*
  *  Compatibility
@@ -3184,6 +3187,10 @@ static int snd_pcm_default_mmap(struct snd_pcm_substream *substream,
 					 substream->runtime->dma_area,
 					 substream->runtime->dma_addr,
 					 area->vm_end - area->vm_start);
+#elif defined(CONFIG_MIPS) && defined(CONFIG_DMA_NONCOHERENT)
+	if (substream->dma_buffer.dev.type == SNDRV_DMA_TYPE_DEV &&
+	    !plat_device_is_coherent(substream->dma_buffer.dev.dev))
+		area->vm_page_prot = pgprot_noncached(area->vm_page_prot);
 #endif /* ARCH_HAS_DMA_MMAP_COHERENT */
 	/* mmap with fault handler */
 	area->vm_ops = &snd_pcm_vm_ops_data_fault;
diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
index d8213e2231a6..feabb44c7ca4 100644
--- a/sound/pci/hda/patch_conexant.c
+++ b/sound/pci/hda/patch_conexant.c
@@ -1197,9 +1197,10 @@ static int patch_cxt5045(struct hda_codec *codec)
 	case 0x103c:
 	case 0x1631:
 	case 0x1734:
-		/* HP, Packard Bell, & Fujitsu-Siemens laptops have really bad
-		 * sound over 0dB on NID 0x17. Fix max PCM level to 0 dB
-		 * (originally it has 0x2b steps with 0dB offset 0x14)
+	case 0x17aa:
+		/* HP, Packard Bell, Fujitsu-Siemens & Lenovo laptops have
+		 * really bad sound over 0dB on NID 0x17. Fix max PCM level to
+		 * 0 dB (originally it has 0x2b steps with 0dB offset 0x14)
 		 */
 		snd_hda_override_amp_caps(codec, 0x17, HDA_INPUT,
 					  (0x14 << AC_AMPCAP_OFFSET_SHIFT) |
diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
index 12825aa03106..a0e06d82da1f 100644
--- a/sound/pci/hda/patch_sigmatel.c
+++ b/sound/pci/hda/patch_sigmatel.c
@@ -104,6 +104,7 @@ enum {
 	STAC_DELL_M4_2,
 	STAC_DELL_M4_3,
 	STAC_HP_M4,
+	STAC_HP_DV4,
 	STAC_HP_DV5,
 	STAC_HP_HDX,
 	STAC_HP_DV4_1222NR,
@@ -1691,6 +1692,7 @@ static unsigned int *stac92hd71bxx_brd_tbl[STAC_92HD71BXX_MODELS] = {
 	[STAC_DELL_M4_2]	= dell_m4_2_pin_configs,
 	[STAC_DELL_M4_3]	= dell_m4_3_pin_configs,
 	[STAC_HP_M4]		= NULL,
+	[STAC_HP_DV4]		= NULL,
 	[STAC_HP_DV5]		= NULL,
 	[STAC_HP_HDX]           = NULL,
 	[STAC_HP_DV4_1222NR]	= NULL,
@@ -1703,6 +1705,7 @@ static const char *stac92hd71bxx_models[STAC_92HD71BXX_MODELS] = {
 	[STAC_DELL_M4_2] = "dell-m4-2",
 	[STAC_DELL_M4_3] = "dell-m4-3",
 	[STAC_HP_M4] = "hp-m4",
+	[STAC_HP_DV4] = "hp-dv4",
 	[STAC_HP_DV5] = "hp-dv5",
 	[STAC_HP_HDX] = "hp-hdx",
 	[STAC_HP_DV4_1222NR] = "hp-dv4-1222nr",
@@ -1721,7 +1724,7 @@ static struct snd_pci_quirk stac92hd71bxx_cfg_tbl[] = {
 	SND_PCI_QUIRK_MASK(PCI_VENDOR_ID_HP, 0xfff0, 0x3080,
 		      "HP", STAC_HP_DV5),
 	SND_PCI_QUIRK_MASK(PCI_VENDOR_ID_HP, 0xfff0, 0x30f0,
-		      "HP dv4-7", STAC_HP_DV5),
+		      "HP dv4-7", STAC_HP_DV4),
 	SND_PCI_QUIRK_MASK(PCI_VENDOR_ID_HP, 0xfff0, 0x3600,
 		      "HP dv4-7", STAC_HP_DV5),
 	SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x3610,
@@ -4766,6 +4769,9 @@ static void set_hp_led_gpio(struct hda_codec *codec)
 	struct sigmatel_spec *spec = codec->spec;
 	unsigned int gpio;
 
+	if (spec->gpio_led)
+		return;
+
 	gpio = snd_hda_param_read(codec, codec->afg, AC_PAR_GPIO_CAP);
 	gpio &= AC_GPIO_IO_COUNT;
 	if (gpio > 3)
@@ -5675,6 +5681,9 @@ again:
 		spec->num_smuxes = 1;
 		spec->num_dmuxes = 1;
 		/* fallthrough */
+	case STAC_HP_DV4:
+		spec->gpio_led = 0x01;
+		/* fallthrough */
 	case STAC_HP_DV5:
 		snd_hda_codec_set_pincfg(codec, 0x0d, 0x90170010);
 		stac92xx_auto_set_pinctl(codec, 0x0d, AC_PINCTL_OUT_EN);
@@ -5688,6 +5697,7 @@ again:
 		spec->num_dmics = 1;
 		spec->num_dmuxes = 1;
 		spec->num_smuxes = 1;
+		spec->gpio_led = 0x08;
 		break;
 	}
 
@@ -5744,7 +5754,8 @@ again:
 	}
 
 	/* enable bass on HP dv7 */
-	if (spec->board_config == STAC_HP_DV5) {
+	if (spec->board_config == STAC_HP_DV4 ||
+	    spec->board_config == STAC_HP_DV5) {
 		unsigned int cap;
 		cap = snd_hda_param_read(codec, 0x1, AC_PAR_GPIO_CAP);
 		cap &= AC_GPIO_IO_COUNT;
diff --git a/sound/pci/ice1712/maya44.c b/sound/pci/ice1712/maya44.c
index 3e1c20ae2f1c..726fd4b92e19 100644
--- a/sound/pci/ice1712/maya44.c
+++ b/sound/pci/ice1712/maya44.c
@@ -347,7 +347,7 @@ static int maya_gpio_sw_put(struct snd_kcontrol *kcontrol,
 
 /* known working input slots (0-4) */
 #define MAYA_LINE_IN	1	/* in-2 */
-#define MAYA_MIC_IN	4	/* in-5 */
+#define MAYA_MIC_IN	3	/* in-4 */
 
 static void wm8776_select_input(struct snd_maya44 *chip, int idx, int line)
 {
@@ -393,8 +393,8 @@ static int maya_rec_src_put(struct snd_kcontrol *kcontrol,
 	int changed;
 
 	mutex_lock(&chip->mutex);
-	changed = maya_set_gpio_bits(chip->ice, GPIO_MIC_RELAY,
-				     sel ? GPIO_MIC_RELAY : 0);
+	changed = maya_set_gpio_bits(chip->ice, 1 << GPIO_MIC_RELAY,
+				     sel ? (1 << GPIO_MIC_RELAY) : 0);
 	wm8776_select_input(chip, 0, sel ? MAYA_MIC_IN : MAYA_LINE_IN);
 	mutex_unlock(&chip->mutex);
 	return changed;
diff --git a/sound/pci/oxygen/xonar_cs43xx.c b/sound/pci/oxygen/xonar_cs43xx.c
index 16c226bfcd2b..7c4986b27f2b 100644
--- a/sound/pci/oxygen/xonar_cs43xx.c
+++ b/sound/pci/oxygen/xonar_cs43xx.c
@@ -56,6 +56,7 @@
 #include <sound/pcm_params.h>
 #include <sound/tlv.h>
 #include "xonar.h"
+#include "cm9780.h"
 #include "cs4398.h"
 #include "cs4362a.h"
 
@@ -172,6 +173,8 @@ static void xonar_d1_init(struct oxygen *chip)
 	oxygen_clear_bits16(chip, OXYGEN_GPIO_DATA,
 			    GPIO_D1_FRONT_PANEL | GPIO_D1_INPUT_ROUTE);
 
+	oxygen_ac97_set_bits(chip, 0, CM9780_JACK, CM9780_FMIC2MIC);
+
 	xonar_init_cs53x1(chip);
 	xonar_enable_output(chip);
 
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 3b8b6387c47c..f1411e9cdf47 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -563,6 +563,9 @@ static int __cmd_record(int argc, const char **argv)
 
 	err = event__synthesize_kernel_mmap(process_synthesized_event,
 					    session, "_text");
+	if (err < 0)
+		err = event__synthesize_kernel_mmap(process_synthesized_event,
+						    session, "_stext");
 	if (err < 0) {
 		pr_err("Couldn't record kernel reference relocation symbol.\n");
 		return err;
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 03a5eb22da2b..7c79c1d76d0c 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -197,7 +197,7 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
 	union kvm_ioapic_redirect_entry entry;
 	int ret = 1;
 
-	mutex_lock(&ioapic->lock);
+	spin_lock(&ioapic->lock);
 	if (irq >= 0 && irq < IOAPIC_NUM_PINS) {
 		entry = ioapic->redirtbl[irq];
 		level ^= entry.fields.polarity;
@@ -214,7 +214,7 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
 		}
 		trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0);
 	}
-	mutex_unlock(&ioapic->lock);
+	spin_unlock(&ioapic->lock);
 
 	return ret;
 }
@@ -238,9 +238,9 @@ static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int vector,
 		 * is dropped it will be put into irr and will be delivered
 		 * after ack notifier returns.
 		 */
-		mutex_unlock(&ioapic->lock);
+		spin_unlock(&ioapic->lock);
 		kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, i);
-		mutex_lock(&ioapic->lock);
+		spin_lock(&ioapic->lock);
 
 		if (trigger_mode != IOAPIC_LEVEL_TRIG)
 			continue;
@@ -259,9 +259,9 @@ void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode)
 	smp_rmb();
 	if (!test_bit(vector, ioapic->handled_vectors))
 		return;
-	mutex_lock(&ioapic->lock);
+	spin_lock(&ioapic->lock);
 	__kvm_ioapic_update_eoi(ioapic, vector, trigger_mode);
-	mutex_unlock(&ioapic->lock);
+	spin_unlock(&ioapic->lock);
 }
 
 static inline struct kvm_ioapic *to_ioapic(struct kvm_io_device *dev)
@@ -287,7 +287,7 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
 	ASSERT(!(addr & 0xf));	/* check alignment */
 
 	addr &= 0xff;
-	mutex_lock(&ioapic->lock);
+	spin_lock(&ioapic->lock);
 	switch (addr) {
 	case IOAPIC_REG_SELECT:
 		result = ioapic->ioregsel;
@@ -301,7 +301,7 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
 		result = 0;
 		break;
 	}
-	mutex_unlock(&ioapic->lock);
+	spin_unlock(&ioapic->lock);
 
 	switch (len) {
 	case 8:
@@ -338,7 +338,7 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
 	}
 
 	addr &= 0xff;
-	mutex_lock(&ioapic->lock);
+	spin_lock(&ioapic->lock);
 	switch (addr) {
 	case IOAPIC_REG_SELECT:
 		ioapic->ioregsel = data;
@@ -356,7 +356,7 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
 	default:
 		break;
 	}
-	mutex_unlock(&ioapic->lock);
+	spin_unlock(&ioapic->lock);
 	return 0;
 }
 
@@ -386,7 +386,7 @@ int kvm_ioapic_init(struct kvm *kvm)
 	ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL);
 	if (!ioapic)
 		return -ENOMEM;
-	mutex_init(&ioapic->lock);
+	spin_lock_init(&ioapic->lock);
 	kvm->arch.vioapic = ioapic;
 	kvm_ioapic_reset(ioapic);
 	kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops);
@@ -419,9 +419,9 @@ int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
 	if (!ioapic)
 		return -EINVAL;
 
-	mutex_lock(&ioapic->lock);
+	spin_lock(&ioapic->lock);
 	memcpy(state, ioapic, sizeof(struct kvm_ioapic_state));
-	mutex_unlock(&ioapic->lock);
+	spin_unlock(&ioapic->lock);
 	return 0;
 }
 
@@ -431,9 +431,9 @@ int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
 	if (!ioapic)
 		return -EINVAL;
 
-	mutex_lock(&ioapic->lock);
+	spin_lock(&ioapic->lock);
 	memcpy(ioapic, state, sizeof(struct kvm_ioapic_state));
 	update_handled_vectors(ioapic);
-	mutex_unlock(&ioapic->lock);
+	spin_unlock(&ioapic->lock);
 	return 0;
 }
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index 8a751b78a430..0b190c34ccc3 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -45,7 +45,7 @@ struct kvm_ioapic {
 	struct kvm_io_device dev;
 	struct kvm *kvm;
 	void (*ack_notifier)(void *opaque, int irq);
-	struct mutex lock;
+	spinlock_t lock;
 	DECLARE_BITMAP(handled_vectors, 256);
 };