564 files changed, 15446 insertions, 6714 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index 15996290fed4..e9c9334507dd 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -357,6 +357,43 @@ config SECCOMP_FILTER
 
 	  See Documentation/prctl/seccomp_filter.txt for details.
 
+config HAVE_GCC_PLUGINS
+	bool
+	help
+	  An arch should select this symbol if it supports building with
+	  GCC plugins.
+
+menuconfig GCC_PLUGINS
+	bool "GCC plugins"
+	depends on HAVE_GCC_PLUGINS
+	depends on !COMPILE_TEST
+	help
+	  GCC plugins are loadable modules that provide extra features to the
+	  compiler. They are useful for runtime instrumentation and static analysis.
+
+	  See Documentation/gcc-plugins.txt for details.
+
+config GCC_PLUGIN_CYC_COMPLEXITY
+	bool "Compute the cyclomatic complexity of a function"
+	depends on GCC_PLUGINS
+	help
+	  The complexity M of a function's control flow graph is defined as:
+	   M = E - N + 2P
+	  where
+
+	  E = the number of edges
+	  N = the number of nodes
+	  P = the number of connected components (exit nodes).
+
+config GCC_PLUGIN_SANCOV
+	bool
+	depends on GCC_PLUGINS
+	help
+	  This plugin inserts a __sanitizer_cov_trace_pc() call at the start of
+	  basic blocks. It supports all gcc versions with plugin support (from
+	  gcc-4.5 on). It is based on the commit "Add fuzzing coverage support"
+	  by Dmitry Vyukov <dvyukov@google.com>.
+
 config HAVE_CC_STACKPROTECTOR
 	bool
 	help
@@ -424,6 +461,15 @@ config CC_STACKPROTECTOR_STRONG
 
 endchoice
 
+config HAVE_ARCH_WITHIN_STACK_FRAMES
+	bool
+	help
+	  An architecture should select this if it can walk the kernel stack
+	  frames to determine if an object is part of either the arguments
+	  or local variables (i.e. that it excludes saved return addresses,
+	  and similar) by implementing an inline arch_within_stack_frames(),
+	  which is used by CONFIG_HARDENED_USERCOPY.
+
 config HAVE_CONTEXT_TRACKING
 	bool
 	help
diff --git a/arch/alpha/boot/Makefile b/arch/alpha/boot/Makefile
index 8399bd0e68e8..0cbe4c59d3ce 100644
--- a/arch/alpha/boot/Makefile
+++ b/arch/alpha/boot/Makefile
@@ -15,7 +15,7 @@ targets		:= vmlinux.gz vmlinux \
 OBJSTRIP	:= $(obj)/tools/objstrip
 
 HOSTCFLAGS	:= -Wall -I$(objtree)/usr/include
-BOOTCFLAGS	+= -I$(obj) -I$(srctree)/$(obj)
+BOOTCFLAGS	+= -I$(objtree)/$(obj) -I$(srctree)/$(obj)
 
 # SRM bootable image.  Copy to offset 512 of a partition.
 $(obj)/bootimage: $(addprefix $(obj)/tools/,mkbb lxboot bootlx) $(obj)/vmlinux.nh
diff --git a/arch/alpha/include/asm/dma-mapping.h b/arch/alpha/include/asm/dma-mapping.h
index 3c3451f58ff4..c63b6ac19ee5 100644
--- a/arch/alpha/include/asm/dma-mapping.h
+++ b/arch/alpha/include/asm/dma-mapping.h
@@ -1,8 +1,6 @@
 #ifndef _ALPHA_DMA_MAPPING_H
 #define _ALPHA_DMA_MAPPING_H
 
-#include <linux/dma-attrs.h>
-
 extern struct dma_map_ops *dma_ops;
 
 static inline struct dma_map_ops *get_dma_ops(struct device *dev)
diff --git a/arch/alpha/include/asm/rtc.h b/arch/alpha/include/asm/rtc.h
deleted file mode 100644
index f71c3b0ed360..000000000000
--- a/arch/alpha/include/asm/rtc.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/rtc.h>
diff --git a/arch/alpha/include/asm/thread_info.h b/arch/alpha/include/asm/thread_info.h
index 32e920a83ae5..e9e90bfa2b50 100644
--- a/arch/alpha/include/asm/thread_info.h
+++ b/arch/alpha/include/asm/thread_info.h
@@ -86,33 +86,6 @@ register struct thread_info *__current_thread_info __asm__("$8");
 #define TS_UAC_NOPRINT		0x0001	/* ! Preserve the following three */
 #define TS_UAC_NOFIX		0x0002	/* ! flags as they match          */
 #define TS_UAC_SIGBUS		0x0004	/* ! userspace part of 'osf_sysinfo' */
-#define TS_RESTORE_SIGMASK	0x0008	/* restore signal mask in do_signal() */
-
-#ifndef __ASSEMBLY__
-#define HAVE_SET_RESTORE_SIGMASK	1
-static inline void set_restore_sigmask(void)
-{
-	struct thread_info *ti = current_thread_info();
-	ti->status |= TS_RESTORE_SIGMASK;
-	WARN_ON(!test_bit(TIF_SIGPENDING, (unsigned long *)&ti->flags));
-}
-static inline void clear_restore_sigmask(void)
-{
-	current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
-}
-static inline bool test_restore_sigmask(void)
-{
-	return current_thread_info()->status & TS_RESTORE_SIGMASK;
-}
-static inline bool test_and_clear_restore_sigmask(void)
-{
-	struct thread_info *ti = current_thread_info();
-	if (!(ti->status & TS_RESTORE_SIGMASK))
-		return false;
-	ti->status &= ~TS_RESTORE_SIGMASK;
-	return true;
-}
-#endif
 
 #define SET_UNALIGN_CTL(task,value)	({				\
 	__u32 status = task_thread_info(task)->status & ~UAC_BITMASK;	\
diff --git a/arch/alpha/kernel/core_marvel.c b/arch/alpha/kernel/core_marvel.c
index 53dd2f1a53aa..d5f0580746a5 100644
--- a/arch/alpha/kernel/core_marvel.c
+++ b/arch/alpha/kernel/core_marvel.c
@@ -24,7 +24,6 @@
 #include <asm/gct.h>
 #include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
-#include <asm/rtc.h>
 #include <asm/vga.h>
 
 #include "proto.h"
diff --git a/arch/alpha/kernel/machvec_impl.h b/arch/alpha/kernel/machvec_impl.h
index f54bdf658cd0..d3398f6ab74c 100644
--- a/arch/alpha/kernel/machvec_impl.h
+++ b/arch/alpha/kernel/machvec_impl.h
@@ -137,7 +137,7 @@
 #define __initmv __initdata
 #define ALIAS_MV(x)
 #else
-#define __initmv __initdata_refok
+#define __initmv __refdata
 
 /* GCC actually has a syntax for defining aliases, but is under some
    delusion that you shouldn't be able to declare it extern somewhere
diff --git a/arch/alpha/kernel/pci-noop.c b/arch/alpha/kernel/pci-noop.c
index 8e735b5e56bd..bb152e21e5ae 100644
--- a/arch/alpha/kernel/pci-noop.c
+++ b/arch/alpha/kernel/pci-noop.c
@@ -109,7 +109,7 @@ sys_pciconfig_write(unsigned long bus, unsigned long dfn,
 
 static void *alpha_noop_alloc_coherent(struct device *dev, size_t size,
 				       dma_addr_t *dma_handle, gfp_t gfp,
-				       struct dma_attrs *attrs)
+				       unsigned long attrs)
 {
 	void *ret;
 
diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c
index 8969bf2dfe3a..451fc9cdd323 100644
--- a/arch/alpha/kernel/pci_iommu.c
+++ b/arch/alpha/kernel/pci_iommu.c
@@ -349,7 +349,7 @@ static struct pci_dev *alpha_gendev_to_pci(struct device *dev)
 static dma_addr_t alpha_pci_map_page(struct device *dev, struct page *page,
 				     unsigned long offset, size_t size,
 				     enum dma_data_direction dir,
-				     struct dma_attrs *attrs)
+				     unsigned long attrs)
 {
 	struct pci_dev *pdev = alpha_gendev_to_pci(dev);
 	int dac_allowed;
@@ -369,7 +369,7 @@ static dma_addr_t alpha_pci_map_page(struct device *dev, struct page *page,
 
 static void alpha_pci_unmap_page(struct device *dev, dma_addr_t dma_addr,
 				 size_t size, enum dma_data_direction dir,
-				 struct dma_attrs *attrs)
+				 unsigned long attrs)
 {
 	unsigned long flags;
 	struct pci_dev *pdev = alpha_gendev_to_pci(dev);
@@ -433,7 +433,7 @@ static void alpha_pci_unmap_page(struct device *dev, dma_addr_t dma_addr,
 
 static void *alpha_pci_alloc_coherent(struct device *dev, size_t size,
 				      dma_addr_t *dma_addrp, gfp_t gfp,
-				      struct dma_attrs *attrs)
+				      unsigned long attrs)
 {
 	struct pci_dev *pdev = alpha_gendev_to_pci(dev);
 	void *cpu_addr;
@@ -478,7 +478,7 @@ try_again:
 
 static void alpha_pci_free_coherent(struct device *dev, size_t size,
 				    void *cpu_addr, dma_addr_t dma_addr,
-				    struct dma_attrs *attrs)
+				    unsigned long attrs)
 {
 	struct pci_dev *pdev = alpha_gendev_to_pci(dev);
 	pci_unmap_single(pdev, dma_addr, size, PCI_DMA_BIDIRECTIONAL);
@@ -651,7 +651,7 @@ sg_fill(struct device *dev, struct scatterlist *leader, struct scatterlist *end,
 
 static int alpha_pci_map_sg(struct device *dev, struct scatterlist *sg,
 			    int nents, enum dma_data_direction dir,
-			    struct dma_attrs *attrs)
+			    unsigned long attrs)
 {
 	struct pci_dev *pdev = alpha_gendev_to_pci(dev);
 	struct scatterlist *start, *end, *out;
@@ -729,7 +729,7 @@ static int alpha_pci_map_sg(struct device *dev, struct scatterlist *sg,
 
 static void alpha_pci_unmap_sg(struct device *dev, struct scatterlist *sg,
 			       int nents, enum dma_data_direction dir,
-			       struct dma_attrs *attrs)
+			       unsigned long attrs)
 {
 	struct pci_dev *pdev = alpha_gendev_to_pci(dev);
 	unsigned long flags;
diff --git a/arch/alpha/kernel/rtc.c b/arch/alpha/kernel/rtc.c
index f535a3fd0f60..ceed68c7500b 100644
--- a/arch/alpha/kernel/rtc.c
+++ b/arch/alpha/kernel/rtc.c
@@ -15,8 +15,6 @@
 #include <linux/rtc.h>
 #include <linux/platform_device.h>
 
-#include <asm/rtc.h>
-
 #include "proto.h"
 
 
@@ -81,7 +79,7 @@ init_rtc_epoch(void)
 static int
 alpha_rtc_read_time(struct device *dev, struct rtc_time *tm)
 {
-	__get_rtc_time(tm);
+	mc146818_get_time(tm);
 
 	/* Adjust for non-default epochs.  It's easier to depend on the
 	   generic __get_rtc_time and adjust the epoch here than create
@@ -112,7 +110,7 @@ alpha_rtc_set_time(struct device *dev, struct rtc_time *tm)
 		tm = &xtm;
 	}
 
-	return __set_rtc_time(tm);
+	return mc146818_set_time(tm);
 }
 
 static int
diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c
index ab74b5d9186c..20afc65e22dc 100644
--- a/arch/arc/mm/dma.c
+++ b/arch/arc/mm/dma.c
@@ -22,7 +22,7 @@
 
 
 static void *arc_dma_alloc(struct device *dev, size_t size,
-		dma_addr_t *dma_handle, gfp_t gfp, struct dma_attrs *attrs)
+		dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
 {
 	unsigned long order = get_order(size);
 	struct page *page;
@@ -46,7 +46,7 @@ static void *arc_dma_alloc(struct device *dev, size_t size,
 	 *   (vs. always going to memory - thus are faster)
 	 */
 	if ((is_isa_arcv2() && ioc_exists) ||
-	    dma_get_attr(DMA_ATTR_NON_CONSISTENT, attrs))
+	    (attrs & DMA_ATTR_NON_CONSISTENT))
 		need_coh = 0;
 
 	/*
@@ -90,13 +90,13 @@ static void *arc_dma_alloc(struct device *dev, size_t size,
 }
 
 static void arc_dma_free(struct device *dev, size_t size, void *vaddr,
-		dma_addr_t dma_handle, struct dma_attrs *attrs)
+		dma_addr_t dma_handle, unsigned long attrs)
 {
 	phys_addr_t paddr = plat_dma_to_phys(dev, dma_handle);
 	struct page *page = virt_to_page(paddr);
 	int is_non_coh = 1;
 
-	is_non_coh = dma_get_attr(DMA_ATTR_NON_CONSISTENT, attrs) ||
+	is_non_coh = (attrs & DMA_ATTR_NON_CONSISTENT) ||
 			(is_isa_arcv2() && ioc_exists);
 
 	if (PageHighMem(page) || !is_non_coh)
@@ -130,7 +130,7 @@ static void _dma_cache_sync(phys_addr_t paddr, size_t size,
 
 static dma_addr_t arc_dma_map_page(struct device *dev, struct page *page,
 		unsigned long offset, size_t size, enum dma_data_direction dir,
-		struct dma_attrs *attrs)
+		unsigned long attrs)
 {
 	phys_addr_t paddr = page_to_phys(page) + offset;
 	_dma_cache_sync(paddr, size, dir);
@@ -138,7 +138,7 @@ static dma_addr_t arc_dma_map_page(struct device *dev, struct page *page,
 }
 
 static int arc_dma_map_sg(struct device *dev, struct scatterlist *sg,
-	   int nents, enum dma_data_direction dir, struct dma_attrs *attrs)
+	   int nents, enum dma_data_direction dir, unsigned long attrs)
 {
 	struct scatterlist *s;
 	int i;
diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c
index 8be930394750..399e2f223d25 100644
--- a/arch/arc/mm/init.c
+++ b/arch/arc/mm/init.c
@@ -220,7 +220,7 @@ void __init mem_init(void)
 /*
  * free_initmem: Free all the __init memory.
  */
-void __init_refok free_initmem(void)
+void __ref free_initmem(void)
 {
 	free_initmem_default(-1);
 }
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 14b4cf75ceec..a9c4e48bb7ec 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -35,6 +35,7 @@ config ARM
 	select HARDIRQS_SW_RESEND
 	select HAVE_ARCH_AUDITSYSCALL if (AEABI && !OABI_COMPAT)
 	select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6
+	select HAVE_ARCH_HARDENED_USERCOPY
 	select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU
 	select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU
 	select HAVE_ARCH_MMAP_RND_BITS if MMU
@@ -54,6 +55,7 @@ config ARM
 	select HAVE_FTRACE_MCOUNT_RECORD if (!XIP_KERNEL)
 	select HAVE_FUNCTION_GRAPH_TRACER if (!THUMB2_KERNEL)
 	select HAVE_FUNCTION_TRACER if (!XIP_KERNEL)
+	select HAVE_GCC_PLUGINS
 	select HAVE_GENERIC_DMA_COHERENT
 	select HAVE_HW_BREAKPOINT if (PERF_EVENTS && (CPU_V6 || CPU_V6K || CPU_V7))
 	select HAVE_IDE if PCI || ISA || PCMCIA
@@ -699,7 +701,7 @@ config ARCH_VIRT
 	depends on ARCH_MULTI_V7
 	select ARM_AMBA
 	select ARM_GIC
-	select ARM_GIC_V2M if PCI_MSI
+	select ARM_GIC_V2M if PCI
 	select ARM_GIC_V3
 	select ARM_PSCI
 	select HAVE_ARM_ARCH_TIMER
diff --git a/arch/arm/common/dmabounce.c b/arch/arm/common/dmabounce.c
index 1143c4d5c567..301281645d08 100644
--- a/arch/arm/common/dmabounce.c
+++ b/arch/arm/common/dmabounce.c
@@ -310,7 +310,7 @@ static inline void unmap_single(struct device *dev, struct safe_buffer *buf,
  */
 static dma_addr_t dmabounce_map_page(struct device *dev, struct page *page,
 		unsigned long offset, size_t size, enum dma_data_direction dir,
-		struct dma_attrs *attrs)
+		unsigned long attrs)
 {
 	dma_addr_t dma_addr;
 	int ret;
@@ -344,7 +344,7 @@ static dma_addr_t dmabounce_map_page(struct device *dev, struct page *page,
  * should be)
  */
 static void dmabounce_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,
-		enum dma_data_direction dir, struct dma_attrs *attrs)
+		enum dma_data_direction dir, unsigned long attrs)
 {
 	struct safe_buffer *buf;
 
diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h
index a83570f10124..d009f7911ffc 100644
--- a/arch/arm/include/asm/dma-mapping.h
+++ b/arch/arm/include/asm/dma-mapping.h
@@ -5,7 +5,6 @@
 
 #include <linux/mm_types.h>
 #include <linux/scatterlist.h>
-#include <linux/dma-attrs.h>
 #include <linux/dma-debug.h>
 
 #include <asm/memory.h>
@@ -174,7 +173,7 @@ static inline void dma_mark_clean(void *addr, size_t size) { }
  * to be the device-viewed address.
  */
 extern void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
-			   gfp_t gfp, struct dma_attrs *attrs);
+			   gfp_t gfp, unsigned long attrs);
 
 /**
  * arm_dma_free - free memory allocated by arm_dma_alloc
@@ -191,7 +190,7 @@ extern void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
  * during and after this call executing are illegal.
  */
 extern void arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
-			 dma_addr_t handle, struct dma_attrs *attrs);
+			 dma_addr_t handle, unsigned long attrs);
 
 /**
  * arm_dma_mmap - map a coherent DMA allocation into user space
@@ -208,7 +207,7 @@ extern void arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
  */
 extern int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma,
 			void *cpu_addr, dma_addr_t dma_addr, size_t size,
-			struct dma_attrs *attrs);
+			unsigned long attrs);
 
 /*
  * This can be called during early boot to increase the size of the atomic
@@ -262,16 +261,16 @@ extern void dmabounce_unregister_dev(struct device *);
  * The scatter list versions of the above methods.
  */
 extern int arm_dma_map_sg(struct device *, struct scatterlist *, int,
-		enum dma_data_direction, struct dma_attrs *attrs);
+		enum dma_data_direction, unsigned long attrs);
 extern void arm_dma_unmap_sg(struct device *, struct scatterlist *, int,
-		enum dma_data_direction, struct dma_attrs *attrs);
+		enum dma_data_direction, unsigned long attrs);
 extern void arm_dma_sync_sg_for_cpu(struct device *, struct scatterlist *, int,
 		enum dma_data_direction);
 extern void arm_dma_sync_sg_for_device(struct device *, struct scatterlist *, int,
 		enum dma_data_direction);
 extern int arm_dma_get_sgtable(struct device *dev, struct sg_table *sgt,
 		void *cpu_addr, dma_addr_t dma_addr, size_t size,
-		struct dma_attrs *attrs);
+		unsigned long attrs);
 
 #endif /* __KERNEL__ */
 #endif
diff --git a/arch/arm/include/asm/kexec.h b/arch/arm/include/asm/kexec.h
index c2b9b4bdec00..1869af6bac5c 100644
--- a/arch/arm/include/asm/kexec.h
+++ b/arch/arm/include/asm/kexec.h
@@ -53,6 +53,30 @@ static inline void crash_setup_regs(struct pt_regs *newregs,
 /* Function pointer to optional machine-specific reinitialization */
 extern void (*kexec_reinit)(void);
 
+static inline unsigned long phys_to_boot_phys(phys_addr_t phys)
+{
+	return phys_to_idmap(phys);
+}
+#define phys_to_boot_phys phys_to_boot_phys
+
+static inline phys_addr_t boot_phys_to_phys(unsigned long entry)
+{
+	return idmap_to_phys(entry);
+}
+#define boot_phys_to_phys boot_phys_to_phys
+
+static inline unsigned long page_to_boot_pfn(struct page *page)
+{
+	return page_to_pfn(page) + (arch_phys_to_idmap_offset >> PAGE_SHIFT);
+}
+#define page_to_boot_pfn page_to_boot_pfn
+
+static inline struct page *boot_pfn_to_page(unsigned long boot_pfn)
+{
+	return pfn_to_page(boot_pfn - (arch_phys_to_idmap_offset >> PAGE_SHIFT));
+}
+#define boot_pfn_to_page boot_pfn_to_page
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* CONFIG_KEXEC */
diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
index 3d5a5cd071bd..58faff5f1eb2 100644
--- a/arch/arm/include/asm/kvm_asm.h
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -66,6 +66,8 @@ extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
 extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
 
 extern void __init_stage2_translation(void);
+
+extern void __kvm_hyp_reset(unsigned long);
 #endif
 
 #endif /* __ARM_KVM_ASM_H__ */
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 96387d477e91..de338d93d11b 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -241,8 +241,7 @@ int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
 int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
 		int exception_index);
 
-static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr,
-				       phys_addr_t pgd_ptr,
+static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
 				       unsigned long hyp_stack_ptr,
 				       unsigned long vector_ptr)
 {
@@ -251,18 +250,13 @@ static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr,
 	 * code. The init code doesn't need to preserve these
 	 * registers as r0-r3 are already callee saved according to
 	 * the AAPCS.
-	 * Note that we slightly misuse the prototype by casing the
+	 * Note that we slightly misuse the prototype by casting the
 	 * stack pointer to a void *.
-	 *
-	 * We don't have enough registers to perform the full init in
-	 * one go.  Install the boot PGD first, and then install the
-	 * runtime PGD, stack pointer and vectors. The PGDs are always
-	 * passed as the third argument, in order to be passed into
-	 * r2-r3 to the init code (yes, this is compliant with the
-	 * PCS!).
-	 */
 
-	kvm_call_hyp(NULL, 0, boot_pgd_ptr);
+	 * The PGDs are always passed as the third argument, in order
+	 * to be passed into r2-r3 to the init code (yes, this is
+	 * compliant with the PCS!).
+	 */
 
 	kvm_call_hyp((void*)hyp_stack_ptr, vector_ptr, pgd_ptr);
 }
@@ -272,16 +266,13 @@ static inline void __cpu_init_stage2(void)
 	kvm_call_hyp(__init_stage2_translation);
 }
 
-static inline void __cpu_reset_hyp_mode(phys_addr_t boot_pgd_ptr,
+static inline void __cpu_reset_hyp_mode(unsigned long vector_ptr,
 					phys_addr_t phys_idmap_start)
 {
-	/*
-	 * TODO
-	 * kvm_call_reset(boot_pgd_ptr, phys_idmap_start);
-	 */
+	kvm_call_hyp((void *)virt_to_idmap(__kvm_hyp_reset), vector_ptr);
 }
 
-static inline int kvm_arch_dev_ioctl_check_extension(long ext)
+static inline int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext)
 {
 	return 0;
 }
diff --git a/arch/arm/include/asm/kvm_hyp.h b/arch/arm/include/asm/kvm_hyp.h
index f0e860761380..6eaff28f2ff3 100644
--- a/arch/arm/include/asm/kvm_hyp.h
+++ b/arch/arm/include/asm/kvm_hyp.h
@@ -25,9 +25,6 @@
 
 #define __hyp_text __section(.hyp.text) notrace
 
-#define kern_hyp_va(v) (v)
-#define hyp_kern_va(v) (v)
-
 #define __ACCESS_CP15(CRn, Op1, CRm, Op2)	\
 	"mrc", "mcr", __stringify(p15, Op1, %0, CRn, CRm, Op2), u32
 #define __ACCESS_CP15_64(Op1, CRm)		\
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index f9a65061130b..3bb803d6814b 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -26,16 +26,7 @@
  * We directly use the kernel VA for the HYP, as we can directly share
  * the mapping (HTTBR "covers" TTBR1).
  */
-#define HYP_PAGE_OFFSET_MASK	UL(~0)
-#define HYP_PAGE_OFFSET		PAGE_OFFSET
-#define KERN_TO_HYP(kva)	(kva)
-
-/*
- * Our virtual mapping for the boot-time MMU-enable code. Must be
- * shared across all the page-tables. Conveniently, we use the vectors
- * page, where no kernel data will ever be shared with HYP.
- */
-#define TRAMPOLINE_VA		UL(CONFIG_VECTORS_BASE)
+#define kern_hyp_va(kva)	(kva)
 
 /*
  * KVM_MMU_CACHE_MIN_PAGES is the number of stage2 page table translation levels.
@@ -49,9 +40,8 @@
 #include <asm/pgalloc.h>
 #include <asm/stage2_pgtable.h>
 
-int create_hyp_mappings(void *from, void *to);
+int create_hyp_mappings(void *from, void *to, pgprot_t prot);
 int create_hyp_io_mappings(void *from, void *to, phys_addr_t);
-void free_boot_hyp_pgd(void);
 void free_hyp_pgds(void);
 
 void stage2_unmap_vm(struct kvm *kvm);
@@ -65,7 +55,6 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run);
 void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu);
 
 phys_addr_t kvm_mmu_get_httbr(void);
-phys_addr_t kvm_mmu_get_boot_httbr(void);
 phys_addr_t kvm_get_idmap_vector(void);
 phys_addr_t kvm_get_idmap_start(void);
 int kvm_mmu_init(void);
diff --git a/arch/arm/include/asm/mach/pci.h b/arch/arm/include/asm/mach/pci.h
index 0070e8520cd4..2d88af5be45f 100644
--- a/arch/arm/include/asm/mach/pci.h
+++ b/arch/arm/include/asm/mach/pci.h
@@ -22,6 +22,7 @@ struct hw_pci {
 	struct msi_controller *msi_ctrl;
 	struct pci_ops	*ops;
 	int		nr_controllers;
+	unsigned int	io_optional:1;
 	void		**private_data;
 	int		(*setup)(int nr, struct pci_sys_data *);
 	struct pci_bus *(*scan)(int nr, struct pci_sys_data *);
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index d62204060cbe..a8d656d9aec7 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -97,7 +97,9 @@ extern pgprot_t		pgprot_s2_device;
 #define PAGE_READONLY_EXEC	_MOD_PROT(pgprot_user, L_PTE_USER | L_PTE_RDONLY)
 #define PAGE_KERNEL		_MOD_PROT(pgprot_kernel, L_PTE_XN)
 #define PAGE_KERNEL_EXEC	pgprot_kernel
-#define PAGE_HYP		_MOD_PROT(pgprot_kernel, L_PTE_HYP)
+#define PAGE_HYP		_MOD_PROT(pgprot_kernel, L_PTE_HYP | L_PTE_XN)
+#define PAGE_HYP_EXEC		_MOD_PROT(pgprot_kernel, L_PTE_HYP | L_PTE_RDONLY)
+#define PAGE_HYP_RO		_MOD_PROT(pgprot_kernel, L_PTE_HYP | L_PTE_RDONLY | L_PTE_XN)
 #define PAGE_HYP_DEVICE		_MOD_PROT(pgprot_hyp_device, L_PTE_HYP)
 #define PAGE_S2			_MOD_PROT(pgprot_s2, L_PTE_S2_RDONLY)
 #define PAGE_S2_DEVICE		_MOD_PROT(pgprot_s2_device, L_PTE_S2_RDONLY)
diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
index 62a6f65029e6..a93c0f99acf7 100644
--- a/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@ -480,7 +480,10 @@ arm_copy_from_user(void *to, const void __user *from, unsigned long n);
 static inline unsigned long __must_check
 __copy_from_user(void *to, const void __user *from, unsigned long n)
 {
-	unsigned int __ua_flags = uaccess_save_and_enable();
+	unsigned int __ua_flags;
+
+	check_object_size(to, n, false);
+	__ua_flags = uaccess_save_and_enable();
 	n = arm_copy_from_user(to, from, n);
 	uaccess_restore(__ua_flags);
 	return n;
@@ -495,11 +498,15 @@ static inline unsigned long __must_check
 __copy_to_user(void __user *to, const void *from, unsigned long n)
 {
 #ifndef CONFIG_UACCESS_WITH_MEMCPY
-	unsigned int __ua_flags = uaccess_save_and_enable();
+	unsigned int __ua_flags;
+
+	check_object_size(from, n, true);
+	__ua_flags = uaccess_save_and_enable();
 	n = arm_copy_to_user(to, from, n);
 	uaccess_restore(__ua_flags);
 	return n;
 #else
+	check_object_size(from, n, true);
 	return arm_copy_to_user(to, from, n);
 #endif
 }
diff --git a/arch/arm/include/asm/virt.h b/arch/arm/include/asm/virt.h
index d4ceaf5f299b..a2e75b84e2ae 100644
--- a/arch/arm/include/asm/virt.h
+++ b/arch/arm/include/asm/virt.h
@@ -80,6 +80,10 @@ static inline bool is_kernel_in_hyp_mode(void)
 	return false;
 }
 
+/* The section containing the hypervisor idmap text */
+extern char __hyp_idmap_text_start[];
+extern char __hyp_idmap_text_end[];
+
 /* The section containing the hypervisor text */
 extern char __hyp_text_start[];
 extern char __hyp_text_end[];
diff --git a/arch/arm/include/asm/xen/page-coherent.h b/arch/arm/include/asm/xen/page-coherent.h
index 9408a994cc91..95ce6ac3a971 100644
--- a/arch/arm/include/asm/xen/page-coherent.h
+++ b/arch/arm/include/asm/xen/page-coherent.h
@@ -2,15 +2,14 @@
 #define _ASM_ARM_XEN_PAGE_COHERENT_H
 
 #include <asm/page.h>
-#include <linux/dma-attrs.h>
 #include <linux/dma-mapping.h>
 
 void __xen_dma_map_page(struct device *hwdev, struct page *page,
 	     dma_addr_t dev_addr, unsigned long offset, size_t size,
-	     enum dma_data_direction dir, struct dma_attrs *attrs);
+	     enum dma_data_direction dir, unsigned long attrs);
 void __xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
 		size_t size, enum dma_data_direction dir,
-		struct dma_attrs *attrs);
+		unsigned long attrs);
 void __xen_dma_sync_single_for_cpu(struct device *hwdev,
 		dma_addr_t handle, size_t size, enum dma_data_direction dir);
 
@@ -18,22 +17,20 @@ void __xen_dma_sync_single_for_device(struct device *hwdev,
 		dma_addr_t handle, size_t size, enum dma_data_direction dir);
 
 static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size,
-		dma_addr_t *dma_handle, gfp_t flags,
-		struct dma_attrs *attrs)
+		dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs)
 {
 	return __generic_dma_ops(hwdev)->alloc(hwdev, size, dma_handle, flags, attrs);
 }
 
 static inline void xen_free_coherent_pages(struct device *hwdev, size_t size,
-		void *cpu_addr, dma_addr_t dma_handle,
-		struct dma_attrs *attrs)
+		void *cpu_addr, dma_addr_t dma_handle, unsigned long attrs)
 {
 	__generic_dma_ops(hwdev)->free(hwdev, size, cpu_addr, dma_handle, attrs);
 }
 
 static inline void xen_dma_map_page(struct device *hwdev, struct page *page,
 	     dma_addr_t dev_addr, unsigned long offset, size_t size,
-	     enum dma_data_direction dir, struct dma_attrs *attrs)
+	     enum dma_data_direction dir, unsigned long attrs)
 {
 	unsigned long page_pfn = page_to_xen_pfn(page);
 	unsigned long dev_pfn = XEN_PFN_DOWN(dev_addr);
@@ -58,8 +55,7 @@ static inline void xen_dma_map_page(struct device *hwdev, struct page *page,
 }
 
 static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
-		size_t size, enum dma_data_direction dir,
-		struct dma_attrs *attrs)
+		size_t size, enum dma_data_direction dir, unsigned long attrs)
 {
 	unsigned long pfn = PFN_DOWN(handle);
 	/*
diff --git a/arch/arm/kernel/bios32.c b/arch/arm/kernel/bios32.c
index 05e61a2eeabe..2f0e07735d1d 100644
--- a/arch/arm/kernel/bios32.c
+++ b/arch/arm/kernel/bios32.c
@@ -410,7 +410,8 @@ static int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 	return irq;
 }
 
-static int pcibios_init_resources(int busnr, struct pci_sys_data *sys)
+static int pcibios_init_resource(int busnr, struct pci_sys_data *sys,
+				 int io_optional)
 {
 	int ret;
 	struct resource_entry *window;
@@ -420,6 +421,14 @@ static int pcibios_init_resources(int busnr, struct pci_sys_data *sys)
 			 &iomem_resource, sys->mem_offset);
 	}
 
+	/*
+	 * If a platform says I/O port support is optional, we don't add
+	 * the default I/O space.  The platform is responsible for adding
+	 * any I/O space it needs.
+	 */
+	if (io_optional)
+		return 0;
+
 	resource_list_for_each_entry(window, &sys->resources)
 		if (resource_type(window->res) == IORESOURCE_IO)
 			return 0;
@@ -466,7 +475,7 @@ static void pcibios_init_hw(struct device *parent, struct hw_pci *hw,
 		if (ret > 0) {
 			struct pci_host_bridge *host_bridge;
 
-			ret = pcibios_init_resources(nr, sys);
+			ret = pcibios_init_resource(nr, sys, hw->io_optional);
 			if (ret)  {
 				kfree(sys);
 				break;
@@ -515,25 +524,23 @@ void pci_common_init_dev(struct device *parent, struct hw_pci *hw)
 	list_for_each_entry(sys, &head, node) {
 		struct pci_bus *bus = sys->bus;
 
-		if (!pci_has_flag(PCI_PROBE_ONLY)) {
+		/*
+		 * We insert PCI resources into the iomem_resource and
+		 * ioport_resource trees in either pci_bus_claim_resources()
+		 * or pci_bus_assign_resources().
+		 */
+		if (pci_has_flag(PCI_PROBE_ONLY)) {
+			pci_bus_claim_resources(bus);
+		} else {
 			struct pci_bus *child;
 
-			/*
-			 * Size the bridge windows.
-			 */
 			pci_bus_size_bridges(bus);
-
-			/*
-			 * Assign resources.
-			 */
 			pci_bus_assign_resources(bus);
 
 			list_for_each_entry(child, &bus->children, node)
 				pcie_bus_configure_settings(child);
 		}
-		/*
-		 * Tell drivers about devices found.
-		 */
+
 		pci_bus_add_devices(bus);
 	}
 }
@@ -590,18 +597,6 @@ resource_size_t pcibios_align_resource(void *data, const struct resource *res,
 	return start;
 }
 
-/**
- * pcibios_enable_device - Enable I/O and memory.
- * @dev: PCI device to be enabled
- */
-int pcibios_enable_device(struct pci_dev *dev, int mask)
-{
-	if (pci_has_flag(PCI_PROBE_ONLY))
-		return 0;
-
-	return pci_enable_resources(dev, mask);
-}
-
 int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
 			enum pci_mmap_state mmap_state, int write_combine)
 {
diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c
index 59fd0e24c56b..b18c1ea56bed 100644
--- a/arch/arm/kernel/machine_kexec.c
+++ b/arch/arm/kernel/machine_kexec.c
@@ -57,7 +57,7 @@ int machine_kexec_prepare(struct kimage *image)
 	for (i = 0; i < image->nr_segments; i++) {
 		current_segment = &image->segment[i];
 
-		if (!memblock_is_region_memory(current_segment->mem,
+		if (!memblock_is_region_memory(idmap_to_phys(current_segment->mem),
 					       current_segment->memsz))
 			return -EINVAL;
 
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index da2f6c360f6b..df7f2a75e769 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -848,10 +848,29 @@ static void __init request_standard_resources(const struct machine_desc *mdesc)
 	kernel_data.end     = virt_to_phys(_end - 1);
 
 	for_each_memblock(memory, region) {
+		phys_addr_t start = __pfn_to_phys(memblock_region_memory_base_pfn(region));
+		phys_addr_t end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1;
+		unsigned long boot_alias_start;
+
+		/*
+		 * Some systems have a special memory alias which is only
+		 * used for booting.  We need to advertise this region to
+		 * kexec-tools so they know where bootable RAM is located.
+		 */
+		boot_alias_start = phys_to_idmap(start);
+		if (arm_has_idmap_alias() && boot_alias_start != IDMAP_INVALID_ADDR) {
+			res = memblock_virt_alloc(sizeof(*res), 0);
+			res->name = "System RAM (boot alias)";
+			res->start = boot_alias_start;
+			res->end = phys_to_idmap(end);
+			res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+			request_resource(&iomem_resource, res);
+		}
+
 		res = memblock_virt_alloc(sizeof(*res), 0);
 		res->name  = "System RAM";
-		res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region));
-		res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1;
+		res->start = start;
+		res->end = end;
 		res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
 
 		request_resource(&iomem_resource, res);
@@ -1000,9 +1019,25 @@ static void __init reserve_crashkernel(void)
 		(unsigned long)(crash_base >> 20),
 		(unsigned long)(total_mem >> 20));
 
+	/* The crashk resource must always be located in normal mem */
 	crashk_res.start = crash_base;
 	crashk_res.end = crash_base + crash_size - 1;
 	insert_resource(&iomem_resource, &crashk_res);
+
+	if (arm_has_idmap_alias()) {
+		/*
+		 * If we have a special RAM alias for use at boot, we
+		 * need to advertise to kexec tools where the alias is.
+		 */
+		static struct resource crashk_boot_res = {
+			.name = "Crash kernel (boot alias)",
+			.flags = IORESOURCE_BUSY | IORESOURCE_MEM,
+		};
+
+		crashk_boot_res.start = phys_to_idmap(crash_base);
+		crashk_boot_res.end = crashk_boot_res.start + crash_size - 1;
+		insert_resource(&iomem_resource, &crashk_boot_res);
+	}
 }
 #else
 static inline void reserve_crashkernel(void) {}
diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c
index 087acb569b63..5f221acd21ae 100644
--- a/arch/arm/kernel/sys_oabi-compat.c
+++ b/arch/arm/kernel/sys_oabi-compat.c
@@ -279,8 +279,12 @@ asmlinkage long sys_oabi_epoll_wait(int epfd,
 	mm_segment_t fs;
 	long ret, err, i;
 
-	if (maxevents <= 0 || maxevents > (INT_MAX/sizeof(struct epoll_event)))
+	if (maxevents <= 0 ||
+			maxevents > (INT_MAX/sizeof(*kbuf)) ||
+			maxevents > (INT_MAX/sizeof(*events)))
 		return -EINVAL;
+	if (!access_ok(VERIFY_WRITE, events, sizeof(*events) * maxevents))
+		return -EFAULT;
 	kbuf = kmalloc(sizeof(*kbuf) * maxevents, GFP_KERNEL);
 	if (!kbuf)
 		return -ENOMEM;
@@ -317,6 +321,8 @@ asmlinkage long sys_oabi_semtimedop(int semid,
 
 	if (nsops < 1 || nsops > SEMOPM)
 		return -EINVAL;
+	if (!access_ok(VERIFY_READ, tsops, sizeof(*tsops) * nsops))
+		return -EFAULT;
 	sops = kmalloc(sizeof(*sops) * nsops, GFP_KERNEL);
 	if (!sops)
 		return -ENOMEM;
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index 99420fc1f066..d24e5dd2aa7a 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -44,7 +44,7 @@
 #endif
 
 #if (defined(CONFIG_SMP_ON_UP) && !defined(CONFIG_DEBUG_SPINLOCK)) || \
-	defined(CONFIG_GENERIC_BUG)
+	defined(CONFIG_GENERIC_BUG) || defined(CONFIG_JUMP_LABEL)
 #define ARM_EXIT_KEEP(x)	x
 #define ARM_EXIT_DISCARD(x)
 #else
diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
index 02abfff68ee5..3e1cd0452d67 100644
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig
@@ -32,6 +32,8 @@ config KVM
 	select KVM_VFIO
 	select HAVE_KVM_EVENTFD
 	select HAVE_KVM_IRQFD
+	select HAVE_KVM_IRQCHIP
+	select HAVE_KVM_IRQ_ROUTING
 	depends on ARM_VIRT_EXT && ARM_LPAE && ARM_ARCH_TIMER
 	---help---
 	  Support hosting virtualized guest machines.
@@ -46,13 +48,6 @@ config KVM_ARM_HOST
 	---help---
 	  Provides host support for ARM processors.
 
-config KVM_NEW_VGIC
-	bool "New VGIC implementation"
-	depends on KVM
-	default y
-	---help---
-	  uses the new VGIC implementation
-
 source drivers/vhost/Kconfig
 
 endif # VIRTUALIZATION
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
index a596b58f6d37..10d77a66cad5 100644
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -22,7 +22,6 @@ obj-y += kvm-arm.o init.o interrupts.o
 obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o
 obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o
 
-ifeq ($(CONFIG_KVM_NEW_VGIC),y)
 obj-y += $(KVM)/arm/vgic/vgic.o
 obj-y += $(KVM)/arm/vgic/vgic-init.o
 obj-y += $(KVM)/arm/vgic/vgic-irqfd.o
@@ -30,9 +29,5 @@ obj-y += $(KVM)/arm/vgic/vgic-v2.o
 obj-y += $(KVM)/arm/vgic/vgic-mmio.o
 obj-y += $(KVM)/arm/vgic/vgic-mmio-v2.o
 obj-y += $(KVM)/arm/vgic/vgic-kvm-device.o
-else
-obj-y += $(KVM)/arm/vgic.o
-obj-y += $(KVM)/arm/vgic-v2.o
-obj-y += $(KVM)/arm/vgic-v2-emul.o
-endif
+obj-y += $(KVM)/irqchip.o
 obj-y += $(KVM)/arm/arch_timer.o
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index f1bde7c4e736..d94bb9093ead 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -20,6 +20,7 @@
 #include <linux/errno.h>
 #include <linux/err.h>
 #include <linux/kvm_host.h>
+#include <linux/list.h>
 #include <linux/module.h>
 #include <linux/vmalloc.h>
 #include <linux/fs.h>
@@ -122,7 +123,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	if (ret)
 		goto out_fail_alloc;
 
-	ret = create_hyp_mappings(kvm, kvm + 1);
+	ret = create_hyp_mappings(kvm, kvm + 1, PAGE_HYP);
 	if (ret)
 		goto out_free_stage2_pgd;
 
@@ -201,7 +202,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 		r = KVM_MAX_VCPUS;
 		break;
 	default:
-		r = kvm_arch_dev_ioctl_check_extension(ext);
+		r = kvm_arch_dev_ioctl_check_extension(kvm, ext);
 		break;
 	}
 	return r;
@@ -239,7 +240,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
 	if (err)
 		goto free_vcpu;
 
-	err = create_hyp_mappings(vcpu, vcpu + 1);
+	err = create_hyp_mappings(vcpu, vcpu + 1, PAGE_HYP);
 	if (err)
 		goto vcpu_uninit;
 
@@ -377,7 +378,7 @@ void force_vm_exit(const cpumask_t *mask)
 
 /**
  * need_new_vmid_gen - check that the VMID is still valid
- * @kvm: The VM's VMID to checkt
+ * @kvm: The VM's VMID to check
  *
  * return true if there is a new generation of VMIDs being used
  *
@@ -616,7 +617,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		 * Enter the guest
 		 */
 		trace_kvm_entry(*vcpu_pc(vcpu));
-		__kvm_guest_enter();
+		guest_enter_irqoff();
 		vcpu->mode = IN_GUEST_MODE;
 
 		ret = kvm_call_hyp(__kvm_vcpu_run, vcpu);
@@ -642,14 +643,14 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		local_irq_enable();
 
 		/*
-		 * We do local_irq_enable() before calling kvm_guest_exit() so
+		 * We do local_irq_enable() before calling guest_exit() so
 		 * that if a timer interrupt hits while running the guest we
 		 * account that tick as being spent in the guest.  We enable
-		 * preemption after calling kvm_guest_exit() so that if we get
+		 * preemption after calling guest_exit() so that if we get
 		 * preempted we make sure ticks after that is not counted as
 		 * guest time.
 		 */
-		kvm_guest_exit();
+		guest_exit();
 		trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
 
 		/*
@@ -1039,7 +1040,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
 
 static void cpu_init_hyp_mode(void *dummy)
 {
-	phys_addr_t boot_pgd_ptr;
 	phys_addr_t pgd_ptr;
 	unsigned long hyp_stack_ptr;
 	unsigned long stack_page;
@@ -1048,13 +1048,12 @@ static void cpu_init_hyp_mode(void *dummy)
 	/* Switch from the HYP stub to our own HYP init vector */
 	__hyp_set_vectors(kvm_get_idmap_vector());
 
-	boot_pgd_ptr = kvm_mmu_get_boot_httbr();
 	pgd_ptr = kvm_mmu_get_httbr();
 	stack_page = __this_cpu_read(kvm_arm_hyp_stack_page);
 	hyp_stack_ptr = stack_page + PAGE_SIZE;
 	vector_ptr = (unsigned long)kvm_ksym_ref(__kvm_hyp_vector);
 
-	__cpu_init_hyp_mode(boot_pgd_ptr, pgd_ptr, hyp_stack_ptr, vector_ptr);
+	__cpu_init_hyp_mode(pgd_ptr, hyp_stack_ptr, vector_ptr);
 	__cpu_init_stage2();
 
 	kvm_arm_init_debug();
@@ -1076,15 +1075,9 @@ static void cpu_hyp_reinit(void)
 
 static void cpu_hyp_reset(void)
 {
-	phys_addr_t boot_pgd_ptr;
-	phys_addr_t phys_idmap_start;
-
-	if (!is_kernel_in_hyp_mode()) {
-		boot_pgd_ptr = kvm_mmu_get_boot_httbr();
-		phys_idmap_start = kvm_get_idmap_start();
-
-		__cpu_reset_hyp_mode(boot_pgd_ptr, phys_idmap_start);
-	}
+	if (!is_kernel_in_hyp_mode())
+		__cpu_reset_hyp_mode(hyp_default_vectors,
+				     kvm_get_idmap_start());
 }
 
 static void _kvm_arch_hardware_enable(void *discard)
@@ -1294,14 +1287,14 @@ static int init_hyp_mode(void)
 	 * Map the Hyp-code called directly from the host
 	 */
 	err = create_hyp_mappings(kvm_ksym_ref(__hyp_text_start),
-				  kvm_ksym_ref(__hyp_text_end));
+				  kvm_ksym_ref(__hyp_text_end), PAGE_HYP_EXEC);
 	if (err) {
 		kvm_err("Cannot map world-switch code\n");
 		goto out_err;
 	}
 
 	err = create_hyp_mappings(kvm_ksym_ref(__start_rodata),
-				  kvm_ksym_ref(__end_rodata));
+				  kvm_ksym_ref(__end_rodata), PAGE_HYP_RO);
 	if (err) {
 		kvm_err("Cannot map rodata section\n");
 		goto out_err;
@@ -1312,7 +1305,8 @@ static int init_hyp_mode(void)
 	 */
 	for_each_possible_cpu(cpu) {
 		char *stack_page = (char *)per_cpu(kvm_arm_hyp_stack_page, cpu);
-		err = create_hyp_mappings(stack_page, stack_page + PAGE_SIZE);
+		err = create_hyp_mappings(stack_page, stack_page + PAGE_SIZE,
+					  PAGE_HYP);
 
 		if (err) {
 			kvm_err("Cannot map hyp stack\n");
@@ -1324,7 +1318,7 @@ static int init_hyp_mode(void)
 		kvm_cpu_context_t *cpu_ctxt;
 
 		cpu_ctxt = per_cpu_ptr(kvm_host_cpu_state, cpu);
-		err = create_hyp_mappings(cpu_ctxt, cpu_ctxt + 1);
+		err = create_hyp_mappings(cpu_ctxt, cpu_ctxt + 1, PAGE_HYP);
 
 		if (err) {
 			kvm_err("Cannot map host CPU state: %d\n", err);
@@ -1332,10 +1326,6 @@ static int init_hyp_mode(void)
 		}
 	}
 
-#ifndef CONFIG_HOTPLUG_CPU
-	free_boot_hyp_pgd();
-#endif
-
 	/* set size of VMID supported by CPU */
 	kvm_vmid_bits = kvm_get_vmid_bits();
 	kvm_info("%d-bit VMID\n", kvm_vmid_bits);
diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c
index a494def3f195..af93e3ffc9f3 100644
--- a/arch/arm/kvm/emulate.c
+++ b/arch/arm/kvm/emulate.c
@@ -210,7 +210,7 @@ bool kvm_condition_valid(struct kvm_vcpu *vcpu)
  * @vcpu:	The VCPU pointer
  *
  * When exceptions occur while instructions are executed in Thumb IF-THEN
- * blocks, the ITSTATE field of the CPSR is not advanved (updated), so we have
+ * blocks, the ITSTATE field of the CPSR is not advanced (updated), so we have
  * to do this little bit of work manually. The fields map like this:
  *
  * IT[7:0] -> CPSR[26:25],CPSR[15:10]
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index 9093ed0f8b2a..9aca92074f85 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -182,7 +182,7 @@ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu)
 /**
  * kvm_arm_copy_reg_indices - get indices of all registers.
  *
- * We do core registers right here, then we apppend coproc regs.
+ * We do core registers right here, then we append coproc regs.
  */
 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
 {
diff --git a/arch/arm/kvm/init.S b/arch/arm/kvm/init.S
index 1f9ae17476f9..bf89c919efc1 100644
--- a/arch/arm/kvm/init.S
+++ b/arch/arm/kvm/init.S
@@ -32,23 +32,13 @@
  *       r2,r3 = Hypervisor pgd pointer
  *
  * The init scenario is:
- * - We jump in HYP with four parameters: boot HYP pgd, runtime HYP pgd,
- *   runtime stack, runtime vectors
- * - Enable the MMU with the boot pgd
- * - Jump to a target into the trampoline page (remember, this is the same
- *   physical page!)
- * - Now switch to the runtime pgd (same VA, and still the same physical
- *   page!)
+ * - We jump in HYP with 3 parameters: runtime HYP pgd, runtime stack,
+ *   runtime vectors
  * - Invalidate TLBs
  * - Set stack and vectors
+ * - Setup the page tables
+ * - Enable the MMU
  * - Profit! (or eret, if you only care about the code).
- *
- * As we only have four registers available to pass parameters (and we
- * need six), we split the init in two phases:
- * - Phase 1: r0 = 0, r1 = 0, r2,r3 contain the boot PGD.
- *   Provides the basic HYP init, and enable the MMU.
- * - Phase 2: r0 = ToS, r1 = vectors, r2,r3 contain the runtime PGD.
- *   Switches to the runtime PGD, set stack and vectors.
  */
 
 	.text
@@ -68,8 +58,11 @@ __kvm_hyp_init:
 	W(b)	.
 
 __do_hyp_init:
-	cmp	r0, #0			@ We have a SP?
-	bne	phase2			@ Yes, second stage init
+	@ Set stack pointer
+	mov	sp, r0
+
+	@ Set HVBAR to point to the HYP vectors
+	mcr	p15, 4, r1, c12, c0, 0	@ HVBAR
 
 	@ Set the HTTBR to point to the hypervisor PGD pointer passed
 	mcrr	p15, 4, rr_lo_hi(r2, r3), c2
@@ -114,34 +107,25 @@ __do_hyp_init:
  THUMB(	ldr	r2, =(HSCTLR_M | HSCTLR_A | HSCTLR_TE)		)
 	orr	r1, r1, r2
 	orr	r0, r0, r1
-	isb
 	mcr	p15, 4, r0, c1, c0, 0	@ HSCR
+	isb
 
-	@ End of init phase-1
 	eret
 
-phase2:
-	@ Set stack pointer
-	mov	sp, r0
-
-	@ Set HVBAR to point to the HYP vectors
-	mcr	p15, 4, r1, c12, c0, 0	@ HVBAR
-
-	@ Jump to the trampoline page
-	ldr	r0, =TRAMPOLINE_VA
-	adr	r1, target
-	bfi	r0, r1, #0, #PAGE_SHIFT
-	ret	r0
+	@ r0 : stub vectors address
+ENTRY(__kvm_hyp_reset)
+	/* We're now in idmap, disable MMU */
+	mrc	p15, 4, r1, c1, c0, 0	@ HSCTLR
+	ldr	r2, =(HSCTLR_M | HSCTLR_A | HSCTLR_C | HSCTLR_I)
+	bic	r1, r1, r2
+	mcr	p15, 4, r1, c1, c0, 0	@ HSCTLR
 
-target:	@ We're now in the trampoline code, switch page tables
-	mcrr	p15, 4, rr_lo_hi(r2, r3), c2
+	/* Install stub vectors */
+	mcr	p15, 4, r0, c12, c0, 0	@ HVBAR
 	isb
 
-	@ Invalidate the old TLBs
-	mcr	p15, 4, r0, c8, c7, 0	@ TLBIALLH
-	dsb	ish
-
 	eret
+ENDPROC(__kvm_hyp_reset)
 
 	.ltorg
 
diff --git a/arch/arm/kvm/irq.h b/arch/arm/kvm/irq.h
new file mode 100644
index 000000000000..b74099b905fd
--- /dev/null
+++ b/arch/arm/kvm/irq.h
@@ -0,0 +1,19 @@
+/*
+ * irq.h: in kernel interrupt controller related definitions
+ * Copyright (c) 2016 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This header is included by irqchip.c. However, on ARM, interrupt
+ * controller declarations are located in include/kvm/arm_vgic.h since
+ * they are mostly shared between arm and arm64.
+ */
+
+#ifndef __IRQ_H
+#define __IRQ_H
+
+#include <kvm/arm_vgic.h>
+
+#endif
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 45c43aecb8f2..bda27b6b1aa2 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -32,8 +32,6 @@
 
 #include "trace.h"
 
-extern char  __hyp_idmap_text_start[], __hyp_idmap_text_end[];
-
 static pgd_t *boot_hyp_pgd;
 static pgd_t *hyp_pgd;
 static pgd_t *merged_hyp_pgd;
@@ -484,28 +482,6 @@ static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size)
 }
 
 /**
- * free_boot_hyp_pgd - free HYP boot page tables
- *
- * Free the HYP boot page tables. The bounce page is also freed.
- */
-void free_boot_hyp_pgd(void)
-{
-	mutex_lock(&kvm_hyp_pgd_mutex);
-
-	if (boot_hyp_pgd) {
-		unmap_hyp_range(boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE);
-		unmap_hyp_range(boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
-		free_pages((unsigned long)boot_hyp_pgd, hyp_pgd_order);
-		boot_hyp_pgd = NULL;
-	}
-
-	if (hyp_pgd)
-		unmap_hyp_range(hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
-
-	mutex_unlock(&kvm_hyp_pgd_mutex);
-}
-
-/**
  * free_hyp_pgds - free Hyp-mode page tables
  *
  * Assumes hyp_pgd is a page table used strictly in Hyp-mode and
@@ -519,15 +495,20 @@ void free_hyp_pgds(void)
 {
 	unsigned long addr;
 
-	free_boot_hyp_pgd();
-
 	mutex_lock(&kvm_hyp_pgd_mutex);
 
+	if (boot_hyp_pgd) {
+		unmap_hyp_range(boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE);
+		free_pages((unsigned long)boot_hyp_pgd, hyp_pgd_order);
+		boot_hyp_pgd = NULL;
+	}
+
 	if (hyp_pgd) {
+		unmap_hyp_range(hyp_pgd, hyp_idmap_start, PAGE_SIZE);
 		for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE)
-			unmap_hyp_range(hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
+			unmap_hyp_range(hyp_pgd, kern_hyp_va(addr), PGDIR_SIZE);
 		for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE)
-			unmap_hyp_range(hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
+			unmap_hyp_range(hyp_pgd, kern_hyp_va(addr), PGDIR_SIZE);
 
 		free_pages((unsigned long)hyp_pgd, hyp_pgd_order);
 		hyp_pgd = NULL;
@@ -679,17 +660,18 @@ static phys_addr_t kvm_kaddr_to_phys(void *kaddr)
  * create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode
  * @from:	The virtual kernel start address of the range
  * @to:		The virtual kernel end address of the range (exclusive)
+ * @prot:	The protection to be applied to this range
  *
  * The same virtual address as the kernel virtual address is also used
  * in Hyp-mode mapping (modulo HYP_PAGE_OFFSET) to the same underlying
  * physical pages.
  */
-int create_hyp_mappings(void *from, void *to)
+int create_hyp_mappings(void *from, void *to, pgprot_t prot)
 {
 	phys_addr_t phys_addr;
 	unsigned long virt_addr;
-	unsigned long start = KERN_TO_HYP((unsigned long)from);
-	unsigned long end = KERN_TO_HYP((unsigned long)to);
+	unsigned long start = kern_hyp_va((unsigned long)from);
+	unsigned long end = kern_hyp_va((unsigned long)to);
 
 	if (is_kernel_in_hyp_mode())
 		return 0;
@@ -704,7 +686,7 @@ int create_hyp_mappings(void *from, void *to)
 		err = __create_hyp_mappings(hyp_pgd, virt_addr,
 					    virt_addr + PAGE_SIZE,
 					    __phys_to_pfn(phys_addr),
-					    PAGE_HYP);
+					    prot);
 		if (err)
 			return err;
 	}
@@ -723,8 +705,8 @@ int create_hyp_mappings(void *from, void *to)
  */
 int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
 {
-	unsigned long start = KERN_TO_HYP((unsigned long)from);
-	unsigned long end = KERN_TO_HYP((unsigned long)to);
+	unsigned long start = kern_hyp_va((unsigned long)from);
+	unsigned long end = kern_hyp_va((unsigned long)to);
 
 	if (is_kernel_in_hyp_mode())
 		return 0;
@@ -1687,14 +1669,6 @@ phys_addr_t kvm_mmu_get_httbr(void)
 		return virt_to_phys(hyp_pgd);
 }
 
-phys_addr_t kvm_mmu_get_boot_httbr(void)
-{
-	if (__kvm_cpu_uses_extended_idmap())
-		return virt_to_phys(merged_hyp_pgd);
-	else
-		return virt_to_phys(boot_hyp_pgd);
-}
-
 phys_addr_t kvm_get_idmap_vector(void)
 {
 	return hyp_idmap_vector;
@@ -1705,6 +1679,22 @@ phys_addr_t kvm_get_idmap_start(void)
 	return hyp_idmap_start;
 }
 
+static int kvm_map_idmap_text(pgd_t *pgd)
+{
+	int err;
+
+	/* Create the idmap in the boot page tables */
+	err = 	__create_hyp_mappings(pgd,
+				      hyp_idmap_start, hyp_idmap_end,
+				      __phys_to_pfn(hyp_idmap_start),
+				      PAGE_HYP_EXEC);
+	if (err)
+		kvm_err("Failed to idmap %lx-%lx\n",
+			hyp_idmap_start, hyp_idmap_end);
+
+	return err;
+}
+
 int kvm_mmu_init(void)
 {
 	int err;
@@ -1719,28 +1709,41 @@ int kvm_mmu_init(void)
 	 */
 	BUG_ON((hyp_idmap_start ^ (hyp_idmap_end - 1)) & PAGE_MASK);
 
-	hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, hyp_pgd_order);
-	boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, hyp_pgd_order);
+	kvm_info("IDMAP page: %lx\n", hyp_idmap_start);
+	kvm_info("HYP VA range: %lx:%lx\n",
+		 kern_hyp_va(PAGE_OFFSET), kern_hyp_va(~0UL));
 
-	if (!hyp_pgd || !boot_hyp_pgd) {
-		kvm_err("Hyp mode PGD not allocated\n");
-		err = -ENOMEM;
+	if (hyp_idmap_start >= kern_hyp_va(PAGE_OFFSET) &&
+	    hyp_idmap_start <  kern_hyp_va(~0UL)) {
+		/*
+		 * The idmap page is intersecting with the VA space,
+		 * it is not safe to continue further.
+		 */
+		kvm_err("IDMAP intersecting with HYP VA, unable to continue\n");
+		err = -EINVAL;
 		goto out;
 	}
 
-	/* Create the idmap in the boot page tables */
-	err = 	__create_hyp_mappings(boot_hyp_pgd,
-				      hyp_idmap_start, hyp_idmap_end,
-				      __phys_to_pfn(hyp_idmap_start),
-				      PAGE_HYP);
-
-	if (err) {
-		kvm_err("Failed to idmap %lx-%lx\n",
-			hyp_idmap_start, hyp_idmap_end);
+	hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, hyp_pgd_order);
+	if (!hyp_pgd) {
+		kvm_err("Hyp mode PGD not allocated\n");
+		err = -ENOMEM;
 		goto out;
 	}
 
 	if (__kvm_cpu_uses_extended_idmap()) {
+		boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+							 hyp_pgd_order);
+		if (!boot_hyp_pgd) {
+			kvm_err("Hyp boot PGD not allocated\n");
+			err = -ENOMEM;
+			goto out;
+		}
+
+		err = kvm_map_idmap_text(boot_hyp_pgd);
+		if (err)
+			goto out;
+
 		merged_hyp_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
 		if (!merged_hyp_pgd) {
 			kvm_err("Failed to allocate extra HYP pgd\n");
@@ -1748,29 +1751,10 @@ int kvm_mmu_init(void)
 		}
 		__kvm_extend_hypmap(boot_hyp_pgd, hyp_pgd, merged_hyp_pgd,
 				    hyp_idmap_start);
-		return 0;
-	}
-
-	/* Map the very same page at the trampoline VA */
-	err = 	__create_hyp_mappings(boot_hyp_pgd,
-				      TRAMPOLINE_VA, TRAMPOLINE_VA + PAGE_SIZE,
-				      __phys_to_pfn(hyp_idmap_start),
-				      PAGE_HYP);
-	if (err) {
-		kvm_err("Failed to map trampoline @%lx into boot HYP pgd\n",
-			TRAMPOLINE_VA);
-		goto out;
-	}
-
-	/* Map the same page again into the runtime page tables */
-	err = 	__create_hyp_mappings(hyp_pgd,
-				      TRAMPOLINE_VA, TRAMPOLINE_VA + PAGE_SIZE,
-				      __phys_to_pfn(hyp_idmap_start),
-				      PAGE_HYP);
-	if (err) {
-		kvm_err("Failed to map trampoline @%lx into runtime HYP pgd\n",
-			TRAMPOLINE_VA);
-		goto out;
+	} else {
+		err = kvm_map_idmap_text(hyp_pgd);
+		if (err)
+			goto out;
 	}
 
 	return 0;
diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c
index 0048b5a62a50..4b5e802e57d1 100644
--- a/arch/arm/kvm/reset.c
+++ b/arch/arm/kvm/reset.c
@@ -52,7 +52,7 @@ static const struct kvm_irq_level cortexa_vtimer_irq = {
  * @vcpu: The VCPU pointer
  *
  * This function finds the right table above and sets the registers on the
- * virtual CPU struct to their architectually defined reset values.
+ * virtual CPU struct to their architecturally defined reset values.
  */
 int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
 {
diff --git a/arch/arm/mach-ep93xx/ts72xx.c b/arch/arm/mach-ep93xx/ts72xx.c
index 45b81a2bcd4b..3b39ea353d30 100644
--- a/arch/arm/mach-ep93xx/ts72xx.c
+++ b/arch/arm/mach-ep93xx/ts72xx.c
@@ -16,7 +16,7 @@
 #include <linux/init.h>
 #include <linux/platform_device.h>
 #include <linux/io.h>
-#include <linux/m48t86.h>
+#include <linux/platform_data/rtc-m48t86.h>
 #include <linux/mtd/nand.h>
 #include <linux/mtd/partitions.h>
 
diff --git a/arch/arm/mach-integrator/impd1.c b/arch/arm/mach-integrator/impd1.c
index 38b0da300dd5..ed9a01484030 100644
--- a/arch/arm/mach-integrator/impd1.c
+++ b/arch/arm/mach-integrator/impd1.c
@@ -320,11 +320,11 @@ static struct impd1_device impd1_devs[] = {
 #define IMPD1_VALID_IRQS 0x00000bffU
 
 /*
- * As this module is bool, it is OK to have this as __init_refok() - no
+ * As this module is bool, it is OK to have this as __ref() - no
  * probe calls will be done after the initial system bootup, as devices
  * are discovered as part of the machine startup.
  */
-static int __init_refok impd1_probe(struct lm_device *dev)
+static int __ref impd1_probe(struct lm_device *dev)
 {
 	struct impd1_module *impd1;
 	int irq_base;
diff --git a/arch/arm/mach-mv78xx0/common.c b/arch/arm/mach-mv78xx0/common.c
index 45a05207b418..6af5430d0d97 100644
--- a/arch/arm/mach-mv78xx0/common.c
+++ b/arch/arm/mach-mv78xx0/common.c
@@ -343,7 +343,7 @@ void __init mv78xx0_init_early(void)
 				DDR_WINDOW_CPU1_BASE, DDR_WINDOW_CPU_SZ);
 }
 
-void __init_refok mv78xx0_timer_init(void)
+void __ref mv78xx0_timer_init(void)
 {
 	orion_time_init(BRIDGE_VIRT_BASE, BRIDGE_INT_TIMER1_CLR,
 			IRQ_MV78XX0_TIMER_1, get_tclk());
diff --git a/arch/arm/mach-omap2/omap_device.c b/arch/arm/mach-omap2/omap_device.c
index c2626f83e0c2..e920dd83e443 100644
--- a/arch/arm/mach-omap2/omap_device.c
+++ b/arch/arm/mach-omap2/omap_device.c
@@ -209,7 +209,7 @@ static int _omap_device_notifier_call(struct notifier_block *nb,
 	int err;
 
 	switch (event) {
-	case BUS_NOTIFY_DEL_DEVICE:
+	case BUS_NOTIFY_REMOVED_DEVICE:
 		if (pdev->archdata.od)
 			omap_device_delete(pdev->archdata.od);
 		break;
diff --git a/arch/arm/mach-orion5x/ts78xx-setup.c b/arch/arm/mach-orion5x/ts78xx-setup.c
index 3a58a5d4a28a..8d597267d0c4 100644
--- a/arch/arm/mach-orion5x/ts78xx-setup.c
+++ b/arch/arm/mach-orion5x/ts78xx-setup.c
@@ -16,7 +16,7 @@
 #include <linux/platform_device.h>
 #include <linux/mv643xx_eth.h>
 #include <linux/ata_platform.h>
-#include <linux/m48t86.h>
+#include <linux/platform_data/rtc-m48t86.h>
 #include <linux/mtd/nand.h>
 #include <linux/mtd/partitions.h>
 #include <linux/timeriomem-rng.h>
diff --git a/arch/arm/mach-pxa/cm-x270.c b/arch/arm/mach-pxa/cm-x270.c
index fa5f51d633a3..be4a66166d61 100644
--- a/arch/arm/mach-pxa/cm-x270.c
+++ b/arch/arm/mach-pxa/cm-x270.c
@@ -14,7 +14,7 @@
 #include <linux/gpio.h>
 #include <linux/delay.h>
 
-#include <linux/rtc-v3020.h>
+#include <linux/platform_data/rtc-v3020.h>
 #include <video/mbxfb.h>
 
 #include <linux/spi/spi.h>
diff --git a/arch/arm/mach-pxa/cm-x300.c b/arch/arm/mach-pxa/cm-x300.c
index 5f5ac7c8faf0..868448d2cd82 100644
--- a/arch/arm/mach-pxa/cm-x300.c
+++ b/arch/arm/mach-pxa/cm-x300.c
@@ -25,7 +25,7 @@
 #include <linux/gpio.h>
 #include <linux/dm9000.h>
 #include <linux/leds.h>
-#include <linux/rtc-v3020.h>
+#include <linux/platform_data/rtc-v3020.h>
 #include <linux/pwm.h>
 #include <linux/pwm_backlight.h>
 
diff --git a/arch/arm/mach-pxa/em-x270.c b/arch/arm/mach-pxa/em-x270.c
index 6e0268deec43..03354c21e1f2 100644
--- a/arch/arm/mach-pxa/em-x270.c
+++ b/arch/arm/mach-pxa/em-x270.c
@@ -14,7 +14,7 @@
 #include <linux/delay.h>
 
 #include <linux/dm9000.h>
-#include <linux/rtc-v3020.h>
+#include <linux/platform_data/rtc-v3020.h>
 #include <linux/mtd/nand.h>
 #include <linux/mtd/partitions.h>
 #include <linux/mtd/physmap.h>
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index b7eed75960fe..c6834c0cfd1c 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -128,16 +128,16 @@ static void __dma_page_dev_to_cpu(struct page *, unsigned long,
  */
 static dma_addr_t arm_dma_map_page(struct device *dev, struct page *page,
 	     unsigned long offset, size_t size, enum dma_data_direction dir,
-	     struct dma_attrs *attrs)
+	     unsigned long attrs)
 {
-	if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
+	if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
 		__dma_page_cpu_to_dev(page, offset, size, dir);
 	return pfn_to_dma(dev, page_to_pfn(page)) + offset;
 }
 
 static dma_addr_t arm_coherent_dma_map_page(struct device *dev, struct page *page,
 	     unsigned long offset, size_t size, enum dma_data_direction dir,
-	     struct dma_attrs *attrs)
+	     unsigned long attrs)
 {
 	return pfn_to_dma(dev, page_to_pfn(page)) + offset;
 }
@@ -157,10 +157,9 @@ static dma_addr_t arm_coherent_dma_map_page(struct device *dev, struct page *pag
  * whatever the device wrote there.
  */
 static void arm_dma_unmap_page(struct device *dev, dma_addr_t handle,
-		size_t size, enum dma_data_direction dir,
-		struct dma_attrs *attrs)
+		size_t size, enum dma_data_direction dir, unsigned long attrs)
 {
-	if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
+	if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
 		__dma_page_dev_to_cpu(pfn_to_page(dma_to_pfn(dev, handle)),
 				      handle & ~PAGE_MASK, size, dir);
 }
@@ -198,12 +197,12 @@ struct dma_map_ops arm_dma_ops = {
 EXPORT_SYMBOL(arm_dma_ops);
 
 static void *arm_coherent_dma_alloc(struct device *dev, size_t size,
-	dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs);
+	dma_addr_t *handle, gfp_t gfp, unsigned long attrs);
 static void arm_coherent_dma_free(struct device *dev, size_t size, void *cpu_addr,
-				  dma_addr_t handle, struct dma_attrs *attrs);
+				  dma_addr_t handle, unsigned long attrs);
 static int arm_coherent_dma_mmap(struct device *dev, struct vm_area_struct *vma,
 		 void *cpu_addr, dma_addr_t dma_addr, size_t size,
-		 struct dma_attrs *attrs);
+		 unsigned long attrs);
 
 struct dma_map_ops arm_coherent_dma_ops = {
 	.alloc			= arm_coherent_dma_alloc,
@@ -639,11 +638,11 @@ static void __free_from_contiguous(struct device *dev, struct page *page,
 	dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT);
 }
 
-static inline pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot)
+static inline pgprot_t __get_dma_pgprot(unsigned long attrs, pgprot_t prot)
 {
-	prot = dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs) ?
-			    pgprot_writecombine(prot) :
-			    pgprot_dmacoherent(prot);
+	prot = (attrs & DMA_ATTR_WRITE_COMBINE) ?
+			pgprot_writecombine(prot) :
+			pgprot_dmacoherent(prot);
 	return prot;
 }
 
@@ -751,7 +750,7 @@ static struct arm_dma_allocator remap_allocator = {
 
 static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
 			 gfp_t gfp, pgprot_t prot, bool is_coherent,
-			 struct dma_attrs *attrs, const void *caller)
+			 unsigned long attrs, const void *caller)
 {
 	u64 mask = get_coherent_dma_mask(dev);
 	struct page *page = NULL;
@@ -764,7 +763,7 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
 		.gfp = gfp,
 		.prot = prot,
 		.caller = caller,
-		.want_vaddr = !dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs),
+		.want_vaddr = ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) == 0),
 		.coherent_flag = is_coherent ? COHERENT : NORMAL,
 	};
 
@@ -834,7 +833,7 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
  * virtual and bus address for that space.
  */
 void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
-		    gfp_t gfp, struct dma_attrs *attrs)
+		    gfp_t gfp, unsigned long attrs)
 {
 	pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL);
 
@@ -843,7 +842,7 @@ void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
 }
 
 static void *arm_coherent_dma_alloc(struct device *dev, size_t size,
-	dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs)
+	dma_addr_t *handle, gfp_t gfp, unsigned long attrs)
 {
 	return __dma_alloc(dev, size, handle, gfp, PAGE_KERNEL, true,
 			   attrs, __builtin_return_address(0));
@@ -851,7 +850,7 @@ static void *arm_coherent_dma_alloc(struct device *dev, size_t size,
 
 static int __arm_dma_mmap(struct device *dev, struct vm_area_struct *vma,
 		 void *cpu_addr, dma_addr_t dma_addr, size_t size,
-		 struct dma_attrs *attrs)
+		 unsigned long attrs)
 {
 	int ret = -ENXIO;
 #ifdef CONFIG_MMU
@@ -879,14 +878,14 @@ static int __arm_dma_mmap(struct device *dev, struct vm_area_struct *vma,
  */
 static int arm_coherent_dma_mmap(struct device *dev, struct vm_area_struct *vma,
 		 void *cpu_addr, dma_addr_t dma_addr, size_t size,
-		 struct dma_attrs *attrs)
+		 unsigned long attrs)
 {
 	return __arm_dma_mmap(dev, vma, cpu_addr, dma_addr, size, attrs);
 }
 
 int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma,
 		 void *cpu_addr, dma_addr_t dma_addr, size_t size,
-		 struct dma_attrs *attrs)
+		 unsigned long attrs)
 {
 #ifdef CONFIG_MMU
 	vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot);
@@ -898,7 +897,7 @@ int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma,
  * Free a buffer as defined by the above mapping.
  */
 static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
-			   dma_addr_t handle, struct dma_attrs *attrs,
+			   dma_addr_t handle, unsigned long attrs,
 			   bool is_coherent)
 {
 	struct page *page = pfn_to_page(dma_to_pfn(dev, handle));
@@ -908,7 +907,7 @@ static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
 		.size = PAGE_ALIGN(size),
 		.cpu_addr = cpu_addr,
 		.page = page,
-		.want_vaddr = !dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs),
+		.want_vaddr = ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) == 0),
 	};
 
 	buf = arm_dma_buffer_find(cpu_addr);
@@ -920,20 +919,20 @@ static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
 }
 
 void arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
-		  dma_addr_t handle, struct dma_attrs *attrs)
+		  dma_addr_t handle, unsigned long attrs)
 {
 	__arm_dma_free(dev, size, cpu_addr, handle, attrs, false);
 }
 
 static void arm_coherent_dma_free(struct device *dev, size_t size, void *cpu_addr,
-				  dma_addr_t handle, struct dma_attrs *attrs)
+				  dma_addr_t handle, unsigned long attrs)
 {
 	__arm_dma_free(dev, size, cpu_addr, handle, attrs, true);
 }
 
 int arm_dma_get_sgtable(struct device *dev, struct sg_table *sgt,
 		 void *cpu_addr, dma_addr_t handle, size_t size,
-		 struct dma_attrs *attrs)
+		 unsigned long attrs)
 {
 	struct page *page = pfn_to_page(dma_to_pfn(dev, handle));
 	int ret;
@@ -1066,7 +1065,7 @@ static void __dma_page_dev_to_cpu(struct page *page, unsigned long off,
  * here.
  */
 int arm_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
-		enum dma_data_direction dir, struct dma_attrs *attrs)
+		enum dma_data_direction dir, unsigned long attrs)
 {
 	struct dma_map_ops *ops = get_dma_ops(dev);
 	struct scatterlist *s;
@@ -1100,7 +1099,7 @@ int arm_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
  * rules concerning calls here are the same as for dma_unmap_single().
  */
 void arm_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
-		enum dma_data_direction dir, struct dma_attrs *attrs)
+		enum dma_data_direction dir, unsigned long attrs)
 {
 	struct dma_map_ops *ops = get_dma_ops(dev);
 	struct scatterlist *s;
@@ -1273,7 +1272,7 @@ static inline void __free_iova(struct dma_iommu_mapping *mapping,
 static const int iommu_order_array[] = { 9, 8, 4, 0 };
 
 static struct page **__iommu_alloc_buffer(struct device *dev, size_t size,
-					  gfp_t gfp, struct dma_attrs *attrs,
+					  gfp_t gfp, unsigned long attrs,
 					  int coherent_flag)
 {
 	struct page **pages;
@@ -1289,7 +1288,7 @@ static struct page **__iommu_alloc_buffer(struct device *dev, size_t size,
 	if (!pages)
 		return NULL;
 
-	if (dma_get_attr(DMA_ATTR_FORCE_CONTIGUOUS, attrs))
+	if (attrs & DMA_ATTR_FORCE_CONTIGUOUS)
 	{
 		unsigned long order = get_order(size);
 		struct page *page;
@@ -1307,7 +1306,7 @@ static struct page **__iommu_alloc_buffer(struct device *dev, size_t size,
 	}
 
 	/* Go straight to 4K chunks if caller says it's OK. */
-	if (dma_get_attr(DMA_ATTR_ALLOC_SINGLE_PAGES, attrs))
+	if (attrs & DMA_ATTR_ALLOC_SINGLE_PAGES)
 		order_idx = ARRAY_SIZE(iommu_order_array) - 1;
 
 	/*
@@ -1363,12 +1362,12 @@ error:
 }
 
 static int __iommu_free_buffer(struct device *dev, struct page **pages,
-			       size_t size, struct dma_attrs *attrs)
+			       size_t size, unsigned long attrs)
 {
 	int count = size >> PAGE_SHIFT;
 	int i;
 
-	if (dma_get_attr(DMA_ATTR_FORCE_CONTIGUOUS, attrs)) {
+	if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
 		dma_release_from_contiguous(dev, pages[0], count);
 	} else {
 		for (i = 0; i < count; i++)
@@ -1460,14 +1459,14 @@ static struct page **__atomic_get_pages(void *addr)
 	return (struct page **)page;
 }
 
-static struct page **__iommu_get_pages(void *cpu_addr, struct dma_attrs *attrs)
+static struct page **__iommu_get_pages(void *cpu_addr, unsigned long attrs)
 {
 	struct vm_struct *area;
 
 	if (__in_atomic_pool(cpu_addr, PAGE_SIZE))
 		return __atomic_get_pages(cpu_addr);
 
-	if (dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs))
+	if (attrs & DMA_ATTR_NO_KERNEL_MAPPING)
 		return cpu_addr;
 
 	area = find_vm_area(cpu_addr);
@@ -1511,7 +1510,7 @@ static void __iommu_free_atomic(struct device *dev, void *cpu_addr,
 }
 
 static void *__arm_iommu_alloc_attrs(struct device *dev, size_t size,
-	    dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs,
+	    dma_addr_t *handle, gfp_t gfp, unsigned long attrs,
 	    int coherent_flag)
 {
 	pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL);
@@ -1542,7 +1541,7 @@ static void *__arm_iommu_alloc_attrs(struct device *dev, size_t size,
 	if (*handle == DMA_ERROR_CODE)
 		goto err_buffer;
 
-	if (dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs))
+	if (attrs & DMA_ATTR_NO_KERNEL_MAPPING)
 		return pages;
 
 	addr = __iommu_alloc_remap(pages, size, gfp, prot,
@@ -1560,20 +1559,20 @@ err_buffer:
 }
 
 static void *arm_iommu_alloc_attrs(struct device *dev, size_t size,
-		    dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs)
+	    dma_addr_t *handle, gfp_t gfp, unsigned long attrs)
 {
 	return __arm_iommu_alloc_attrs(dev, size, handle, gfp, attrs, NORMAL);
 }
 
 static void *arm_coherent_iommu_alloc_attrs(struct device *dev, size_t size,
-		    dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs)
+		    dma_addr_t *handle, gfp_t gfp, unsigned long attrs)
 {
 	return __arm_iommu_alloc_attrs(dev, size, handle, gfp, attrs, COHERENT);
 }
 
 static int __arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
 		    void *cpu_addr, dma_addr_t dma_addr, size_t size,
-		    struct dma_attrs *attrs)
+		    unsigned long attrs)
 {
 	unsigned long uaddr = vma->vm_start;
 	unsigned long usize = vma->vm_end - vma->vm_start;
@@ -1603,7 +1602,7 @@ static int __arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma
 }
 static int arm_iommu_mmap_attrs(struct device *dev,
 		struct vm_area_struct *vma, void *cpu_addr,
-		dma_addr_t dma_addr, size_t size, struct dma_attrs *attrs)
+		dma_addr_t dma_addr, size_t size, unsigned long attrs)
 {
 	vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot);
 
@@ -1612,7 +1611,7 @@ static int arm_iommu_mmap_attrs(struct device *dev,
 
 static int arm_coherent_iommu_mmap_attrs(struct device *dev,
 		struct vm_area_struct *vma, void *cpu_addr,
-		dma_addr_t dma_addr, size_t size, struct dma_attrs *attrs)
+		dma_addr_t dma_addr, size_t size, unsigned long attrs)
 {
 	return __arm_iommu_mmap_attrs(dev, vma, cpu_addr, dma_addr, size, attrs);
 }
@@ -1622,7 +1621,7 @@ static int arm_coherent_iommu_mmap_attrs(struct device *dev,
  * Must not be called with IRQs disabled.
  */
 void __arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
-	dma_addr_t handle, struct dma_attrs *attrs, int coherent_flag)
+	dma_addr_t handle, unsigned long attrs, int coherent_flag)
 {
 	struct page **pages;
 	size = PAGE_ALIGN(size);
@@ -1638,7 +1637,7 @@ void __arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
 		return;
 	}
 
-	if (!dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs)) {
+	if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) == 0) {
 		dma_common_free_remap(cpu_addr, size,
 			VM_ARM_DMA_CONSISTENT | VM_USERMAP);
 	}
@@ -1648,20 +1647,20 @@ void __arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
 }
 
 void arm_iommu_free_attrs(struct device *dev, size_t size,
-		    void *cpu_addr, dma_addr_t handle, struct dma_attrs *attrs)
+		    void *cpu_addr, dma_addr_t handle, unsigned long attrs)
 {
 	__arm_iommu_free_attrs(dev, size, cpu_addr, handle, attrs, NORMAL);
 }
 
 void arm_coherent_iommu_free_attrs(struct device *dev, size_t size,
-		    void *cpu_addr, dma_addr_t handle, struct dma_attrs *attrs)
+		    void *cpu_addr, dma_addr_t handle, unsigned long attrs)
 {
 	__arm_iommu_free_attrs(dev, size, cpu_addr, handle, attrs, COHERENT);
 }
 
 static int arm_iommu_get_sgtable(struct device *dev, struct sg_table *sgt,
 				 void *cpu_addr, dma_addr_t dma_addr,
-				 size_t size, struct dma_attrs *attrs)
+				 size_t size, unsigned long attrs)
 {
 	unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
 	struct page **pages = __iommu_get_pages(cpu_addr, attrs);
@@ -1699,7 +1698,7 @@ static int __dma_direction_to_prot(enum dma_data_direction dir)
  */
 static int __map_sg_chunk(struct device *dev, struct scatterlist *sg,
 			  size_t size, dma_addr_t *handle,
-			  enum dma_data_direction dir, struct dma_attrs *attrs,
+			  enum dma_data_direction dir, unsigned long attrs,
 			  bool is_coherent)
 {
 	struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
@@ -1720,8 +1719,7 @@ static int __map_sg_chunk(struct device *dev, struct scatterlist *sg,
 		phys_addr_t phys = page_to_phys(sg_page(s));
 		unsigned int len = PAGE_ALIGN(s->offset + s->length);
 
-		if (!is_coherent &&
-			!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
+		if (!is_coherent && (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
 			__dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir);
 
 		prot = __dma_direction_to_prot(dir);
@@ -1742,7 +1740,7 @@ fail:
 }
 
 static int __iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents,
-		     enum dma_data_direction dir, struct dma_attrs *attrs,
+		     enum dma_data_direction dir, unsigned long attrs,
 		     bool is_coherent)
 {
 	struct scatterlist *s = sg, *dma = sg, *start = sg;
@@ -1800,7 +1798,7 @@ bad_mapping:
  * obtained via sg_dma_{address,length}.
  */
 int arm_coherent_iommu_map_sg(struct device *dev, struct scatterlist *sg,
-		int nents, enum dma_data_direction dir, struct dma_attrs *attrs)
+		int nents, enum dma_data_direction dir, unsigned long attrs)
 {
 	return __iommu_map_sg(dev, sg, nents, dir, attrs, true);
 }
@@ -1818,14 +1816,14 @@ int arm_coherent_iommu_map_sg(struct device *dev, struct scatterlist *sg,
  * sg_dma_{address,length}.
  */
 int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg,
-		int nents, enum dma_data_direction dir, struct dma_attrs *attrs)
+		int nents, enum dma_data_direction dir, unsigned long attrs)
 {
 	return __iommu_map_sg(dev, sg, nents, dir, attrs, false);
 }
 
 static void __iommu_unmap_sg(struct device *dev, struct scatterlist *sg,
-		int nents, enum dma_data_direction dir, struct dma_attrs *attrs,
-		bool is_coherent)
+		int nents, enum dma_data_direction dir,
+		unsigned long attrs, bool is_coherent)
 {
 	struct scatterlist *s;
 	int i;
@@ -1834,8 +1832,7 @@ static void __iommu_unmap_sg(struct device *dev, struct scatterlist *sg,
 		if (sg_dma_len(s))
 			__iommu_remove_mapping(dev, sg_dma_address(s),
 					       sg_dma_len(s));
-		if (!is_coherent &&
-		    !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
+		if (!is_coherent && (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
 			__dma_page_dev_to_cpu(sg_page(s), s->offset,
 					      s->length, dir);
 	}
@@ -1852,7 +1849,8 @@ static void __iommu_unmap_sg(struct device *dev, struct scatterlist *sg,
  * rules concerning calls here are the same as for dma_unmap_single().
  */
 void arm_coherent_iommu_unmap_sg(struct device *dev, struct scatterlist *sg,
-		int nents, enum dma_data_direction dir, struct dma_attrs *attrs)
+		int nents, enum dma_data_direction dir,
+		unsigned long attrs)
 {
 	__iommu_unmap_sg(dev, sg, nents, dir, attrs, true);
 }
@@ -1868,7 +1866,8 @@ void arm_coherent_iommu_unmap_sg(struct device *dev, struct scatterlist *sg,
  * rules concerning calls here are the same as for dma_unmap_single().
  */
 void arm_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
-			enum dma_data_direction dir, struct dma_attrs *attrs)
+			enum dma_data_direction dir,
+			unsigned long attrs)
 {
 	__iommu_unmap_sg(dev, sg, nents, dir, attrs, false);
 }
@@ -1921,7 +1920,7 @@ void arm_iommu_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
  */
 static dma_addr_t arm_coherent_iommu_map_page(struct device *dev, struct page *page,
 	     unsigned long offset, size_t size, enum dma_data_direction dir,
-	     struct dma_attrs *attrs)
+	     unsigned long attrs)
 {
 	struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
 	dma_addr_t dma_addr;
@@ -1955,9 +1954,9 @@ fail:
  */
 static dma_addr_t arm_iommu_map_page(struct device *dev, struct page *page,
 	     unsigned long offset, size_t size, enum dma_data_direction dir,
-	     struct dma_attrs *attrs)
+	     unsigned long attrs)
 {
-	if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
+	if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
 		__dma_page_cpu_to_dev(page, offset, size, dir);
 
 	return arm_coherent_iommu_map_page(dev, page, offset, size, dir, attrs);
@@ -1973,8 +1972,7 @@ static dma_addr_t arm_iommu_map_page(struct device *dev, struct page *page,
  * Coherent IOMMU aware version of arm_dma_unmap_page()
  */
 static void arm_coherent_iommu_unmap_page(struct device *dev, dma_addr_t handle,
-		size_t size, enum dma_data_direction dir,
-		struct dma_attrs *attrs)
+		size_t size, enum dma_data_direction dir, unsigned long attrs)
 {
 	struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
 	dma_addr_t iova = handle & PAGE_MASK;
@@ -1998,8 +1996,7 @@ static void arm_coherent_iommu_unmap_page(struct device *dev, dma_addr_t handle,
  * IOMMU aware version of arm_dma_unmap_page()
  */
 static void arm_iommu_unmap_page(struct device *dev, dma_addr_t handle,
-		size_t size, enum dma_data_direction dir,
-		struct dma_attrs *attrs)
+		size_t size, enum dma_data_direction dir, unsigned long attrs)
 {
 	struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
 	dma_addr_t iova = handle & PAGE_MASK;
@@ -2010,7 +2007,7 @@ static void arm_iommu_unmap_page(struct device *dev, dma_addr_t handle,
 	if (!iova)
 		return;
 
-	if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
+	if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
 		__dma_page_dev_to_cpu(page, offset, size, dir);
 
 	iommu_unmap(mapping->domain, iova, len);
diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index c5f9a9e3d1f3..d062f08f5020 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -98,11 +98,11 @@ static void __xen_dma_page_cpu_to_dev(struct device *hwdev, dma_addr_t handle,
 
 void __xen_dma_map_page(struct device *hwdev, struct page *page,
 	     dma_addr_t dev_addr, unsigned long offset, size_t size,
-	     enum dma_data_direction dir, struct dma_attrs *attrs)
+	     enum dma_data_direction dir, unsigned long attrs)
 {
 	if (is_device_dma_coherent(hwdev))
 		return;
-	if (dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
+	if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
 		return;
 
 	__xen_dma_page_cpu_to_dev(hwdev, dev_addr, size, dir);
@@ -110,12 +110,12 @@ void __xen_dma_map_page(struct device *hwdev, struct page *page,
 
 void __xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
 		size_t size, enum dma_data_direction dir,
-		struct dma_attrs *attrs)
+		unsigned long attrs)
 
 {
 	if (is_device_dma_coherent(hwdev))
 		return;
-	if (dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
+	if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
 		return;
 
 	__xen_dma_page_dev_to_cpu(hwdev, handle, size, dir);
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 9f8b99e20557..bc3f00f586f1 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -3,6 +3,7 @@ config ARM64
 	select ACPI_CCA_REQUIRED if ACPI
 	select ACPI_GENERIC_GSI if ACPI
 	select ACPI_REDUCED_HARDWARE_ONLY if ACPI
+	select ACPI_MCFG if ACPI
 	select ARCH_HAS_DEVMEM_IS_ALLOWED
 	select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
 	select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
@@ -22,9 +23,9 @@ config ARM64
 	select ARM_ARCH_TIMER
 	select ARM_GIC
 	select AUDIT_ARCH_COMPAT_GENERIC
-	select ARM_GIC_V2M if PCI_MSI
+	select ARM_GIC_V2M if PCI
 	select ARM_GIC_V3
-	select ARM_GIC_V3_ITS if PCI_MSI
+	select ARM_GIC_V3_ITS if PCI
 	select ARM_PSCI_FW
 	select BUILDTIME_EXTABLE_SORT
 	select CLONE_BACKWARDS
@@ -53,6 +54,7 @@ config ARM64
 	select HAVE_ALIGNED_STRUCT_PAGE if SLUB
 	select HAVE_ARCH_AUDITSYSCALL
 	select HAVE_ARCH_BITREVERSE
+	select HAVE_ARCH_HARDENED_USERCOPY
 	select HAVE_ARCH_HUGE_VMAP
 	select HAVE_ARCH_JUMP_LABEL
 	select HAVE_ARCH_KASAN if SPARSEMEM_VMEMMAP && !(ARM64_16K_PAGES && ARM64_VA_BITS_48)
@@ -78,6 +80,7 @@ config ARM64
 	select HAVE_FTRACE_MCOUNT_RECORD
 	select HAVE_FUNCTION_TRACER
 	select HAVE_FUNCTION_GRAPH_TRACER
+	select HAVE_GCC_PLUGINS
 	select HAVE_GENERIC_DMA_COHERENT
 	select HAVE_HW_BREAKPOINT if PERF_EVENTS
 	select HAVE_IRQ_TIME_ACCOUNTING
@@ -101,6 +104,7 @@ config ARM64
 	select OF_EARLY_FLATTREE
 	select OF_NUMA if NUMA && OF
 	select OF_RESERVED_MEM
+	select PCI_ECAM if ACPI
 	select PERF_USE_VMALLOC
 	select POWER_RESET
 	select POWER_SUPPLY
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index d59b6908a21a..5b54f8c021d8 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -15,7 +15,7 @@ CPPFLAGS_vmlinux.lds = -DTEXT_OFFSET=$(TEXT_OFFSET)
 GZFLAGS		:=-9
 
 ifneq ($(CONFIG_RELOCATABLE),)
-LDFLAGS_vmlinux		+= -pie
+LDFLAGS_vmlinux		+= -pie -Bsymbolic
 endif
 
 KBUILD_DEFCONFIG := defconfig
diff --git a/arch/arm64/boot/dts/marvell/armada-3720-db.dts b/arch/arm64/boot/dts/marvell/armada-3720-db.dts
index 86110a6ae330..1372e9a6aaa4 100644
--- a/arch/arm64/boot/dts/marvell/armada-3720-db.dts
+++ b/arch/arm64/boot/dts/marvell/armada-3720-db.dts
@@ -76,3 +76,8 @@
 &usb3 {
 	status = "okay";
 };
+
+/* CON17 (PCIe) / CON12 (mini-PCIe) */
+&pcie0 {
+	status = "okay";
+};
diff --git a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi
index eb29280962d7..c4762538ec01 100644
--- a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi
+++ b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi
@@ -176,5 +176,30 @@
 				      <0x1d40000 0x40000>; /* GICR */
 			};
 		};
+
+		pcie0: pcie@d0070000 {
+			compatible = "marvell,armada-3700-pcie";
+			device_type = "pci";
+			status = "disabled";
+			reg = <0 0xd0070000 0 0x20000>;
+			#address-cells = <3>;
+			#size-cells = <2>;
+			bus-range = <0x00 0xff>;
+			interrupts = <GIC_SPI 29 IRQ_TYPE_LEVEL_HIGH>;
+			#interrupt-cells = <1>;
+			msi-parent = <&pcie0>;
+			msi-controller;
+			ranges = <0x82000000 0 0xe8000000   0 0xe8000000 0 0x1000000 /* Port 0 MEM */
+				  0x81000000 0 0xe9000000   0 0xe9000000 0 0x10000>; /* Port 0 IO*/
+			interrupt-map-mask = <0 0 0 7>;
+			interrupt-map = <0 0 0 1 &pcie_intc 0>,
+					<0 0 0 2 &pcie_intc 1>,
+					<0 0 0 3 &pcie_intc 2>,
+					<0 0 0 4 &pcie_intc 3>;
+			pcie_intc: interrupt-controller {
+				interrupt-controller;
+				#interrupt-cells = <1>;
+			};
+		};
 	};
 };
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 49dd1bd3ea50..7099f26e3702 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -36,8 +36,9 @@
 #define ARM64_HAS_VIRT_HOST_EXTN		11
 #define ARM64_WORKAROUND_CAVIUM_27456		12
 #define ARM64_HAS_32BIT_EL0			13
+#define ARM64_HYP_OFFSET_LOW			14
 
-#define ARM64_NCAPS				14
+#define ARM64_NCAPS				15
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
index 579b6e654f2d..a55384f4a5d7 100644
--- a/arch/arm64/include/asm/elf.h
+++ b/arch/arm64/include/asm/elf.h
@@ -140,6 +140,7 @@ typedef struct user_fpsimd_state elf_fpregset_t;
 
 #define SET_PERSONALITY(ex)		clear_thread_flag(TIF_32BIT);
 
+/* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */
 #define ARCH_DLINFO							\
 do {									\
 	NEW_AUX_ENT(AT_SYSINFO_EHDR,					\
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 2cdb6b551ac6..4b5c977af465 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -178,7 +178,7 @@
 /* Hyp System Trap Register */
 #define HSTR_EL2_T(x)	(1 << x)
 
-/* Hyp Coproccessor Trap Register Shifts */
+/* Hyp Coprocessor Trap Register Shifts */
 #define CPTR_EL2_TFP_SHIFT 10
 
 /* Hyp Coprocessor Trap Register */
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 49095fc4b482..3eda975837d0 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -47,8 +47,7 @@
 
 int __attribute_const__ kvm_target_cpu(void);
 int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
-int kvm_arch_dev_ioctl_check_extension(long ext);
-unsigned long kvm_hyp_reset_entry(void);
+int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext);
 void __extended_idmap_trampoline(phys_addr_t boot_pgd, phys_addr_t idmap_start);
 
 struct kvm_arch {
@@ -348,8 +347,7 @@ int kvm_perf_teardown(void);
 
 struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
 
-static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr,
-				       phys_addr_t pgd_ptr,
+static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
 				       unsigned long hyp_stack_ptr,
 				       unsigned long vector_ptr)
 {
@@ -357,19 +355,14 @@ static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr,
 	 * Call initialization code, and switch to the full blown
 	 * HYP code.
 	 */
-	__kvm_call_hyp((void *)boot_pgd_ptr, pgd_ptr,
-		       hyp_stack_ptr, vector_ptr);
+	__kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr);
 }
 
-static inline void __cpu_reset_hyp_mode(phys_addr_t boot_pgd_ptr,
+void __kvm_hyp_teardown(void);
+static inline void __cpu_reset_hyp_mode(unsigned long vector_ptr,
 					phys_addr_t phys_idmap_start)
 {
-	/*
-	 * Call reset code, and switch back to stub hyp vectors.
-	 * Uses __kvm_call_hyp() to avoid kaslr's kvm_ksym_ref() translation.
-	 */
-	__kvm_call_hyp((void *)kvm_hyp_reset_entry(),
-		       boot_pgd_ptr, phys_idmap_start);
+	kvm_call_hyp(__kvm_hyp_teardown, phys_idmap_start);
 }
 
 static inline void kvm_arch_hardware_unsetup(void) {}
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index 44eaff70da6a..cff510574fae 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -25,29 +25,6 @@
 
 #define __hyp_text __section(.hyp.text) notrace
 
-static inline unsigned long __kern_hyp_va(unsigned long v)
-{
-	asm volatile(ALTERNATIVE("and %0, %0, %1",
-				 "nop",
-				 ARM64_HAS_VIRT_HOST_EXTN)
-		     : "+r" (v) : "i" (HYP_PAGE_OFFSET_MASK));
-	return v;
-}
-
-#define kern_hyp_va(v) (typeof(v))(__kern_hyp_va((unsigned long)(v)))
-
-static inline unsigned long __hyp_kern_va(unsigned long v)
-{
-	u64 offset = PAGE_OFFSET - HYP_PAGE_OFFSET;
-	asm volatile(ALTERNATIVE("add %0, %0, %1",
-				 "nop",
-				 ARM64_HAS_VIRT_HOST_EXTN)
-		     : "+r" (v) : "r" (offset));
-	return v;
-}
-
-#define hyp_kern_va(v) (typeof(v))(__hyp_kern_va((unsigned long)(v)))
-
 #define read_sysreg_elx(r,nvh,vh)					\
 	({								\
 		u64 reg;						\
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index f05ac27d033e..b6bb83400cd8 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -29,21 +29,48 @@
  *
  * Instead, give the HYP mode its own VA region at a fixed offset from
  * the kernel by just masking the top bits (which are all ones for a
- * kernel address).
+ * kernel address). We need to find out how many bits to mask.
  *
- * ARMv8.1 (using VHE) does have a TTBR1_EL2, and doesn't use these
- * macros (the entire kernel runs at EL2).
+ * We want to build a set of page tables that cover both parts of the
+ * idmap (the trampoline page used to initialize EL2), and our normal
+ * runtime VA space, at the same time.
+ *
+ * Given that the kernel uses VA_BITS for its entire address space,
+ * and that half of that space (VA_BITS - 1) is used for the linear
+ * mapping, we can also limit the EL2 space to (VA_BITS - 1).
+ *
+ * The main question is "Within the VA_BITS space, does EL2 use the
+ * top or the bottom half of that space to shadow the kernel's linear
+ * mapping?". As we need to idmap the trampoline page, this is
+ * determined by the range in which this page lives.
+ *
+ * If the page is in the bottom half, we have to use the top half. If
+ * the page is in the top half, we have to use the bottom half:
+ *
+ * T = __virt_to_phys(__hyp_idmap_text_start)
+ * if (T & BIT(VA_BITS - 1))
+ *	HYP_VA_MIN = 0  //idmap in upper half
+ * else
+ *	HYP_VA_MIN = 1 << (VA_BITS - 1)
+ * HYP_VA_MAX = HYP_VA_MIN + (1 << (VA_BITS - 1)) - 1
+ *
+ * This of course assumes that the trampoline page exists within the
+ * VA_BITS range. If it doesn't, then it means we're in the odd case
+ * where the kernel idmap (as well as HYP) uses more levels than the
+ * kernel runtime page tables (as seen when the kernel is configured
+ * for 4k pages, 39bits VA, and yet memory lives just above that
+ * limit, forcing the idmap to use 4 levels of page tables while the
+ * kernel itself only uses 3). In this particular case, it doesn't
+ * matter which side of VA_BITS we use, as we're guaranteed not to
+ * conflict with anything.
+ *
+ * When using VHE, there are no separate hyp mappings and all KVM
+ * functionality is already mapped as part of the main kernel
+ * mappings, and none of this applies in that case.
  */
-#define HYP_PAGE_OFFSET_SHIFT	VA_BITS
-#define HYP_PAGE_OFFSET_MASK	((UL(1) << HYP_PAGE_OFFSET_SHIFT) - 1)
-#define HYP_PAGE_OFFSET		(PAGE_OFFSET & HYP_PAGE_OFFSET_MASK)
 
-/*
- * Our virtual mapping for the idmap-ed MMU-enable code. Must be
- * shared across all the page-tables. Conveniently, we use the last
- * possible page, where no kernel mapping will ever exist.
- */
-#define TRAMPOLINE_VA		(HYP_PAGE_OFFSET_MASK & PAGE_MASK)
+#define HYP_PAGE_OFFSET_HIGH_MASK	((UL(1) << VA_BITS) - 1)
+#define HYP_PAGE_OFFSET_LOW_MASK	((UL(1) << (VA_BITS - 1)) - 1)
 
 #ifdef __ASSEMBLY__
 
@@ -53,13 +80,33 @@
 /*
  * Convert a kernel VA into a HYP VA.
  * reg: VA to be converted.
+ *
+ * This generates the following sequences:
+ * - High mask:
+ *		and x0, x0, #HYP_PAGE_OFFSET_HIGH_MASK
+ *		nop
+ * - Low mask:
+ *		and x0, x0, #HYP_PAGE_OFFSET_HIGH_MASK
+ *		and x0, x0, #HYP_PAGE_OFFSET_LOW_MASK
+ * - VHE:
+ *		nop
+ *		nop
+ *
+ * The "low mask" version works because the mask is a strict subset of
+ * the "high mask", hence performing the first mask for nothing.
+ * Should be completely invisible on any viable CPU.
  */
 .macro kern_hyp_va	reg
-alternative_if_not ARM64_HAS_VIRT_HOST_EXTN	
-	and	\reg, \reg, #HYP_PAGE_OFFSET_MASK
+alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
+	and     \reg, \reg, #HYP_PAGE_OFFSET_HIGH_MASK
 alternative_else
 	nop
 alternative_endif
+alternative_if_not ARM64_HYP_OFFSET_LOW
+	nop
+alternative_else
+	and     \reg, \reg, #HYP_PAGE_OFFSET_LOW_MASK
+alternative_endif
 .endm
 
 #else
@@ -70,7 +117,22 @@ alternative_endif
 #include <asm/mmu_context.h>
 #include <asm/pgtable.h>
 
-#define KERN_TO_HYP(kva)	((unsigned long)kva - PAGE_OFFSET + HYP_PAGE_OFFSET)
+static inline unsigned long __kern_hyp_va(unsigned long v)
+{
+	asm volatile(ALTERNATIVE("and %0, %0, %1",
+				 "nop",
+				 ARM64_HAS_VIRT_HOST_EXTN)
+		     : "+r" (v)
+		     : "i" (HYP_PAGE_OFFSET_HIGH_MASK));
+	asm volatile(ALTERNATIVE("nop",
+				 "and %0, %0, %1",
+				 ARM64_HYP_OFFSET_LOW)
+		     : "+r" (v)
+		     : "i" (HYP_PAGE_OFFSET_LOW_MASK));
+	return v;
+}
+
+#define kern_hyp_va(v) 	(typeof(v))(__kern_hyp_va((unsigned long)(v)))
 
 /*
  * We currently only support a 40bit IPA.
@@ -81,9 +143,8 @@ alternative_endif
 
 #include <asm/stage2_pgtable.h>
 
-int create_hyp_mappings(void *from, void *to);
+int create_hyp_mappings(void *from, void *to, pgprot_t prot);
 int create_hyp_io_mappings(void *from, void *to, phys_addr_t);
-void free_boot_hyp_pgd(void);
 void free_hyp_pgds(void);
 
 void stage2_unmap_vm(struct kvm *kvm);
@@ -97,7 +158,6 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run);
 void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu);
 
 phys_addr_t kvm_mmu_get_httbr(void);
-phys_addr_t kvm_mmu_get_boot_httbr(void);
 phys_addr_t kvm_get_idmap_vector(void);
 phys_addr_t kvm_get_idmap_start(void);
 int kvm_mmu_init(void);
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index 2813748e2f24..c3ae239db3ee 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -164,6 +164,7 @@
 #define PTE_CONT		(_AT(pteval_t, 1) << 52)	/* Contiguous range */
 #define PTE_PXN			(_AT(pteval_t, 1) << 53)	/* Privileged XN */
 #define PTE_UXN			(_AT(pteval_t, 1) << 54)	/* User XN */
+#define PTE_HYP_XN		(_AT(pteval_t, 1) << 54)	/* HYP XN */
 
 /*
  * AttrIndx[2:0] encoding (mapping attributes defined in the MAIR* registers).
diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
index 29fcb33ab401..39f5252673f7 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -55,7 +55,9 @@
 #define PAGE_KERNEL_EXEC	__pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE)
 #define PAGE_KERNEL_EXEC_CONT	__pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT)
 
-#define PAGE_HYP		__pgprot(_PAGE_DEFAULT | PTE_HYP)
+#define PAGE_HYP		__pgprot(_PAGE_DEFAULT | PTE_HYP | PTE_HYP_XN)
+#define PAGE_HYP_EXEC		__pgprot(_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY)
+#define PAGE_HYP_RO		__pgprot(_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY | PTE_HYP_XN)
 #define PAGE_HYP_DEVICE		__pgprot(PROT_DEVICE_nGnRE | PTE_HYP)
 
 #define PAGE_S2			__pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY)
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 46472a91b6df..e20bd431184a 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -224,6 +224,23 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
 	set_pte(ptep, pte);
 }
 
+#define __HAVE_ARCH_PTE_SAME
+static inline int pte_same(pte_t pte_a, pte_t pte_b)
+{
+	pteval_t lhs, rhs;
+
+	lhs = pte_val(pte_a);
+	rhs = pte_val(pte_b);
+
+	if (pte_present(pte_a))
+		lhs &= ~PTE_RDONLY;
+
+	if (pte_present(pte_b))
+		rhs &= ~PTE_RDONLY;
+
+	return (lhs == rhs);
+}
+
 /*
  * Huge pte definitions.
  */
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index 5e834d10b291..c47257c91b77 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -265,22 +265,25 @@ extern unsigned long __must_check __clear_user(void __user *addr, unsigned long
 static inline unsigned long __must_check __copy_from_user(void *to, const void __user *from, unsigned long n)
 {
 	kasan_check_write(to, n);
-	return  __arch_copy_from_user(to, from, n);
+	check_object_size(to, n, false);
+	return __arch_copy_from_user(to, from, n);
 }
 
 static inline unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n)
 {
 	kasan_check_read(from, n);
-	return  __arch_copy_to_user(to, from, n);
+	check_object_size(from, n, true);
+	return __arch_copy_to_user(to, from, n);
 }
 
 static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n)
 {
 	kasan_check_write(to, n);
 
-	if (access_ok(VERIFY_READ, from, n))
+	if (access_ok(VERIFY_READ, from, n)) {
+		check_object_size(to, n, false);
 		n = __arch_copy_from_user(to, from, n);
-	else /* security hole - plug it */
+	} else /* security hole - plug it */
 		memset(to, 0, n);
 	return n;
 }
@@ -289,8 +292,10 @@ static inline unsigned long __must_check copy_to_user(void __user *to, const voi
 {
 	kasan_check_read(from, n);
 
-	if (access_ok(VERIFY_WRITE, to, n))
+	if (access_ok(VERIFY_WRITE, to, n)) {
+		check_object_size(from, n, true);
 		n = __arch_copy_to_user(to, from, n);
+	}
 	return n;
 }
 
diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index bbc6a8cf83f1..1788545f25bc 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h
@@ -87,6 +87,10 @@ extern void verify_cpu_run_el(void);
 static inline void verify_cpu_run_el(void) {}
 #endif
 
+/* The section containing the hypervisor idmap text */
+extern char __hyp_idmap_text_start[];
+extern char __hyp_idmap_text_end[];
+
 /* The section containing the hypervisor text */
 extern char __hyp_text_start[];
 extern char __hyp_text_end[];
diff --git a/arch/arm64/include/uapi/asm/auxvec.h b/arch/arm64/include/uapi/asm/auxvec.h
index 22d6d8885854..4cf0c17787a8 100644
--- a/arch/arm64/include/uapi/asm/auxvec.h
+++ b/arch/arm64/include/uapi/asm/auxvec.h
@@ -19,4 +19,6 @@
 /* vDSO location */
 #define AT_SYSINFO_EHDR	33
 
+#define AT_VECTOR_SIZE_ARCH 1 /* entries in ARCH_DLINFO */
+
 #endif
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index f209ea151dca..3051f86a9b5f 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -87,9 +87,11 @@ struct kvm_regs {
 /* Supported VGICv3 address types  */
 #define KVM_VGIC_V3_ADDR_TYPE_DIST	2
 #define KVM_VGIC_V3_ADDR_TYPE_REDIST	3
+#define KVM_VGIC_ITS_ADDR_TYPE		4
 
 #define KVM_VGIC_V3_DIST_SIZE		SZ_64K
 #define KVM_VGIC_V3_REDIST_SIZE		(2 * SZ_64K)
+#define KVM_VGIC_V3_ITS_SIZE		(2 * SZ_64K)
 
 #define KVM_ARM_VCPU_POWER_OFF		0 /* CPU is started in OFF state */
 #define KVM_ARM_VCPU_EL1_32BIT		1 /* CPU running a 32bit VM */
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 916d27ad79c1..62272eac1352 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -726,6 +726,19 @@ static bool runs_at_el2(const struct arm64_cpu_capabilities *entry, int __unused
 	return is_kernel_in_hyp_mode();
 }
 
+static bool hyp_offset_low(const struct arm64_cpu_capabilities *entry,
+			   int __unused)
+{
+	phys_addr_t idmap_addr = virt_to_phys(__hyp_idmap_text_start);
+
+	/*
+	 * Activate the lower HYP offset only if:
+	 * - the idmap doesn't clash with it,
+	 * - the kernel is not running at EL2.
+	 */
+	return idmap_addr > GENMASK(VA_BITS - 2, 0) && !is_kernel_in_hyp_mode();
+}
+
 static const struct arm64_cpu_capabilities arm64_features[] = {
 	{
 		.desc = "GIC system register CPU interface",
@@ -803,6 +816,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.field_pos = ID_AA64PFR0_EL0_SHIFT,
 		.min_field_value = ID_AA64PFR0_EL0_32BIT_64BIT,
 	},
+	{
+		.desc = "Reduced HYP mapping offset",
+		.capability = ARM64_HYP_OFFSET_LOW,
+		.def_scope = SCOPE_SYSTEM,
+		.matches = hyp_offset_low,
+	},
 	{},
 };
 
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 2c6e598a94dc..b77f58355da1 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -781,40 +781,25 @@ __primary_switch:
 	 * Iterate over each entry in the relocation table, and apply the
 	 * relocations in place.
 	 */
-	ldr	w8, =__dynsym_offset		// offset to symbol table
 	ldr	w9, =__rela_offset		// offset to reloc table
 	ldr	w10, =__rela_size		// size of reloc table
 
 	mov_q	x11, KIMAGE_VADDR		// default virtual offset
 	add	x11, x11, x23			// actual virtual offset
-	add	x8, x8, x11			// __va(.dynsym)
 	add	x9, x9, x11			// __va(.rela)
 	add	x10, x9, x10			// __va(.rela) + sizeof(.rela)
 
 0:	cmp	x9, x10
-	b.hs	2f
+	b.hs	1f
 	ldp	x11, x12, [x9], #24
 	ldr	x13, [x9, #-8]
 	cmp	w12, #R_AARCH64_RELATIVE
-	b.ne	1f
+	b.ne	0b
 	add	x13, x13, x23			// relocate
 	str	x13, [x11, x23]
 	b	0b
 
-1:	cmp	w12, #R_AARCH64_ABS64
-	b.ne	0b
-	add	x12, x12, x12, lsl #1		// symtab offset: 24x top word
-	add	x12, x8, x12, lsr #(32 - 3)	// ... shifted into bottom word
-	ldrsh	w14, [x12, #6]			// Elf64_Sym::st_shndx
-	ldr	x15, [x12, #8]			// Elf64_Sym::st_value
-	cmp	w14, #-0xf			// SHN_ABS (0xfff1) ?
-	add	x14, x15, x23			// relocate
-	csel	x15, x14, x15, ne
-	add	x15, x13, x15
-	str	x15, [x11, x23]
-	b	0b
-
-2:
+1:
 #endif
 	ldr	x8, =__primary_switched
 	br	x8
diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c
index 3c4e308b40a0..acf38722457b 100644
--- a/arch/arm64/kernel/pci.c
+++ b/arch/arm64/kernel/pci.c
@@ -17,6 +17,9 @@
 #include <linux/mm.h>
 #include <linux/of_pci.h>
 #include <linux/of_platform.h>
+#include <linux/pci.h>
+#include <linux/pci-acpi.h>
+#include <linux/pci-ecam.h>
 #include <linux/slab.h>
 
 /*
@@ -36,25 +39,17 @@ resource_size_t pcibios_align_resource(void *data, const struct resource *res,
 	return res->start;
 }
 
-/**
- * pcibios_enable_device - Enable I/O and memory.
- * @dev: PCI device to be enabled
- * @mask: bitmask of BARs to enable
- */
-int pcibios_enable_device(struct pci_dev *dev, int mask)
-{
-	if (pci_has_flag(PCI_PROBE_ONLY))
-		return 0;
-
-	return pci_enable_resources(dev, mask);
-}
-
 /*
- * Try to assign the IRQ number from DT when adding a new device
+ * Try to assign the IRQ number when probing a new device
  */
-int pcibios_add_device(struct pci_dev *dev)
+int pcibios_alloc_irq(struct pci_dev *dev)
 {
-	dev->irq = of_irq_parse_and_map_pci(dev, 0, 0);
+	if (acpi_disabled)
+		dev->irq = of_irq_parse_and_map_pci(dev, 0, 0);
+#ifdef CONFIG_ACPI
+	else
+		return acpi_pci_irq_enable(dev);
+#endif
 
 	return 0;
 }
@@ -65,13 +60,21 @@ int pcibios_add_device(struct pci_dev *dev)
 int raw_pci_read(unsigned int domain, unsigned int bus,
 		  unsigned int devfn, int reg, int len, u32 *val)
 {
-	return -ENXIO;
+	struct pci_bus *b = pci_find_bus(domain, bus);
+
+	if (!b)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	return b->ops->read(b, devfn, reg, len, val);
 }
 
 int raw_pci_write(unsigned int domain, unsigned int bus,
 		unsigned int devfn, int reg, int len, u32 val)
 {
-	return -ENXIO;
+	struct pci_bus *b = pci_find_bus(domain, bus);
+
+	if (!b)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	return b->ops->write(b, devfn, reg, len, val);
 }
 
 #ifdef CONFIG_NUMA
@@ -85,10 +88,124 @@ EXPORT_SYMBOL(pcibus_to_node);
 #endif
 
 #ifdef CONFIG_ACPI
-/* Root bridge scanning */
+
+struct acpi_pci_generic_root_info {
+	struct acpi_pci_root_info	common;
+	struct pci_config_window	*cfg;	/* config space mapping */
+};
+
+int acpi_pci_bus_find_domain_nr(struct pci_bus *bus)
+{
+	struct pci_config_window *cfg = bus->sysdata;
+	struct acpi_device *adev = to_acpi_device(cfg->parent);
+	struct acpi_pci_root *root = acpi_driver_data(adev);
+
+	return root->segment;
+}
+
+int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
+{
+	if (!acpi_disabled) {
+		struct pci_config_window *cfg = bridge->bus->sysdata;
+		struct acpi_device *adev = to_acpi_device(cfg->parent);
+		ACPI_COMPANION_SET(&bridge->dev, adev);
+	}
+
+	return 0;
+}
+
+/*
+ * Lookup the bus range for the domain in MCFG, and set up config space
+ * mapping.
+ */
+static struct pci_config_window *
+pci_acpi_setup_ecam_mapping(struct acpi_pci_root *root)
+{
+	struct resource *bus_res = &root->secondary;
+	u16 seg = root->segment;
+	struct pci_config_window *cfg;
+	struct resource cfgres;
+	unsigned int bsz;
+
+	/* Use address from _CBA if present, otherwise lookup MCFG */
+	if (!root->mcfg_addr)
+		root->mcfg_addr = pci_mcfg_lookup(seg, bus_res);
+
+	if (!root->mcfg_addr) {
+		dev_err(&root->device->dev, "%04x:%pR ECAM region not found\n",
+			seg, bus_res);
+		return NULL;
+	}
+
+	bsz = 1 << pci_generic_ecam_ops.bus_shift;
+	cfgres.start = root->mcfg_addr + bus_res->start * bsz;
+	cfgres.end = cfgres.start + resource_size(bus_res) * bsz - 1;
+	cfgres.flags = IORESOURCE_MEM;
+	cfg = pci_ecam_create(&root->device->dev, &cfgres, bus_res,
+			      &pci_generic_ecam_ops);
+	if (IS_ERR(cfg)) {
+		dev_err(&root->device->dev, "%04x:%pR error %ld mapping ECAM\n",
+			seg, bus_res, PTR_ERR(cfg));
+		return NULL;
+	}
+
+	return cfg;
+}
+
+/* release_info: free resources allocated by init_info */
+static void pci_acpi_generic_release_info(struct acpi_pci_root_info *ci)
+{
+	struct acpi_pci_generic_root_info *ri;
+
+	ri = container_of(ci, struct acpi_pci_generic_root_info, common);
+	pci_ecam_free(ri->cfg);
+	kfree(ri);
+}
+
+static struct acpi_pci_root_ops acpi_pci_root_ops = {
+	.release_info = pci_acpi_generic_release_info,
+};
+
+/* Interface called from ACPI code to setup PCI host controller */
 struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
 {
-	/* TODO: Should be revisited when implementing PCI on ACPI */
-	return NULL;
+	int node = acpi_get_node(root->device->handle);
+	struct acpi_pci_generic_root_info *ri;
+	struct pci_bus *bus, *child;
+
+	ri = kzalloc_node(sizeof(*ri), GFP_KERNEL, node);
+	if (!ri)
+		return NULL;
+
+	ri->cfg = pci_acpi_setup_ecam_mapping(root);
+	if (!ri->cfg) {
+		kfree(ri);
+		return NULL;
+	}
+
+	acpi_pci_root_ops.pci_ops = &ri->cfg->ops->pci_ops;
+	bus = acpi_pci_root_create(root, &acpi_pci_root_ops, &ri->common,
+				   ri->cfg);
+	if (!bus)
+		return NULL;
+
+	pci_bus_size_bridges(bus);
+	pci_bus_assign_resources(bus);
+
+	list_for_each_entry(child, &bus->children, node)
+		pcie_bus_configure_settings(child);
+
+	return bus;
 }
+
+void pcibios_add_bus(struct pci_bus *bus)
+{
+	acpi_pci_add_bus(bus);
+}
+
+void pcibios_remove_bus(struct pci_bus *bus)
+{
+	acpi_pci_remove_bus(bus);
+}
+
 #endif
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 89d6e177ecbd..659963d40bb4 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -103,6 +103,7 @@ SECTIONS
 		*(.discard)
 		*(.discard.*)
 		*(.interp .dynamic)
+		*(.dynsym .dynstr .hash)
 	}
 
 	. = KIMAGE_VADDR + TEXT_OFFSET;
@@ -174,19 +175,9 @@ SECTIONS
 	.rela : ALIGN(8) {
 		*(.rela .rela*)
 	}
-	.dynsym : ALIGN(8) {
-		*(.dynsym)
-	}
-	.dynstr : {
-		*(.dynstr)
-	}
-	.hash : {
-		*(.hash)
-	}
 
-	__rela_offset	= ADDR(.rela) - KIMAGE_VADDR;
+	__rela_offset	= ABSOLUTE(ADDR(.rela) - KIMAGE_VADDR);
 	__rela_size	= SIZEOF(.rela);
-	__dynsym_offset	= ADDR(.dynsym) - KIMAGE_VADDR;
 
 	. = ALIGN(SEGMENT_ALIGN);
 	__init_end = .;
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index c4f26ef91e77..9c9edc98d271 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -36,6 +36,9 @@ config KVM
 	select HAVE_KVM_IRQFD
 	select KVM_ARM_VGIC_V3
 	select KVM_ARM_PMU if HW_PERF_EVENTS
+	select HAVE_KVM_MSI
+	select HAVE_KVM_IRQCHIP
+	select HAVE_KVM_IRQ_ROUTING
 	---help---
 	  Support hosting virtualized guest machines.
 	  We don't support KVM with 16K page tables yet, due to the multiple
@@ -54,13 +57,6 @@ config KVM_ARM_PMU
 	  Adds support for a virtual Performance Monitoring Unit (PMU) in
 	  virtual machines.
 
-config KVM_NEW_VGIC
-	bool "New VGIC implementation"
-	depends on KVM
-	default y
-        ---help---
-          uses the new VGIC implementation
-
 source drivers/vhost/Kconfig
 
 endif # VIRTUALIZATION
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index a7a958ca29d5..695eb3c7ef41 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -20,7 +20,6 @@ kvm-$(CONFIG_KVM_ARM_HOST) += emulate.o inject_fault.o regmap.o
 kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o
 kvm-$(CONFIG_KVM_ARM_HOST) += guest.o debug.o reset.o sys_regs.o sys_regs_generic_v8.o
 
-ifeq ($(CONFIG_KVM_NEW_VGIC),y)
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-init.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-irqfd.o
@@ -30,12 +29,7 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v2.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v3.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-kvm-device.o
-else
-kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic.o
-kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2.o
-kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2-emul.o
-kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3.o
-kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3-emul.o
-endif
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-its.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/irqchip.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o
 kvm-$(CONFIG_KVM_ARM_PMU) += $(KVM)/arm/pmu.o
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 32fad75bb9ff..3f9e15722473 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -211,7 +211,7 @@ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu)
 /**
  * kvm_arm_copy_reg_indices - get indices of all registers.
  *
- * We do core registers right here, then we apppend system regs.
+ * We do core registers right here, then we append system regs.
  */
 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
 {
diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
index a873a6d8be90..6b29d3d9e1f2 100644
--- a/arch/arm64/kvm/hyp-init.S
+++ b/arch/arm64/kvm/hyp-init.S
@@ -53,10 +53,9 @@ __invalid:
 	b	.
 
 	/*
-	 * x0: HYP boot pgd
-	 * x1: HYP pgd
-	 * x2: HYP stack
-	 * x3: HYP vectors
+	 * x0: HYP pgd
+	 * x1: HYP stack
+	 * x2: HYP vectors
 	 */
 __do_hyp_init:
 
@@ -110,71 +109,27 @@ __do_hyp_init:
 	msr	sctlr_el2, x4
 	isb
 
-	/* Skip the trampoline dance if we merged the boot and runtime PGDs */
-	cmp	x0, x1
-	b.eq	merged
-
-	/* MMU is now enabled. Get ready for the trampoline dance */
-	ldr	x4, =TRAMPOLINE_VA
-	adr	x5, target
-	bfi	x4, x5, #0, #PAGE_SHIFT
-	br	x4
-
-target: /* We're now in the trampoline code, switch page tables */
-	msr	ttbr0_el2, x1
-	isb
-
-	/* Invalidate the old TLBs */
-	tlbi	alle2
-	dsb	sy
-
-merged:
 	/* Set the stack and new vectors */
+	kern_hyp_va	x1
+	mov	sp, x1
 	kern_hyp_va	x2
-	mov	sp, x2
-	kern_hyp_va	x3
-	msr	vbar_el2, x3
+	msr	vbar_el2, x2
 
 	/* Hello, World! */
 	eret
 ENDPROC(__kvm_hyp_init)
 
 	/*
-	 * Reset kvm back to the hyp stub. This is the trampoline dance in
-	 * reverse. If kvm used an extended idmap, __extended_idmap_trampoline
-	 * calls this code directly in the idmap. In this case switching to the
-	 * boot tables is a no-op.
-	 *
-	 * x0: HYP boot pgd
-	 * x1: HYP phys_idmap_start
+	 * Reset kvm back to the hyp stub.
 	 */
 ENTRY(__kvm_hyp_reset)
-	/* We're in trampoline code in VA, switch back to boot page tables */
-	msr	ttbr0_el2, x0
-	isb
-
-	/* Ensure the PA branch doesn't find a stale tlb entry or stale code. */
-	ic	iallu
-	tlbi	alle2
-	dsb	sy
-	isb
-
-	/* Branch into PA space */
-	adr	x0, 1f
-	bfi	x1, x0, #0, #PAGE_SHIFT
-	br	x1
-
 	/* We're now in idmap, disable MMU */
-1:	mrs	x0, sctlr_el2
+	mrs	x0, sctlr_el2
 	ldr	x1, =SCTLR_ELx_FLAGS
 	bic	x0, x0, x1		// Clear SCTL_M and etc
 	msr	sctlr_el2, x0
 	isb
 
-	/* Invalidate the old TLBs */
-	tlbi	alle2
-	dsb	sy
-
 	/* Install stub vectors */
 	adr_l	x0, __hyp_stub_vectors
 	msr	vbar_el2, x0
diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
index 70254a65bd5b..ce9e5e5f28cf 100644
--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -164,22 +164,3 @@ alternative_endif
 
 	eret
 ENDPROC(__fpsimd_guest_restore)
-
-/*
- * When using the extended idmap, we don't have a trampoline page we can use
- * while we switch pages tables during __kvm_hyp_reset. Accessing the idmap
- * directly would be ideal, but if we're using the extended idmap then the
- * idmap is located above HYP_PAGE_OFFSET, and the address will be masked by
- * kvm_call_hyp using kern_hyp_va.
- *
- * x0: HYP boot pgd
- * x1: HYP phys_idmap_start
- */
-ENTRY(__extended_idmap_trampoline)
-	mov	x4, x1
-	adr_l	x3, __kvm_hyp_reset
-
-	/* insert __kvm_hyp_reset()s offset into phys_idmap_start */
-	bfi	x4, x3, #0, #PAGE_SHIFT
-	br	x4
-ENDPROC(__extended_idmap_trampoline)
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index 2d87f36d5cb4..f6d9694ae3b1 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -62,6 +62,21 @@ ENTRY(__vhe_hyp_call)
 	isb
 	ret
 ENDPROC(__vhe_hyp_call)
+
+/*
+ * Compute the idmap address of __kvm_hyp_reset based on the idmap
+ * start passed as a parameter, and jump there.
+ *
+ * x0: HYP phys_idmap_start
+ */
+ENTRY(__kvm_hyp_teardown)
+	mov	x4, x0
+	adr_l	x3, __kvm_hyp_reset
+
+	/* insert __kvm_hyp_reset()s offset into phys_idmap_start */
+	bfi	x4, x3, #0, #PAGE_SHIFT
+	br	x4
+ENDPROC(__kvm_hyp_teardown)
 	
 el1_sync:				// Guest trapped into EL2
 	save_x0_to_x3
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 4373997d1a70..ae7855f16ec2 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -299,9 +299,16 @@ static const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%
 
 static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par)
 {
-	unsigned long str_va = (unsigned long)__hyp_panic_string;
+	unsigned long str_va;
 
-	__hyp_do_panic(hyp_kern_va(str_va),
+	/*
+	 * Force the panic string to be loaded from the literal pool,
+	 * making sure it is a kernel address and not a PC-relative
+	 * reference.
+	 */
+	asm volatile("ldr %0, =__hyp_panic_string" : "=r" (str_va));
+
+	__hyp_do_panic(str_va,
 		       spsr,  elr,
 		       read_sysreg(esr_el2),   read_sysreg_el2(far),
 		       read_sysreg(hpfar_el2), par,
diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c
index e9e0e6db73f6..898c0e6aedd4 100644
--- a/arch/arm64/kvm/inject_fault.c
+++ b/arch/arm64/kvm/inject_fault.c
@@ -132,16 +132,14 @@ static u64 get_except_vector(struct kvm_vcpu *vcpu, enum exception_type type)
 static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr)
 {
 	unsigned long cpsr = *vcpu_cpsr(vcpu);
-	bool is_aarch32;
+	bool is_aarch32 = vcpu_mode_is_32bit(vcpu);
 	u32 esr = 0;
 
-	is_aarch32 = vcpu_mode_is_32bit(vcpu);
-
-	*vcpu_spsr(vcpu) = cpsr;
 	*vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu);
-
 	*vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync);
+
 	*vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64;
+	*vcpu_spsr(vcpu) = cpsr;
 
 	vcpu_sys_reg(vcpu, FAR_EL1) = addr;
 
@@ -172,11 +170,11 @@ static void inject_undef64(struct kvm_vcpu *vcpu)
 	unsigned long cpsr = *vcpu_cpsr(vcpu);
 	u32 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT);
 
-	*vcpu_spsr(vcpu) = cpsr;
 	*vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu);
-
 	*vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync);
+
 	*vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64;
+	*vcpu_spsr(vcpu) = cpsr;
 
 	/*
 	 * Build an unknown exception, depending on the instruction
diff --git a/arch/arm64/kvm/irq.h b/arch/arm64/kvm/irq.h
new file mode 100644
index 000000000000..b74099b905fd
--- /dev/null
+++ b/arch/arm64/kvm/irq.h
@@ -0,0 +1,19 @@
+/*
+ * irq.h: in kernel interrupt controller related definitions
+ * Copyright (c) 2016 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This header is included by irqchip.c. However, on ARM, interrupt
+ * controller declarations are located in include/kvm/arm_vgic.h since
+ * they are mostly shared between arm and arm64.
+ */
+
+#ifndef __IRQ_H
+#define __IRQ_H
+
+#include <kvm/arm_vgic.h>
+
+#endif
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index b1ad730e1567..5bc460884639 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -65,7 +65,7 @@ static bool cpu_has_32bit_el1(void)
  * We currently assume that the number of HW registers is uniform
  * across all CPUs (see cpuinfo_sanity_check).
  */
-int kvm_arch_dev_ioctl_check_extension(long ext)
+int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext)
 {
 	int r;
 
@@ -86,6 +86,12 @@ int kvm_arch_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_VCPU_ATTRIBUTES:
 		r = 1;
 		break;
+	case KVM_CAP_MSI_DEVID:
+		if (!kvm)
+			r = -EINVAL;
+		else
+			r = kvm->arch.vgic.msis_require_devid;
+		break;
 	default:
 		r = 0;
 	}
@@ -98,7 +104,7 @@ int kvm_arch_dev_ioctl_check_extension(long ext)
  * @vcpu: The VCPU pointer
  *
  * This function finds the right table above and sets the registers on
- * the virtual CPU struct to their architectually defined reset
+ * the virtual CPU struct to their architecturally defined reset
  * values.
  */
 int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
@@ -132,31 +138,3 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
 	/* Reset timer */
 	return kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq);
 }
-
-extern char __hyp_idmap_text_start[];
-
-unsigned long kvm_hyp_reset_entry(void)
-{
-	if (!__kvm_cpu_uses_extended_idmap()) {
-		unsigned long offset;
-
-		/*
-		 * Find the address of __kvm_hyp_reset() in the trampoline page.
-		 * This is present in the running page tables, and the boot page
-		 * tables, so we call the code here to start the trampoline
-		 * dance in reverse.
-		 */
-		offset = (unsigned long)__kvm_hyp_reset
-			 - ((unsigned long)__hyp_idmap_text_start & PAGE_MASK);
-
-		return TRAMPOLINE_VA + offset;
-	} else {
-		/*
-		 * KVM is running with merged page tables, which don't have the
-		 * trampoline page mapped. We know the idmap is still mapped,
-		 * but can't be called into directly. Use
-		 * __extended_idmap_trampoline to do the call.
-		 */
-		return (unsigned long)kvm_ksym_ref(__extended_idmap_trampoline);
-	}
-}
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index a57d650f552c..b0b225ceca18 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1546,7 +1546,7 @@ static void unhandled_cp_access(struct kvm_vcpu *vcpu,
 				struct sys_reg_params *params)
 {
 	u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu);
-	int cp;
+	int cp = -1;
 
 	switch(hsr_ec) {
 	case ESR_ELx_EC_CP15_32:
@@ -1558,7 +1558,7 @@ static void unhandled_cp_access(struct kvm_vcpu *vcpu,
 		cp = 14;
 		break;
 	default:
-		WARN_ON((cp = -1));
+		WARN_ON(1);
 	}
 
 	kvm_err("Unsupported guest CP%d access at: %08lx\n",
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index f6c55afab3e2..c4284c432ae8 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -32,10 +32,10 @@
 
 static int swiotlb __read_mostly;
 
-static pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot,
+static pgprot_t __get_dma_pgprot(unsigned long attrs, pgprot_t prot,
 				 bool coherent)
 {
-	if (!coherent || dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs))
+	if (!coherent || (attrs & DMA_ATTR_WRITE_COMBINE))
 		return pgprot_writecombine(prot);
 	return prot;
 }
@@ -91,7 +91,7 @@ static int __free_from_pool(void *start, size_t size)
 
 static void *__dma_alloc_coherent(struct device *dev, size_t size,
 				  dma_addr_t *dma_handle, gfp_t flags,
-				  struct dma_attrs *attrs)
+				  unsigned long attrs)
 {
 	if (dev == NULL) {
 		WARN_ONCE(1, "Use an actual device structure for DMA allocation\n");
@@ -121,7 +121,7 @@ static void *__dma_alloc_coherent(struct device *dev, size_t size,
 
 static void __dma_free_coherent(struct device *dev, size_t size,
 				void *vaddr, dma_addr_t dma_handle,
-				struct dma_attrs *attrs)
+				unsigned long attrs)
 {
 	bool freed;
 	phys_addr_t paddr = dma_to_phys(dev, dma_handle);
@@ -140,7 +140,7 @@ static void __dma_free_coherent(struct device *dev, size_t size,
 
 static void *__dma_alloc(struct device *dev, size_t size,
 			 dma_addr_t *dma_handle, gfp_t flags,
-			 struct dma_attrs *attrs)
+			 unsigned long attrs)
 {
 	struct page *page;
 	void *ptr, *coherent_ptr;
@@ -188,7 +188,7 @@ no_mem:
 
 static void __dma_free(struct device *dev, size_t size,
 		       void *vaddr, dma_addr_t dma_handle,
-		       struct dma_attrs *attrs)
+		       unsigned long attrs)
 {
 	void *swiotlb_addr = phys_to_virt(dma_to_phys(dev, dma_handle));
 
@@ -205,7 +205,7 @@ static void __dma_free(struct device *dev, size_t size,
 static dma_addr_t __swiotlb_map_page(struct device *dev, struct page *page,
 				     unsigned long offset, size_t size,
 				     enum dma_data_direction dir,
-				     struct dma_attrs *attrs)
+				     unsigned long attrs)
 {
 	dma_addr_t dev_addr;
 
@@ -219,7 +219,7 @@ static dma_addr_t __swiotlb_map_page(struct device *dev, struct page *page,
 
 static void __swiotlb_unmap_page(struct device *dev, dma_addr_t dev_addr,
 				 size_t size, enum dma_data_direction dir,
-				 struct dma_attrs *attrs)
+				 unsigned long attrs)
 {
 	if (!is_device_dma_coherent(dev))
 		__dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
@@ -228,7 +228,7 @@ static void __swiotlb_unmap_page(struct device *dev, dma_addr_t dev_addr,
 
 static int __swiotlb_map_sg_attrs(struct device *dev, struct scatterlist *sgl,
 				  int nelems, enum dma_data_direction dir,
-				  struct dma_attrs *attrs)
+				  unsigned long attrs)
 {
 	struct scatterlist *sg;
 	int i, ret;
@@ -245,7 +245,7 @@ static int __swiotlb_map_sg_attrs(struct device *dev, struct scatterlist *sgl,
 static void __swiotlb_unmap_sg_attrs(struct device *dev,
 				     struct scatterlist *sgl, int nelems,
 				     enum dma_data_direction dir,
-				     struct dma_attrs *attrs)
+				     unsigned long attrs)
 {
 	struct scatterlist *sg;
 	int i;
@@ -306,7 +306,7 @@ static void __swiotlb_sync_sg_for_device(struct device *dev,
 static int __swiotlb_mmap(struct device *dev,
 			  struct vm_area_struct *vma,
 			  void *cpu_addr, dma_addr_t dma_addr, size_t size,
-			  struct dma_attrs *attrs)
+			  unsigned long attrs)
 {
 	int ret = -ENXIO;
 	unsigned long nr_vma_pages = (vma->vm_end - vma->vm_start) >>
@@ -333,7 +333,7 @@ static int __swiotlb_mmap(struct device *dev,
 
 static int __swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt,
 				 void *cpu_addr, dma_addr_t handle, size_t size,
-				 struct dma_attrs *attrs)
+				 unsigned long attrs)
 {
 	int ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
 
@@ -435,21 +435,21 @@ out:
 
 static void *__dummy_alloc(struct device *dev, size_t size,
 			   dma_addr_t *dma_handle, gfp_t flags,
-			   struct dma_attrs *attrs)
+			   unsigned long attrs)
 {
 	return NULL;
 }
 
 static void __dummy_free(struct device *dev, size_t size,
 			 void *vaddr, dma_addr_t dma_handle,
-			 struct dma_attrs *attrs)
+			 unsigned long attrs)
 {
 }
 
 static int __dummy_mmap(struct device *dev,
 			struct vm_area_struct *vma,
 			void *cpu_addr, dma_addr_t dma_addr, size_t size,
-			struct dma_attrs *attrs)
+			unsigned long attrs)
 {
 	return -ENXIO;
 }
@@ -457,20 +457,20 @@ static int __dummy_mmap(struct device *dev,
 static dma_addr_t __dummy_map_page(struct device *dev, struct page *page,
 				   unsigned long offset, size_t size,
 				   enum dma_data_direction dir,
-				   struct dma_attrs *attrs)
+				   unsigned long attrs)
 {
 	return DMA_ERROR_CODE;
 }
 
 static void __dummy_unmap_page(struct device *dev, dma_addr_t dev_addr,
 			       size_t size, enum dma_data_direction dir,
-			       struct dma_attrs *attrs)
+			       unsigned long attrs)
 {
 }
 
 static int __dummy_map_sg(struct device *dev, struct scatterlist *sgl,
 			  int nelems, enum dma_data_direction dir,
-			  struct dma_attrs *attrs)
+			  unsigned long attrs)
 {
 	return 0;
 }
@@ -478,7 +478,7 @@ static int __dummy_map_sg(struct device *dev, struct scatterlist *sgl,
 static void __dummy_unmap_sg(struct device *dev,
 			     struct scatterlist *sgl, int nelems,
 			     enum dma_data_direction dir,
-			     struct dma_attrs *attrs)
+			     unsigned long attrs)
 {
 }
 
@@ -553,7 +553,7 @@ static void flush_page(struct device *dev, const void *virt, phys_addr_t phys)
 
 static void *__iommu_alloc_attrs(struct device *dev, size_t size,
 				 dma_addr_t *handle, gfp_t gfp,
-				 struct dma_attrs *attrs)
+				 unsigned long attrs)
 {
 	bool coherent = is_device_dma_coherent(dev);
 	int ioprot = dma_direction_to_prot(DMA_BIDIRECTIONAL, coherent);
@@ -613,7 +613,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size,
 }
 
 static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
-			       dma_addr_t handle, struct dma_attrs *attrs)
+			       dma_addr_t handle, unsigned long attrs)
 {
 	size_t iosize = size;
 
@@ -629,7 +629,7 @@ static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
 	 * Hence how dodgy the below logic looks...
 	 */
 	if (__in_atomic_pool(cpu_addr, size)) {
-		iommu_dma_unmap_page(dev, handle, iosize, 0, NULL);
+		iommu_dma_unmap_page(dev, handle, iosize, 0, 0);
 		__free_from_pool(cpu_addr, size);
 	} else if (is_vmalloc_addr(cpu_addr)){
 		struct vm_struct *area = find_vm_area(cpu_addr);
@@ -639,14 +639,14 @@ static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
 		iommu_dma_free(dev, area->pages, iosize, &handle);
 		dma_common_free_remap(cpu_addr, size, VM_USERMAP);
 	} else {
-		iommu_dma_unmap_page(dev, handle, iosize, 0, NULL);
+		iommu_dma_unmap_page(dev, handle, iosize, 0, 0);
 		__free_pages(virt_to_page(cpu_addr), get_order(size));
 	}
 }
 
 static int __iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
 			      void *cpu_addr, dma_addr_t dma_addr, size_t size,
-			      struct dma_attrs *attrs)
+			      unsigned long attrs)
 {
 	struct vm_struct *area;
 	int ret;
@@ -666,7 +666,7 @@ static int __iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
 
 static int __iommu_get_sgtable(struct device *dev, struct sg_table *sgt,
 			       void *cpu_addr, dma_addr_t dma_addr,
-			       size_t size, struct dma_attrs *attrs)
+			       size_t size, unsigned long attrs)
 {
 	unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
 	struct vm_struct *area = find_vm_area(cpu_addr);
@@ -707,14 +707,14 @@ static void __iommu_sync_single_for_device(struct device *dev,
 static dma_addr_t __iommu_map_page(struct device *dev, struct page *page,
 				   unsigned long offset, size_t size,
 				   enum dma_data_direction dir,
-				   struct dma_attrs *attrs)
+				   unsigned long attrs)
 {
 	bool coherent = is_device_dma_coherent(dev);
 	int prot = dma_direction_to_prot(dir, coherent);
 	dma_addr_t dev_addr = iommu_dma_map_page(dev, page, offset, size, prot);
 
 	if (!iommu_dma_mapping_error(dev, dev_addr) &&
-	    !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
+	    (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
 		__iommu_sync_single_for_device(dev, dev_addr, size, dir);
 
 	return dev_addr;
@@ -722,9 +722,9 @@ static dma_addr_t __iommu_map_page(struct device *dev, struct page *page,
 
 static void __iommu_unmap_page(struct device *dev, dma_addr_t dev_addr,
 			       size_t size, enum dma_data_direction dir,
-			       struct dma_attrs *attrs)
+			       unsigned long attrs)
 {
-	if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
+	if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
 		__iommu_sync_single_for_cpu(dev, dev_addr, size, dir);
 
 	iommu_dma_unmap_page(dev, dev_addr, size, dir, attrs);
@@ -760,11 +760,11 @@ static void __iommu_sync_sg_for_device(struct device *dev,
 
 static int __iommu_map_sg_attrs(struct device *dev, struct scatterlist *sgl,
 				int nelems, enum dma_data_direction dir,
-				struct dma_attrs *attrs)
+				unsigned long attrs)
 {
 	bool coherent = is_device_dma_coherent(dev);
 
-	if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
+	if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
 		__iommu_sync_sg_for_device(dev, sgl, nelems, dir);
 
 	return iommu_dma_map_sg(dev, sgl, nelems,
@@ -774,9 +774,9 @@ static int __iommu_map_sg_attrs(struct device *dev, struct scatterlist *sgl,
 static void __iommu_unmap_sg_attrs(struct device *dev,
 				   struct scatterlist *sgl, int nelems,
 				   enum dma_data_direction dir,
-				   struct dma_attrs *attrs)
+				   unsigned long attrs)
 {
-	if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
+	if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
 		__iommu_sync_sg_for_cpu(dev, sgl, nelems, dir);
 
 	iommu_dma_unmap_sg(dev, sgl, nelems, dir, attrs);
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 51a558195bb9..4989948d1feb 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -686,9 +686,9 @@ void *__init __fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot)
 	/*
 	 * Check whether the physical FDT address is set and meets the minimum
 	 * alignment requirement. Since we are relying on MIN_FDT_ALIGN to be
-	 * at least 8 bytes so that we can always access the size field of the
-	 * FDT header after mapping the first chunk, double check here if that
-	 * is indeed the case.
+	 * at least 8 bytes so that we can always access the magic and size
+	 * fields of the FDT header after mapping the first chunk, double check
+	 * here if that is indeed the case.
 	 */
 	BUILD_BUG_ON(MIN_FDT_ALIGN < 8);
 	if (!dt_phys || dt_phys % MIN_FDT_ALIGN)
@@ -716,7 +716,7 @@ void *__init __fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot)
 	create_mapping_noalloc(round_down(dt_phys, SWAPPER_BLOCK_SIZE),
 			dt_virt_base, SWAPPER_BLOCK_SIZE, prot);
 
-	if (fdt_check_header(dt_virt) != 0)
+	if (fdt_magic(dt_virt) != FDT_MAGIC)
 		return NULL;
 
 	*size = fdt_totalsize(dt_virt);
diff --git a/arch/avr32/mm/dma-coherent.c b/arch/avr32/mm/dma-coherent.c
index 92cf1fb2b3e6..58610d0df7ed 100644
--- a/arch/avr32/mm/dma-coherent.c
+++ b/arch/avr32/mm/dma-coherent.c
@@ -99,7 +99,7 @@ static void __dma_free(struct device *dev, size_t size,
 }
 
 static void *avr32_dma_alloc(struct device *dev, size_t size,
-		dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs)
+		dma_addr_t *handle, gfp_t gfp, unsigned long attrs)
 {
 	struct page *page;
 	dma_addr_t phys;
@@ -109,7 +109,7 @@ static void *avr32_dma_alloc(struct device *dev, size_t size,
 		return NULL;
 	phys = page_to_phys(page);
 
-	if (dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs)) {
+	if (attrs & DMA_ATTR_WRITE_COMBINE) {
 		/* Now, map the page into P3 with write-combining turned on */
 		*handle = phys;
 		return __ioremap(phys, size, _PAGE_BUFFER);
@@ -119,11 +119,11 @@ static void *avr32_dma_alloc(struct device *dev, size_t size,
 }
 
 static void avr32_dma_free(struct device *dev, size_t size,
-		void *cpu_addr, dma_addr_t handle, struct dma_attrs *attrs)
+		void *cpu_addr, dma_addr_t handle, unsigned long attrs)
 {
 	struct page *page;
 
-	if (dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs)) {
+	if (attrs & DMA_ATTR_WRITE_COMBINE) {
 		iounmap(cpu_addr);
 
 		page = phys_to_page(handle);
@@ -142,7 +142,7 @@ static void avr32_dma_free(struct device *dev, size_t size,
 
 static dma_addr_t avr32_dma_map_page(struct device *dev, struct page *page,
 		unsigned long offset, size_t size,
-		enum dma_data_direction direction, struct dma_attrs *attrs)
+		enum dma_data_direction direction, unsigned long attrs)
 {
 	void *cpu_addr = page_address(page) + offset;
 
@@ -152,7 +152,7 @@ static dma_addr_t avr32_dma_map_page(struct device *dev, struct page *page,
 
 static int avr32_dma_map_sg(struct device *dev, struct scatterlist *sglist,
 		int nents, enum dma_data_direction direction,
-		struct dma_attrs *attrs)
+		unsigned long attrs)
 {
 	int i;
 	struct scatterlist *sg;
diff --git a/arch/blackfin/kernel/dma-mapping.c b/arch/blackfin/kernel/dma-mapping.c
index 771afe6e4264..53fbbb61aa86 100644
--- a/arch/blackfin/kernel/dma-mapping.c
+++ b/arch/blackfin/kernel/dma-mapping.c
@@ -79,7 +79,7 @@ static void __free_dma_pages(unsigned long addr, unsigned int pages)
 }
 
 static void *bfin_dma_alloc(struct device *dev, size_t size,
-		dma_addr_t *dma_handle, gfp_t gfp, struct dma_attrs *attrs)
+		dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
 {
 	void *ret;
 
@@ -94,7 +94,7 @@ static void *bfin_dma_alloc(struct device *dev, size_t size,
 }
 
 static void bfin_dma_free(struct device *dev, size_t size, void *vaddr,
-		  dma_addr_t dma_handle, struct dma_attrs *attrs)
+		  dma_addr_t dma_handle, unsigned long attrs)
 {
 	__free_dma_pages((unsigned long)vaddr, get_pages(size));
 }
@@ -111,7 +111,7 @@ EXPORT_SYMBOL(__dma_sync);
 
 static int bfin_dma_map_sg(struct device *dev, struct scatterlist *sg_list,
 		int nents, enum dma_data_direction direction,
-		struct dma_attrs *attrs)
+		unsigned long attrs)
 {
 	struct scatterlist *sg;
 	int i;
@@ -139,7 +139,7 @@ static void bfin_dma_sync_sg_for_device(struct device *dev,
 
 static dma_addr_t bfin_dma_map_page(struct device *dev, struct page *page,
 		unsigned long offset, size_t size, enum dma_data_direction dir,
-		struct dma_attrs *attrs)
+		unsigned long attrs)
 {
 	dma_addr_t handle = (dma_addr_t)(page_address(page) + offset);
 
diff --git a/arch/blackfin/mm/init.c b/arch/blackfin/mm/init.c
index 166842de3dc7..b59cd7c3261a 100644
--- a/arch/blackfin/mm/init.c
+++ b/arch/blackfin/mm/init.c
@@ -112,7 +112,7 @@ void __init free_initrd_mem(unsigned long start, unsigned long end)
 }
 #endif
 
-void __init_refok free_initmem(void)
+void __ref free_initmem(void)
 {
 #if defined CONFIG_RAMKERNEL && !defined CONFIG_MPU
 	free_initmem_default(-1);
diff --git a/arch/c6x/include/asm/dma-mapping.h b/arch/c6x/include/asm/dma-mapping.h
index 6b5cd7b0cf32..5717b1e52d96 100644
--- a/arch/c6x/include/asm/dma-mapping.h
+++ b/arch/c6x/include/asm/dma-mapping.h
@@ -26,8 +26,8 @@ static inline struct dma_map_ops *get_dma_ops(struct device *dev)
 
 extern void coherent_mem_init(u32 start, u32 size);
 void *c6x_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
-		gfp_t gfp, struct dma_attrs *attrs);
+		gfp_t gfp, unsigned long attrs);
 void c6x_dma_free(struct device *dev, size_t size, void *vaddr,
-		dma_addr_t dma_handle, struct dma_attrs *attrs);
+		dma_addr_t dma_handle, unsigned long attrs);
 
 #endif	/* _ASM_C6X_DMA_MAPPING_H */
diff --git a/arch/c6x/kernel/dma.c b/arch/c6x/kernel/dma.c
index 8a80f3a250c0..db4a6a301f5e 100644
--- a/arch/c6x/kernel/dma.c
+++ b/arch/c6x/kernel/dma.c
@@ -38,7 +38,7 @@ static void c6x_dma_sync(dma_addr_t handle, size_t size,
 
 static dma_addr_t c6x_dma_map_page(struct device *dev, struct page *page,
 		unsigned long offset, size_t size, enum dma_data_direction dir,
-		struct dma_attrs *attrs)
+		unsigned long attrs)
 {
 	dma_addr_t handle = virt_to_phys(page_address(page) + offset);
 
@@ -47,13 +47,13 @@ static dma_addr_t c6x_dma_map_page(struct device *dev, struct page *page,
 }
 
 static void c6x_dma_unmap_page(struct device *dev, dma_addr_t handle,
-		size_t size, enum dma_data_direction dir, struct dma_attrs *attrs)
+		size_t size, enum dma_data_direction dir, unsigned long attrs)
 {
 	c6x_dma_sync(handle, size, dir);
 }
 
 static int c6x_dma_map_sg(struct device *dev, struct scatterlist *sglist,
-		int nents, enum dma_data_direction dir, struct dma_attrs *attrs)
+		int nents, enum dma_data_direction dir, unsigned long attrs)
 {
 	struct scatterlist *sg;
 	int i;
@@ -67,8 +67,7 @@ static int c6x_dma_map_sg(struct device *dev, struct scatterlist *sglist,
 }
 
 static void c6x_dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
-		  int nents, enum dma_data_direction dir,
-		  struct dma_attrs *attrs)
+		  int nents, enum dma_data_direction dir, unsigned long attrs)
 {
 	struct scatterlist *sg;
 	int i;
diff --git a/arch/c6x/mm/dma-coherent.c b/arch/c6x/mm/dma-coherent.c
index f7ee63af2541..95e38ad27c69 100644
--- a/arch/c6x/mm/dma-coherent.c
+++ b/arch/c6x/mm/dma-coherent.c
@@ -74,7 +74,7 @@ static void __free_dma_pages(u32 addr, int order)
  * virtual and DMA address for that space.
  */
 void *c6x_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
-		gfp_t gfp, struct dma_attrs *attrs)
+		gfp_t gfp, unsigned long attrs)
 {
 	u32 paddr;
 	int order;
@@ -99,7 +99,7 @@ void *c6x_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
  * Free DMA coherent memory as defined by the above mapping.
  */
 void c6x_dma_free(struct device *dev, size_t size, void *vaddr,
-		dma_addr_t dma_handle, struct dma_attrs *attrs)
+		dma_addr_t dma_handle, unsigned long attrs)
 {
 	int order;
 
diff --git a/arch/cris/arch-v10/drivers/axisflashmap.c b/arch/cris/arch-v10/drivers/axisflashmap.c
index 60d57c590032..bdc25aa43468 100644
--- a/arch/cris/arch-v10/drivers/axisflashmap.c
+++ b/arch/cris/arch-v10/drivers/axisflashmap.c
@@ -397,7 +397,7 @@ static int __init init_axis_flash(void)
 	if (!romfs_in_flash) {
 		/* Create an RAM device for the root partition (romfs). */
 
-#if !defined(CONFIG_MTD_MTDRAM) || (CONFIG_MTDRAM_TOTAL_SIZE != 0) || (CONFIG_MTDRAM_ABS_POS != 0)
+#if !defined(CONFIG_MTD_MTDRAM) || (CONFIG_MTDRAM_TOTAL_SIZE != 0)
 		/* No use trying to boot this kernel from RAM. Panic! */
 		printk(KERN_EMERG "axisflashmap: Cannot create an MTD RAM "
 		       "device due to kernel (mis)configuration!\n");
diff --git a/arch/cris/arch-v32/drivers/axisflashmap.c b/arch/cris/arch-v32/drivers/axisflashmap.c
index bd10d3ba0949..87656c41fec7 100644
--- a/arch/cris/arch-v32/drivers/axisflashmap.c
+++ b/arch/cris/arch-v32/drivers/axisflashmap.c
@@ -320,7 +320,7 @@ static int __init init_axis_flash(void)
 	 * but its size must be configured as 0 so as not to conflict
 	 * with our usage.
 	 */
-#if !defined(CONFIG_MTD_MTDRAM) || (CONFIG_MTDRAM_TOTAL_SIZE != 0) || (CONFIG_MTDRAM_ABS_POS != 0)
+#if !defined(CONFIG_MTD_MTDRAM) || (CONFIG_MTDRAM_TOTAL_SIZE != 0)
 	if (!romfs_in_flash && !nand_boot) {
 		printk(KERN_EMERG "axisflashmap: Cannot create an MTD RAM "
 		       "device; configure CONFIG_MTD_MTDRAM with size = 0!\n");
diff --git a/arch/cris/arch-v32/drivers/pci/dma.c b/arch/cris/arch-v32/drivers/pci/dma.c
index 8d5efa58cce1..1f0636793f0c 100644
--- a/arch/cris/arch-v32/drivers/pci/dma.c
+++ b/arch/cris/arch-v32/drivers/pci/dma.c
@@ -17,7 +17,7 @@
 #include <asm/io.h>
 
 static void *v32_dma_alloc(struct device *dev, size_t size,
-		dma_addr_t *dma_handle, gfp_t gfp,  struct dma_attrs *attrs)
+		dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
 {
 	void *ret;
 
@@ -37,22 +37,21 @@ static void *v32_dma_alloc(struct device *dev, size_t size,
 }
 
 static void v32_dma_free(struct device *dev, size_t size, void *vaddr,
-		dma_addr_t dma_handle, struct dma_attrs *attrs)
+		dma_addr_t dma_handle, unsigned long attrs)
 {
 	free_pages((unsigned long)vaddr, get_order(size));
 }
 
 static inline dma_addr_t v32_dma_map_page(struct device *dev,
 		struct page *page, unsigned long offset, size_t size,
-		enum dma_data_direction direction,
-		struct dma_attrs *attrs)
+		enum dma_data_direction direction, unsigned long attrs)
 {
 	return page_to_phys(page) + offset;
 }
 
 static inline int v32_dma_map_sg(struct device *dev, struct scatterlist *sg,
 		int nents, enum dma_data_direction direction,
-		struct dma_attrs *attrs)
+		unsigned long attrs)
 {
 	printk("Map sg\n");
 	return nents;
diff --git a/arch/frv/include/asm/mc146818rtc.h b/arch/frv/include/asm/mc146818rtc.h
deleted file mode 100644
index 90dfb7a633d1..000000000000
--- a/arch/frv/include/asm/mc146818rtc.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* mc146818rtc.h: RTC defs
- *
- * Copyright (C) 2005 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#ifndef _ASM_MC146818RTC_H
-#define _ASM_MC146818RTC_H
-
-
-#endif /* _ASM_MC146818RTC_H */
diff --git a/arch/frv/mb93090-mb00/pci-dma-nommu.c b/arch/frv/mb93090-mb00/pci-dma-nommu.c
index 082be49b5df0..90f2e4cb33d6 100644
--- a/arch/frv/mb93090-mb00/pci-dma-nommu.c
+++ b/arch/frv/mb93090-mb00/pci-dma-nommu.c
@@ -35,7 +35,7 @@ static DEFINE_SPINLOCK(dma_alloc_lock);
 static LIST_HEAD(dma_alloc_list);
 
 static void *frv_dma_alloc(struct device *hwdev, size_t size, dma_addr_t *dma_handle,
-		gfp_t gfp, struct dma_attrs *attrs)
+		gfp_t gfp, unsigned long attrs)
 {
 	struct dma_alloc_record *new;
 	struct list_head *this = &dma_alloc_list;
@@ -86,7 +86,7 @@ static void *frv_dma_alloc(struct device *hwdev, size_t size, dma_addr_t *dma_ha
 }
 
 static void frv_dma_free(struct device *hwdev, size_t size, void *vaddr,
-		dma_addr_t dma_handle, struct dma_attrs *attrs)
+		dma_addr_t dma_handle, unsigned long attrs)
 {
 	struct dma_alloc_record *rec;
 	unsigned long flags;
@@ -107,7 +107,7 @@ static void frv_dma_free(struct device *hwdev, size_t size, void *vaddr,
 
 static int frv_dma_map_sg(struct device *dev, struct scatterlist *sglist,
 		int nents, enum dma_data_direction direction,
-		struct dma_attrs *attrs)
+		unsigned long attrs)
 {
 	int i;
 	struct scatterlist *sg;
@@ -124,7 +124,7 @@ static int frv_dma_map_sg(struct device *dev, struct scatterlist *sglist,
 
 static dma_addr_t frv_dma_map_page(struct device *dev, struct page *page,
 		unsigned long offset, size_t size,
-		enum dma_data_direction direction, struct dma_attrs *attrs)
+		enum dma_data_direction direction, unsigned long attrs)
 {
 	BUG_ON(direction == DMA_NONE);
 	flush_dcache_page(page);
diff --git a/arch/frv/mb93090-mb00/pci-dma.c b/arch/frv/mb93090-mb00/pci-dma.c
index 316b7b65348d..f585745b1abc 100644
--- a/arch/frv/mb93090-mb00/pci-dma.c
+++ b/arch/frv/mb93090-mb00/pci-dma.c
@@ -19,8 +19,7 @@
 #include <asm/io.h>
 
 static void *frv_dma_alloc(struct device *hwdev, size_t size,
-		dma_addr_t *dma_handle, gfp_t gfp,
-		struct dma_attrs *attrs)
+		dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
 {
 	void *ret;
 
@@ -32,14 +31,14 @@ static void *frv_dma_alloc(struct device *hwdev, size_t size,
 }
 
 static void frv_dma_free(struct device *hwdev, size_t size, void *vaddr,
-		dma_addr_t dma_handle, struct dma_attrs *attrs)
+		dma_addr_t dma_handle, unsigned long attrs)
 {
 	consistent_free(vaddr);
 }
 
 static int frv_dma_map_sg(struct device *dev, struct scatterlist *sglist,
 		int nents, enum dma_data_direction direction,
-		struct dma_attrs *attrs)
+		unsigned long attrs)
 {
 	unsigned long dampr2;
 	void *vaddr;
@@ -69,7 +68,7 @@ static int frv_dma_map_sg(struct device *dev, struct scatterlist *sglist,
 
 static dma_addr_t frv_dma_map_page(struct device *dev, struct page *page,
 		unsigned long offset, size_t size,
-		enum dma_data_direction direction, struct dma_attrs *attrs)
+		enum dma_data_direction direction, unsigned long attrs)
 {
 	flush_dcache_page(page);
 	return (dma_addr_t) page_to_phys(page) + offset;
diff --git a/arch/h8300/include/asm/mc146818rtc.h b/arch/h8300/include/asm/mc146818rtc.h
deleted file mode 100644
index ab9d9646d241..000000000000
--- a/arch/h8300/include/asm/mc146818rtc.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/*
- * Machine dependent access functions for RTC registers.
- */
-#ifndef _H8300_MC146818RTC_H
-#define _H8300_MC146818RTC_H
-
-/* empty include file to satisfy the include in genrtc.c/ide-geometry.c */
-
-#endif /* _H8300_MC146818RTC_H */
diff --git a/arch/h8300/kernel/dma.c b/arch/h8300/kernel/dma.c
index eeb13d3f2424..3651da045806 100644
--- a/arch/h8300/kernel/dma.c
+++ b/arch/h8300/kernel/dma.c
@@ -12,7 +12,7 @@
 
 static void *dma_alloc(struct device *dev, size_t size,
 		       dma_addr_t *dma_handle, gfp_t gfp,
-		       struct dma_attrs *attrs)
+		       unsigned long attrs)
 {
 	void *ret;
 
@@ -32,7 +32,7 @@ static void *dma_alloc(struct device *dev, size_t size,
 
 static void dma_free(struct device *dev, size_t size,
 		     void *vaddr, dma_addr_t dma_handle,
-		     struct dma_attrs *attrs)
+		     unsigned long attrs)
 
 {
 	free_pages((unsigned long)vaddr, get_order(size));
@@ -41,14 +41,14 @@ static void dma_free(struct device *dev, size_t size,
 static dma_addr_t map_page(struct device *dev, struct page *page,
 				  unsigned long offset, size_t size,
 				  enum dma_data_direction direction,
-				  struct dma_attrs *attrs)
+				  unsigned long attrs)
 {
 	return page_to_phys(page) + offset;
 }
 
 static int map_sg(struct device *dev, struct scatterlist *sgl,
 		  int nents, enum dma_data_direction direction,
-		  struct dma_attrs *attrs)
+		  unsigned long attrs)
 {
 	struct scatterlist *sg;
 	int i;
diff --git a/arch/hexagon/include/asm/dma-mapping.h b/arch/hexagon/include/asm/dma-mapping.h
index aa6203464520..7ef58df909fc 100644
--- a/arch/hexagon/include/asm/dma-mapping.h
+++ b/arch/hexagon/include/asm/dma-mapping.h
@@ -26,7 +26,6 @@
 #include <linux/mm.h>
 #include <linux/scatterlist.h>
 #include <linux/dma-debug.h>
-#include <linux/dma-attrs.h>
 #include <asm/io.h>
 
 struct device;
diff --git a/arch/hexagon/kernel/dma.c b/arch/hexagon/kernel/dma.c
index 9e3ddf792bd3..b9017785fb71 100644
--- a/arch/hexagon/kernel/dma.c
+++ b/arch/hexagon/kernel/dma.c
@@ -51,7 +51,7 @@ static struct gen_pool *coherent_pool;
 
 static void *hexagon_dma_alloc_coherent(struct device *dev, size_t size,
 				 dma_addr_t *dma_addr, gfp_t flag,
-				 struct dma_attrs *attrs)
+				 unsigned long attrs)
 {
 	void *ret;
 
@@ -84,7 +84,7 @@ static void *hexagon_dma_alloc_coherent(struct device *dev, size_t size,
 }
 
 static void hexagon_free_coherent(struct device *dev, size_t size, void *vaddr,
-				  dma_addr_t dma_addr, struct dma_attrs *attrs)
+				  dma_addr_t dma_addr, unsigned long attrs)
 {
 	gen_pool_free(coherent_pool, (unsigned long) vaddr, size);
 }
@@ -105,7 +105,7 @@ static int check_addr(const char *name, struct device *hwdev,
 
 static int hexagon_map_sg(struct device *hwdev, struct scatterlist *sg,
 			  int nents, enum dma_data_direction dir,
-			  struct dma_attrs *attrs)
+			  unsigned long attrs)
 {
 	struct scatterlist *s;
 	int i;
@@ -172,7 +172,7 @@ static inline void dma_sync(void *addr, size_t size,
 static dma_addr_t hexagon_map_page(struct device *dev, struct page *page,
 				   unsigned long offset, size_t size,
 				   enum dma_data_direction dir,
-				   struct dma_attrs *attrs)
+				   unsigned long attrs)
 {
 	dma_addr_t bus = page_to_phys(page) + offset;
 	WARN_ON(size == 0);
diff --git a/arch/hexagon/mm/init.c b/arch/hexagon/mm/init.c
index 88977e42af0a..192584d5ac2f 100644
--- a/arch/hexagon/mm/init.c
+++ b/arch/hexagon/mm/init.c
@@ -93,7 +93,7 @@ void __init mem_init(void)
  * Todo:  free pages between __init_begin and __init_end; possibly
  * some devtree related stuff as well.
  */
-void __init_refok free_initmem(void)
+void __ref free_initmem(void)
 {
 }
 
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 6a15083cc366..18ca6a9ce566 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -52,6 +52,7 @@ config IA64
 	select MODULES_USE_ELF_RELA
 	select ARCH_USE_CMPXCHG_LOCKREF
 	select HAVE_ARCH_AUDITSYSCALL
+	select HAVE_ARCH_HARDENED_USERCOPY
 	default y
 	help
 	  The Itanium Processor Family is Intel's 64-bit successor to
diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c
index a6d6190c9d24..630ee8073899 100644
--- a/arch/ia64/hp/common/sba_iommu.c
+++ b/arch/ia64/hp/common/sba_iommu.c
@@ -919,7 +919,7 @@ sba_mark_invalid(struct ioc *ioc, dma_addr_t iova, size_t byte_cnt)
 static dma_addr_t sba_map_page(struct device *dev, struct page *page,
 			       unsigned long poff, size_t size,
 			       enum dma_data_direction dir,
-			       struct dma_attrs *attrs)
+			       unsigned long attrs)
 {
 	struct ioc *ioc;
 	void *addr = page_address(page) + poff;
@@ -1005,7 +1005,7 @@ static dma_addr_t sba_map_page(struct device *dev, struct page *page,
 
 static dma_addr_t sba_map_single_attrs(struct device *dev, void *addr,
 				       size_t size, enum dma_data_direction dir,
-				       struct dma_attrs *attrs)
+				       unsigned long attrs)
 {
 	return sba_map_page(dev, virt_to_page(addr),
 			    (unsigned long)addr & ~PAGE_MASK, size, dir, attrs);
@@ -1046,7 +1046,7 @@ sba_mark_clean(struct ioc *ioc, dma_addr_t iova, size_t size)
  * See Documentation/DMA-API-HOWTO.txt
  */
 static void sba_unmap_page(struct device *dev, dma_addr_t iova, size_t size,
-			   enum dma_data_direction dir, struct dma_attrs *attrs)
+			   enum dma_data_direction dir, unsigned long attrs)
 {
 	struct ioc *ioc;
 #if DELAYED_RESOURCE_CNT > 0
@@ -1115,7 +1115,7 @@ static void sba_unmap_page(struct device *dev, dma_addr_t iova, size_t size,
 }
 
 void sba_unmap_single_attrs(struct device *dev, dma_addr_t iova, size_t size,
-			    enum dma_data_direction dir, struct dma_attrs *attrs)
+			    enum dma_data_direction dir, unsigned long attrs)
 {
 	sba_unmap_page(dev, iova, size, dir, attrs);
 }
@@ -1130,7 +1130,7 @@ void sba_unmap_single_attrs(struct device *dev, dma_addr_t iova, size_t size,
  */
 static void *
 sba_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
-		   gfp_t flags, struct dma_attrs *attrs)
+		   gfp_t flags, unsigned long attrs)
 {
 	struct ioc *ioc;
 	void *addr;
@@ -1175,7 +1175,7 @@ sba_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
 	 * device to map single to get an iova mapping.
 	 */
 	*dma_handle = sba_map_single_attrs(&ioc->sac_only_dev->dev, addr,
-					   size, 0, NULL);
+					   size, 0, 0);
 
 	return addr;
 }
@@ -1191,9 +1191,9 @@ sba_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
  * See Documentation/DMA-API-HOWTO.txt
  */
 static void sba_free_coherent(struct device *dev, size_t size, void *vaddr,
-			      dma_addr_t dma_handle, struct dma_attrs *attrs)
+			      dma_addr_t dma_handle, unsigned long attrs)
 {
-	sba_unmap_single_attrs(dev, dma_handle, size, 0, NULL);
+	sba_unmap_single_attrs(dev, dma_handle, size, 0, 0);
 	free_pages((unsigned long) vaddr, get_order(size));
 }
 
@@ -1442,7 +1442,7 @@ sba_coalesce_chunks(struct ioc *ioc, struct device *dev,
 
 static void sba_unmap_sg_attrs(struct device *dev, struct scatterlist *sglist,
 			       int nents, enum dma_data_direction dir,
-			       struct dma_attrs *attrs);
+			       unsigned long attrs);
 /**
  * sba_map_sg - map Scatter/Gather list
  * @dev: instance of PCI owned by the driver that's asking.
@@ -1455,7 +1455,7 @@ static void sba_unmap_sg_attrs(struct device *dev, struct scatterlist *sglist,
  */
 static int sba_map_sg_attrs(struct device *dev, struct scatterlist *sglist,
 			    int nents, enum dma_data_direction dir,
-			    struct dma_attrs *attrs)
+			    unsigned long attrs)
 {
 	struct ioc *ioc;
 	int coalesced, filled = 0;
@@ -1551,7 +1551,7 @@ static int sba_map_sg_attrs(struct device *dev, struct scatterlist *sglist,
  */
 static void sba_unmap_sg_attrs(struct device *dev, struct scatterlist *sglist,
 			       int nents, enum dma_data_direction dir,
-			       struct dma_attrs *attrs)
+			       unsigned long attrs)
 {
 #ifdef ASSERT_PDIR_SANITY
 	struct ioc *ioc;
diff --git a/arch/ia64/include/asm/machvec.h b/arch/ia64/include/asm/machvec.h
index 9c39bdfc2da8..ed7f09089f12 100644
--- a/arch/ia64/include/asm/machvec.h
+++ b/arch/ia64/include/asm/machvec.h
@@ -22,7 +22,6 @@ struct pci_bus;
 struct task_struct;
 struct pci_dev;
 struct msi_desc;
-struct dma_attrs;
 
 typedef void ia64_mv_setup_t (char **);
 typedef void ia64_mv_cpu_init_t (void);
diff --git a/arch/ia64/include/asm/mc146818rtc.h b/arch/ia64/include/asm/mc146818rtc.h
deleted file mode 100644
index 407787a237ba..000000000000
--- a/arch/ia64/include/asm/mc146818rtc.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef _ASM_IA64_MC146818RTC_H
-#define _ASM_IA64_MC146818RTC_H
-
-/*
- * Machine dependent access functions for RTC registers.
- */
-
-/* empty include file to satisfy the include in genrtc.c */
-
-#endif /* _ASM_IA64_MC146818RTC_H */
diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h
index d1212b84fb83..29bd59790d6c 100644
--- a/arch/ia64/include/asm/thread_info.h
+++ b/arch/ia64/include/asm/thread_info.h
@@ -121,32 +121,4 @@ struct thread_info {
 /* like TIF_ALLWORK_BITS but sans TIF_SYSCALL_TRACE or TIF_SYSCALL_AUDIT */
 #define TIF_WORK_MASK		(TIF_ALLWORK_MASK&~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT))
 
-#define TS_RESTORE_SIGMASK	2	/* restore signal mask in do_signal() */
-
-#ifndef __ASSEMBLY__
-#define HAVE_SET_RESTORE_SIGMASK	1
-static inline void set_restore_sigmask(void)
-{
-	struct thread_info *ti = current_thread_info();
-	ti->status |= TS_RESTORE_SIGMASK;
-	WARN_ON(!test_bit(TIF_SIGPENDING, &ti->flags));
-}
-static inline void clear_restore_sigmask(void)
-{
-	current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
-}
-static inline bool test_restore_sigmask(void)
-{
-	return current_thread_info()->status & TS_RESTORE_SIGMASK;
-}
-static inline bool test_and_clear_restore_sigmask(void)
-{
-	struct thread_info *ti = current_thread_info();
-	if (!(ti->status & TS_RESTORE_SIGMASK))
-		return false;
-	ti->status &= ~TS_RESTORE_SIGMASK;
-	return true;
-}
-#endif	/* !__ASSEMBLY__ */
-
 #endif /* _ASM_IA64_THREAD_INFO_H */
diff --git a/arch/ia64/include/asm/uaccess.h b/arch/ia64/include/asm/uaccess.h
index 2189d5ddc1ee..465c70982f40 100644
--- a/arch/ia64/include/asm/uaccess.h
+++ b/arch/ia64/include/asm/uaccess.h
@@ -241,12 +241,18 @@ extern unsigned long __must_check __copy_user (void __user *to, const void __use
 static inline unsigned long
 __copy_to_user (void __user *to, const void *from, unsigned long count)
 {
+	if (!__builtin_constant_p(count))
+		check_object_size(from, count, true);
+
 	return __copy_user(to, (__force void __user *) from, count);
 }
 
 static inline unsigned long
 __copy_from_user (void *to, const void __user *from, unsigned long count)
 {
+	if (!__builtin_constant_p(count))
+		check_object_size(to, count, false);
+
 	return __copy_user((__force void __user *) to, from, count);
 }
 
@@ -258,8 +264,11 @@ __copy_from_user (void *to, const void __user *from, unsigned long count)
 	const void *__cu_from = (from);							\
 	long __cu_len = (n);								\
 											\
-	if (__access_ok(__cu_to, __cu_len, get_fs()))					\
-		__cu_len = __copy_user(__cu_to, (__force void __user *) __cu_from, __cu_len);	\
+	if (__access_ok(__cu_to, __cu_len, get_fs())) {					\
+		if (!__builtin_constant_p(n))						\
+			check_object_size(__cu_from, __cu_len, true);			\
+		__cu_len = __copy_user(__cu_to, (__force void __user *)  __cu_from, __cu_len);	\
+	}										\
 	__cu_len;									\
 })
 
@@ -270,8 +279,11 @@ __copy_from_user (void *to, const void __user *from, unsigned long count)
 	long __cu_len = (n);								\
 											\
 	__chk_user_ptr(__cu_from);							\
-	if (__access_ok(__cu_from, __cu_len, get_fs()))					\
+	if (__access_ok(__cu_from, __cu_len, get_fs())) {				\
+		if (!__builtin_constant_p(n))						\
+			check_object_size(__cu_to, __cu_len, false);			\
 		__cu_len = __copy_user((__force void __user *) __cu_to, __cu_from, __cu_len);	\
+	}										\
 	__cu_len;									\
 })
 
diff --git a/arch/ia64/kernel/machine_kexec.c b/arch/ia64/kernel/machine_kexec.c
index b72cd7a07222..599507bcec91 100644
--- a/arch/ia64/kernel/machine_kexec.c
+++ b/arch/ia64/kernel/machine_kexec.c
@@ -163,7 +163,7 @@ void arch_crash_save_vmcoreinfo(void)
 #endif
 }
 
-unsigned long paddr_vmcoreinfo_note(void)
+phys_addr_t paddr_vmcoreinfo_note(void)
 {
 	return ia64_tpa((unsigned long)(char *)&vmcoreinfo_note);
 }
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 07a4e32ae96a..eb9220cde76c 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -1831,7 +1831,7 @@ format_mca_init_stack(void *mca_data, unsigned long offset,
 }
 
 /* Caller prevents this from being called after init */
-static void * __init_refok mca_bootmem(void)
+static void * __ref mca_bootmem(void)
 {
 	return __alloc_bootmem(sizeof(struct ia64_mca_cpu),
 	                    KERNEL_STACK_SIZE, 0);
diff --git a/arch/ia64/kernel/pci-swiotlb.c b/arch/ia64/kernel/pci-swiotlb.c
index 939260aeac98..2933208c0285 100644
--- a/arch/ia64/kernel/pci-swiotlb.c
+++ b/arch/ia64/kernel/pci-swiotlb.c
@@ -16,7 +16,7 @@ EXPORT_SYMBOL(swiotlb);
 
 static void *ia64_swiotlb_alloc_coherent(struct device *dev, size_t size,
 					 dma_addr_t *dma_handle, gfp_t gfp,
-					 struct dma_attrs *attrs)
+					 unsigned long attrs)
 {
 	if (dev->coherent_dma_mask != DMA_BIT_MASK(64))
 		gfp |= GFP_DMA;
@@ -25,7 +25,7 @@ static void *ia64_swiotlb_alloc_coherent(struct device *dev, size_t size,
 
 static void ia64_swiotlb_free_coherent(struct device *dev, size_t size,
 				       void *vaddr, dma_addr_t dma_addr,
-				       struct dma_attrs *attrs)
+				       unsigned long attrs)
 {
 	swiotlb_free_coherent(dev, size, vaddr, dma_addr);
 }
diff --git a/arch/ia64/sn/pci/pci_dma.c b/arch/ia64/sn/pci/pci_dma.c
index 8f59907007cb..74c934a997bb 100644
--- a/arch/ia64/sn/pci/pci_dma.c
+++ b/arch/ia64/sn/pci/pci_dma.c
@@ -77,7 +77,7 @@ EXPORT_SYMBOL(sn_dma_set_mask);
  */
 static void *sn_dma_alloc_coherent(struct device *dev, size_t size,
 				   dma_addr_t * dma_handle, gfp_t flags,
-				   struct dma_attrs *attrs)
+				   unsigned long attrs)
 {
 	void *cpuaddr;
 	unsigned long phys_addr;
@@ -138,7 +138,7 @@ static void *sn_dma_alloc_coherent(struct device *dev, size_t size,
  * any associated IOMMU mappings.
  */
 static void sn_dma_free_coherent(struct device *dev, size_t size, void *cpu_addr,
-				 dma_addr_t dma_handle, struct dma_attrs *attrs)
+				 dma_addr_t dma_handle, unsigned long attrs)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev);
@@ -176,21 +176,18 @@ static void sn_dma_free_coherent(struct device *dev, size_t size, void *cpu_addr
 static dma_addr_t sn_dma_map_page(struct device *dev, struct page *page,
 				  unsigned long offset, size_t size,
 				  enum dma_data_direction dir,
-				  struct dma_attrs *attrs)
+				  unsigned long attrs)
 {
 	void *cpu_addr = page_address(page) + offset;
 	dma_addr_t dma_addr;
 	unsigned long phys_addr;
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev);
-	int dmabarr;
-
-	dmabarr = dma_get_attr(DMA_ATTR_WRITE_BARRIER, attrs);
 
 	BUG_ON(!dev_is_pci(dev));
 
 	phys_addr = __pa(cpu_addr);
-	if (dmabarr)
+	if (attrs & DMA_ATTR_WRITE_BARRIER)
 		dma_addr = provider->dma_map_consistent(pdev, phys_addr,
 							size, SN_DMA_ADDR_PHYS);
 	else
@@ -218,7 +215,7 @@ static dma_addr_t sn_dma_map_page(struct device *dev, struct page *page,
  */
 static void sn_dma_unmap_page(struct device *dev, dma_addr_t dma_addr,
 			      size_t size, enum dma_data_direction dir,
-			      struct dma_attrs *attrs)
+			      unsigned long attrs)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev);
@@ -240,7 +237,7 @@ static void sn_dma_unmap_page(struct device *dev, dma_addr_t dma_addr,
  */
 static void sn_dma_unmap_sg(struct device *dev, struct scatterlist *sgl,
 			    int nhwentries, enum dma_data_direction dir,
-			    struct dma_attrs *attrs)
+			    unsigned long attrs)
 {
 	int i;
 	struct pci_dev *pdev = to_pci_dev(dev);
@@ -273,16 +270,13 @@ static void sn_dma_unmap_sg(struct device *dev, struct scatterlist *sgl,
  */
 static int sn_dma_map_sg(struct device *dev, struct scatterlist *sgl,
 			 int nhwentries, enum dma_data_direction dir,
-			 struct dma_attrs *attrs)
+			 unsigned long attrs)
 {
 	unsigned long phys_addr;
 	struct scatterlist *saved_sg = sgl, *sg;
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev);
 	int i;
-	int dmabarr;
-
-	dmabarr = dma_get_attr(DMA_ATTR_WRITE_BARRIER, attrs);
 
 	BUG_ON(!dev_is_pci(dev));
 
@@ -292,7 +286,7 @@ static int sn_dma_map_sg(struct device *dev, struct scatterlist *sgl,
 	for_each_sg(sgl, sg, nhwentries, i) {
 		dma_addr_t dma_addr;
 		phys_addr = SG_ENT_PHYS_ADDRESS(sg);
-		if (dmabarr)
+		if (attrs & DMA_ATTR_WRITE_BARRIER)
 			dma_addr = provider->dma_map_consistent(pdev,
 								phys_addr,
 								sg->length,
diff --git a/arch/m68k/amiga/config.c b/arch/m68k/amiga/config.c
index 01693df7f2f6..ec9cc1fdd237 100644
--- a/arch/m68k/amiga/config.c
+++ b/arch/m68k/amiga/config.c
@@ -35,7 +35,6 @@
 #include <asm/amigahw.h>
 #include <asm/amigaints.h>
 #include <asm/irq.h>
-#include <asm/rtc.h>
 #include <asm/machdep.h>
 #include <asm/io.h>
 
diff --git a/arch/m68k/apollo/config.c b/arch/m68k/apollo/config.c
index 6e62d66c396e..432bc8bacfc2 100644
--- a/arch/m68k/apollo/config.c
+++ b/arch/m68k/apollo/config.c
@@ -15,7 +15,6 @@
 #include <asm/pgtable.h>
 #include <asm/apollohw.h>
 #include <asm/irq.h>
-#include <asm/rtc.h>
 #include <asm/machdep.h>
 
 u_long sio01_physaddr;
diff --git a/arch/m68k/bvme6000/config.c b/arch/m68k/bvme6000/config.c
index 478623dbb209..611d4d9ea2bd 100644
--- a/arch/m68k/bvme6000/config.c
+++ b/arch/m68k/bvme6000/config.c
@@ -34,7 +34,6 @@
 #include <asm/setup.h>
 #include <asm/irq.h>
 #include <asm/traps.h>
-#include <asm/rtc.h>
 #include <asm/machdep.h>
 #include <asm/bvme6000hw.h>
 
diff --git a/arch/m68k/hp300/config.c b/arch/m68k/hp300/config.c
index a9befe65adc4..7cfab158fb61 100644
--- a/arch/m68k/hp300/config.c
+++ b/arch/m68k/hp300/config.c
@@ -12,6 +12,7 @@
 #include <linux/string.h>
 #include <linux/kernel.h>
 #include <linux/console.h>
+#include <linux/rtc.h>
 
 #include <asm/bootinfo.h>
 #include <asm/bootinfo-hp300.h>
@@ -20,7 +21,6 @@
 #include <asm/blinken.h>
 #include <asm/io.h>                               /* readb() and writeb() */
 #include <asm/hp300hw.h>
-#include <asm/rtc.h>
 
 #include "time.h"
 
diff --git a/arch/m68k/include/asm/flat.h b/arch/m68k/include/asm/flat.h
index f9454b89a51b..00c392b0cabd 100644
--- a/arch/m68k/include/asm/flat.h
+++ b/arch/m68k/include/asm/flat.h
@@ -1,5 +1,5 @@
 /*
- * include/asm-m68knommu/flat.h -- uClinux flat-format executables
+ * flat.h -- uClinux flat-format executables
  */
 
 #ifndef __M68KNOMMU_FLAT_H__
@@ -8,8 +8,9 @@
 #define	flat_argvp_envp_on_stack()		1
 #define	flat_old_ram_flag(flags)		(flags)
 #define	flat_reloc_valid(reloc, size)		((reloc) <= (size))
-#define	flat_get_addr_from_rp(rp, relval, flags, p)	get_unaligned(rp)
-#define	flat_put_addr_at_rp(rp, val, relval)	put_unaligned(val,rp)
+#define	flat_get_addr_from_rp(rp, relval, flags, p) \
+	({ unsigned long __val; __get_user_unaligned(__val, rp); __val; })
+#define	flat_put_addr_at_rp(rp, val, relval)	__put_user_unaligned(val, rp)
 #define	flat_get_relocate_addr(rel)		(rel)
 
 static inline int flat_set_persistent(unsigned long relval,
@@ -18,4 +19,10 @@ static inline int flat_set_persistent(unsigned long relval,
 	return 0;
 }
 
+#define FLAT_PLAT_INIT(regs) \
+	do { \
+		if (current->mm) \
+			(regs)->d5 = current->mm->start_data; \
+	} while (0)
+
 #endif /* __M68KNOMMU_FLAT_H__ */
diff --git a/arch/m68k/include/asm/processor.h b/arch/m68k/include/asm/processor.h
index a6ce2ec8d693..c84a2183b3f0 100644
--- a/arch/m68k/include/asm/processor.h
+++ b/arch/m68k/include/asm/processor.h
@@ -110,7 +110,6 @@ struct thread_struct {
 #define setframeformat(_regs)	do { } while (0)
 #endif
 
-#ifdef CONFIG_MMU
 /*
  * Do necessary setup to start up a newly executed thread.
  */
@@ -123,26 +122,14 @@ static inline void start_thread(struct pt_regs * regs, unsigned long pc,
 	wrusp(usp);
 }
 
+#ifdef CONFIG_MMU
 extern int handle_kernel_fault(struct pt_regs *regs);
-
 #else
-
-#define start_thread(_regs, _pc, _usp)                  \
-do {                                                    \
-	(_regs)->pc = (_pc);                            \
-	setframeformat(_regs);                          \
-	if (current->mm)                                \
-		(_regs)->d5 = current->mm->start_data;  \
-	(_regs)->sr &= ~0x2000;                         \
-	wrusp(_usp);                                    \
-} while(0)
-
 static inline  int handle_kernel_fault(struct pt_regs *regs)
 {
 	/* Any fault in kernel is fatal on non-mmu */
 	return 0;
 }
-
 #endif
 
 /* Forward declaration, a strange C thing */
diff --git a/arch/m68k/include/asm/rtc.h b/arch/m68k/include/asm/rtc.h
deleted file mode 100644
index a4d08ea122ee..000000000000
--- a/arch/m68k/include/asm/rtc.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/* include/asm-m68k/rtc.h
- *
- * Copyright Richard Zidlicky
- * implementation details for genrtc/q40rtc driver
- */
-/* permission is hereby granted to copy, modify and redistribute this code
- * in terms of the GNU Library General Public License, Version 2 or later,
- * at your option.
- */
-
-#ifndef _ASM_RTC_H
-#define _ASM_RTC_H
-
-#ifdef __KERNEL__
-
-#include <linux/rtc.h>
-#include <asm/errno.h>
-#include <asm/machdep.h>
-
-#define RTC_PIE 0x40		/* periodic interrupt enable */
-#define RTC_AIE 0x20		/* alarm interrupt enable */
-#define RTC_UIE 0x10		/* update-finished interrupt enable */
-
-/* some dummy definitions */
-#define RTC_BATT_BAD 0x100	/* battery bad */
-#define RTC_SQWE 0x08		/* enable square-wave output */
-#define RTC_DM_BINARY 0x04	/* all time/date values are BCD if clear */
-#define RTC_24H 0x02		/* 24 hour mode - else hours bit 7 means pm */
-#define RTC_DST_EN 0x01	        /* auto switch DST - works f. USA only */
-
-static inline unsigned int get_rtc_time(struct rtc_time *time)
-{
-	/*
-	 * Only the values that we read from the RTC are set. We leave
-	 * tm_wday, tm_yday and tm_isdst untouched. Even though the
-	 * RTC has RTC_DAY_OF_WEEK, we ignore it, as it is only updated
-	 * by the RTC when initially set to a non-zero value.
-	 */
-	if (mach_hwclk)
-		mach_hwclk(0, time);
-	return RTC_24H;
-}
-
-static inline int set_rtc_time(struct rtc_time *time)
-{
-	if (mach_hwclk)
-		return mach_hwclk(1, time);
-	return -EINVAL;
-}
-
-static inline unsigned int get_rtc_ss(void)
-{
-	if (mach_get_ss)
-		return mach_get_ss();
-	else{
-		struct rtc_time h;
-
-		get_rtc_time(&h);
-		return h.tm_sec;
-	}
-}
-
-static inline int get_rtc_pll(struct rtc_pll_info *pll)
-{
-	if (mach_get_rtc_pll)
-		return mach_get_rtc_pll(pll);
-	else
-		return -EINVAL;
-}
-static inline int set_rtc_pll(struct rtc_pll_info *pll)
-{
-	if (mach_set_rtc_pll)
-		return mach_set_rtc_pll(pll);
-	else
-		return -EINVAL;
-}
-#endif /* __KERNEL__ */
-
-#endif /* _ASM__RTC_H */
diff --git a/arch/m68k/kernel/dma.c b/arch/m68k/kernel/dma.c
index cbc78b4117b5..8cf97cbadc91 100644
--- a/arch/m68k/kernel/dma.c
+++ b/arch/m68k/kernel/dma.c
@@ -19,7 +19,7 @@
 #if defined(CONFIG_MMU) && !defined(CONFIG_COLDFIRE)
 
 static void *m68k_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
-		gfp_t flag, struct dma_attrs *attrs)
+		gfp_t flag, unsigned long attrs)
 {
 	struct page *page, **map;
 	pgprot_t pgprot;
@@ -62,7 +62,7 @@ static void *m68k_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
 }
 
 static void m68k_dma_free(struct device *dev, size_t size, void *addr,
-		dma_addr_t handle, struct dma_attrs *attrs)
+		dma_addr_t handle, unsigned long attrs)
 {
 	pr_debug("dma_free_coherent: %p, %x\n", addr, handle);
 	vfree(addr);
@@ -73,7 +73,7 @@ static void m68k_dma_free(struct device *dev, size_t size, void *addr,
 #include <asm/cacheflush.h>
 
 static void *m68k_dma_alloc(struct device *dev, size_t size,
-		dma_addr_t *dma_handle, gfp_t gfp, struct dma_attrs *attrs)
+		dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
 {
 	void *ret;
 	/* ignore region specifiers */
@@ -91,7 +91,7 @@ static void *m68k_dma_alloc(struct device *dev, size_t size,
 }
 
 static void m68k_dma_free(struct device *dev, size_t size, void *vaddr,
-		dma_addr_t dma_handle, struct dma_attrs *attrs)
+		dma_addr_t dma_handle, unsigned long attrs)
 {
 	free_pages((unsigned long)vaddr, get_order(size));
 }
@@ -130,7 +130,7 @@ static void m68k_dma_sync_sg_for_device(struct device *dev,
 
 static dma_addr_t m68k_dma_map_page(struct device *dev, struct page *page,
 		unsigned long offset, size_t size, enum dma_data_direction dir,
-		struct dma_attrs *attrs)
+		unsigned long attrs)
 {
 	dma_addr_t handle = page_to_phys(page) + offset;
 
@@ -139,7 +139,7 @@ static dma_addr_t m68k_dma_map_page(struct device *dev, struct page *page,
 }
 
 static int m68k_dma_map_sg(struct device *dev, struct scatterlist *sglist,
-		int nents, enum dma_data_direction dir, struct dma_attrs *attrs)
+		int nents, enum dma_data_direction dir, unsigned long attrs)
 {
 	int i;
 	struct scatterlist *sg;
diff --git a/arch/m68k/kernel/time.c b/arch/m68k/kernel/time.c
index 3857737e3958..4e5aa2f4f522 100644
--- a/arch/m68k/kernel/time.c
+++ b/arch/m68k/kernel/time.c
@@ -86,7 +86,49 @@ void read_persistent_clock(struct timespec *ts)
 	}
 }
 
-#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
+#if defined(CONFIG_ARCH_USES_GETTIMEOFFSET) && IS_ENABLED(CONFIG_RTC_DRV_GENERIC)
+static int rtc_generic_get_time(struct device *dev, struct rtc_time *tm)
+{
+	mach_hwclk(0, tm);
+	return rtc_valid_tm(tm);
+}
+
+static int rtc_generic_set_time(struct device *dev, struct rtc_time *tm)
+{
+	if (mach_hwclk(1, tm) < 0)
+		return -EOPNOTSUPP;
+	return 0;
+}
+
+static int rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
+{
+	struct rtc_pll_info pll;
+	struct rtc_pll_info __user *argp = (void __user *)arg;
+
+	switch (cmd) {
+	case RTC_PLL_GET:
+		if (!mach_get_rtc_pll || mach_get_rtc_pll(&pll))
+			return -EINVAL;
+		return copy_to_user(argp, &pll, sizeof pll) ? -EFAULT : 0;
+
+	case RTC_PLL_SET:
+		if (!mach_set_rtc_pll)
+			return -EINVAL;
+		if (!capable(CAP_SYS_TIME))
+			return -EACCES;
+		if (copy_from_user(&pll, argp, sizeof(pll)))
+			return -EFAULT;
+		return mach_set_rtc_pll(&pll);
+	}
+
+	return -ENOIOCTLCMD;
+}
+
+static const struct rtc_class_ops generic_rtc_ops = {
+	.ioctl = rtc_ioctl,
+	.read_time = rtc_generic_get_time,
+	.set_time = rtc_generic_set_time,
+};
 
 static int __init rtc_init(void)
 {
@@ -95,7 +137,9 @@ static int __init rtc_init(void)
 	if (!mach_hwclk)
 		return -ENODEV;
 
-	pdev = platform_device_register_simple("rtc-generic", -1, NULL, 0);
+	pdev = platform_device_register_data(NULL, "rtc-generic", -1,
+					     &generic_rtc_ops,
+					     sizeof(generic_rtc_ops));
 	return PTR_ERR_OR_ZERO(pdev);
 }
 
diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c
index 689b47d292ac..2f33a33001e5 100644
--- a/arch/m68k/mac/config.c
+++ b/arch/m68k/mac/config.c
@@ -10,6 +10,7 @@
  * Miscellaneous linux stuff
  */
 
+#include <linux/errno.h>
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/mm.h>
@@ -25,6 +26,7 @@
 #include <linux/platform_device.h>
 #include <linux/adb.h>
 #include <linux/cuda.h>
+#include <linux/rtc.h>
 
 #include <asm/setup.h>
 #include <asm/bootinfo.h>
@@ -34,7 +36,6 @@
 #include <asm/io.h>
 #include <asm/irq.h>
 #include <asm/pgtable.h>
-#include <asm/rtc.h>
 #include <asm/machdep.h>
 
 #include <asm/macintosh.h>
diff --git a/arch/m68k/mac/misc.c b/arch/m68k/mac/misc.c
index 707b61aea203..0fb54a90eac2 100644
--- a/arch/m68k/mac/misc.c
+++ b/arch/m68k/mac/misc.c
@@ -18,7 +18,6 @@
 
 #include <asm/uaccess.h>
 #include <asm/io.h>
-#include <asm/rtc.h>
 #include <asm/segment.h>
 #include <asm/setup.h>
 #include <asm/macintosh.h>
diff --git a/arch/m68k/mvme147/config.c b/arch/m68k/mvme147/config.c
index e6a3b56c6481..c11d38dfad08 100644
--- a/arch/m68k/mvme147/config.c
+++ b/arch/m68k/mvme147/config.c
@@ -32,7 +32,6 @@
 #include <asm/setup.h>
 #include <asm/irq.h>
 #include <asm/traps.h>
-#include <asm/rtc.h>
 #include <asm/machdep.h>
 #include <asm/mvme147hw.h>
 
diff --git a/arch/m68k/mvme16x/config.c b/arch/m68k/mvme16x/config.c
index a53803cc66cd..58e240939d26 100644
--- a/arch/m68k/mvme16x/config.c
+++ b/arch/m68k/mvme16x/config.c
@@ -35,7 +35,6 @@
 #include <asm/setup.h>
 #include <asm/irq.h>
 #include <asm/traps.h>
-#include <asm/rtc.h>
 #include <asm/machdep.h>
 #include <asm/mvme16xhw.h>
 
diff --git a/arch/m68k/q40/config.c b/arch/m68k/q40/config.c
index e90fe903613e..fcb7f05b60b6 100644
--- a/arch/m68k/q40/config.c
+++ b/arch/m68k/q40/config.c
@@ -12,6 +12,7 @@
  * for more details.
  */
 
+#include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
@@ -27,7 +28,6 @@
 #include <linux/platform_device.h>
 
 #include <asm/io.h>
-#include <asm/rtc.h>
 #include <asm/bootinfo.h>
 #include <asm/pgtable.h>
 #include <asm/setup.h>
diff --git a/arch/m68k/sun3/config.c b/arch/m68k/sun3/config.c
index 71884bf01d72..3af34fa3a344 100644
--- a/arch/m68k/sun3/config.c
+++ b/arch/m68k/sun3/config.c
@@ -26,7 +26,6 @@
 #include <asm/pgalloc.h>
 #include <asm/sun3-head.h>
 #include <asm/sun3mmu.h>
-#include <asm/rtc.h>
 #include <asm/machdep.h>
 #include <asm/machines.h>
 #include <asm/idprom.h>
diff --git a/arch/m68k/sun3/intersil.c b/arch/m68k/sun3/intersil.c
index 889829e11f1d..2cd0bcbe6f30 100644
--- a/arch/m68k/sun3/intersil.c
+++ b/arch/m68k/sun3/intersil.c
@@ -14,8 +14,8 @@
 #include <linux/rtc.h>
 
 #include <asm/errno.h>
-#include <asm/rtc.h>
 #include <asm/intersil.h>
+#include <asm/machdep.h>
 
 
 /* bits to set for start/run of the intersil */
diff --git a/arch/m68k/sun3x/time.c b/arch/m68k/sun3x/time.c
index c8eb08add6b0..431d3c4306dd 100644
--- a/arch/m68k/sun3x/time.c
+++ b/arch/m68k/sun3x/time.c
@@ -15,10 +15,10 @@
 
 #include <asm/irq.h>
 #include <asm/io.h>
+#include <asm/machdep.h>
 #include <asm/traps.h>
 #include <asm/sun3x.h>
 #include <asm/sun3ints.h>
-#include <asm/rtc.h>
 
 #include "time.h"
 
diff --git a/arch/metag/include/asm/cmpxchg_lnkget.h b/arch/metag/include/asm/cmpxchg_lnkget.h
index 0154e2807ebb..2369ad394876 100644
--- a/arch/metag/include/asm/cmpxchg_lnkget.h
+++ b/arch/metag/include/asm/cmpxchg_lnkget.h
@@ -73,7 +73,7 @@ static inline unsigned long __cmpxchg_u32(volatile int *m, unsigned long old,
 		      "	DCACHE	[%2], %0\n"
 #endif
 		      "2:\n"
-		      : "=&d" (temp), "=&da" (retval)
+		      : "=&d" (temp), "=&d" (retval)
 		      : "da" (m), "bd" (old), "da" (new)
 		      : "cc"
 		      );
diff --git a/arch/metag/include/asm/metag_mem.h b/arch/metag/include/asm/metag_mem.h
index aa5a076df439..7848bc6d3b61 100644
--- a/arch/metag/include/asm/metag_mem.h
+++ b/arch/metag/include/asm/metag_mem.h
@@ -881,7 +881,7 @@
 #define    PERFCTRL_DCSTALL 11 /* Dcache+TLB o/p delayed (per-thread) */
 #define    PERFCTRL_ICSTALL 12 /* Icache+TLB o/p delayed (per-thread) */
 
-#define    PERFCTRL_INT     13 /* Internal core delailed events (see next) */
+#define    PERFCTRL_INT     13 /* Internal core detailed events (see next) */
 #define    PERFCTRL_EXT     15 /* External source in core periphery */
 #endif /* METAC_2_1 */
 
diff --git a/arch/metag/include/asm/metag_regs.h b/arch/metag/include/asm/metag_regs.h
index 40c3f679c5b8..60b750971d8a 100644
--- a/arch/metag/include/asm/metag_regs.h
+++ b/arch/metag/include/asm/metag_regs.h
@@ -179,7 +179,7 @@
 ;   is best to dump these registers immediately at the start of a routine
 ;   using a MSETL or SETL instruction-
 ;
-;   MSETL   [A0StP],D0Ar6,D0Ar4,D0Ar2; Only dump argments expected
+;   MSETL   [A0StP],D0Ar6,D0Ar4,D0Ar2; Only dump arguments expected
 ;or SETL    [A0StP+#8++],D0Ar2       ; Up to two 32-bit args expected
 ;
 ; For non-leaf routines it is always necessary to save and restore at least
diff --git a/arch/metag/kernel/cachepart.c b/arch/metag/kernel/cachepart.c
index 04b7d4f8429a..db944c2e7d88 100644
--- a/arch/metag/kernel/cachepart.c
+++ b/arch/metag/kernel/cachepart.c
@@ -15,7 +15,7 @@
 #define SYSC_DCPART(n)	(SYSC_DCPART0 + SYSC_xCPARTn_STRIDE * (n))
 #define SYSC_ICPART(n)	(SYSC_ICPART0 + SYSC_xCPARTn_STRIDE * (n))
 
-#define CACHE_ASSOCIATIVITY 4 /* 4 way set-assosiative */
+#define CACHE_ASSOCIATIVITY 4 /* 4 way set-associative */
 #define ICACHE 0
 #define DCACHE 1
 
diff --git a/arch/metag/kernel/dma.c b/arch/metag/kernel/dma.c
index e12368d02155..0db31e24c541 100644
--- a/arch/metag/kernel/dma.c
+++ b/arch/metag/kernel/dma.c
@@ -172,7 +172,7 @@ out:
  * virtual and bus address for that space.
  */
 static void *metag_dma_alloc(struct device *dev, size_t size,
-		dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs)
+		dma_addr_t *handle, gfp_t gfp, unsigned long attrs)
 {
 	struct page *page;
 	struct metag_vm_region *c;
@@ -268,7 +268,7 @@ no_page:
  * free a page as defined by the above mapping.
  */
 static void metag_dma_free(struct device *dev, size_t size, void *vaddr,
-		dma_addr_t dma_handle, struct dma_attrs *attrs)
+		dma_addr_t dma_handle, unsigned long attrs)
 {
 	struct metag_vm_region *c;
 	unsigned long flags, addr;
@@ -331,13 +331,13 @@ no_area:
 
 static int metag_dma_mmap(struct device *dev, struct vm_area_struct *vma,
 		void *cpu_addr, dma_addr_t dma_addr, size_t size,
-		struct dma_attrs *attrs)
+		unsigned long attrs)
 {
 	unsigned long flags, user_size, kern_size;
 	struct metag_vm_region *c;
 	int ret = -ENXIO;
 
-	if (dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs))
+	if (attrs & DMA_ATTR_WRITE_COMBINE)
 		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
 	else
 		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
@@ -482,7 +482,7 @@ static void dma_sync_for_cpu(void *vaddr, size_t size, int dma_direction)
 
 static dma_addr_t metag_dma_map_page(struct device *dev, struct page *page,
 		unsigned long offset, size_t size,
-		enum dma_data_direction direction, struct dma_attrs *attrs)
+		enum dma_data_direction direction, unsigned long attrs)
 {
 	dma_sync_for_device((void *)(page_to_phys(page) + offset), size,
 			    direction);
@@ -491,14 +491,14 @@ static dma_addr_t metag_dma_map_page(struct device *dev, struct page *page,
 
 static void metag_dma_unmap_page(struct device *dev, dma_addr_t dma_address,
 		size_t size, enum dma_data_direction direction,
-		struct dma_attrs *attrs)
+		unsigned long attrs)
 {
 	dma_sync_for_cpu(phys_to_virt(dma_address), size, direction);
 }
 
 static int metag_dma_map_sg(struct device *dev, struct scatterlist *sglist,
 		int nents, enum dma_data_direction direction,
-		struct dma_attrs *attrs)
+		unsigned long attrs)
 {
 	struct scatterlist *sg;
 	int i;
@@ -516,7 +516,7 @@ static int metag_dma_map_sg(struct device *dev, struct scatterlist *sglist,
 
 static void metag_dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
 		int nhwentries, enum dma_data_direction direction,
-		struct dma_attrs *attrs)
+		unsigned long attrs)
 {
 	struct scatterlist *sg;
 	int i;
diff --git a/arch/metag/lib/divsi3.S b/arch/metag/lib/divsi3.S
index 7c8a8ae9a0a1..11124cc93dee 100644
--- a/arch/metag/lib/divsi3.S
+++ b/arch/metag/lib/divsi3.S
@@ -50,7 +50,7 @@ $LIDMCQuick:
 	ADDCC   D0Re0,D0Re0,#1          ! If yes result += 1
 	SUBCC   D1Ar1,D1Ar1,D1Re0       !        and A -= Bu
 	ORS     D0Ar4,D0Ar4,D0Ar4       ! Return neg result?
-	NEG     D0Ar2,D0Re0             ! Calulate neg result
+	NEG     D0Ar2,D0Re0             ! Calculate neg result
 	MOVMI   D0Re0,D0Ar2             ! Yes: Take neg result
 $LIDMCRet:
 	MOV     PC,D1RtP
@@ -94,7 +94,7 @@ $LIDMCLoop:
 	LSR     D1Re0, D1Re0, #1        ! Shift down B
 	BNZ     $LIDMCLoop               ! Was single bit in curbit lost?
 	ORS     D0Ar4,D0Ar4,D0Ar4       ! Return neg result?
-	NEG     D0Ar2,D0Re0             ! Calulate neg result
+	NEG     D0Ar2,D0Re0             ! Calculate neg result
 	MOVMI   D0Re0,D0Ar2             ! Yes: Take neg result
 	MOV     PC,D1RtP
 	.size   ___divsi3,.-___divsi3
diff --git a/arch/metag/mm/fault.c b/arch/metag/mm/fault.c
index 372783a67dda..c765b3621b9b 100644
--- a/arch/metag/mm/fault.c
+++ b/arch/metag/mm/fault.c
@@ -187,7 +187,7 @@ bad_area_nosemaphore:
 
 		if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
 		    printk_ratelimit()) {
-			pr_info("%s%s[%d]: segfault at %lx pc %08x sp %08x write %d trap %#x (%s)",
+			printk("%s%s[%d]: segfault at %lx pc %08x sp %08x write %d trap %#x (%s)",
 			       task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
 			       tsk->comm, task_pid_nr(tsk), address,
 			       regs->ctx.CurrPC, regs->ctx.AX[0].U0,
diff --git a/arch/metag/mm/init.c b/arch/metag/mm/init.c
index 11fa51c89617..c0ec116b3993 100644
--- a/arch/metag/mm/init.c
+++ b/arch/metag/mm/init.c
@@ -390,7 +390,6 @@ void __init mem_init(void)
 
 	free_all_bootmem();
 	mem_init_print_info(NULL);
-	show_mem(0);
 }
 
 void free_initmem(void)
diff --git a/arch/microblaze/include/asm/dma-mapping.h b/arch/microblaze/include/asm/dma-mapping.h
index 1884783d15c0..1768d4bdc8d3 100644
--- a/arch/microblaze/include/asm/dma-mapping.h
+++ b/arch/microblaze/include/asm/dma-mapping.h
@@ -25,7 +25,6 @@
 #include <linux/mm.h>
 #include <linux/scatterlist.h>
 #include <linux/dma-debug.h>
-#include <linux/dma-attrs.h>
 #include <asm/io.h>
 #include <asm/cacheflush.h>
 
diff --git a/arch/microblaze/include/asm/pci.h b/arch/microblaze/include/asm/pci.h
index fc3ecb55f1b2..2a120bb70e54 100644
--- a/arch/microblaze/include/asm/pci.h
+++ b/arch/microblaze/include/asm/pci.h
@@ -82,9 +82,6 @@ extern pgprot_t	pci_phys_mem_access_prot(struct file *file,
 					 pgprot_t prot);
 
 #define HAVE_ARCH_PCI_RESOURCE_TO_USER
-extern void pci_resource_to_user(const struct pci_dev *dev, int bar,
-				 const struct resource *rsrc,
-				 resource_size_t *start, resource_size_t *end);
 
 extern void pcibios_setup_bus_devices(struct pci_bus *bus);
 extern void pcibios_setup_bus_self(struct pci_bus *bus);
diff --git a/arch/microblaze/include/asm/thread_info.h b/arch/microblaze/include/asm/thread_info.h
index 383f387b4eee..e7e8954e9815 100644
--- a/arch/microblaze/include/asm/thread_info.h
+++ b/arch/microblaze/include/asm/thread_info.h
@@ -148,33 +148,6 @@ static inline struct thread_info *current_thread_info(void)
  */
 /* FPU was used by this task this quantum (SMP) */
 #define TS_USEDFPU		0x0001
-#define TS_RESTORE_SIGMASK	0x0002
-
-#ifndef __ASSEMBLY__
-#define HAVE_SET_RESTORE_SIGMASK 1
-static inline void set_restore_sigmask(void)
-{
-	struct thread_info *ti = current_thread_info();
-	ti->status |= TS_RESTORE_SIGMASK;
-	WARN_ON(!test_bit(TIF_SIGPENDING, (unsigned long *)&ti->flags));
-}
-static inline void clear_restore_sigmask(void)
-{
-	current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
-}
-static inline bool test_restore_sigmask(void)
-{
-	return current_thread_info()->status & TS_RESTORE_SIGMASK;
-}
-static inline bool test_and_clear_restore_sigmask(void)
-{
-	struct thread_info *ti = current_thread_info();
-	if (!(ti->status & TS_RESTORE_SIGMASK))
-		return false;
-	ti->status &= ~TS_RESTORE_SIGMASK;
-	return true;
-}
-#endif
 
 #endif /* __KERNEL__ */
 #endif /* _ASM_MICROBLAZE_THREAD_INFO_H */
diff --git a/arch/microblaze/kernel/dma.c b/arch/microblaze/kernel/dma.c
index bf4dec229437..ec04dc1e2527 100644
--- a/arch/microblaze/kernel/dma.c
+++ b/arch/microblaze/kernel/dma.c
@@ -17,7 +17,7 @@
 
 static void *dma_direct_alloc_coherent(struct device *dev, size_t size,
 				       dma_addr_t *dma_handle, gfp_t flag,
-				       struct dma_attrs *attrs)
+				       unsigned long attrs)
 {
 #ifdef NOT_COHERENT_CACHE
 	return consistent_alloc(flag, size, dma_handle);
@@ -42,7 +42,7 @@ static void *dma_direct_alloc_coherent(struct device *dev, size_t size,
 
 static void dma_direct_free_coherent(struct device *dev, size_t size,
 				     void *vaddr, dma_addr_t dma_handle,
-				     struct dma_attrs *attrs)
+				     unsigned long attrs)
 {
 #ifdef NOT_COHERENT_CACHE
 	consistent_free(size, vaddr);
@@ -53,7 +53,7 @@ static void dma_direct_free_coherent(struct device *dev, size_t size,
 
 static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl,
 			     int nents, enum dma_data_direction direction,
-			     struct dma_attrs *attrs)
+			     unsigned long attrs)
 {
 	struct scatterlist *sg;
 	int i;
@@ -78,7 +78,7 @@ static inline dma_addr_t dma_direct_map_page(struct device *dev,
 					     unsigned long offset,
 					     size_t size,
 					     enum dma_data_direction direction,
-					     struct dma_attrs *attrs)
+					     unsigned long attrs)
 {
 	__dma_sync(page_to_phys(page) + offset, size, direction);
 	return page_to_phys(page) + offset;
@@ -88,7 +88,7 @@ static inline void dma_direct_unmap_page(struct device *dev,
 					 dma_addr_t dma_address,
 					 size_t size,
 					 enum dma_data_direction direction,
-					 struct dma_attrs *attrs)
+					 unsigned long attrs)
 {
 /* There is not necessary to do cache cleanup
  *
@@ -157,7 +157,7 @@ dma_direct_sync_sg_for_device(struct device *dev,
 static
 int dma_direct_mmap_coherent(struct device *dev, struct vm_area_struct *vma,
 			     void *cpu_addr, dma_addr_t handle, size_t size,
-			     struct dma_attrs *attrs)
+			     unsigned long attrs)
 {
 #ifdef CONFIG_MMU
 	unsigned long user_count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c
index 77bc7c7e6522..434639f9a3a6 100644
--- a/arch/microblaze/mm/init.c
+++ b/arch/microblaze/mm/init.c
@@ -414,7 +414,7 @@ void __init *early_get_page(void)
 
 #endif /* CONFIG_MMU */
 
-void * __init_refok alloc_maybe_bootmem(size_t size, gfp_t mask)
+void * __ref alloc_maybe_bootmem(size_t size, gfp_t mask)
 {
 	if (mem_init_done)
 		return kmalloc(size, mask);
@@ -422,7 +422,7 @@ void * __init_refok alloc_maybe_bootmem(size_t size, gfp_t mask)
 		return alloc_bootmem(size);
 }
 
-void * __init_refok zalloc_maybe_bootmem(size_t size, gfp_t mask)
+void * __ref zalloc_maybe_bootmem(size_t size, gfp_t mask)
 {
 	void *p;
 
diff --git a/arch/microblaze/mm/pgtable.c b/arch/microblaze/mm/pgtable.c
index eb99fcc76088..cc732fe357ad 100644
--- a/arch/microblaze/mm/pgtable.c
+++ b/arch/microblaze/mm/pgtable.c
@@ -234,7 +234,7 @@ unsigned long iopa(unsigned long addr)
 	return pa;
 }
 
-__init_refok pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
+__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 		unsigned long address)
 {
 	pte_t *pte;
diff --git a/arch/microblaze/pci/pci-common.c b/arch/microblaze/pci/pci-common.c
index 14cba600da7a..81556b843a8e 100644
--- a/arch/microblaze/pci/pci-common.c
+++ b/arch/microblaze/pci/pci-common.c
@@ -219,33 +219,6 @@ static struct resource *__pci_mmap_make_offset(struct pci_dev *dev,
 }
 
 /*
- * Set vm_page_prot of VMA, as appropriate for this architecture, for a pci
- * device mapping.
- */
-static pgprot_t __pci_mmap_set_pgprot(struct pci_dev *dev, struct resource *rp,
-				      pgprot_t protection,
-				      enum pci_mmap_state mmap_state,
-				      int write_combine)
-{
-	pgprot_t prot = protection;
-
-	/* Write combine is always 0 on non-memory space mappings. On
-	 * memory space, if the user didn't pass 1, we check for a
-	 * "prefetchable" resource. This is a bit hackish, but we use
-	 * this to workaround the inability of /sysfs to provide a write
-	 * combine bit
-	 */
-	if (mmap_state != pci_mmap_mem)
-		write_combine = 0;
-	else if (write_combine == 0) {
-		if (rp->flags & IORESOURCE_PREFETCH)
-			write_combine = 1;
-	}
-
-	return pgprot_noncached(prot);
-}
-
-/*
  * This one is used by /dev/mem and fbdev who have no clue about the
  * PCI device, it tries to find the PCI device first and calls the
  * above routine
@@ -317,9 +290,7 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
 		return -EINVAL;
 
 	vma->vm_pgoff = offset >> PAGE_SHIFT;
-	vma->vm_page_prot = __pci_mmap_set_pgprot(dev, rp,
-						  vma->vm_page_prot,
-						  mmap_state, write_combine);
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 
 	ret = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
 			       vma->vm_end - vma->vm_start, vma->vm_page_prot);
@@ -473,39 +444,25 @@ void pci_resource_to_user(const struct pci_dev *dev, int bar,
 			  const struct resource *rsrc,
 			  resource_size_t *start, resource_size_t *end)
 {
-	struct pci_controller *hose = pci_bus_to_host(dev->bus);
-	resource_size_t offset = 0;
+	struct pci_bus_region region;
 
-	if (hose == NULL)
+	if (rsrc->flags & IORESOURCE_IO) {
+		pcibios_resource_to_bus(dev->bus, &region,
+					(struct resource *) rsrc);
+		*start = region.start;
+		*end = region.end;
 		return;
+	}
 
-	if (rsrc->flags & IORESOURCE_IO)
-		offset = (unsigned long)hose->io_base_virt - _IO_BASE;
-
-	/* We pass a fully fixed up address to userland for MMIO instead of
-	 * a BAR value because X is lame and expects to be able to use that
-	 * to pass to /dev/mem !
+	/* We pass a CPU physical address to userland for MMIO instead of a
+	 * BAR value because X is lame and expects to be able to use that
+	 * to pass to /dev/mem!
 	 *
-	 * That means that we'll have potentially 64 bits values where some
-	 * userland apps only expect 32 (like X itself since it thinks only
-	 * Sparc has 64 bits MMIO) but if we don't do that, we break it on
-	 * 32 bits CHRPs :-(
-	 *
-	 * Hopefully, the sysfs insterface is immune to that gunk. Once X
-	 * has been fixed (and the fix spread enough), we can re-enable the
-	 * 2 lines below and pass down a BAR value to userland. In that case
-	 * we'll also have to re-enable the matching code in
-	 * __pci_mmap_make_offset().
-	 *
-	 * BenH.
+	 * That means we may have 64-bit values where some apps only expect
+	 * 32 (like X itself since it thinks only Sparc has 64-bit MMIO).
 	 */
-#if 0
-	else if (rsrc->flags & IORESOURCE_MEM)
-		offset = hose->pci_mem_offset;
-#endif
-
-	*start = rsrc->start - offset;
-	*end = rsrc->end - offset;
+	*start = rsrc->start;
+	*end = rsrc->end;
 }
 
 /**
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index ac91939b9b75..26388562e300 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -64,6 +64,7 @@ config MIPS
 	select GENERIC_TIME_VSYSCALL
 	select ARCH_CLOCKSOURCE_DATA
 	select HANDLE_DOMAIN_IRQ
+	select HAVE_EXIT_THREAD
 
 menu "Machine selection"
 
@@ -384,7 +385,7 @@ config MACH_PISTACHIO
 	select CLKSRC_MIPS_GIC
 	select COMMON_CLK
 	select CSRC_R4K
-	select DMA_MAYBE_COHERENT
+	select DMA_NONCOHERENT
 	select GPIOLIB
 	select IRQ_MIPS_CPU
 	select LIBFDT
@@ -880,7 +881,6 @@ config CAVIUM_OCTEON_SOC
 	select SYS_SUPPORTS_HOTPLUG_CPU if CPU_BIG_ENDIAN
 	select SYS_HAS_EARLY_PRINTK
 	select SYS_HAS_CPU_CAVIUM_OCTEON
-	select SWAP_IO_SPACE
 	select HW_HAS_PCI
 	select ZONE_DMA32
 	select HOLES_IN_ZONE
@@ -1111,16 +1111,6 @@ config NEED_DMA_MAP_STATE
 config SYS_HAS_EARLY_PRINTK
 	bool
 
-config HOTPLUG_CPU
-	bool "Support for hot-pluggable CPUs"
-	depends on SMP && SYS_SUPPORTS_HOTPLUG_CPU
-	help
-	  Say Y here to allow turning CPUs off and on. CPUs can be
-	  controlled through /sys/devices/system/cpu.
-	  (Note: power management support will enable this option
-	    automatically on SMP systems. )
-	  Say N if you want to disable CPU hotplug.
-
 config SYS_SUPPORTS_HOTPLUG_CPU
 	bool
 
@@ -1406,7 +1396,6 @@ config CPU_LOONGSON1B
 	bool "Loongson 1B"
 	depends on SYS_HAS_CPU_LOONGSON1B
 	select CPU_LOONGSON1
-	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select LEDS_GPIO_REGISTER
 	help
 	  The Loongson 1B is a 32-bit SoC, which implements the MIPS32
@@ -1488,6 +1477,7 @@ config CPU_MIPS64_R2
 	select CPU_SUPPORTS_HIGHMEM
 	select CPU_SUPPORTS_HUGEPAGES
 	select CPU_SUPPORTS_MSA
+	select HAVE_KVM
 	help
 	  Choose this option to build a kernel for release 2 or later of the
 	  MIPS64 architecture.  Many modern embedded systems with a 64-bit
@@ -1505,6 +1495,7 @@ config CPU_MIPS64_R6
 	select CPU_SUPPORTS_MSA
 	select GENERIC_CSUM
 	select MIPS_O32_FP64_SUPPORT if MIPS32_O32
+	select HAVE_KVM
 	help
 	  Choose this option to build a kernel for release 6 or later of the
 	  MIPS64 architecture.  New MIPS processors, starting with the Warrior
@@ -2634,6 +2625,16 @@ config SMP
 
 	  If you don't know what to do here, say N.
 
+config HOTPLUG_CPU
+	bool "Support for hot-pluggable CPUs"
+	depends on SMP && SYS_SUPPORTS_HOTPLUG_CPU
+	help
+	  Say Y here to allow turning CPUs off and on. CPUs can be
+	  controlled through /sys/devices/system/cpu.
+	  (Note: power management support will enable this option
+	    automatically on SMP systems. )
+	  Say N if you want to disable CPU hotplug.
+
 config SMP_UP
 	bool
 
@@ -2885,10 +2886,10 @@ choice
 		  the documented boot protocol using a device tree.
 
 	config MIPS_RAW_APPENDED_DTB
-		bool "vmlinux.bin"
+		bool "vmlinux.bin or vmlinuz.bin"
 		help
 		  With this option, the boot code will look for a device tree binary
-		  DTB) appended to raw vmlinux.bin (without decompressor).
+		  DTB) appended to raw vmlinux.bin or vmlinuz.bin.
 		  (e.g. cat vmlinux.bin <filename>.dtb > vmlinux_w_dtb).
 
 		  This is meant as a backward compatibility convenience for those
@@ -2900,24 +2901,6 @@ choice
 		  look like a DTB header after a reboot if no actual DTB is appended
 		  to vmlinux.bin.  Do not leave this option active in a production kernel
 		  if you don't intend to always append a DTB.
-
-	config MIPS_ZBOOT_APPENDED_DTB
-		bool "vmlinuz.bin"
-		depends on SYS_SUPPORTS_ZBOOT
-		help
-		  With this option, the boot code will look for a device tree binary
-		  DTB) appended to raw vmlinuz.bin (with decompressor).
-		  (e.g. cat vmlinuz.bin <filename>.dtb > vmlinuz_w_dtb).
-
-		  This is meant as a backward compatibility convenience for those
-		  systems with a bootloader that can't be upgraded to accommodate
-		  the documented boot protocol using a device tree.
-
-		  Beware that there is very little in terms of protection against
-		  this option being confused by leftover garbage in memory that might
-		  look like a DTB header after a reboot if no actual DTB is appended
-		  to vmlinuz.bin.  Do not leave this option active in a production kernel
-		  if you don't intend to always append a DTB.
 endchoice
 
 choice
diff --git a/arch/mips/ath79/setup.c b/arch/mips/ath79/setup.c
index 3a0019deb7f7..f206dafbb0a3 100644
--- a/arch/mips/ath79/setup.c
+++ b/arch/mips/ath79/setup.c
@@ -203,8 +203,8 @@ void __init plat_mem_setup(void)
 	fdt_start = fw_getenvl("fdt_start");
 	if (fdt_start)
 		__dt_setup_arch((void *)KSEG0ADDR(fdt_start));
-	else if (fw_arg0 == -2)
-		__dt_setup_arch((void *)KSEG0ADDR(fw_arg1));
+	else if (fw_passed_dtb)
+		__dt_setup_arch((void *)KSEG0ADDR(fw_passed_dtb));
 
 	if (mips_machtype != ATH79_MACH_GENERIC_OF) {
 		ath79_reset_base = ioremap_nocache(AR71XX_RESET_BASE,
diff --git a/arch/mips/bmips/setup.c b/arch/mips/bmips/setup.c
index f146d1219bde..6776042679dd 100644
--- a/arch/mips/bmips/setup.c
+++ b/arch/mips/bmips/setup.c
@@ -162,8 +162,8 @@ void __init plat_mem_setup(void)
 	/* intended to somewhat resemble ARM; see Documentation/arm/Booting */
 	if (fw_arg0 == 0 && fw_arg1 == 0xffffffff)
 		dtb = phys_to_virt(fw_arg2);
-	else if (fw_arg0 == -2) /* UHI interface */
-		dtb = (void *)fw_arg1;
+	else if (fw_passed_dtb) /* UHI interface */
+		dtb = (void *)fw_passed_dtb;
 	else if (__dtb_start != __dtb_end)
 		dtb = (void *)__dtb_start;
 	else
diff --git a/arch/mips/boot/compressed/decompress.c b/arch/mips/boot/compressed/decompress.c
index 080cd53bac36..fdf99e9dd4c3 100644
--- a/arch/mips/boot/compressed/decompress.c
+++ b/arch/mips/boot/compressed/decompress.c
@@ -14,6 +14,7 @@
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
+#include <linux/libfdt.h>
 
 #include <asm/addrspace.h>
 
@@ -36,6 +37,8 @@ extern void puthex(unsigned long long val);
 #define puthex(val) do {} while (0)
 #endif
 
+extern char __appended_dtb[];
+
 void error(char *x)
 {
 	puts("\n\n");
@@ -114,6 +117,20 @@ void decompress_kernel(unsigned long boot_heap_start)
 	__decompress((char *)zimage_start, zimage_size, 0, 0,
 		   (void *)VMLINUX_LOAD_ADDRESS_ULL, 0, 0, error);
 
+	if (IS_ENABLED(CONFIG_MIPS_RAW_APPENDED_DTB) &&
+	    fdt_magic((void *)&__appended_dtb) == FDT_MAGIC) {
+		unsigned int image_size, dtb_size;
+
+		dtb_size = fdt_totalsize((void *)&__appended_dtb);
+
+		/* last four bytes is always image size in little endian */
+		image_size = le32_to_cpup((void *)&__image_end - 4);
+
+		/* copy dtb to where the booted kernel will expect it */
+		memcpy((void *)VMLINUX_LOAD_ADDRESS_ULL + image_size,
+		       __appended_dtb, dtb_size);
+	}
+
 	/* FIXME: should we flush cache here? */
 	puts("Now, booting the kernel...\n");
 }
diff --git a/arch/mips/boot/compressed/head.S b/arch/mips/boot/compressed/head.S
index c580e853b9fb..409cb483a9ff 100644
--- a/arch/mips/boot/compressed/head.S
+++ b/arch/mips/boot/compressed/head.S
@@ -25,22 +25,6 @@ start:
 	move	s2, a2
 	move	s3, a3
 
-#ifdef CONFIG_MIPS_ZBOOT_APPENDED_DTB
-	PTR_LA	t0, __appended_dtb
-#ifdef CONFIG_CPU_BIG_ENDIAN
-	li	t1, 0xd00dfeed
-#else
-	li	t1, 0xedfe0dd0
-#endif
-	lw	t2, (t0)
-	bne	t1, t2, not_found
-	 nop
-
-	move	s1, t0
-	PTR_LI	s0, -2
-not_found:
-#endif
-
 	/* Clear BSS */
 	PTR_LA	a0, _edata
 	PTR_LA	a2, _end
diff --git a/arch/mips/boot/dts/cavium-octeon/dlink_dsr-1000n.dts b/arch/mips/boot/dts/cavium-octeon/dlink_dsr-1000n.dts
index d6bc994f736f..b134798a0fd7 100644
--- a/arch/mips/boot/dts/cavium-octeon/dlink_dsr-1000n.dts
+++ b/arch/mips/boot/dts/cavium-octeon/dlink_dsr-1000n.dts
@@ -9,6 +9,7 @@
  */
 
 /include/ "octeon_3xxx.dtsi"
+#include <dt-bindings/gpio/gpio.h>
 
 / {
 	model = "dlink,dsr-1000n";
@@ -63,12 +64,27 @@
 
 		usb1 {
 			label = "usb1";
-			gpios = <&gpio 9 1>; /* Active low */
+			gpios = <&gpio 9 GPIO_ACTIVE_LOW>;
 		};
 
 		usb2 {
 			label = "usb2";
-			gpios = <&gpio 10 1>; /* Active low */
+			gpios = <&gpio 10 GPIO_ACTIVE_LOW>;
+		};
+
+		wps {
+			label = "wps";
+			gpios = <&gpio 11 GPIO_ACTIVE_LOW>;
+		};
+
+		wireless1 {
+			label = "5g";
+			gpios = <&gpio 17 GPIO_ACTIVE_LOW>;
+		};
+
+		wireless2 {
+			label = "2.4g";
+			gpios = <&gpio 18 GPIO_ACTIVE_LOW>;
 		};
 	};
 
diff --git a/arch/mips/boot/dts/cavium-octeon/octeon_3xxx.dts b/arch/mips/boot/dts/cavium-octeon/octeon_3xxx.dts
index de61f02d3ef6..ca6b4467bcd3 100644
--- a/arch/mips/boot/dts/cavium-octeon/octeon_3xxx.dts
+++ b/arch/mips/boot/dts/cavium-octeon/octeon_3xxx.dts
@@ -388,16 +388,4 @@
 		usbn = &usbn;
 		led0 = &led0;
 	};
-
-	dsr1000n-leds {
-		compatible = "gpio-leds";
-		usb1 {
-			label = "usb1";
-			gpios = <&gpio 9 1>; /* Active low */
-		};
-		usb2 {
-			label = "usb2";
-			gpios = <&gpio 10 1>; /* Active low */
-		};
-	};
  };
diff --git a/arch/mips/boot/tools/relocs_64.c b/arch/mips/boot/tools/relocs_64.c
index b671b5e2dcd8..06066e6ac2f9 100644
--- a/arch/mips/boot/tools/relocs_64.c
+++ b/arch/mips/boot/tools/relocs_64.c
@@ -9,17 +9,20 @@
 
 typedef uint8_t Elf64_Byte;
 
-typedef struct {
-	Elf64_Word r_sym;	/* Symbol index.  */
-	Elf64_Byte r_ssym;	/* Special symbol.  */
-	Elf64_Byte r_type3;	/* Third relocation.  */
-	Elf64_Byte r_type2;	/* Second relocation.  */
-	Elf64_Byte r_type;	/* First relocation.  */
+typedef union {
+	struct {
+		Elf64_Word r_sym;	/* Symbol index.  */
+		Elf64_Byte r_ssym;	/* Special symbol.  */
+		Elf64_Byte r_type3;	/* Third relocation.  */
+		Elf64_Byte r_type2;	/* Second relocation.  */
+		Elf64_Byte r_type;	/* First relocation.  */
+	} fields;
+	Elf64_Xword unused;
 } Elf64_Mips_Rela;
 
 #define ELF_CLASS               ELFCLASS64
-#define ELF_R_SYM(val)          (((Elf64_Mips_Rela *)(&val))->r_sym)
-#define ELF_R_TYPE(val)         (((Elf64_Mips_Rela *)(&val))->r_type)
+#define ELF_R_SYM(val)          (((Elf64_Mips_Rela *)(&val))->fields.r_sym)
+#define ELF_R_TYPE(val)         (((Elf64_Mips_Rela *)(&val))->fields.r_type)
 #define ELF_ST_TYPE(o)          ELF64_ST_TYPE(o)
 #define ELF_ST_BIND(o)          ELF64_ST_BIND(o)
 #define ELF_ST_VISIBILITY(o)    ELF64_ST_VISIBILITY(o)
diff --git a/arch/mips/cavium-octeon/dma-octeon.c b/arch/mips/cavium-octeon/dma-octeon.c
index 2cd45f5f9481..fd69528b24fb 100644
--- a/arch/mips/cavium-octeon/dma-octeon.c
+++ b/arch/mips/cavium-octeon/dma-octeon.c
@@ -125,7 +125,7 @@ static phys_addr_t octeon_small_dma_to_phys(struct device *dev,
 
 static dma_addr_t octeon_dma_map_page(struct device *dev, struct page *page,
 	unsigned long offset, size_t size, enum dma_data_direction direction,
-	struct dma_attrs *attrs)
+	unsigned long attrs)
 {
 	dma_addr_t daddr = swiotlb_map_page(dev, page, offset, size,
 					    direction, attrs);
@@ -135,7 +135,7 @@ static dma_addr_t octeon_dma_map_page(struct device *dev, struct page *page,
 }
 
 static int octeon_dma_map_sg(struct device *dev, struct scatterlist *sg,
-	int nents, enum dma_data_direction direction, struct dma_attrs *attrs)
+	int nents, enum dma_data_direction direction, unsigned long attrs)
 {
 	int r = swiotlb_map_sg_attrs(dev, sg, nents, direction, attrs);
 	mb();
@@ -157,7 +157,7 @@ static void octeon_dma_sync_sg_for_device(struct device *dev,
 }
 
 static void *octeon_dma_alloc_coherent(struct device *dev, size_t size,
-	dma_addr_t *dma_handle, gfp_t gfp, struct dma_attrs *attrs)
+	dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
 {
 	void *ret;
 
@@ -189,7 +189,7 @@ static void *octeon_dma_alloc_coherent(struct device *dev, size_t size,
 }
 
 static void octeon_dma_free_coherent(struct device *dev, size_t size,
-	void *vaddr, dma_addr_t dma_handle, struct dma_attrs *attrs)
+	void *vaddr, dma_addr_t dma_handle, unsigned long attrs)
 {
 	swiotlb_free_coherent(dev, size, vaddr, dma_handle);
 }
diff --git a/arch/mips/cavium-octeon/executive/cvmx-bootmem.c b/arch/mips/cavium-octeon/executive/cvmx-bootmem.c
index 504ed61a47cd..b65a6c1ac016 100644
--- a/arch/mips/cavium-octeon/executive/cvmx-bootmem.c
+++ b/arch/mips/cavium-octeon/executive/cvmx-bootmem.c
@@ -668,7 +668,7 @@ int64_t cvmx_bootmem_phy_named_block_alloc(uint64_t size, uint64_t min_addr,
 	/*
 	 * Round size up to mult of minimum alignment bytes We need
 	 * the actual size allocated to allow for blocks to be
-	 * coallesced when they are freed.  The alloc routine does the
+	 * coalesced when they are freed. The alloc routine does the
 	 * same rounding up on all allocations.
 	 */
 	size = ALIGN(size, CVMX_BOOTMEM_ALIGNMENT_SIZE);
diff --git a/arch/mips/cavium-octeon/executive/cvmx-helper-board.c b/arch/mips/cavium-octeon/executive/cvmx-helper-board.c
index 36e30d65ba05..ff49fc04500c 100644
--- a/arch/mips/cavium-octeon/executive/cvmx-helper-board.c
+++ b/arch/mips/cavium-octeon/executive/cvmx-helper-board.c
@@ -186,15 +186,6 @@ int cvmx_helper_board_get_mii_address(int ipd_port)
 			return 7 - ipd_port;
 		else
 			return -1;
-	case CVMX_BOARD_TYPE_CUST_DSR1000N:
-		/*
-		 * Port 2 connects to Broadcom PHY (B5081). Other ports (0-1)
-		 * connect to a switch (BCM53115).
-		 */
-		if (ipd_port == 2)
-			return 8;
-		else
-			return -1;
 	case CVMX_BOARD_TYPE_KONTRON_S1901:
 		if (ipd_port == CVMX_HELPER_BOARD_MGMT_IPD_PORT)
 			return 1;
@@ -289,18 +280,6 @@ cvmx_helper_link_info_t __cvmx_helper_board_link_get(int ipd_port)
 			return result;
 		}
 		break;
-	case CVMX_BOARD_TYPE_CUST_DSR1000N:
-		if (ipd_port == 0 || ipd_port == 1) {
-			/* Ports 0 and 1 connect to a switch (BCM53115). */
-			result.s.link_up = 1;
-			result.s.full_duplex = 1;
-			result.s.speed = 1000;
-			return result;
-		} else {
-			/* Port 2 uses a Broadcom PHY (B5081). */
-			is_broadcom_phy = 1;
-		}
-		break;
 	}
 
 	phy_addr = cvmx_helper_board_get_mii_address(ipd_port);
@@ -765,7 +744,6 @@ enum cvmx_helper_board_usb_clock_types __cvmx_helper_board_usb_get_clock_type(vo
 	case CVMX_BOARD_TYPE_LANAI2_G:
 	case CVMX_BOARD_TYPE_NIC10E_66:
 	case CVMX_BOARD_TYPE_UBNT_E100:
-	case CVMX_BOARD_TYPE_CUST_DSR1000N:
 		return USB_CLOCK_TYPE_CRYSTAL_12;
 	case CVMX_BOARD_TYPE_NIC10E:
 		return USB_CLOCK_TYPE_REF_12;
diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c
index 368eb490354c..5a9b87b7993e 100644
--- a/arch/mips/cavium-octeon/octeon-irq.c
+++ b/arch/mips/cavium-octeon/octeon-irq.c
@@ -1260,7 +1260,7 @@ static int octeon_irq_gpio_map(struct irq_domain *d,
 
 	line = (hw + gpiod->base_hwirq) >> 6;
 	bit = (hw + gpiod->base_hwirq) & 63;
-	if (line > ARRAY_SIZE(octeon_irq_ciu_to_irq) ||
+	if (line >= ARRAY_SIZE(octeon_irq_ciu_to_irq) ||
 		octeon_irq_ciu_to_irq[line][bit] != 0)
 		return -EINVAL;
 
@@ -1542,10 +1542,6 @@ static int __init octeon_irq_init_ciu(
 			goto err;
 	}
 
-	r = octeon_irq_force_ciu_mapping(ciu_domain, OCTEON_IRQ_USB0, 0, 56);
-	if (r)
-		goto err;
-
 	r = octeon_irq_force_ciu_mapping(ciu_domain, OCTEON_IRQ_TWSI2, 0, 59);
 	if (r)
 		goto err;
@@ -1559,10 +1555,6 @@ static int __init octeon_irq_init_ciu(
 			goto err;
 	}
 
-	r = octeon_irq_force_ciu_mapping(ciu_domain, OCTEON_IRQ_USB1, 1, 17);
-	if (r)
-		goto err;
-
 	/* Enable the CIU lines */
 	set_c0_status(STATUSF_IP3 | STATUSF_IP2);
 	if (octeon_irq_use_ip4)
@@ -2077,10 +2069,6 @@ static int __init octeon_irq_init_ciu2(
 			goto err;
 	}
 
-	r = octeon_irq_force_ciu_mapping(ciu_domain, OCTEON_IRQ_USB0, 3, 44);
-	if (r)
-		goto err;
-
 	for (i = 0; i < 4; i++) {
 		r = octeon_irq_force_ciu_mapping(
 			ciu_domain, i + OCTEON_IRQ_PCI_INT0, 4, i);
diff --git a/arch/mips/cavium-octeon/octeon-platform.c b/arch/mips/cavium-octeon/octeon-platform.c
index 7aeafedff94e..b31fbc9d6eae 100644
--- a/arch/mips/cavium-octeon/octeon-platform.c
+++ b/arch/mips/cavium-octeon/octeon-platform.c
@@ -3,33 +3,27 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (C) 2004-2011 Cavium Networks
+ * Copyright (C) 2004-2016 Cavium Networks
  * Copyright (C) 2008 Wind River Systems
  */
 
-#include <linux/delay.h>
 #include <linux/init.h>
-#include <linux/irq.h>
-#include <linux/i2c.h>
-#include <linux/usb.h>
-#include <linux/dma-mapping.h>
+#include <linux/delay.h>
 #include <linux/etherdevice.h>
-#include <linux/module.h>
-#include <linux/mutex.h>
-#include <linux/slab.h>
-#include <linux/platform_device.h>
 #include <linux/of_platform.h>
 #include <linux/of_fdt.h>
 #include <linux/libfdt.h>
+#include <linux/usb/ehci_def.h>
 #include <linux/usb/ehci_pdriver.h>
 #include <linux/usb/ohci_pdriver.h>
 
 #include <asm/octeon/octeon.h>
-#include <asm/octeon/cvmx-rnm-defs.h>
-#include <asm/octeon/cvmx-helper.h>
 #include <asm/octeon/cvmx-helper-board.h>
 #include <asm/octeon/cvmx-uctlx-defs.h>
 
+#define CVMX_UAHCX_EHCI_USBCMD	(CVMX_ADD_IO_SEG(0x00016F0000000010ull))
+#define CVMX_UAHCX_OHCI_USBCMD	(CVMX_ADD_IO_SEG(0x00016F0000000408ull))
+
 /* Octeon Random Number Generator.  */
 static int __init octeon_rng_device_init(void)
 {
@@ -78,12 +72,36 @@ static DEFINE_MUTEX(octeon2_usb_clocks_mutex);
 
 static int octeon2_usb_clock_start_cnt;
 
+static int __init octeon2_usb_reset(void)
+{
+	union cvmx_uctlx_clk_rst_ctl clk_rst_ctl;
+	u32 ucmd;
+
+	if (!OCTEON_IS_OCTEON2())
+		return 0;
+
+	clk_rst_ctl.u64 = cvmx_read_csr(CVMX_UCTLX_CLK_RST_CTL(0));
+	if (clk_rst_ctl.s.hrst) {
+		ucmd = cvmx_read64_uint32(CVMX_UAHCX_EHCI_USBCMD);
+		ucmd &= ~CMD_RUN;
+		cvmx_write64_uint32(CVMX_UAHCX_EHCI_USBCMD, ucmd);
+		mdelay(2);
+		ucmd |= CMD_RESET;
+		cvmx_write64_uint32(CVMX_UAHCX_EHCI_USBCMD, ucmd);
+		ucmd = cvmx_read64_uint32(CVMX_UAHCX_OHCI_USBCMD);
+		ucmd |= CMD_RUN;
+		cvmx_write64_uint32(CVMX_UAHCX_OHCI_USBCMD, ucmd);
+	}
+
+	return 0;
+}
+arch_initcall(octeon2_usb_reset);
+
 static void octeon2_usb_clocks_start(struct device *dev)
 {
 	u64 div;
 	union cvmx_uctlx_if_ena if_ena;
 	union cvmx_uctlx_clk_rst_ctl clk_rst_ctl;
-	union cvmx_uctlx_uphy_ctl_status uphy_ctl_status;
 	union cvmx_uctlx_uphy_portx_ctl_status port_ctl_status;
 	int i;
 	unsigned long io_clk_64_to_ns;
@@ -131,6 +149,17 @@ static void octeon2_usb_clocks_start(struct device *dev)
 	if_ena.s.en = 1;
 	cvmx_write_csr(CVMX_UCTLX_IF_ENA(0), if_ena.u64);
 
+	for (i = 0; i <= 1; i++) {
+		port_ctl_status.u64 =
+			cvmx_read_csr(CVMX_UCTLX_UPHY_PORTX_CTL_STATUS(i, 0));
+		/* Set txvreftune to 15 to obtain compliant 'eye' diagram. */
+		port_ctl_status.s.txvreftune = 15;
+		port_ctl_status.s.txrisetune = 1;
+		port_ctl_status.s.txpreemphasistune = 1;
+		cvmx_write_csr(CVMX_UCTLX_UPHY_PORTX_CTL_STATUS(i, 0),
+			       port_ctl_status.u64);
+	}
+
 	/* Step 3: Configure the reference clock, PHY, and HCLK */
 	clk_rst_ctl.u64 = cvmx_read_csr(CVMX_UCTLX_CLK_RST_CTL(0));
 
@@ -218,29 +247,10 @@ static void octeon2_usb_clocks_start(struct device *dev)
 	clk_rst_ctl.s.p_por = 0;
 	cvmx_write_csr(CVMX_UCTLX_CLK_RST_CTL(0), clk_rst_ctl.u64);
 
-	/* Step 5:    Wait 1 ms for the PHY clock to start. */
-	mdelay(1);
+	/* Step 5:    Wait 3 ms for the PHY clock to start. */
+	mdelay(3);
 
-	/*
-	 * Step 6: Program the reset input from automatic test
-	 * equipment field in the UPHY CSR
-	 */
-	uphy_ctl_status.u64 = cvmx_read_csr(CVMX_UCTLX_UPHY_CTL_STATUS(0));
-	uphy_ctl_status.s.ate_reset = 1;
-	cvmx_write_csr(CVMX_UCTLX_UPHY_CTL_STATUS(0), uphy_ctl_status.u64);
-
-	/* Step 7: Wait for at least 10ns. */
-	ndelay(10);
-
-	/* Step 8: Clear the ATE_RESET field in the UPHY CSR. */
-	uphy_ctl_status.s.ate_reset = 0;
-	cvmx_write_csr(CVMX_UCTLX_UPHY_CTL_STATUS(0), uphy_ctl_status.u64);
-
-	/*
-	 * Step 9: Wait for at least 20ns for UPHY to output PHY clock
-	 * signals and OHCI_CLK48
-	 */
-	ndelay(20);
+	/* Steps 6..9 for ATE only, are skipped. */
 
 	/* Step 10: Configure the OHCI_CLK48 and OHCI_CLK12 clocks. */
 	/* 10a */
@@ -261,6 +271,20 @@ static void octeon2_usb_clocks_start(struct device *dev)
 	clk_rst_ctl.s.p_prst = 1;
 	cvmx_write_csr(CVMX_UCTLX_CLK_RST_CTL(0), clk_rst_ctl.u64);
 
+	/* Step 11b */
+	udelay(1);
+
+	/* Step 11c */
+	clk_rst_ctl.s.p_prst = 0;
+	cvmx_write_csr(CVMX_UCTLX_CLK_RST_CTL(0), clk_rst_ctl.u64);
+
+	/* Step 11d */
+	mdelay(1);
+
+	/* Step 11e */
+	clk_rst_ctl.s.p_prst = 1;
+	cvmx_write_csr(CVMX_UCTLX_CLK_RST_CTL(0), clk_rst_ctl.u64);
+
 	/* Step 12: Wait 1 uS. */
 	udelay(1);
 
@@ -269,21 +293,9 @@ static void octeon2_usb_clocks_start(struct device *dev)
 	cvmx_write_csr(CVMX_UCTLX_CLK_RST_CTL(0), clk_rst_ctl.u64);
 
 end_clock:
-	/* Now we can set some other registers.  */
-
-	for (i = 0; i <= 1; i++) {
-		port_ctl_status.u64 =
-			cvmx_read_csr(CVMX_UCTLX_UPHY_PORTX_CTL_STATUS(i, 0));
-		/* Set txvreftune to 15 to obtain compliant 'eye' diagram. */
-		port_ctl_status.s.txvreftune = 15;
-		port_ctl_status.s.txrisetune = 1;
-		port_ctl_status.s.txpreemphasistune = 1;
-		cvmx_write_csr(CVMX_UCTLX_UPHY_PORTX_CTL_STATUS(i, 0),
-			       port_ctl_status.u64);
-	}
-
 	/* Set uSOF cycle period to 60,000 bits. */
 	cvmx_write_csr(CVMX_UCTLX_EHCI_FLA(0), 0x20ull);
+
 exit:
 	mutex_unlock(&octeon2_usb_clocks_mutex);
 }
@@ -311,7 +323,11 @@ static struct usb_ehci_pdata octeon_ehci_pdata = {
 #ifdef __BIG_ENDIAN
 	.big_endian_mmio	= 1,
 #endif
-	.dma_mask_64	= 1,
+	/*
+	 * We can DMA from anywhere. But the descriptors must be in
+	 * the lower 4GB.
+	 */
+	.dma_mask_64	= 0,
 	.power_on	= octeon_ehci_power_on,
 	.power_off	= octeon_ehci_power_off,
 };
@@ -689,6 +705,10 @@ int __init octeon_prune_device_tree(void)
 	if (fdt_check_header(initial_boot_params))
 		panic("Corrupt Device Tree.");
 
+	WARN(octeon_bootinfo->board_type == CVMX_BOARD_TYPE_CUST_DSR1000N,
+	     "Built-in DTB booting is deprecated on %s. Please switch to use appended DTB.",
+	     cvmx_board_type_to_string(octeon_bootinfo->board_type));
+
 	aliases = fdt_path_offset(initial_boot_params, "/aliases");
 	if (aliases < 0) {
 		pr_err("Error: No /aliases node in device tree.");
@@ -1032,13 +1052,6 @@ end_led:
 		}
 	}
 
-	if (octeon_bootinfo->board_type != CVMX_BOARD_TYPE_CUST_DSR1000N) {
-		int dsr1000n_leds = fdt_path_offset(initial_boot_params,
-						    "/dsr1000n-leds");
-		if (dsr1000n_leds >= 0)
-			fdt_nop_node(initial_boot_params, dsr1000n_leds);
-	}
-
 	return 0;
 }
 
diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c
index 64f852b063a8..cb16fcc5f8f0 100644
--- a/arch/mips/cavium-octeon/setup.c
+++ b/arch/mips/cavium-octeon/setup.c
@@ -40,9 +40,27 @@
 
 #include <asm/octeon/octeon.h>
 #include <asm/octeon/pci-octeon.h>
-#include <asm/octeon/cvmx-mio-defs.h>
 #include <asm/octeon/cvmx-rst-defs.h>
 
+/*
+ * TRUE for devices having registers with little-endian byte
+ * order, FALSE for registers with native-endian byte order.
+ * PCI mandates little-endian, USB and SATA are configuraable,
+ * but we chose little-endian for these.
+ */
+const bool octeon_should_swizzle_table[256] = {
+	[0x00] = true,	/* bootbus/CF */
+	[0x1b] = true,	/* PCI mmio window */
+	[0x1c] = true,	/* PCI mmio window */
+	[0x1d] = true,	/* PCI mmio window */
+	[0x1e] = true,	/* PCI mmio window */
+	[0x68] = true,	/* OCTEON III USB */
+	[0x69] = true,	/* OCTEON III USB */
+	[0x6c] = true,	/* OCTEON III SATA */
+	[0x6f] = true,	/* OCTEON II USB */
+};
+EXPORT_SYMBOL(octeon_should_swizzle_table);
+
 #ifdef CONFIG_PCI
 extern void pci_console_init(const char *arg);
 #endif
diff --git a/arch/mips/cavium-octeon/smp.c b/arch/mips/cavium-octeon/smp.c
index 33aab89259f3..4d457d602d3b 100644
--- a/arch/mips/cavium-octeon/smp.c
+++ b/arch/mips/cavium-octeon/smp.c
@@ -271,6 +271,7 @@ static int octeon_cpu_disable(void)
 		return -ENOTSUPP;
 
 	set_cpu_online(cpu, false);
+	calculate_cpu_foreign_map();
 	cpumask_clear_cpu(cpu, &cpu_callin_map);
 	octeon_fixup_irqs();
 
diff --git a/arch/mips/cobalt/setup.c b/arch/mips/cobalt/setup.c
index 9a8c2fe8d334..c136a18c7221 100644
--- a/arch/mips/cobalt/setup.c
+++ b/arch/mips/cobalt/setup.c
@@ -42,8 +42,8 @@ const char *get_system_type(void)
 
 /*
  * Cobalt doesn't have PS/2 keyboard/mouse interfaces,
- * keyboard conntroller is never used.
- * Also PCI-ISA bridge DMA contoroller is never used.
+ * keyboard controller is never used.
+ * Also PCI-ISA bridge DMA controller is never used.
  */
 static struct resource cobalt_reserved_resources[] = {
 	{	/* dma1 */
diff --git a/arch/mips/configs/ath25_defconfig b/arch/mips/configs/ath25_defconfig
new file mode 100644
index 000000000000..2c829950be17
--- /dev/null
+++ b/arch/mips/configs/ath25_defconfig
@@ -0,0 +1,119 @@
+CONFIG_ATH25=y
+# CONFIG_COMPACTION is not set
+CONFIG_HZ_100=y
+# CONFIG_SECCOMP is not set
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_SYSVIPC=y
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+# CONFIG_FHANDLE is not set
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_RD_GZIP is not set
+# CONFIG_RD_BZIP2 is not set
+# CONFIG_RD_XZ is not set
+# CONFIG_RD_LZO is not set
+# CONFIG_RD_LZ4 is not set
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+# CONFIG_AIO is not set
+CONFIG_EMBEDDED=y
+# CONFIG_VM_EVENT_COUNTERS is not set
+# CONFIG_SLUB_DEBUG is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_IOSCHED_CFQ is not set
+# CONFIG_SUSPEND is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_IPV6 is not set
+CONFIG_CFG80211=m
+CONFIG_MAC80211=m
+CONFIG_MAC80211_DEBUGFS=y
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+# CONFIG_FIRMWARE_IN_KERNEL is not set
+CONFIG_MTD=y
+CONFIG_MTD_REDBOOT_PARTS=y
+CONFIG_MTD_REDBOOT_DIRECTORY_BLOCK=-2
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_CFI_ADV_OPTIONS=y
+CONFIG_MTD_CFI_GEOMETRY=y
+# CONFIG_MTD_MAP_BANK_WIDTH_1 is not set
+# CONFIG_MTD_MAP_BANK_WIDTH_4 is not set
+# CONFIG_MTD_CFI_I2 is not set
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_COMPLEX_MAPPINGS=y
+CONFIG_MTD_PHYSMAP=y
+CONFIG_NETDEVICES=y
+# CONFIG_ETHERNET is not set
+# CONFIG_WLAN_VENDOR_ADMTEK is not set
+CONFIG_ATH5K=m
+# CONFIG_WLAN_VENDOR_ATMEL is not set
+# CONFIG_WLAN_VENDOR_BROADCOM is not set
+# CONFIG_WLAN_VENDOR_CISCO is not set
+# CONFIG_WLAN_VENDOR_INTEL is not set
+# CONFIG_WLAN_VENDOR_INTERSIL is not set
+# CONFIG_WLAN_VENDOR_MARVELL is not set
+# CONFIG_WLAN_VENDOR_MEDIATEK is not set
+# CONFIG_WLAN_VENDOR_RALINK is not set
+# CONFIG_WLAN_VENDOR_REALTEK is not set
+# CONFIG_WLAN_VENDOR_RSI is not set
+# CONFIG_WLAN_VENDOR_ST is not set
+# CONFIG_WLAN_VENDOR_TI is not set
+# CONFIG_WLAN_VENDOR_ZYDAS is not set
+CONFIG_INPUT=m
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_DEVKMEM is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+# CONFIG_SERIAL_8250_PCI is not set
+CONFIG_SERIAL_8250_NR_UARTS=1
+CONFIG_SERIAL_8250_RUNTIME_UARTS=1
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+# CONFIG_VGA_ARB is not set
+CONFIG_USB=m
+CONFIG_USB_EHCI_HCD=m
+CONFIG_LEDS_CLASS=y
+# CONFIG_IOMMU_SUPPORT is not set
+# CONFIG_DNOTIFY is not set
+# CONFIG_PROC_PAGE_MONITOR is not set
+CONFIG_TMPFS=y
+CONFIG_TMPFS_XATTR=y
+CONFIG_JFFS2_FS=y
+CONFIG_JFFS2_SUMMARY=y
+CONFIG_JFFS2_FS_XATTR=y
+# CONFIG_JFFS2_FS_POSIX_ACL is not set
+# CONFIG_JFFS2_FS_SECURITY is not set
+CONFIG_JFFS2_COMPRESSION_OPTIONS=y
+# CONFIG_JFFS2_ZLIB is not set
+CONFIG_SQUASHFS=y
+CONFIG_SQUASHFS_FILE_DIRECT=y
+CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU=y
+# CONFIG_SQUASHFS_ZLIB is not set
+CONFIG_SQUASHFS_XZ=y
+CONFIG_PRINTK_TIME=y
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_STRIP_ASM_SYMS=y
+CONFIG_DEBUG_FS=y
+# CONFIG_SCHED_DEBUG is not set
+# CONFIG_FTRACE is not set
+# CONFIG_XZ_DEC_X86 is not set
+# CONFIG_XZ_DEC_POWERPC is not set
+# CONFIG_XZ_DEC_IA64 is not set
+# CONFIG_XZ_DEC_ARM is not set
+# CONFIG_XZ_DEC_ARMTHUMB is not set
+# CONFIG_XZ_DEC_SPARC is not set
diff --git a/arch/mips/configs/cavium_octeon_defconfig b/arch/mips/configs/cavium_octeon_defconfig
index dcac308cec39..d470d08362c0 100644
--- a/arch/mips/configs/cavium_octeon_defconfig
+++ b/arch/mips/configs/cavium_octeon_defconfig
@@ -59,6 +59,8 @@ CONFIG_EEPROM_AT25=y
 CONFIG_BLK_DEV_SD=y
 CONFIG_ATA=y
 CONFIG_SATA_AHCI=y
+CONFIG_SATA_AHCI_PLATFORM=y
+CONFIG_AHCI_OCTEON=y
 CONFIG_PATA_OCTEON_CF=y
 CONFIG_SATA_SIL=y
 CONFIG_NETDEVICES=y
diff --git a/arch/mips/include/asm/addrspace.h b/arch/mips/include/asm/addrspace.h
index 3b0e51d5a613..c5b04e752e97 100644
--- a/arch/mips/include/asm/addrspace.h
+++ b/arch/mips/include/asm/addrspace.h
@@ -45,7 +45,7 @@
 /*
  * Returns the kernel segment base of a given address
  */
-#define KSEGX(a)		((_ACAST32_ (a)) & 0xe0000000)
+#define KSEGX(a)		((_ACAST32_(a)) & _ACAST32_(0xe0000000))
 
 /*
  * Returns the physical address of a CKSEGx / XKPHYS address
diff --git a/arch/mips/include/asm/bootinfo.h b/arch/mips/include/asm/bootinfo.h
index 9f67033961a6..ee9f5f2d18fc 100644
--- a/arch/mips/include/asm/bootinfo.h
+++ b/arch/mips/include/asm/bootinfo.h
@@ -127,6 +127,10 @@ extern char arcs_cmdline[COMMAND_LINE_SIZE];
  */
 extern unsigned long fw_arg0, fw_arg1, fw_arg2, fw_arg3;
 
+#ifdef CONFIG_USE_OF
+extern unsigned long fw_passed_dtb;
+#endif
+
 /*
  * Platform memory detection hook called by setup_arch
  */
diff --git a/arch/mips/include/asm/dsemul.h b/arch/mips/include/asm/dsemul.h
new file mode 100644
index 000000000000..a6e067801f23
--- /dev/null
+++ b/arch/mips/include/asm/dsemul.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (C) 2016 Imagination Technologies
+ * Author: Paul Burton <paul.burton@imgtec.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#ifndef __MIPS_ASM_DSEMUL_H__
+#define __MIPS_ASM_DSEMUL_H__
+
+#include <asm/break.h>
+#include <asm/inst.h>
+
+/* Break instruction with special math emu break code set */
+#define BREAK_MATH(micromips)	(((micromips) ? 0x7 : 0xd) | (BRK_MEMU << 16))
+
+/* When used as a frame index, indicates the lack of a frame */
+#define BD_EMUFRAME_NONE	((int)BIT(31))
+
+struct mm_struct;
+struct pt_regs;
+struct task_struct;
+
+/**
+ * mips_dsemul() - 'Emulate' an instruction from a branch delay slot
+ * @regs:	User thread register context.
+ * @ir:		The instruction to be 'emulated'.
+ * @branch_pc:	The PC of the branch instruction.
+ * @cont_pc:	The PC to continue at following 'emulation'.
+ *
+ * Emulate or execute an arbitrary MIPS instruction within the context of
+ * the current user thread. This is used primarily to handle instructions
+ * in the delay slots of emulated branch instructions, for example FP
+ * branch instructions on systems without an FPU.
+ *
+ * Return: Zero on success, negative if ir is a NOP, signal number on failure.
+ */
+extern int mips_dsemul(struct pt_regs *regs, mips_instruction ir,
+		       unsigned long branch_pc, unsigned long cont_pc);
+
+/**
+ * do_dsemulret() - Return from a delay slot 'emulation' frame
+ * @xcp:	User thread register context.
+ *
+ * Call in response to the BRK_MEMU break instruction used to return to
+ * the kernel from branch delay slot 'emulation' frames following a call
+ * to mips_dsemul(). Restores the user thread PC to the value that was
+ * passed as the cpc parameter to mips_dsemul().
+ *
+ * Return: True if an emulation frame was returned from, else false.
+ */
+extern bool do_dsemulret(struct pt_regs *xcp);
+
+/**
+ * dsemul_thread_cleanup() - Cleanup thread 'emulation' frame
+ * @tsk: The task structure associated with the thread
+ *
+ * If the thread @tsk has a branch delay slot 'emulation' frame
+ * allocated to it then free that frame.
+ *
+ * Return: True if a frame was freed, else false.
+ */
+extern bool dsemul_thread_cleanup(struct task_struct *tsk);
+
+/**
+ * dsemul_thread_rollback() - Rollback from an 'emulation' frame
+ * @regs:	User thread register context.
+ *
+ * If the current thread, whose register context is represented by @regs,
+ * is executing within a delay slot 'emulation' frame then exit that
+ * frame. The PC will be rolled back to the branch if the instruction
+ * that was being 'emulated' has not yet executed, or advanced to the
+ * continuation PC if it has.
+ *
+ * Return: True if a frame was exited, else false.
+ */
+extern bool dsemul_thread_rollback(struct pt_regs *regs);
+
+/**
+ * dsemul_mm_cleanup() - Cleanup per-mm delay slot 'emulation' state
+ * @mm:		The struct mm_struct to cleanup state for.
+ *
+ * Cleanup state for the given @mm, ensuring that any memory allocated
+ * for delay slot 'emulation' book-keeping is freed. This is to be called
+ * before @mm is freed in order to avoid memory leaks.
+ */
+extern void dsemul_mm_cleanup(struct mm_struct *mm);
+
+#endif /* __MIPS_ASM_DSEMUL_H__ */
diff --git a/arch/mips/include/asm/elf.h b/arch/mips/include/asm/elf.h
index f5f45717968e..2b3dc2973670 100644
--- a/arch/mips/include/asm/elf.h
+++ b/arch/mips/include/asm/elf.h
@@ -458,6 +458,7 @@ extern const char *__elf_platform;
 #define ELF_ET_DYN_BASE		(TASK_SIZE / 3 * 2)
 #endif
 
+/* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */
 #define ARCH_DLINFO							\
 do {									\
 	NEW_AUX_ENT(AT_SYSINFO_EHDR,					\
@@ -498,4 +499,7 @@ extern int arch_check_elf(void *ehdr, bool has_interpreter, void *interp_ehdr,
 extern void mips_set_personality_nan(struct arch_elf_state *state);
 extern void mips_set_personality_fp(struct arch_elf_state *state);
 
+#define elf_read_implies_exec(ex, stk) mips_elf_read_implies_exec(&(ex), stk)
+extern int mips_elf_read_implies_exec(void *elf_ex, int exstack);
+
 #endif /* _ASM_ELF_H */
diff --git a/arch/mips/include/asm/fpu_emulator.h b/arch/mips/include/asm/fpu_emulator.h
index 3225c3c0724b..355dc25172e7 100644
--- a/arch/mips/include/asm/fpu_emulator.h
+++ b/arch/mips/include/asm/fpu_emulator.h
@@ -24,7 +24,7 @@
 #define _ASM_FPU_EMULATOR_H
 
 #include <linux/sched.h>
-#include <asm/break.h>
+#include <asm/dsemul.h>
 #include <asm/thread_info.h>
 #include <asm/inst.h>
 #include <asm/local.h>
@@ -60,27 +60,16 @@ do {									\
 #define MIPS_FPU_EMU_INC_STATS(M) do { } while (0)
 #endif /* CONFIG_DEBUG_FS */
 
-extern int mips_dsemul(struct pt_regs *regs, mips_instruction ir,
-	unsigned long cpc);
-extern int do_dsemulret(struct pt_regs *xcp);
 extern int fpu_emulator_cop1Handler(struct pt_regs *xcp,
 				    struct mips_fpu_struct *ctx, int has_fpu,
 				    void *__user *fault_addr);
 int process_fpemu_return(int sig, void __user *fault_addr,
 			 unsigned long fcr31);
+int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
+		  unsigned long *contpc);
 int mm_isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
 		     unsigned long *contpc);
 
-/*
- * Instruction inserted following the badinst to further tag the sequence
- */
-#define BD_COOKIE 0x0000bd36	/* tne $0, $0 with baggage */
-
-/*
- * Break instruction with special math emu break code set
- */
-#define BREAK_MATH(micromips) (((micromips) ? 0x7 : 0xd) | (BRK_MEMU << 16))
-
 #define SIGNALLING_NAN 0x7ff800007ff80000LL
 
 static inline void fpu_emulator_init_fpu(void)
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index 36a391d289aa..b54bcadd8aec 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -19,6 +19,9 @@
 #include <linux/threads.h>
 #include <linux/spinlock.h>
 
+#include <asm/inst.h>
+#include <asm/mipsregs.h>
+
 /* MIPS KVM register ids */
 #define MIPS_CP0_32(_R, _S)					\
 	(KVM_REG_MIPS_CP0 | KVM_REG_SIZE_U32 | (8 * (_R) + (_S)))
@@ -53,6 +56,12 @@
 #define KVM_REG_MIPS_CP0_CONFIG7	MIPS_CP0_32(16, 7)
 #define KVM_REG_MIPS_CP0_XCONTEXT	MIPS_CP0_64(20, 0)
 #define KVM_REG_MIPS_CP0_ERROREPC	MIPS_CP0_64(30, 0)
+#define KVM_REG_MIPS_CP0_KSCRATCH1	MIPS_CP0_64(31, 2)
+#define KVM_REG_MIPS_CP0_KSCRATCH2	MIPS_CP0_64(31, 3)
+#define KVM_REG_MIPS_CP0_KSCRATCH3	MIPS_CP0_64(31, 4)
+#define KVM_REG_MIPS_CP0_KSCRATCH4	MIPS_CP0_64(31, 5)
+#define KVM_REG_MIPS_CP0_KSCRATCH5	MIPS_CP0_64(31, 6)
+#define KVM_REG_MIPS_CP0_KSCRATCH6	MIPS_CP0_64(31, 7)
 
 
 #define KVM_MAX_VCPUS		1
@@ -65,8 +74,14 @@
 
 
 
-/* Special address that contains the comm page, used for reducing # of traps */
-#define KVM_GUEST_COMMPAGE_ADDR		0x0
+/*
+ * Special address that contains the comm page, used for reducing # of traps
+ * This needs to be within 32Kb of 0x0 (so the zero register can be used), but
+ * preferably not at 0x0 so that most kernel NULL pointer dereferences can be
+ * caught.
+ */
+#define KVM_GUEST_COMMPAGE_ADDR		((PAGE_SIZE > 0x8000) ?	0 : \
+					 (0x8000 - PAGE_SIZE))
 
 #define KVM_GUEST_KERNEL_MODE(vcpu)	((kvm_read_c0_guest_status(vcpu->arch.cop0) & (ST0_EXL | ST0_ERL)) || \
 					((kvm_read_c0_guest_status(vcpu->arch.cop0) & KSU_USER) == 0))
@@ -93,9 +108,6 @@
 #define KVM_INVALID_ADDR		0xdeadbeef
 
 extern atomic_t kvm_mips_instance;
-extern kvm_pfn_t (*kvm_mips_gfn_to_pfn)(struct kvm *kvm, gfn_t gfn);
-extern void (*kvm_mips_release_pfn_clean)(kvm_pfn_t pfn);
-extern bool (*kvm_mips_is_error_pfn)(kvm_pfn_t pfn);
 
 struct kvm_vm_stat {
 	u32 remote_tlb_flush;
@@ -126,28 +138,6 @@ struct kvm_vcpu_stat {
 	u32 halt_wakeup;
 };
 
-enum kvm_mips_exit_types {
-	WAIT_EXITS,
-	CACHE_EXITS,
-	SIGNAL_EXITS,
-	INT_EXITS,
-	COP_UNUSABLE_EXITS,
-	TLBMOD_EXITS,
-	TLBMISS_LD_EXITS,
-	TLBMISS_ST_EXITS,
-	ADDRERR_ST_EXITS,
-	ADDRERR_LD_EXITS,
-	SYSCALL_EXITS,
-	RESVD_INST_EXITS,
-	BREAK_INST_EXITS,
-	TRAP_INST_EXITS,
-	MSA_FPE_EXITS,
-	FPE_EXITS,
-	MSA_DISABLED_EXITS,
-	FLUSH_DCACHE_EXITS,
-	MAX_KVM_MIPS_EXIT_TYPES
-};
-
 struct kvm_arch_memory_slot {
 };
 
@@ -215,73 +205,6 @@ struct mips_coproc {
 #define MIPS_CP0_CONFIG4_SEL	4
 #define MIPS_CP0_CONFIG5_SEL	5
 
-/* Config0 register bits */
-#define CP0C0_M			31
-#define CP0C0_K23		28
-#define CP0C0_KU		25
-#define CP0C0_MDU		20
-#define CP0C0_MM		17
-#define CP0C0_BM		16
-#define CP0C0_BE		15
-#define CP0C0_AT		13
-#define CP0C0_AR		10
-#define CP0C0_MT		7
-#define CP0C0_VI		3
-#define CP0C0_K0		0
-
-/* Config1 register bits */
-#define CP0C1_M			31
-#define CP0C1_MMU		25
-#define CP0C1_IS		22
-#define CP0C1_IL		19
-#define CP0C1_IA		16
-#define CP0C1_DS		13
-#define CP0C1_DL		10
-#define CP0C1_DA		7
-#define CP0C1_C2		6
-#define CP0C1_MD		5
-#define CP0C1_PC		4
-#define CP0C1_WR		3
-#define CP0C1_CA		2
-#define CP0C1_EP		1
-#define CP0C1_FP		0
-
-/* Config2 Register bits */
-#define CP0C2_M			31
-#define CP0C2_TU		28
-#define CP0C2_TS		24
-#define CP0C2_TL		20
-#define CP0C2_TA		16
-#define CP0C2_SU		12
-#define CP0C2_SS		8
-#define CP0C2_SL		4
-#define CP0C2_SA		0
-
-/* Config3 Register bits */
-#define CP0C3_M			31
-#define CP0C3_ISA_ON_EXC	16
-#define CP0C3_ULRI		13
-#define CP0C3_DSPP		10
-#define CP0C3_LPA		7
-#define CP0C3_VEIC		6
-#define CP0C3_VInt		5
-#define CP0C3_SP		4
-#define CP0C3_MT		2
-#define CP0C3_SM		1
-#define CP0C3_TL		0
-
-/* MMU types, the first four entries have the same layout as the
-   CP0C0_MT field.  */
-enum mips_mmu_types {
-	MMU_TYPE_NONE,
-	MMU_TYPE_R4000,
-	MMU_TYPE_RESERVED,
-	MMU_TYPE_FMT,
-	MMU_TYPE_R3000,
-	MMU_TYPE_R6000,
-	MMU_TYPE_R8000
-};
-
 /* Resume Flags */
 #define RESUME_FLAG_DR		(1<<0)	/* Reload guest nonvolatile state? */
 #define RESUME_FLAG_HOST	(1<<1)	/* Resume host? */
@@ -298,11 +221,6 @@ enum emulation_result {
 	EMULATE_PRIV_FAIL,
 };
 
-#define MIPS3_PG_G	0x00000001 /* Global; ignore ASID if in lo0 & lo1 */
-#define MIPS3_PG_V	0x00000002 /* Valid */
-#define MIPS3_PG_NV	0x00000000
-#define MIPS3_PG_D	0x00000004 /* Dirty */
-
 #define mips3_paddr_to_tlbpfn(x) \
 	(((unsigned long)(x) >> MIPS3_PG_SHIFT) & MIPS3_PG_FRAME)
 #define mips3_tlbpfn_to_paddr(x) \
@@ -313,13 +231,11 @@ enum emulation_result {
 
 #define VPN2_MASK		0xffffe000
 #define KVM_ENTRYHI_ASID	MIPS_ENTRYHI_ASID
-#define TLB_IS_GLOBAL(x)	(((x).tlb_lo0 & MIPS3_PG_G) &&		\
-				 ((x).tlb_lo1 & MIPS3_PG_G))
+#define TLB_IS_GLOBAL(x)	((x).tlb_lo[0] & (x).tlb_lo[1] & ENTRYLO_G)
 #define TLB_VPN2(x)		((x).tlb_hi & VPN2_MASK)
 #define TLB_ASID(x)		((x).tlb_hi & KVM_ENTRYHI_ASID)
-#define TLB_IS_VALID(x, va)	(((va) & (1 << PAGE_SHIFT))		\
-				 ? ((x).tlb_lo1 & MIPS3_PG_V)		\
-				 : ((x).tlb_lo0 & MIPS3_PG_V))
+#define TLB_LO_IDX(x, va)	(((va) >> PAGE_SHIFT) & 1)
+#define TLB_IS_VALID(x, va)	((x).tlb_lo[TLB_LO_IDX(x, va)] & ENTRYLO_V)
 #define TLB_HI_VPN2_HIT(x, y)	((TLB_VPN2(x) & ~(x).tlb_mask) ==	\
 				 ((y) & VPN2_MASK & ~(x).tlb_mask))
 #define TLB_HI_ASID_HIT(x, y)	(TLB_IS_GLOBAL(x) ||			\
@@ -328,26 +244,23 @@ enum emulation_result {
 struct kvm_mips_tlb {
 	long tlb_mask;
 	long tlb_hi;
-	long tlb_lo0;
-	long tlb_lo1;
+	long tlb_lo[2];
 };
 
-#define KVM_MIPS_FPU_FPU	0x1
-#define KVM_MIPS_FPU_MSA	0x2
+#define KVM_MIPS_AUX_FPU	0x1
+#define KVM_MIPS_AUX_MSA	0x2
 
 #define KVM_MIPS_GUEST_TLB_SIZE	64
 struct kvm_vcpu_arch {
-	void *host_ebase, *guest_ebase;
+	void *guest_ebase;
 	int (*vcpu_run)(struct kvm_run *run, struct kvm_vcpu *vcpu);
 	unsigned long host_stack;
 	unsigned long host_gp;
 
 	/* Host CP0 registers used when handling exits from guest */
 	unsigned long host_cp0_badvaddr;
-	unsigned long host_cp0_cause;
 	unsigned long host_cp0_epc;
-	unsigned long host_cp0_entryhi;
-	uint32_t guest_inst;
+	u32 host_cp0_cause;
 
 	/* GPRS */
 	unsigned long gprs[32];
@@ -357,8 +270,8 @@ struct kvm_vcpu_arch {
 
 	/* FPU State */
 	struct mips_fpu_struct fpu;
-	/* Which FPU state is loaded (KVM_MIPS_FPU_*) */
-	unsigned int fpu_inuse;
+	/* Which auxiliary state is loaded (KVM_MIPS_AUX_*) */
+	unsigned int aux_inuse;
 
 	/* COP0 State */
 	struct mips_coproc *cop0;
@@ -370,11 +283,11 @@ struct kvm_vcpu_arch {
 
 	struct hrtimer comparecount_timer;
 	/* Count timer control KVM register */
-	uint32_t count_ctl;
+	u32 count_ctl;
 	/* Count bias from the raw time */
-	uint32_t count_bias;
+	u32 count_bias;
 	/* Frequency of timer in Hz */
-	uint32_t count_hz;
+	u32 count_hz;
 	/* Dynamic nanosecond bias (multiple of count_period) to avoid overflow */
 	s64 count_dyn_bias;
 	/* Resume time */
@@ -388,7 +301,7 @@ struct kvm_vcpu_arch {
 	/* Bitmask of pending exceptions to be cleared */
 	unsigned long pending_exceptions_clr;
 
-	unsigned long pending_load_cause;
+	u32 pending_load_cause;
 
 	/* Save/Restore the entryhi register when are are preempted/scheduled back in */
 	unsigned long preempt_entryhi;
@@ -397,8 +310,8 @@ struct kvm_vcpu_arch {
 	struct kvm_mips_tlb guest_tlb[KVM_MIPS_GUEST_TLB_SIZE];
 
 	/* Cached guest kernel/user ASIDs */
-	uint32_t guest_user_asid[NR_CPUS];
-	uint32_t guest_kernel_asid[NR_CPUS];
+	u32 guest_user_asid[NR_CPUS];
+	u32 guest_kernel_asid[NR_CPUS];
 	struct mm_struct guest_kernel_mm, guest_user_mm;
 
 	int last_sched_cpu;
@@ -408,6 +321,7 @@ struct kvm_vcpu_arch {
 
 	u8 fpu_enabled;
 	u8 msa_enabled;
+	u8 kscratch_enabled;
 };
 
 
@@ -461,6 +375,18 @@ struct kvm_vcpu_arch {
 #define kvm_write_c0_guest_config7(cop0, val)	(cop0->reg[MIPS_CP0_CONFIG][7] = (val))
 #define kvm_read_c0_guest_errorepc(cop0)	(cop0->reg[MIPS_CP0_ERROR_PC][0])
 #define kvm_write_c0_guest_errorepc(cop0, val)	(cop0->reg[MIPS_CP0_ERROR_PC][0] = (val))
+#define kvm_read_c0_guest_kscratch1(cop0)	(cop0->reg[MIPS_CP0_DESAVE][2])
+#define kvm_read_c0_guest_kscratch2(cop0)	(cop0->reg[MIPS_CP0_DESAVE][3])
+#define kvm_read_c0_guest_kscratch3(cop0)	(cop0->reg[MIPS_CP0_DESAVE][4])
+#define kvm_read_c0_guest_kscratch4(cop0)	(cop0->reg[MIPS_CP0_DESAVE][5])
+#define kvm_read_c0_guest_kscratch5(cop0)	(cop0->reg[MIPS_CP0_DESAVE][6])
+#define kvm_read_c0_guest_kscratch6(cop0)	(cop0->reg[MIPS_CP0_DESAVE][7])
+#define kvm_write_c0_guest_kscratch1(cop0, val)	(cop0->reg[MIPS_CP0_DESAVE][2] = (val))
+#define kvm_write_c0_guest_kscratch2(cop0, val)	(cop0->reg[MIPS_CP0_DESAVE][3] = (val))
+#define kvm_write_c0_guest_kscratch3(cop0, val)	(cop0->reg[MIPS_CP0_DESAVE][4] = (val))
+#define kvm_write_c0_guest_kscratch4(cop0, val)	(cop0->reg[MIPS_CP0_DESAVE][5] = (val))
+#define kvm_write_c0_guest_kscratch5(cop0, val)	(cop0->reg[MIPS_CP0_DESAVE][6] = (val))
+#define kvm_write_c0_guest_kscratch6(cop0, val)	(cop0->reg[MIPS_CP0_DESAVE][7] = (val))
 
 /*
  * Some of the guest registers may be modified asynchronously (e.g. from a
@@ -474,7 +400,7 @@ static inline void _kvm_atomic_set_c0_guest_reg(unsigned long *reg,
 	unsigned long temp;
 	do {
 		__asm__ __volatile__(
-		"	.set	mips3				\n"
+		"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"
 		"	" __LL "%0, %1				\n"
 		"	or	%0, %2				\n"
 		"	" __SC	"%0, %1				\n"
@@ -490,7 +416,7 @@ static inline void _kvm_atomic_clear_c0_guest_reg(unsigned long *reg,
 	unsigned long temp;
 	do {
 		__asm__ __volatile__(
-		"	.set	mips3				\n"
+		"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"
 		"	" __LL "%0, %1				\n"
 		"	and	%0, %2				\n"
 		"	" __SC	"%0, %1				\n"
@@ -507,7 +433,7 @@ static inline void _kvm_atomic_change_c0_guest_reg(unsigned long *reg,
 	unsigned long temp;
 	do {
 		__asm__ __volatile__(
-		"	.set	mips3				\n"
+		"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"
 		"	" __LL "%0, %1				\n"
 		"	and	%0, %2				\n"
 		"	or	%0, %3				\n"
@@ -542,7 +468,7 @@ static inline void _kvm_atomic_change_c0_guest_reg(unsigned long *reg,
 
 static inline bool kvm_mips_guest_can_have_fpu(struct kvm_vcpu_arch *vcpu)
 {
-	return (!__builtin_constant_p(cpu_has_fpu) || cpu_has_fpu) &&
+	return (!__builtin_constant_p(raw_cpu_has_fpu) || raw_cpu_has_fpu) &&
 		vcpu->fpu_enabled;
 }
 
@@ -589,9 +515,11 @@ struct kvm_mips_callbacks {
 	void (*dequeue_io_int)(struct kvm_vcpu *vcpu,
 			       struct kvm_mips_interrupt *irq);
 	int (*irq_deliver)(struct kvm_vcpu *vcpu, unsigned int priority,
-			   uint32_t cause);
+			   u32 cause);
 	int (*irq_clear)(struct kvm_vcpu *vcpu, unsigned int priority,
-			 uint32_t cause);
+			 u32 cause);
+	unsigned long (*num_regs)(struct kvm_vcpu *vcpu);
+	int (*copy_reg_indices)(struct kvm_vcpu *vcpu, u64 __user *indices);
 	int (*get_one_reg)(struct kvm_vcpu *vcpu,
 			   const struct kvm_one_reg *reg, s64 *v);
 	int (*set_one_reg)(struct kvm_vcpu *vcpu,
@@ -605,8 +533,13 @@ int kvm_mips_emulation_init(struct kvm_mips_callbacks **install_callbacks);
 /* Debug: dump vcpu state */
 int kvm_arch_vcpu_dump_regs(struct kvm_vcpu *vcpu);
 
-/* Trampoline ASM routine to start running in "Guest" context */
-extern int __kvm_mips_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu);
+extern int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu);
+
+/* Building of entry/exception code */
+int kvm_mips_entry_setup(void);
+void *kvm_mips_build_vcpu_run(void *addr);
+void *kvm_mips_build_exception(void *addr, void *handler);
+void *kvm_mips_build_exit(void *addr);
 
 /* FPU/MSA context management */
 void __kvm_save_fpu(struct kvm_vcpu_arch *vcpu);
@@ -622,11 +555,11 @@ void kvm_drop_fpu(struct kvm_vcpu *vcpu);
 void kvm_lose_fpu(struct kvm_vcpu *vcpu);
 
 /* TLB handling */
-uint32_t kvm_get_kernel_asid(struct kvm_vcpu *vcpu);
+u32 kvm_get_kernel_asid(struct kvm_vcpu *vcpu);
 
-uint32_t kvm_get_user_asid(struct kvm_vcpu *vcpu);
+u32 kvm_get_user_asid(struct kvm_vcpu *vcpu);
 
-uint32_t kvm_get_commpage_asid (struct kvm_vcpu *vcpu);
+u32 kvm_get_commpage_asid (struct kvm_vcpu *vcpu);
 
 extern int kvm_mips_handle_kseg0_tlb_fault(unsigned long badbaddr,
 					   struct kvm_vcpu *vcpu);
@@ -635,22 +568,24 @@ extern int kvm_mips_handle_commpage_tlb_fault(unsigned long badvaddr,
 					      struct kvm_vcpu *vcpu);
 
 extern int kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu,
-						struct kvm_mips_tlb *tlb,
-						unsigned long *hpa0,
-						unsigned long *hpa1);
+						struct kvm_mips_tlb *tlb);
 
-extern enum emulation_result kvm_mips_handle_tlbmiss(unsigned long cause,
-						     uint32_t *opc,
+extern enum emulation_result kvm_mips_handle_tlbmiss(u32 cause,
+						     u32 *opc,
 						     struct kvm_run *run,
 						     struct kvm_vcpu *vcpu);
 
-extern enum emulation_result kvm_mips_handle_tlbmod(unsigned long cause,
-						    uint32_t *opc,
+extern enum emulation_result kvm_mips_handle_tlbmod(u32 cause,
+						    u32 *opc,
 						    struct kvm_run *run,
 						    struct kvm_vcpu *vcpu);
 
 extern void kvm_mips_dump_host_tlbs(void);
 extern void kvm_mips_dump_guest_tlbs(struct kvm_vcpu *vcpu);
+extern int kvm_mips_host_tlb_write(struct kvm_vcpu *vcpu, unsigned long entryhi,
+				   unsigned long entrylo0,
+				   unsigned long entrylo1,
+				   int flush_dcache_mask);
 extern void kvm_mips_flush_host_tlb(int skip_kseg0);
 extern int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long entryhi);
 
@@ -667,90 +602,90 @@ extern void kvm_mips_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
 extern void kvm_mips_vcpu_put(struct kvm_vcpu *vcpu);
 
 /* Emulation */
-uint32_t kvm_get_inst(uint32_t *opc, struct kvm_vcpu *vcpu);
-enum emulation_result update_pc(struct kvm_vcpu *vcpu, uint32_t cause);
+u32 kvm_get_inst(u32 *opc, struct kvm_vcpu *vcpu);
+enum emulation_result update_pc(struct kvm_vcpu *vcpu, u32 cause);
 
-extern enum emulation_result kvm_mips_emulate_inst(unsigned long cause,
-						   uint32_t *opc,
+extern enum emulation_result kvm_mips_emulate_inst(u32 cause,
+						   u32 *opc,
 						   struct kvm_run *run,
 						   struct kvm_vcpu *vcpu);
 
-extern enum emulation_result kvm_mips_emulate_syscall(unsigned long cause,
-						      uint32_t *opc,
+extern enum emulation_result kvm_mips_emulate_syscall(u32 cause,
+						      u32 *opc,
 						      struct kvm_run *run,
 						      struct kvm_vcpu *vcpu);
 
-extern enum emulation_result kvm_mips_emulate_tlbmiss_ld(unsigned long cause,
-							 uint32_t *opc,
+extern enum emulation_result kvm_mips_emulate_tlbmiss_ld(u32 cause,
+							 u32 *opc,
 							 struct kvm_run *run,
 							 struct kvm_vcpu *vcpu);
 
-extern enum emulation_result kvm_mips_emulate_tlbinv_ld(unsigned long cause,
-							uint32_t *opc,
+extern enum emulation_result kvm_mips_emulate_tlbinv_ld(u32 cause,
+							u32 *opc,
 							struct kvm_run *run,
 							struct kvm_vcpu *vcpu);
 
-extern enum emulation_result kvm_mips_emulate_tlbmiss_st(unsigned long cause,
-							 uint32_t *opc,
+extern enum emulation_result kvm_mips_emulate_tlbmiss_st(u32 cause,
+							 u32 *opc,
 							 struct kvm_run *run,
 							 struct kvm_vcpu *vcpu);
 
-extern enum emulation_result kvm_mips_emulate_tlbinv_st(unsigned long cause,
-							uint32_t *opc,
+extern enum emulation_result kvm_mips_emulate_tlbinv_st(u32 cause,
+							u32 *opc,
 							struct kvm_run *run,
 							struct kvm_vcpu *vcpu);
 
-extern enum emulation_result kvm_mips_emulate_tlbmod(unsigned long cause,
-						     uint32_t *opc,
+extern enum emulation_result kvm_mips_emulate_tlbmod(u32 cause,
+						     u32 *opc,
 						     struct kvm_run *run,
 						     struct kvm_vcpu *vcpu);
 
-extern enum emulation_result kvm_mips_emulate_fpu_exc(unsigned long cause,
-						      uint32_t *opc,
+extern enum emulation_result kvm_mips_emulate_fpu_exc(u32 cause,
+						      u32 *opc,
 						      struct kvm_run *run,
 						      struct kvm_vcpu *vcpu);
 
-extern enum emulation_result kvm_mips_handle_ri(unsigned long cause,
-						uint32_t *opc,
+extern enum emulation_result kvm_mips_handle_ri(u32 cause,
+						u32 *opc,
 						struct kvm_run *run,
 						struct kvm_vcpu *vcpu);
 
-extern enum emulation_result kvm_mips_emulate_ri_exc(unsigned long cause,
-						     uint32_t *opc,
+extern enum emulation_result kvm_mips_emulate_ri_exc(u32 cause,
+						     u32 *opc,
 						     struct kvm_run *run,
 						     struct kvm_vcpu *vcpu);
 
-extern enum emulation_result kvm_mips_emulate_bp_exc(unsigned long cause,
-						     uint32_t *opc,
+extern enum emulation_result kvm_mips_emulate_bp_exc(u32 cause,
+						     u32 *opc,
 						     struct kvm_run *run,
 						     struct kvm_vcpu *vcpu);
 
-extern enum emulation_result kvm_mips_emulate_trap_exc(unsigned long cause,
-						       uint32_t *opc,
+extern enum emulation_result kvm_mips_emulate_trap_exc(u32 cause,
+						       u32 *opc,
 						       struct kvm_run *run,
 						       struct kvm_vcpu *vcpu);
 
-extern enum emulation_result kvm_mips_emulate_msafpe_exc(unsigned long cause,
-							 uint32_t *opc,
+extern enum emulation_result kvm_mips_emulate_msafpe_exc(u32 cause,
+							 u32 *opc,
 							 struct kvm_run *run,
 							 struct kvm_vcpu *vcpu);
 
-extern enum emulation_result kvm_mips_emulate_fpe_exc(unsigned long cause,
-						      uint32_t *opc,
+extern enum emulation_result kvm_mips_emulate_fpe_exc(u32 cause,
+						      u32 *opc,
 						      struct kvm_run *run,
 						      struct kvm_vcpu *vcpu);
 
-extern enum emulation_result kvm_mips_emulate_msadis_exc(unsigned long cause,
-							 uint32_t *opc,
+extern enum emulation_result kvm_mips_emulate_msadis_exc(u32 cause,
+							 u32 *opc,
 							 struct kvm_run *run,
 							 struct kvm_vcpu *vcpu);
 
 extern enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu,
 							 struct kvm_run *run);
 
-uint32_t kvm_mips_read_count(struct kvm_vcpu *vcpu);
-void kvm_mips_write_count(struct kvm_vcpu *vcpu, uint32_t count);
-void kvm_mips_write_compare(struct kvm_vcpu *vcpu, uint32_t compare, bool ack);
+u32 kvm_mips_read_count(struct kvm_vcpu *vcpu);
+void kvm_mips_write_count(struct kvm_vcpu *vcpu, u32 count);
+void kvm_mips_write_compare(struct kvm_vcpu *vcpu, u32 compare, bool ack);
 void kvm_mips_init_count(struct kvm_vcpu *vcpu);
 int kvm_mips_set_count_ctl(struct kvm_vcpu *vcpu, s64 count_ctl);
 int kvm_mips_set_count_resume(struct kvm_vcpu *vcpu, s64 count_resume);
@@ -759,27 +694,27 @@ void kvm_mips_count_enable_cause(struct kvm_vcpu *vcpu);
 void kvm_mips_count_disable_cause(struct kvm_vcpu *vcpu);
 enum hrtimer_restart kvm_mips_count_timeout(struct kvm_vcpu *vcpu);
 
-enum emulation_result kvm_mips_check_privilege(unsigned long cause,
-					       uint32_t *opc,
+enum emulation_result kvm_mips_check_privilege(u32 cause,
+					       u32 *opc,
 					       struct kvm_run *run,
 					       struct kvm_vcpu *vcpu);
 
-enum emulation_result kvm_mips_emulate_cache(uint32_t inst,
-					     uint32_t *opc,
-					     uint32_t cause,
+enum emulation_result kvm_mips_emulate_cache(union mips_instruction inst,
+					     u32 *opc,
+					     u32 cause,
 					     struct kvm_run *run,
 					     struct kvm_vcpu *vcpu);
-enum emulation_result kvm_mips_emulate_CP0(uint32_t inst,
-					   uint32_t *opc,
-					   uint32_t cause,
+enum emulation_result kvm_mips_emulate_CP0(union mips_instruction inst,
+					   u32 *opc,
+					   u32 cause,
 					   struct kvm_run *run,
 					   struct kvm_vcpu *vcpu);
-enum emulation_result kvm_mips_emulate_store(uint32_t inst,
-					     uint32_t cause,
+enum emulation_result kvm_mips_emulate_store(union mips_instruction inst,
+					     u32 cause,
 					     struct kvm_run *run,
 					     struct kvm_vcpu *vcpu);
-enum emulation_result kvm_mips_emulate_load(uint32_t inst,
-					    uint32_t cause,
+enum emulation_result kvm_mips_emulate_load(union mips_instruction inst,
+					    u32 cause,
 					    struct kvm_run *run,
 					    struct kvm_vcpu *vcpu);
 
@@ -789,13 +724,13 @@ unsigned int kvm_mips_config4_wrmask(struct kvm_vcpu *vcpu);
 unsigned int kvm_mips_config5_wrmask(struct kvm_vcpu *vcpu);
 
 /* Dynamic binary translation */
-extern int kvm_mips_trans_cache_index(uint32_t inst, uint32_t *opc,
-				      struct kvm_vcpu *vcpu);
-extern int kvm_mips_trans_cache_va(uint32_t inst, uint32_t *opc,
+extern int kvm_mips_trans_cache_index(union mips_instruction inst,
+				      u32 *opc, struct kvm_vcpu *vcpu);
+extern int kvm_mips_trans_cache_va(union mips_instruction inst, u32 *opc,
 				   struct kvm_vcpu *vcpu);
-extern int kvm_mips_trans_mfc0(uint32_t inst, uint32_t *opc,
+extern int kvm_mips_trans_mfc0(union mips_instruction inst, u32 *opc,
 			       struct kvm_vcpu *vcpu);
-extern int kvm_mips_trans_mtc0(uint32_t inst, uint32_t *opc,
+extern int kvm_mips_trans_mtc0(union mips_instruction inst, u32 *opc,
 			       struct kvm_vcpu *vcpu);
 
 /* Misc */
diff --git a/arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h b/arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h
index d68e685cde60..bd8b9bbe1771 100644
--- a/arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h
+++ b/arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h
@@ -55,7 +55,7 @@
 #define cpu_has_mipsmt		0
 #define cpu_has_vint		0
 #define cpu_has_veic		0
-#define cpu_hwrena_impl_bits	0xc0000000
+#define cpu_hwrena_impl_bits	(MIPS_HWRENA_IMPL1 | MIPS_HWRENA_IMPL2)
 #define cpu_has_wsbh            1
 
 #define cpu_has_rixi		(cpu_data[0].cputype != CPU_CAVIUM_OCTEON)
diff --git a/arch/mips/include/asm/mach-cavium-octeon/irq.h b/arch/mips/include/asm/mach-cavium-octeon/irq.h
index cceae32a0732..64b86b9d30fe 100644
--- a/arch/mips/include/asm/mach-cavium-octeon/irq.h
+++ b/arch/mips/include/asm/mach-cavium-octeon/irq.h
@@ -42,8 +42,6 @@ enum octeon_irq {
 	OCTEON_IRQ_TIMER1,
 	OCTEON_IRQ_TIMER2,
 	OCTEON_IRQ_TIMER3,
-	OCTEON_IRQ_USB0,
-	OCTEON_IRQ_USB1,
 #ifndef CONFIG_PCI_MSI
 	OCTEON_IRQ_LAST = 127
 #endif
diff --git a/arch/mips/include/asm/mach-cavium-octeon/mangle-port.h b/arch/mips/include/asm/mach-cavium-octeon/mangle-port.h
index 374eefafb320..0cf5ac1f7245 100644
--- a/arch/mips/include/asm/mach-cavium-octeon/mangle-port.h
+++ b/arch/mips/include/asm/mach-cavium-octeon/mangle-port.h
@@ -12,6 +12,14 @@
 
 #ifdef __BIG_ENDIAN
 
+static inline bool __should_swizzle_bits(volatile void *a)
+{
+	extern const bool octeon_should_swizzle_table[];
+
+	unsigned long did = ((unsigned long)a >> 40) & 0xff;
+	return octeon_should_swizzle_table[did];
+}
+
 # define __swizzle_addr_b(port)	(port)
 # define __swizzle_addr_w(port)	(port)
 # define __swizzle_addr_l(port)	(port)
@@ -19,6 +27,8 @@
 
 #else /* __LITTLE_ENDIAN */
 
+#define __should_swizzle_bits(a)	false
+
 static inline bool __should_swizzle_addr(unsigned long p)
 {
 	/* boot bus? */
@@ -35,40 +45,14 @@ static inline bool __should_swizzle_addr(unsigned long p)
 
 #endif /* __BIG_ENDIAN */
 
-/*
- * Sane hardware offers swapping of PCI/ISA I/O space accesses in hardware;
- * less sane hardware forces software to fiddle with this...
- *
- * Regardless, if the host bus endianness mismatches that of PCI/ISA, then
- * you can't have the numerical value of data and byte addresses within
- * multibyte quantities both preserved at the same time.  Hence two
- * variations of functions: non-prefixed ones that preserve the value
- * and prefixed ones that preserve byte addresses.  The latters are
- * typically used for moving raw data between a peripheral and memory (cf.
- * string I/O functions), hence the "__mem_" prefix.
- */
-#if defined(CONFIG_SWAP_IO_SPACE)
 
 # define ioswabb(a, x)		(x)
 # define __mem_ioswabb(a, x)	(x)
-# define ioswabw(a, x)		le16_to_cpu(x)
+# define ioswabw(a, x)		(__should_swizzle_bits(a) ? le16_to_cpu(x) : x)
 # define __mem_ioswabw(a, x)	(x)
-# define ioswabl(a, x)		le32_to_cpu(x)
+# define ioswabl(a, x)		(__should_swizzle_bits(a) ? le32_to_cpu(x) : x)
 # define __mem_ioswabl(a, x)	(x)
-# define ioswabq(a, x)		le64_to_cpu(x)
+# define ioswabq(a, x)		(__should_swizzle_bits(a) ? le64_to_cpu(x) : x)
 # define __mem_ioswabq(a, x)	(x)
 
-#else
-
-# define ioswabb(a, x)		(x)
-# define __mem_ioswabb(a, x)	(x)
-# define ioswabw(a, x)		(x)
-# define __mem_ioswabw(a, x)	cpu_to_le16(x)
-# define ioswabl(a, x)		(x)
-# define __mem_ioswabl(a, x)	cpu_to_le32(x)
-# define ioswabq(a, x)		(x)
-# define __mem_ioswabq(a, x)	cpu_to_le32(x)
-
-#endif
-
 #endif /* __ASM_MACH_GENERIC_MANGLE_PORT_H */
diff --git a/arch/mips/include/asm/mips-cm.h b/arch/mips/include/asm/mips-cm.h
index 9411a4c0bdad..58e7874e9347 100644
--- a/arch/mips/include/asm/mips-cm.h
+++ b/arch/mips/include/asm/mips-cm.h
@@ -462,7 +462,7 @@ static inline unsigned int mips_cm_max_vp_width(void)
 	if (mips_cm_revision() >= CM_REV_CM3)
 		return read_gcr_sys_config2() & CM_GCR_SYS_CONFIG2_MAXVPW_MSK;
 
-	if (config_enabled(CONFIG_SMP))
+	if (IS_ENABLED(CONFIG_SMP))
 		return smp_num_siblings;
 
 	return 1;
diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h
index e1ca65c62f6a..def9d8d13f6e 100644
--- a/arch/mips/include/asm/mipsregs.h
+++ b/arch/mips/include/asm/mipsregs.h
@@ -53,7 +53,7 @@
 #define CP0_SEGCTL2 $5, 4
 #define CP0_WIRED $6
 #define CP0_INFO $7
-#define CP0_HWRENA $7, 0
+#define CP0_HWRENA $7
 #define CP0_BADVADDR $8
 #define CP0_BADINSTR $8, 1
 #define CP0_COUNT $9
@@ -533,6 +533,7 @@
 #define TX49_CONF_CWFON		(_ULCAST_(1) << 27)
 
 /* Bits specific to the MIPS32/64 PRA.	*/
+#define MIPS_CONF_VI		(_ULCAST_(1) <<  3)
 #define MIPS_CONF_MT		(_ULCAST_(7) <<	 7)
 #define MIPS_CONF_MT_TLB	(_ULCAST_(1) <<  7)
 #define MIPS_CONF_MT_FTLB	(_ULCAST_(4) <<  7)
@@ -853,6 +854,24 @@
 #define MIPS_CDMMBASE_ADDR_SHIFT 11
 #define MIPS_CDMMBASE_ADDR_START 15
 
+/* RDHWR register numbers */
+#define MIPS_HWR_CPUNUM		0	/* CPU number */
+#define MIPS_HWR_SYNCISTEP	1	/* SYNCI step size */
+#define MIPS_HWR_CC		2	/* Cycle counter */
+#define MIPS_HWR_CCRES		3	/* Cycle counter resolution */
+#define MIPS_HWR_ULR		29	/* UserLocal */
+#define MIPS_HWR_IMPL1		30	/* Implementation dependent */
+#define MIPS_HWR_IMPL2		31	/* Implementation dependent */
+
+/* Bits in HWREna register */
+#define MIPS_HWRENA_CPUNUM	(_ULCAST_(1) << MIPS_HWR_CPUNUM)
+#define MIPS_HWRENA_SYNCISTEP	(_ULCAST_(1) << MIPS_HWR_SYNCISTEP)
+#define MIPS_HWRENA_CC		(_ULCAST_(1) << MIPS_HWR_CC)
+#define MIPS_HWRENA_CCRES	(_ULCAST_(1) << MIPS_HWR_CCRES)
+#define MIPS_HWRENA_ULR		(_ULCAST_(1) << MIPS_HWR_ULR)
+#define MIPS_HWRENA_IMPL1	(_ULCAST_(1) << MIPS_HWR_IMPL1)
+#define MIPS_HWRENA_IMPL2	(_ULCAST_(1) << MIPS_HWR_IMPL2)
+
 /*
  * Bitfields in the TX39 family CP0 Configuration Register 3
  */
diff --git a/arch/mips/include/asm/mmu.h b/arch/mips/include/asm/mmu.h
index 1afa1f986df8..f6ba08d77931 100644
--- a/arch/mips/include/asm/mmu.h
+++ b/arch/mips/include/asm/mmu.h
@@ -2,11 +2,20 @@
 #define __ASM_MMU_H
 
 #include <linux/atomic.h>
+#include <linux/spinlock.h>
+#include <linux/wait.h>
 
 typedef struct {
 	unsigned long asid[NR_CPUS];
 	void *vdso;
 	atomic_t fp_mode_switching;
+
+	/* lock to be held whilst modifying fp_bd_emupage_allocmap */
+	spinlock_t bd_emupage_lock;
+	/* bitmap tracking allocation of fp_bd_emupage */
+	unsigned long *bd_emupage_allocmap;
+	/* wait queue for threads requiring an emuframe */
+	wait_queue_head_t bd_emupage_queue;
 } mm_context_t;
 
 #endif /* __ASM_MMU_H */
diff --git a/arch/mips/include/asm/mmu_context.h b/arch/mips/include/asm/mmu_context.h
index fc57e135cb0a..ddd57ade1aa8 100644
--- a/arch/mips/include/asm/mmu_context.h
+++ b/arch/mips/include/asm/mmu_context.h
@@ -16,6 +16,7 @@
 #include <linux/smp.h>
 #include <linux/slab.h>
 #include <asm/cacheflush.h>
+#include <asm/dsemul.h>
 #include <asm/hazards.h>
 #include <asm/tlbflush.h>
 #include <asm-generic/mm_hooks.h>
@@ -128,6 +129,10 @@ init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 
 	atomic_set(&mm->context.fp_mode_switching, 0);
 
+	mm->context.bd_emupage_allocmap = NULL;
+	spin_lock_init(&mm->context.bd_emupage_lock);
+	init_waitqueue_head(&mm->context.bd_emupage_queue);
+
 	return 0;
 }
 
@@ -162,6 +167,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
  */
 static inline void destroy_context(struct mm_struct *mm)
 {
+	dsemul_mm_cleanup(mm);
 }
 
 #define deactivate_mm(tsk, mm)	do { } while (0)
diff --git a/arch/mips/include/asm/msa.h b/arch/mips/include/asm/msa.h
index ddf496cb2a2a..8967b475ab10 100644
--- a/arch/mips/include/asm/msa.h
+++ b/arch/mips/include/asm/msa.h
@@ -168,6 +168,7 @@ static inline unsigned int read_msa_##name(void)		\
 	unsigned int reg;					\
 	__asm__ __volatile__(					\
 	"	.set	push\n"					\
+	"	.set	fp=64\n"				\
 	"	.set	msa\n"					\
 	"	cfcmsa	%0, $" #cs "\n"				\
 	"	.set	pop\n"					\
@@ -179,6 +180,7 @@ static inline void write_msa_##name(unsigned int val)		\
 {								\
 	__asm__ __volatile__(					\
 	"	.set	push\n"					\
+	"	.set	fp=64\n"				\
 	"	.set	msa\n"					\
 	"	ctcmsa	$" #cs ", %0\n"				\
 	"	.set	pop\n"					\
diff --git a/arch/mips/include/asm/page.h b/arch/mips/include/asm/page.h
index 21ed7150fec3..ea0cd9773914 100644
--- a/arch/mips/include/asm/page.h
+++ b/arch/mips/include/asm/page.h
@@ -162,16 +162,34 @@ typedef struct { unsigned long pgprot; } pgprot_t;
 /*
  * __pa()/__va() should be used only during mem init.
  */
-#ifdef CONFIG_64BIT
-#define __pa(x)								\
-({									\
-    unsigned long __x = (unsigned long)(x);				\
-    __x < CKSEG0 ? XPHYSADDR(__x) : CPHYSADDR(__x);			\
-})
-#else
-#define __pa(x)								\
-    ((unsigned long)(x) - PAGE_OFFSET + PHYS_OFFSET)
-#endif
+static inline unsigned long ___pa(unsigned long x)
+{
+	if (config_enabled(CONFIG_64BIT)) {
+		/*
+		 * For MIPS64 the virtual address may either be in one of
+		 * the compatibility segements ckseg0 or ckseg1, or it may
+		 * be in xkphys.
+		 */
+		return x < CKSEG0 ? XPHYSADDR(x) : CPHYSADDR(x);
+	}
+
+	if (!config_enabled(CONFIG_EVA)) {
+		/*
+		 * We're using the standard MIPS32 legacy memory map, ie.
+		 * the address x is going to be in kseg0 or kseg1. We can
+		 * handle either case by masking out the desired bits using
+		 * CPHYSADDR.
+		 */
+		return CPHYSADDR(x);
+	}
+
+	/*
+	 * EVA is in use so the memory map could be anything, making it not
+	 * safe to just mask out bits.
+	 */
+	return x - PAGE_OFFSET + PHYS_OFFSET;
+}
+#define __pa(x)		___pa((unsigned long)(x))
 #define __va(x)		((void *)((unsigned long)(x) + PAGE_OFFSET - PHYS_OFFSET))
 #include <asm/io.h>
 
@@ -229,8 +247,10 @@ extern int __virt_addr_valid(const volatile void *kaddr);
 #define virt_addr_valid(kaddr)						\
 	__virt_addr_valid((const volatile void *) (kaddr))
 
-#define VM_DATA_DEFAULT_FLAGS	(VM_READ | VM_WRITE | VM_EXEC | \
-				 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+#define VM_DATA_DEFAULT_FLAGS \
+	(VM_READ | VM_WRITE | \
+	 ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \
+	 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
 
 #define UNCAC_ADDR(addr)	((addr) - PAGE_OFFSET + UNCAC_BASE)
 #define CAC_ADDR(addr)		((addr) - UNCAC_BASE + PAGE_OFFSET)
diff --git a/arch/mips/include/asm/pci.h b/arch/mips/include/asm/pci.h
index 86b239d9d75d..9b63cd41213d 100644
--- a/arch/mips/include/asm/pci.h
+++ b/arch/mips/include/asm/pci.h
@@ -80,16 +80,6 @@ extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
 
 #define HAVE_ARCH_PCI_RESOURCE_TO_USER
 
-static inline void pci_resource_to_user(const struct pci_dev *dev, int bar,
-		const struct resource *rsrc, resource_size_t *start,
-		resource_size_t *end)
-{
-	phys_addr_t size = resource_size(rsrc);
-
-	*start = fixup_bigphys_addr(rsrc->start, size);
-	*end = rsrc->start + size;
-}
-
 /*
  * Dynamic DMA mapping stuff.
  * MIPS has everything mapped statically.
diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index 7d44e888134f..70128d3f770a 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -159,7 +159,7 @@ static inline void set_pte(pte_t *ptep, pte_t pte)
 		 * it better already be global)
 		 */
 		if (pte_none(*buddy)) {
-			if (!config_enabled(CONFIG_XPA))
+			if (!IS_ENABLED(CONFIG_XPA))
 				buddy->pte_low |= _PAGE_GLOBAL;
 			buddy->pte_high |= _PAGE_GLOBAL;
 		}
@@ -172,7 +172,7 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *pt
 
 	htw_stop();
 	/* Preserve global status for the pair */
-	if (config_enabled(CONFIG_XPA)) {
+	if (IS_ENABLED(CONFIG_XPA)) {
 		if (ptep_buddy(ptep)->pte_high & _PAGE_GLOBAL)
 			null.pte_high = _PAGE_GLOBAL;
 	} else {
@@ -319,7 +319,7 @@ static inline int pte_young(pte_t pte)	{ return pte.pte_low & _PAGE_ACCESSED; }
 static inline pte_t pte_wrprotect(pte_t pte)
 {
 	pte.pte_low  &= ~_PAGE_WRITE;
-	if (!config_enabled(CONFIG_XPA))
+	if (!IS_ENABLED(CONFIG_XPA))
 		pte.pte_low &= ~_PAGE_SILENT_WRITE;
 	pte.pte_high &= ~_PAGE_SILENT_WRITE;
 	return pte;
@@ -328,7 +328,7 @@ static inline pte_t pte_wrprotect(pte_t pte)
 static inline pte_t pte_mkclean(pte_t pte)
 {
 	pte.pte_low  &= ~_PAGE_MODIFIED;
-	if (!config_enabled(CONFIG_XPA))
+	if (!IS_ENABLED(CONFIG_XPA))
 		pte.pte_low &= ~_PAGE_SILENT_WRITE;
 	pte.pte_high &= ~_PAGE_SILENT_WRITE;
 	return pte;
@@ -337,7 +337,7 @@ static inline pte_t pte_mkclean(pte_t pte)
 static inline pte_t pte_mkold(pte_t pte)
 {
 	pte.pte_low  &= ~_PAGE_ACCESSED;
-	if (!config_enabled(CONFIG_XPA))
+	if (!IS_ENABLED(CONFIG_XPA))
 		pte.pte_low &= ~_PAGE_SILENT_READ;
 	pte.pte_high &= ~_PAGE_SILENT_READ;
 	return pte;
@@ -347,7 +347,7 @@ static inline pte_t pte_mkwrite(pte_t pte)
 {
 	pte.pte_low |= _PAGE_WRITE;
 	if (pte.pte_low & _PAGE_MODIFIED) {
-		if (!config_enabled(CONFIG_XPA))
+		if (!IS_ENABLED(CONFIG_XPA))
 			pte.pte_low |= _PAGE_SILENT_WRITE;
 		pte.pte_high |= _PAGE_SILENT_WRITE;
 	}
@@ -358,7 +358,7 @@ static inline pte_t pte_mkdirty(pte_t pte)
 {
 	pte.pte_low |= _PAGE_MODIFIED;
 	if (pte.pte_low & _PAGE_WRITE) {
-		if (!config_enabled(CONFIG_XPA))
+		if (!IS_ENABLED(CONFIG_XPA))
 			pte.pte_low |= _PAGE_SILENT_WRITE;
 		pte.pte_high |= _PAGE_SILENT_WRITE;
 	}
@@ -369,7 +369,7 @@ static inline pte_t pte_mkyoung(pte_t pte)
 {
 	pte.pte_low |= _PAGE_ACCESSED;
 	if (!(pte.pte_low & _PAGE_NO_READ)) {
-		if (!config_enabled(CONFIG_XPA))
+		if (!IS_ENABLED(CONFIG_XPA))
 			pte.pte_low |= _PAGE_SILENT_READ;
 		pte.pte_high |= _PAGE_SILENT_READ;
 	}
diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h
index 7e78b6208d7d..0d36c87acbe2 100644
--- a/arch/mips/include/asm/processor.h
+++ b/arch/mips/include/asm/processor.h
@@ -11,12 +11,14 @@
 #ifndef _ASM_PROCESSOR_H
 #define _ASM_PROCESSOR_H
 
+#include <linux/atomic.h>
 #include <linux/cpumask.h>
 #include <linux/threads.h>
 
 #include <asm/cachectl.h>
 #include <asm/cpu.h>
 #include <asm/cpu-info.h>
+#include <asm/dsemul.h>
 #include <asm/mipsregs.h>
 #include <asm/prefetch.h>
 
@@ -78,7 +80,11 @@ extern unsigned int vced_count, vcei_count;
 
 #endif
 
-#define STACK_TOP	(TASK_SIZE & PAGE_MASK)
+/*
+ * One page above the stack is used for branch delay slot "emulation".
+ * See dsemul.c for details.
+ */
+#define STACK_TOP	((TASK_SIZE & PAGE_MASK) - PAGE_SIZE)
 
 /*
  * This decides where the kernel will search for a free chunk of vm
@@ -256,6 +262,12 @@ struct thread_struct {
 
 	/* Saved fpu/fpu emulator stuff. */
 	struct mips_fpu_struct fpu FPU_ALIGN;
+	/* Assigned branch delay slot 'emulation' frame */
+	atomic_t bd_emu_frame;
+	/* PC of the branch from a branch delay slot 'emulation' */
+	unsigned long bd_emu_branch_pc;
+	/* PC to continue from following a branch delay slot 'emulation' */
+	unsigned long bd_emu_cont_pc;
 #ifdef CONFIG_MIPS_MT_FPAFF
 	/* Emulated instruction count */
 	unsigned long emulated_fp;
@@ -323,6 +335,10 @@ struct thread_struct {
 	 * FPU affinity state (null if not FPAFF)		\
 	 */							\
 	FPAFF_INIT						\
+	/* Delay slot emulation */				\
+	.bd_emu_frame = ATOMIC_INIT(BD_EMUFRAME_NONE),		\
+	.bd_emu_branch_pc = 0,					\
+	.bd_emu_cont_pc = 0,					\
 	/*							\
 	 * Saved DSP stuff					\
 	 */							\
diff --git a/arch/mips/include/asm/r4kcache.h b/arch/mips/include/asm/r4kcache.h
index 38902bf97adc..667ca3c467b7 100644
--- a/arch/mips/include/asm/r4kcache.h
+++ b/arch/mips/include/asm/r4kcache.h
@@ -210,7 +210,11 @@ static inline void protected_writeback_dcache_line(unsigned long addr)
 
 static inline void protected_writeback_scache_line(unsigned long addr)
 {
+#ifdef CONFIG_EVA
+	protected_cachee_op(Hit_Writeback_Inv_SD, addr);
+#else
 	protected_cache_op(Hit_Writeback_Inv_SD, addr);
+#endif
 }
 
 /*
diff --git a/arch/mips/include/asm/seccomp.h b/arch/mips/include/asm/seccomp.h
index 684fb3a12ed3..d886d6f7687a 100644
--- a/arch/mips/include/asm/seccomp.h
+++ b/arch/mips/include/asm/seccomp.h
@@ -16,10 +16,10 @@ static inline const int *get_compat_mode1_syscalls(void)
 		0, /* null terminated */
 	};
 
-	if (config_enabled(CONFIG_MIPS32_O32) && test_thread_flag(TIF_32BIT_REGS))
+	if (IS_ENABLED(CONFIG_MIPS32_O32) && test_thread_flag(TIF_32BIT_REGS))
 		return syscalls_O32;
 
-	if (config_enabled(CONFIG_MIPS32_N32))
+	if (IS_ENABLED(CONFIG_MIPS32_N32))
 		return syscalls_N32;
 
 	BUG();
diff --git a/arch/mips/include/asm/setup.h b/arch/mips/include/asm/setup.h
index d7bfdeba9e84..4f5279a8308d 100644
--- a/arch/mips/include/asm/setup.h
+++ b/arch/mips/include/asm/setup.h
@@ -21,6 +21,7 @@ extern void *set_vi_handler(int n, vi_handler_t addr);
 
 extern void *set_except_vector(int n, void *addr);
 extern unsigned long ebase;
+extern unsigned int hwrena;
 extern void per_cpu_trap_init(bool);
 extern void cpu_cache_init(void);
 
diff --git a/arch/mips/include/asm/signal.h b/arch/mips/include/asm/signal.h
index 2292373ff11a..23d6b8015c79 100644
--- a/arch/mips/include/asm/signal.h
+++ b/arch/mips/include/asm/signal.h
@@ -11,7 +11,7 @@
 
 #include <uapi/asm/signal.h>
 
-#ifdef CONFIG_MIPS32_COMPAT
+#ifdef CONFIG_MIPS32_O32
 extern struct mips_abi mips_abi_32;
 
 #define sig_uses_siginfo(ka, abi)                               \
@@ -19,8 +19,8 @@ extern struct mips_abi mips_abi_32;
 		((ka)->sa.sa_flags & SA_SIGINFO))
 #else
 #define sig_uses_siginfo(ka, abi)                               \
-	(config_enabled(CONFIG_64BIT) ? 1 :                     \
-		(config_enabled(CONFIG_TRAD_SIGNALS) ?          \
+	(IS_ENABLED(CONFIG_64BIT) ? 1 :                     \
+		(IS_ENABLED(CONFIG_TRAD_SIGNALS) ?          \
 			((ka)->sa.sa_flags & SA_SIGINFO) : 1) )
 #endif
 
diff --git a/arch/mips/include/asm/smp.h b/arch/mips/include/asm/smp.h
index 03722d4326a1..8bc6c70a4030 100644
--- a/arch/mips/include/asm/smp.h
+++ b/arch/mips/include/asm/smp.h
@@ -23,7 +23,7 @@
 extern int smp_num_siblings;
 extern cpumask_t cpu_sibling_map[];
 extern cpumask_t cpu_core_map[];
-extern cpumask_t cpu_foreign_map;
+extern cpumask_t cpu_foreign_map[];
 
 #define raw_smp_processor_id() (current_thread_info()->cpu)
 
@@ -53,6 +53,8 @@ extern cpumask_t cpu_coherent_mask;
 
 extern void asmlinkage smp_bootstrap(void);
 
+extern void calculate_cpu_foreign_map(void);
+
 /*
  * this function sends a 'reschedule' IPI to another CPU.
  * it goes straight through and wastes no time serializing
diff --git a/arch/mips/include/asm/syscall.h b/arch/mips/include/asm/syscall.h
index 47bc45a67e9b..d87882513ee3 100644
--- a/arch/mips/include/asm/syscall.h
+++ b/arch/mips/include/asm/syscall.h
@@ -99,7 +99,7 @@ static inline void syscall_get_arguments(struct task_struct *task,
 {
 	int ret;
 	/* O32 ABI syscall() - Either 64-bit with O32 or 32-bit */
-	if ((config_enabled(CONFIG_32BIT) ||
+	if ((IS_ENABLED(CONFIG_32BIT) ||
 	    test_tsk_thread_flag(task, TIF_32BIT_REGS)) &&
 	    (regs->regs[2] == __NR_syscall))
 		i++;
diff --git a/arch/mips/include/asm/uaccess.h b/arch/mips/include/asm/uaccess.h
index 7f109d4f64a4..11b965f98d95 100644
--- a/arch/mips/include/asm/uaccess.h
+++ b/arch/mips/include/asm/uaccess.h
@@ -88,7 +88,7 @@ extern u64 __ua_limit;
  */
 static inline bool eva_kernel_access(void)
 {
-	if (!config_enabled(CONFIG_EVA))
+	if (!IS_ENABLED(CONFIG_EVA))
 		return false;
 
 	return segment_eq(get_fs(), get_ds());
diff --git a/arch/mips/include/asm/uasm.h b/arch/mips/include/asm/uasm.h
index b6ecfeee4dbe..f7929f65f7ca 100644
--- a/arch/mips/include/asm/uasm.h
+++ b/arch/mips/include/asm/uasm.h
@@ -104,8 +104,13 @@ Ip_u1s2(_bltz);
 Ip_u1s2(_bltzl);
 Ip_u1u2s3(_bne);
 Ip_u2s3u1(_cache);
+Ip_u1u2(_cfc1);
+Ip_u2u1(_cfcmsa);
+Ip_u1u2(_ctc1);
+Ip_u2u1(_ctcmsa);
 Ip_u2u1s3(_daddiu);
 Ip_u3u1u2(_daddu);
+Ip_u1(_di);
 Ip_u2u1msbu3(_dins);
 Ip_u2u1msbu3(_dinsm);
 Ip_u1u2(_divu);
@@ -141,6 +146,8 @@ Ip_u1(_mfhi);
 Ip_u1(_mflo);
 Ip_u1u2u3(_mtc0);
 Ip_u1u2u3(_mthc0);
+Ip_u1(_mthi);
+Ip_u1(_mtlo);
 Ip_u3u1u2(_mul);
 Ip_u3u1u2(_or);
 Ip_u2u1u3(_ori);
diff --git a/arch/mips/include/uapi/asm/auxvec.h b/arch/mips/include/uapi/asm/auxvec.h
index c9c7195272c4..45ba259a3618 100644
--- a/arch/mips/include/uapi/asm/auxvec.h
+++ b/arch/mips/include/uapi/asm/auxvec.h
@@ -14,4 +14,6 @@
 /* Location of VDSO image. */
 #define AT_SYSINFO_EHDR		33
 
+#define AT_VECTOR_SIZE_ARCH 1 /* entries in ARCH_DLINFO */
+
 #endif /* __ASM_AUXVEC_H */
diff --git a/arch/mips/include/uapi/asm/inst.h b/arch/mips/include/uapi/asm/inst.h
index 8051f9aa1379..77429d1622b3 100644
--- a/arch/mips/include/uapi/asm/inst.h
+++ b/arch/mips/include/uapi/asm/inst.h
@@ -21,20 +21,20 @@
 enum major_op {
 	spec_op, bcond_op, j_op, jal_op,
 	beq_op, bne_op, blez_op, bgtz_op,
-	addi_op, cbcond0_op = addi_op, addiu_op, slti_op, sltiu_op,
+	addi_op, pop10_op = addi_op, addiu_op, slti_op, sltiu_op,
 	andi_op, ori_op, xori_op, lui_op,
 	cop0_op, cop1_op, cop2_op, cop1x_op,
 	beql_op, bnel_op, blezl_op, bgtzl_op,
-	daddi_op, cbcond1_op = daddi_op, daddiu_op, ldl_op, ldr_op,
+	daddi_op, pop30_op = daddi_op, daddiu_op, ldl_op, ldr_op,
 	spec2_op, jalx_op, mdmx_op, msa_op = mdmx_op, spec3_op,
 	lb_op, lh_op, lwl_op, lw_op,
 	lbu_op, lhu_op, lwr_op, lwu_op,
 	sb_op, sh_op, swl_op, sw_op,
 	sdl_op, sdr_op, swr_op, cache_op,
 	ll_op, lwc1_op, lwc2_op, bc6_op = lwc2_op, pref_op,
-	lld_op, ldc1_op, ldc2_op, beqzcjic_op = ldc2_op, ld_op,
+	lld_op, ldc1_op, ldc2_op, pop66_op = ldc2_op, ld_op,
 	sc_op, swc1_op, swc2_op, balc6_op = swc2_op, major_3b_op,
-	scd_op, sdc1_op, sdc2_op, bnezcjialc_op = sdc2_op, sd_op
+	scd_op, sdc1_op, sdc2_op, pop76_op = sdc2_op, sd_op
 };
 
 /*
@@ -93,6 +93,50 @@ enum spec3_op {
 };
 
 /*
+ * Bits 10-6 minor opcode for r6 spec mult/div encodings
+ */
+enum mult_op {
+	mult_mult_op = 0x0,
+	mult_mul_op = 0x2,
+	mult_muh_op = 0x3,
+};
+enum multu_op {
+	multu_multu_op = 0x0,
+	multu_mulu_op = 0x2,
+	multu_muhu_op = 0x3,
+};
+enum div_op {
+	div_div_op = 0x0,
+	div_div6_op = 0x2,
+	div_mod_op = 0x3,
+};
+enum divu_op {
+	divu_divu_op = 0x0,
+	divu_divu6_op = 0x2,
+	divu_modu_op = 0x3,
+};
+enum dmult_op {
+	dmult_dmult_op = 0x0,
+	dmult_dmul_op = 0x2,
+	dmult_dmuh_op = 0x3,
+};
+enum dmultu_op {
+	dmultu_dmultu_op = 0x0,
+	dmultu_dmulu_op = 0x2,
+	dmultu_dmuhu_op = 0x3,
+};
+enum ddiv_op {
+	ddiv_ddiv_op = 0x0,
+	ddiv_ddiv6_op = 0x2,
+	ddiv_dmod_op = 0x3,
+};
+enum ddivu_op {
+	ddivu_ddivu_op = 0x0,
+	ddivu_ddivu6_op = 0x2,
+	ddivu_dmodu_op = 0x3,
+};
+
+/*
  * rt field of bcond opcodes.
  */
 enum rt_op {
@@ -103,7 +147,7 @@ enum rt_op {
 	bltzal_op, bgezal_op, bltzall_op, bgezall_op,
 	rt_op_0x14, rt_op_0x15, rt_op_0x16, rt_op_0x17,
 	rt_op_0x18, rt_op_0x19, rt_op_0x1a, rt_op_0x1b,
-	bposge32_op, rt_op_0x1d, rt_op_0x1e, rt_op_0x1f
+	bposge32_op, rt_op_0x1d, rt_op_0x1e, synci_op
 };
 
 /*
@@ -238,6 +282,21 @@ enum bshfl_func {
 };
 
 /*
+ * MSA minor opcodes.
+ */
+enum msa_func {
+	msa_elm_op = 0x19,
+};
+
+/*
+ * MSA ELM opcodes.
+ */
+enum msa_elm {
+	msa_ctc_op = 0x3e,
+	msa_cfc_op = 0x7e,
+};
+
+/*
  * func field for MSA MI10 format.
  */
 enum msa_mi10_func {
@@ -264,7 +323,7 @@ enum mm_major_op {
 	mm_pool32b_op, mm_pool16b_op, mm_lhu16_op, mm_andi16_op,
 	mm_addiu32_op, mm_lhu32_op, mm_sh32_op, mm_lh32_op,
 	mm_pool32i_op, mm_pool16c_op, mm_lwsp16_op, mm_pool16d_op,
-	mm_ori32_op, mm_pool32f_op, mm_reserved1_op, mm_reserved2_op,
+	mm_ori32_op, mm_pool32f_op, mm_pool32s_op, mm_reserved2_op,
 	mm_pool32c_op, mm_lwgp16_op, mm_lw16_op, mm_pool16e_op,
 	mm_xori32_op, mm_jals32_op, mm_addiupc_op, mm_reserved3_op,
 	mm_reserved4_op, mm_pool16f_op, mm_sb16_op, mm_beqz16_op,
@@ -360,7 +419,10 @@ enum mm_32axf_minor_op {
 	mm_mflo32_op = 0x075,
 	mm_jalrhb_op = 0x07c,
 	mm_tlbwi_op = 0x08d,
+	mm_mthi32_op = 0x0b5,
 	mm_tlbwr_op = 0x0cd,
+	mm_mtlo32_op = 0x0f5,
+	mm_di_op = 0x11d,
 	mm_jalrs_op = 0x13c,
 	mm_jalrshb_op = 0x17c,
 	mm_sync_op = 0x1ad,
@@ -479,6 +541,13 @@ enum mm_32f_73_minor_op {
 };
 
 /*
+ * (microMIPS) POOL32S minor opcodes.
+ */
+enum mm_32s_minor_op {
+	mm_32s_elm_op = 0x16,
+};
+
+/*
  * (microMIPS) POOL16C minor opcodes.
  */
 enum mm_16c_minor_op {
@@ -586,6 +655,36 @@ struct r_format {			/* Register format */
 	;))))))
 };
 
+struct c0r_format {			/* C0 register format */
+	__BITFIELD_FIELD(unsigned int opcode : 6,
+	__BITFIELD_FIELD(unsigned int rs : 5,
+	__BITFIELD_FIELD(unsigned int rt : 5,
+	__BITFIELD_FIELD(unsigned int rd : 5,
+	__BITFIELD_FIELD(unsigned int z: 8,
+	__BITFIELD_FIELD(unsigned int sel : 3,
+	;))))))
+};
+
+struct mfmc0_format {			/* MFMC0 register format */
+	__BITFIELD_FIELD(unsigned int opcode : 6,
+	__BITFIELD_FIELD(unsigned int rs : 5,
+	__BITFIELD_FIELD(unsigned int rt : 5,
+	__BITFIELD_FIELD(unsigned int rd : 5,
+	__BITFIELD_FIELD(unsigned int re : 5,
+	__BITFIELD_FIELD(unsigned int sc : 1,
+	__BITFIELD_FIELD(unsigned int : 2,
+	__BITFIELD_FIELD(unsigned int sel : 3,
+	;))))))))
+};
+
+struct co_format {			/* C0 CO format */
+	__BITFIELD_FIELD(unsigned int opcode : 6,
+	__BITFIELD_FIELD(unsigned int co : 1,
+	__BITFIELD_FIELD(unsigned int code : 19,
+	__BITFIELD_FIELD(unsigned int func : 6,
+	;))))
+};
+
 struct p_format {		/* Performance counter format (R10000) */
 	__BITFIELD_FIELD(unsigned int opcode : 6,
 	__BITFIELD_FIELD(unsigned int rs : 5,
@@ -937,6 +1036,9 @@ union mips_instruction {
 	struct u_format u_format;
 	struct c_format c_format;
 	struct r_format r_format;
+	struct c0r_format c0r_format;
+	struct mfmc0_format mfmc0_format;
+	struct co_format co_format;
 	struct p_format p_format;
 	struct f_format f_format;
 	struct ma_format ma_format;
diff --git a/arch/mips/jz4740/setup.c b/arch/mips/jz4740/setup.c
index 0914ef775b5f..6d0152321819 100644
--- a/arch/mips/jz4740/setup.c
+++ b/arch/mips/jz4740/setup.c
@@ -75,7 +75,7 @@ void __init device_tree_init(void)
 
 const char *get_system_type(void)
 {
-	if (config_enabled(CONFIG_MACH_JZ4780))
+	if (IS_ENABLED(CONFIG_MACH_JZ4780))
 		return "JZ4780";
 
 	return "JZ4740";
diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile
index e6053d07072f..4a603a3ea657 100644
--- a/arch/mips/kernel/Makefile
+++ b/arch/mips/kernel/Makefile
@@ -71,7 +71,7 @@ obj-$(CONFIG_32BIT)		+= scall32-o32.o
 obj-$(CONFIG_64BIT)		+= scall64-64.o
 obj-$(CONFIG_MIPS32_COMPAT)	+= linux32.o ptrace32.o signal32.o
 obj-$(CONFIG_MIPS32_N32)	+= binfmt_elfn32.o scall64-n32.o signal_n32.o
-obj-$(CONFIG_MIPS32_O32)	+= binfmt_elfo32.o scall64-o32.o
+obj-$(CONFIG_MIPS32_O32)	+= binfmt_elfo32.o scall64-o32.o signal_o32.o
 
 obj-$(CONFIG_KGDB)		+= kgdb.o
 obj-$(CONFIG_PROC_FS)		+= proc.o
diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c
index 1ea973b2abb1..fae2f9447792 100644
--- a/arch/mips/kernel/asm-offsets.c
+++ b/arch/mips/kernel/asm-offsets.c
@@ -339,71 +339,9 @@ void output_pm_defines(void)
 }
 #endif
 
-void output_cpuinfo_defines(void)
-{
-	COMMENT(" MIPS cpuinfo offsets. ");
-	DEFINE(CPUINFO_SIZE, sizeof(struct cpuinfo_mips));
-#ifdef CONFIG_MIPS_ASID_BITS_VARIABLE
-	OFFSET(CPUINFO_ASID_MASK, cpuinfo_mips, asid_mask);
-#endif
-}
-
 void output_kvm_defines(void)
 {
 	COMMENT(" KVM/MIPS Specfic offsets. ");
-	DEFINE(VCPU_ARCH_SIZE, sizeof(struct kvm_vcpu_arch));
-	OFFSET(VCPU_RUN, kvm_vcpu, run);
-	OFFSET(VCPU_HOST_ARCH, kvm_vcpu, arch);
-
-	OFFSET(VCPU_HOST_EBASE, kvm_vcpu_arch, host_ebase);
-	OFFSET(VCPU_GUEST_EBASE, kvm_vcpu_arch, guest_ebase);
-
-	OFFSET(VCPU_HOST_STACK, kvm_vcpu_arch, host_stack);
-	OFFSET(VCPU_HOST_GP, kvm_vcpu_arch, host_gp);
-
-	OFFSET(VCPU_HOST_CP0_BADVADDR, kvm_vcpu_arch, host_cp0_badvaddr);
-	OFFSET(VCPU_HOST_CP0_CAUSE, kvm_vcpu_arch, host_cp0_cause);
-	OFFSET(VCPU_HOST_EPC, kvm_vcpu_arch, host_cp0_epc);
-	OFFSET(VCPU_HOST_ENTRYHI, kvm_vcpu_arch, host_cp0_entryhi);
-
-	OFFSET(VCPU_GUEST_INST, kvm_vcpu_arch, guest_inst);
-
-	OFFSET(VCPU_R0, kvm_vcpu_arch, gprs[0]);
-	OFFSET(VCPU_R1, kvm_vcpu_arch, gprs[1]);
-	OFFSET(VCPU_R2, kvm_vcpu_arch, gprs[2]);
-	OFFSET(VCPU_R3, kvm_vcpu_arch, gprs[3]);
-	OFFSET(VCPU_R4, kvm_vcpu_arch, gprs[4]);
-	OFFSET(VCPU_R5, kvm_vcpu_arch, gprs[5]);
-	OFFSET(VCPU_R6, kvm_vcpu_arch, gprs[6]);
-	OFFSET(VCPU_R7, kvm_vcpu_arch, gprs[7]);
-	OFFSET(VCPU_R8, kvm_vcpu_arch, gprs[8]);
-	OFFSET(VCPU_R9, kvm_vcpu_arch, gprs[9]);
-	OFFSET(VCPU_R10, kvm_vcpu_arch, gprs[10]);
-	OFFSET(VCPU_R11, kvm_vcpu_arch, gprs[11]);
-	OFFSET(VCPU_R12, kvm_vcpu_arch, gprs[12]);
-	OFFSET(VCPU_R13, kvm_vcpu_arch, gprs[13]);
-	OFFSET(VCPU_R14, kvm_vcpu_arch, gprs[14]);
-	OFFSET(VCPU_R15, kvm_vcpu_arch, gprs[15]);
-	OFFSET(VCPU_R16, kvm_vcpu_arch, gprs[16]);
-	OFFSET(VCPU_R17, kvm_vcpu_arch, gprs[17]);
-	OFFSET(VCPU_R18, kvm_vcpu_arch, gprs[18]);
-	OFFSET(VCPU_R19, kvm_vcpu_arch, gprs[19]);
-	OFFSET(VCPU_R20, kvm_vcpu_arch, gprs[20]);
-	OFFSET(VCPU_R21, kvm_vcpu_arch, gprs[21]);
-	OFFSET(VCPU_R22, kvm_vcpu_arch, gprs[22]);
-	OFFSET(VCPU_R23, kvm_vcpu_arch, gprs[23]);
-	OFFSET(VCPU_R24, kvm_vcpu_arch, gprs[24]);
-	OFFSET(VCPU_R25, kvm_vcpu_arch, gprs[25]);
-	OFFSET(VCPU_R26, kvm_vcpu_arch, gprs[26]);
-	OFFSET(VCPU_R27, kvm_vcpu_arch, gprs[27]);
-	OFFSET(VCPU_R28, kvm_vcpu_arch, gprs[28]);
-	OFFSET(VCPU_R29, kvm_vcpu_arch, gprs[29]);
-	OFFSET(VCPU_R30, kvm_vcpu_arch, gprs[30]);
-	OFFSET(VCPU_R31, kvm_vcpu_arch, gprs[31]);
-	OFFSET(VCPU_LO, kvm_vcpu_arch, lo);
-	OFFSET(VCPU_HI, kvm_vcpu_arch, hi);
-	OFFSET(VCPU_PC, kvm_vcpu_arch, pc);
-	BLANK();
 
 	OFFSET(VCPU_FPR0, kvm_vcpu_arch, fpu.fpr[0]);
 	OFFSET(VCPU_FPR1, kvm_vcpu_arch, fpu.fpr[1]);
@@ -441,14 +379,6 @@ void output_kvm_defines(void)
 	OFFSET(VCPU_FCR31, kvm_vcpu_arch, fpu.fcr31);
 	OFFSET(VCPU_MSA_CSR, kvm_vcpu_arch, fpu.msacsr);
 	BLANK();
-
-	OFFSET(VCPU_COP0, kvm_vcpu_arch, cop0);
-	OFFSET(VCPU_GUEST_KERNEL_ASID, kvm_vcpu_arch, guest_kernel_asid);
-	OFFSET(VCPU_GUEST_USER_ASID, kvm_vcpu_arch, guest_user_asid);
-
-	OFFSET(COP0_TLB_HI, mips_coproc, reg[MIPS_CP0_TLB_HI][0]);
-	OFFSET(COP0_STATUS, mips_coproc, reg[MIPS_CP0_STATUS][0]);
-	BLANK();
 }
 
 #ifdef CONFIG_MIPS_CPS
diff --git a/arch/mips/kernel/branch.c b/arch/mips/kernel/branch.c
index 6dc3f1fdaccc..46c227fc98f5 100644
--- a/arch/mips/kernel/branch.c
+++ b/arch/mips/kernel/branch.c
@@ -790,7 +790,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs,
 		epc += 4 + (insn.i_format.simmediate << 2);
 		regs->cp0_epc = epc;
 		break;
-	case beqzcjic_op:
+	case pop66_op:
 		if (!cpu_has_mips_r6) {
 			ret = -SIGILL;
 			break;
@@ -798,7 +798,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs,
 		/* Compact branch: BEQZC || JIC */
 		regs->cp0_epc += 8;
 		break;
-	case bnezcjialc_op:
+	case pop76_op:
 		if (!cpu_has_mips_r6) {
 			ret = -SIGILL;
 			break;
@@ -809,8 +809,8 @@ int __compute_return_epc_for_insn(struct pt_regs *regs,
 		regs->cp0_epc += 8;
 		break;
 #endif
-	case cbcond0_op:
-	case cbcond1_op:
+	case pop10_op:
+	case pop30_op:
 		/* Only valid for MIPS R6 */
 		if (!cpu_has_mips_r6) {
 			ret = -SIGILL;
diff --git a/arch/mips/kernel/cevt-r4k.c b/arch/mips/kernel/cevt-r4k.c
index e4c21bbf9422..804d2a2a19fe 100644
--- a/arch/mips/kernel/cevt-r4k.c
+++ b/arch/mips/kernel/cevt-r4k.c
@@ -276,12 +276,7 @@ int r4k_clockevent_init(void)
 				  CLOCK_EVT_FEAT_C3STOP |
 				  CLOCK_EVT_FEAT_PERCPU;
 
-	clockevent_set_clock(cd, mips_hpt_frequency);
-
-	/* Calculate the min / max delta */
-	cd->max_delta_ns	= clockevent_delta2ns(0x7fffffff, cd);
 	min_delta		= calculate_min_delta();
-	cd->min_delta_ns	= clockevent_delta2ns(min_delta, cd);
 
 	cd->rating		= 300;
 	cd->irq			= irq;
@@ -289,7 +284,7 @@ int r4k_clockevent_init(void)
 	cd->set_next_event	= mips_next_event;
 	cd->event_handler	= mips_event_handler;
 
-	clockevents_register_device(cd);
+	clockevents_config_and_register(cd, mips_hpt_frequency, min_delta, 0x7fffffff);
 
 	if (cp0_timer_irq_installed)
 		return 0;
diff --git a/arch/mips/kernel/cpu-bugs64.c b/arch/mips/kernel/cpu-bugs64.c
index 6392dbe504fb..a378e44688f5 100644
--- a/arch/mips/kernel/cpu-bugs64.c
+++ b/arch/mips/kernel/cpu-bugs64.c
@@ -244,7 +244,7 @@ static inline void check_daddi(void)
 	panic(bug64hit, !DADDI_WAR ? daddiwar : nowar);
 }
 
-int daddiu_bug	= config_enabled(CONFIG_CPU_MIPSR6) ? 0 : -1;
+int daddiu_bug	= IS_ENABLED(CONFIG_CPU_MIPSR6) ? 0 : -1;
 
 static inline void check_daddiu(void)
 {
@@ -314,7 +314,7 @@ static inline void check_daddiu(void)
 
 void __init check_bugs64_early(void)
 {
-	if (!config_enabled(CONFIG_CPU_MIPSR6)) {
+	if (!IS_ENABLED(CONFIG_CPU_MIPSR6)) {
 		check_mult_sh();
 		check_daddiu();
 	}
@@ -322,6 +322,6 @@ void __init check_bugs64_early(void)
 
 void __init check_bugs64(void)
 {
-	if (!config_enabled(CONFIG_CPU_MIPSR6))
+	if (!IS_ENABLED(CONFIG_CPU_MIPSR6))
 		check_daddi();
 }
diff --git a/arch/mips/kernel/csrc-r4k.c b/arch/mips/kernel/csrc-r4k.c
index 1f910563fdf6..d76275da54cb 100644
--- a/arch/mips/kernel/csrc-r4k.c
+++ b/arch/mips/kernel/csrc-r4k.c
@@ -23,7 +23,7 @@ static struct clocksource clocksource_mips = {
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
-static u64 notrace r4k_read_sched_clock(void)
+static u64 __maybe_unused notrace r4k_read_sched_clock(void)
 {
 	return read_c0_count();
 }
@@ -82,7 +82,9 @@ int __init init_r4k_clocksource(void)
 
 	clocksource_register_hz(&clocksource_mips, mips_hpt_frequency);
 
+#ifndef CONFIG_CPU_FREQ
 	sched_clock_register(r4k_read_sched_clock, 32, mips_hpt_frequency);
+#endif
 
 	return 0;
 }
diff --git a/arch/mips/kernel/elf.c b/arch/mips/kernel/elf.c
index 891f5ee63983..6430bff21fff 100644
--- a/arch/mips/kernel/elf.c
+++ b/arch/mips/kernel/elf.c
@@ -8,9 +8,12 @@
  * option) any later version.
  */
 
+#include <linux/binfmts.h>
 #include <linux/elf.h>
+#include <linux/export.h>
 #include <linux/sched.h>
 
+#include <asm/cpu-features.h>
 #include <asm/cpu-info.h>
 
 /* Whether to accept legacy-NaN and 2008-NaN user binaries.  */
@@ -179,7 +182,7 @@ int arch_check_elf(void *_ehdr, bool has_interpreter, void *_interp_ehdr,
 			return -ELIBBAD;
 	}
 
-	if (!config_enabled(CONFIG_MIPS_O32_FP64_SUPPORT))
+	if (!IS_ENABLED(CONFIG_MIPS_O32_FP64_SUPPORT))
 		return 0;
 
 	fp_abi = state->fp_abi;
@@ -285,7 +288,7 @@ void mips_set_personality_fp(struct arch_elf_state *state)
 	 * not be worried about N32/N64 binaries.
 	 */
 
-	if (!config_enabled(CONFIG_MIPS_O32_FP64_SUPPORT))
+	if (!IS_ENABLED(CONFIG_MIPS_O32_FP64_SUPPORT))
 		return;
 
 	switch (state->overall_fp_mode) {
@@ -326,3 +329,19 @@ void mips_set_personality_nan(struct arch_elf_state *state)
 		BUG();
 	}
 }
+
+int mips_elf_read_implies_exec(void *elf_ex, int exstack)
+{
+	if (exstack != EXSTACK_DISABLE_X) {
+		/* The binary doesn't request a non-executable stack */
+		return 1;
+	}
+
+	if (!cpu_has_rixi) {
+		/* The CPU doesn't support non-executable memory */
+		return 1;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(mips_elf_read_implies_exec);
diff --git a/arch/mips/kernel/head.S b/arch/mips/kernel/head.S
index 56e8fede3fd8..cf052204eb0a 100644
--- a/arch/mips/kernel/head.S
+++ b/arch/mips/kernel/head.S
@@ -93,21 +93,24 @@ NESTED(kernel_entry, 16, sp)			# kernel entry point
 	jr	t0
 0:
 
+#ifdef CONFIG_USE_OF
 #ifdef CONFIG_MIPS_RAW_APPENDED_DTB
-	PTR_LA		t0, __appended_dtb
+	PTR_LA		t2, __appended_dtb
 
 #ifdef CONFIG_CPU_BIG_ENDIAN
 	li		t1, 0xd00dfeed
 #else
 	li		t1, 0xedfe0dd0
 #endif
-	lw		t2, (t0)
-	bne		t1, t2, not_found
-	 nop
+	lw		t0, (t2)
+	beq		t0, t1, dtb_found
+#endif
+	li		t1, -2
+	beq		a0, t1, dtb_found
+	move		t2, a1
 
-	move		a1, t0
-	PTR_LI		a0, -2
-not_found:
+	li		t2, 0
+dtb_found:
 #endif
 	PTR_LA		t0, __bss_start		# clear .bss
 	LONG_S		zero, (t0)
@@ -122,6 +125,10 @@ not_found:
 	LONG_S		a2, fw_arg2
 	LONG_S		a3, fw_arg3
 
+#ifdef CONFIG_USE_OF
+	LONG_S		t2, fw_passed_dtb
+#endif
+
 	MTC0		zero, CP0_CONTEXT	# clear context register
 	PTR_LA		$28, init_thread_union
 	/* Set the SP after an empty pt_regs.  */
diff --git a/arch/mips/kernel/mips-cm.c b/arch/mips/kernel/mips-cm.c
index 760217bbb2fa..659e6d3ae335 100644
--- a/arch/mips/kernel/mips-cm.c
+++ b/arch/mips/kernel/mips-cm.c
@@ -251,7 +251,7 @@ int mips_cm_probe(void)
 	mips_cm_probe_l2sync();
 
 	/* determine register width for this CM */
-	mips_cm_is64 = config_enabled(CONFIG_64BIT) && (mips_cm_revision() >= CM_REV_CM3);
+	mips_cm_is64 = IS_ENABLED(CONFIG_64BIT) && (mips_cm_revision() >= CM_REV_CM3);
 
 	for_each_possible_cpu(cpu)
 		spin_lock_init(&per_cpu(cm_core_lock, cpu));
diff --git a/arch/mips/kernel/mips-r2-to-r6-emul.c b/arch/mips/kernel/mips-r2-to-r6-emul.c
index 7ff2a557f4aa..c3372cac6db2 100644
--- a/arch/mips/kernel/mips-r2-to-r6-emul.c
+++ b/arch/mips/kernel/mips-r2-to-r6-emul.c
@@ -84,7 +84,7 @@ static inline int mipsr6_emul(struct pt_regs *regs, u32 ir)
 				(s32)MIPSInst_SIMM(ir);
 		return 0;
 	case daddiu_op:
-		if (config_enabled(CONFIG_32BIT))
+		if (IS_ENABLED(CONFIG_32BIT))
 			break;
 
 		if (MIPSInst_RT(ir))
@@ -143,7 +143,7 @@ static inline int mipsr6_emul(struct pt_regs *regs, u32 ir)
 					      (u32)regs->regs[MIPSInst_RT(ir)]);
 			return 0;
 		case dsll_op:
-			if (config_enabled(CONFIG_32BIT) || MIPSInst_RS(ir))
+			if (IS_ENABLED(CONFIG_32BIT) || MIPSInst_RS(ir))
 				break;
 
 			if (MIPSInst_RD(ir))
@@ -152,7 +152,7 @@ static inline int mipsr6_emul(struct pt_regs *regs, u32 ir)
 						MIPSInst_FD(ir));
 			return 0;
 		case dsrl_op:
-			if (config_enabled(CONFIG_32BIT) || MIPSInst_RS(ir))
+			if (IS_ENABLED(CONFIG_32BIT) || MIPSInst_RS(ir))
 				break;
 
 			if (MIPSInst_RD(ir))
@@ -161,7 +161,7 @@ static inline int mipsr6_emul(struct pt_regs *regs, u32 ir)
 						MIPSInst_FD(ir));
 			return 0;
 		case daddu_op:
-			if (config_enabled(CONFIG_32BIT) || MIPSInst_FD(ir))
+			if (IS_ENABLED(CONFIG_32BIT) || MIPSInst_FD(ir))
 				break;
 
 			if (MIPSInst_RD(ir))
@@ -170,7 +170,7 @@ static inline int mipsr6_emul(struct pt_regs *regs, u32 ir)
 					(u64)regs->regs[MIPSInst_RT(ir)];
 			return 0;
 		case dsubu_op:
-			if (config_enabled(CONFIG_32BIT) || MIPSInst_FD(ir))
+			if (IS_ENABLED(CONFIG_32BIT) || MIPSInst_FD(ir))
 				break;
 
 			if (MIPSInst_RD(ir))
@@ -283,7 +283,7 @@ static int jr_func(struct pt_regs *regs, u32 ir)
 		err = mipsr6_emul(regs, nir);
 		if (err > 0) {
 			regs->cp0_epc = nepc;
-			err = mips_dsemul(regs, nir, cepc);
+			err = mips_dsemul(regs, nir, epc, cepc);
 			if (err == SIGILL)
 				err = SIGEMT;
 			MIPS_R2_STATS(dsemul);
@@ -498,7 +498,7 @@ static int dmult_func(struct pt_regs *regs, u32 ir)
 	s64 res;
 	s64 rt, rs;
 
-	if (config_enabled(CONFIG_32BIT))
+	if (IS_ENABLED(CONFIG_32BIT))
 		return SIGILL;
 
 	rt = regs->regs[MIPSInst_RT(ir)];
@@ -530,7 +530,7 @@ static int dmultu_func(struct pt_regs *regs, u32 ir)
 	u64 res;
 	u64 rt, rs;
 
-	if (config_enabled(CONFIG_32BIT))
+	if (IS_ENABLED(CONFIG_32BIT))
 		return SIGILL;
 
 	rt = regs->regs[MIPSInst_RT(ir)];
@@ -561,7 +561,7 @@ static int ddiv_func(struct pt_regs *regs, u32 ir)
 {
 	s64 rt, rs;
 
-	if (config_enabled(CONFIG_32BIT))
+	if (IS_ENABLED(CONFIG_32BIT))
 		return SIGILL;
 
 	rt = regs->regs[MIPSInst_RT(ir)];
@@ -586,7 +586,7 @@ static int ddivu_func(struct pt_regs *regs, u32 ir)
 {
 	u64 rt, rs;
 
-	if (config_enabled(CONFIG_32BIT))
+	if (IS_ENABLED(CONFIG_32BIT))
 		return SIGILL;
 
 	rt = regs->regs[MIPSInst_RT(ir)];
@@ -825,7 +825,7 @@ static int dclz_func(struct pt_regs *regs, u32 ir)
 	u64 res;
 	u64 rs;
 
-	if (config_enabled(CONFIG_32BIT))
+	if (IS_ENABLED(CONFIG_32BIT))
 		return SIGILL;
 
 	if (!MIPSInst_RD(ir))
@@ -852,7 +852,7 @@ static int dclo_func(struct pt_regs *regs, u32 ir)
 	u64 res;
 	u64 rs;
 
-	if (config_enabled(CONFIG_32BIT))
+	if (IS_ENABLED(CONFIG_32BIT))
 		return SIGILL;
 
 	if (!MIPSInst_RD(ir))
@@ -1033,7 +1033,7 @@ repeat:
 			if (nir) {
 				err = mipsr6_emul(regs, nir);
 				if (err > 0) {
-					err = mips_dsemul(regs, nir, cpc);
+					err = mips_dsemul(regs, nir, epc, cpc);
 					if (err == SIGILL)
 						err = SIGEMT;
 					MIPS_R2_STATS(dsemul);
@@ -1082,7 +1082,7 @@ repeat:
 			if (nir) {
 				err = mipsr6_emul(regs, nir);
 				if (err > 0) {
-					err = mips_dsemul(regs, nir, cpc);
+					err = mips_dsemul(regs, nir, epc, cpc);
 					if (err == SIGILL)
 						err = SIGEMT;
 					MIPS_R2_STATS(dsemul);
@@ -1149,7 +1149,7 @@ repeat:
 		if (nir) {
 			err = mipsr6_emul(regs, nir);
 			if (err > 0) {
-				err = mips_dsemul(regs, nir, cpc);
+				err = mips_dsemul(regs, nir, epc, cpc);
 				if (err == SIGILL)
 					err = SIGEMT;
 				MIPS_R2_STATS(dsemul);
@@ -1484,7 +1484,7 @@ fpu_emul:
 		break;
 
 	case ldl_op:
-		if (config_enabled(CONFIG_32BIT)) {
+		if (IS_ENABLED(CONFIG_32BIT)) {
 		    err = SIGILL;
 		    break;
 		}
@@ -1603,7 +1603,7 @@ fpu_emul:
 		break;
 
 	case ldr_op:
-		if (config_enabled(CONFIG_32BIT)) {
+		if (IS_ENABLED(CONFIG_32BIT)) {
 		    err = SIGILL;
 		    break;
 		}
@@ -1722,7 +1722,7 @@ fpu_emul:
 		break;
 
 	case sdl_op:
-		if (config_enabled(CONFIG_32BIT)) {
+		if (IS_ENABLED(CONFIG_32BIT)) {
 		    err = SIGILL;
 		    break;
 		}
@@ -1840,7 +1840,7 @@ fpu_emul:
 		break;
 
 	case sdr_op:
-		if (config_enabled(CONFIG_32BIT)) {
+		if (IS_ENABLED(CONFIG_32BIT)) {
 		    err = SIGILL;
 		    break;
 		}
@@ -2072,7 +2072,7 @@ fpu_emul:
 		break;
 
 	case lld_op:
-		if (config_enabled(CONFIG_32BIT)) {
+		if (IS_ENABLED(CONFIG_32BIT)) {
 		    err = SIGILL;
 		    break;
 		}
@@ -2133,7 +2133,7 @@ fpu_emul:
 		break;
 
 	case scd_op:
-		if (config_enabled(CONFIG_32BIT)) {
+		if (IS_ENABLED(CONFIG_32BIT)) {
 		    err = SIGILL;
 		    break;
 		}
diff --git a/arch/mips/kernel/pm-cps.c b/arch/mips/kernel/pm-cps.c
index adda3ffb9b78..5b31a9405ebc 100644
--- a/arch/mips/kernel/pm-cps.c
+++ b/arch/mips/kernel/pm-cps.c
@@ -148,7 +148,7 @@ int cps_pm_enter_state(enum cps_pm_state state)
 	}
 
 	/* Setup the VPE to run mips_cps_pm_restore when started again */
-	if (config_enabled(CONFIG_CPU_PM) && state == CPS_PM_POWER_GATED) {
+	if (IS_ENABLED(CONFIG_CPU_PM) && state == CPS_PM_POWER_GATED) {
 		/* Power gating relies upon CPS SMP */
 		if (!mips_cps_smp_in_use())
 			return -EINVAL;
@@ -387,7 +387,7 @@ static void * __init cps_gen_entry_code(unsigned cpu, enum cps_pm_state state)
 	memset(labels, 0, sizeof(labels));
 	memset(relocs, 0, sizeof(relocs));
 
-	if (config_enabled(CONFIG_CPU_PM) && state == CPS_PM_POWER_GATED) {
+	if (IS_ENABLED(CONFIG_CPU_PM) && state == CPS_PM_POWER_GATED) {
 		/* Power gating relies upon CPS SMP */
 		if (!mips_cps_smp_in_use())
 			goto out_err;
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index 813ed7829c61..7429ad09fbe3 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -30,6 +30,7 @@
 #include <asm/asm.h>
 #include <asm/bootinfo.h>
 #include <asm/cpu.h>
+#include <asm/dsemul.h>
 #include <asm/dsp.h>
 #include <asm/fpu.h>
 #include <asm/msa.h>
@@ -68,11 +69,22 @@ void start_thread(struct pt_regs * regs, unsigned long pc, unsigned long sp)
 	lose_fpu(0);
 	clear_thread_flag(TIF_MSA_CTX_LIVE);
 	clear_used_math();
+	atomic_set(&current->thread.bd_emu_frame, BD_EMUFRAME_NONE);
 	init_dsp();
 	regs->cp0_epc = pc;
 	regs->regs[29] = sp;
 }
 
+void exit_thread(struct task_struct *tsk)
+{
+	/*
+	 * User threads may have allocated a delay slot emulation frame.
+	 * If so, clean up that allocation.
+	 */
+	if (!(current->flags & PF_KTHREAD))
+		dsemul_thread_cleanup(tsk);
+}
+
 int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 {
 	/*
@@ -159,6 +171,8 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
 	clear_tsk_thread_flag(p, TIF_FPUBOUND);
 #endif /* CONFIG_MIPS_MT_FPAFF */
 
+	atomic_set(&p->thread.bd_emu_frame, BD_EMUFRAME_NONE);
+
 	if (clone_flags & CLONE_SETTLS)
 		ti->tp_value = regs->regs[7];
 
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index 9c0b387d6427..51d3988933f8 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -348,7 +348,7 @@ EXPORT(sysn32_call_table)
 	PTR	sys_ni_syscall			/* available, was setaltroot */
 	PTR	sys_add_key
 	PTR	sys_request_key
-	PTR	sys_keyctl			/* 6245 */
+	PTR	compat_sys_keyctl		/* 6245 */
 	PTR	sys_set_thread_area
 	PTR	sys_inotify_init
 	PTR	sys_inotify_add_watch
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index f4f28b1580de..6efa7136748f 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -504,7 +504,7 @@ EXPORT(sys32_call_table)
 	PTR	sys_ni_syscall			/* available, was setaltroot */
 	PTR	sys_add_key			/* 4280 */
 	PTR	sys_request_key
-	PTR	sys_keyctl
+	PTR	compat_sys_keyctl
 	PTR	sys_set_thread_area
 	PTR	sys_inotify_init
 	PTR	sys_inotify_add_watch		/* 4285 */
diff --git a/arch/mips/kernel/segment.c b/arch/mips/kernel/segment.c
index 87bc74a5a518..2703f218202e 100644
--- a/arch/mips/kernel/segment.c
+++ b/arch/mips/kernel/segment.c
@@ -26,17 +26,20 @@ static void build_segment_config(char *str, unsigned int cfg)
 
 	/*
 	 * Access modes MK, MSK and MUSK are mapped segments. Therefore
-	 * there is no direct physical address mapping.
+	 * there is no direct physical address mapping unless it becomes
+	 * unmapped uncached at error level due to EU.
 	 */
-	if ((am == 0) || (am > 3)) {
+	if ((am == 0) || (am > 3) || (cfg & MIPS_SEGCFG_EU))
 		str += sprintf(str, "         %03lx",
 			((cfg & MIPS_SEGCFG_PA) >> MIPS_SEGCFG_PA_SHIFT));
+	else
+		str += sprintf(str, "         UND");
+
+	if ((am == 0) || (am > 3))
 		str += sprintf(str, "         %01ld",
 			((cfg & MIPS_SEGCFG_C) >> MIPS_SEGCFG_C_SHIFT));
-	} else {
-		str += sprintf(str, "         UND");
+	else
 		str += sprintf(str, "         U");
-	}
 
 	/* Exception configuration. */
 	str += sprintf(str, "       %01ld\n",
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index ef408a03e818..36cf8d65c47d 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -875,6 +875,10 @@ void __init setup_arch(char **cmdline_p)
 unsigned long kernelsp[NR_CPUS];
 unsigned long fw_arg0, fw_arg1, fw_arg2, fw_arg3;
 
+#ifdef CONFIG_USE_OF
+unsigned long fw_passed_dtb;
+#endif
+
 #ifdef CONFIG_DEBUG_FS
 struct dentry *mips_debugfs_dir;
 static int __init debugfs_mips(void)
diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c
index ae4231452115..9e224469c788 100644
--- a/arch/mips/kernel/signal.c
+++ b/arch/mips/kernel/signal.c
@@ -165,7 +165,7 @@ static int save_msa_extcontext(void __user *buf)
 		 * should already have been done when handling scalar FP
 		 * context.
 		 */
-		BUG_ON(config_enabled(CONFIG_EVA));
+		BUG_ON(IS_ENABLED(CONFIG_EVA));
 
 		err = __put_user(read_msa_csr(), &msa->csr);
 		err |= _save_msa_all_upper(&msa->wr);
@@ -195,7 +195,7 @@ static int restore_msa_extcontext(void __user *buf, unsigned int size)
 	unsigned int csr;
 	int i, err;
 
-	if (!config_enabled(CONFIG_CPU_HAS_MSA))
+	if (!IS_ENABLED(CONFIG_CPU_HAS_MSA))
 		return SIGSYS;
 
 	if (size != sizeof(*msa))
@@ -215,7 +215,7 @@ static int restore_msa_extcontext(void __user *buf, unsigned int size)
 		 * scalar FP context, so FPU & MSA should have already been
 		 * disabled whilst handling scalar FP context.
 		 */
-		BUG_ON(config_enabled(CONFIG_EVA));
+		BUG_ON(IS_ENABLED(CONFIG_EVA));
 
 		write_msa_csr(csr);
 		err |= _restore_msa_all_upper(&msa->wr);
@@ -315,7 +315,7 @@ int protected_save_fp_context(void __user *sc)
 	 * EVA does not have userland equivalents of ldc1 or sdc1, so
 	 * save to the kernel FP context & copy that to userland below.
 	 */
-	if (config_enabled(CONFIG_EVA))
+	if (IS_ENABLED(CONFIG_EVA))
 		lose_fpu(1);
 
 	while (1) {
@@ -378,7 +378,7 @@ int protected_restore_fp_context(void __user *sc)
 	 * disable the FPU here such that the code below simply copies to
 	 * the kernel FP context.
 	 */
-	if (config_enabled(CONFIG_EVA))
+	if (IS_ENABLED(CONFIG_EVA))
 		lose_fpu(0);
 
 	while (1) {
@@ -772,6 +772,14 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
 	struct mips_abi *abi = current->thread.abi;
 	void *vdso = current->mm->context.vdso;
 
+	/*
+	 * If we were emulating a delay slot instruction, exit that frame such
+	 * that addresses in the sigframe are as expected for userland and we
+	 * don't have a problem if we reuse the thread's frame for an
+	 * instruction within the signal handler.
+	 */
+	dsemul_thread_rollback(regs);
+
 	if (regs->regs[0]) {
 		switch(regs->regs[2]) {
 		case ERESTART_RESTARTBLOCK:
diff --git a/arch/mips/kernel/signal32.c b/arch/mips/kernel/signal32.c
index 78c8349d151c..97b7c51b8251 100644
--- a/arch/mips/kernel/signal32.c
+++ b/arch/mips/kernel/signal32.c
@@ -6,129 +6,26 @@
  * Copyright (C) 1991, 1992  Linus Torvalds
  * Copyright (C) 1994 - 2000, 2006  Ralf Baechle
  * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
+ * Copyright (C) 2016, Imagination Technologies Ltd.
  */
-#include <linux/cache.h>
-#include <linux/compat.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
+#include <linux/compiler.h>
+#include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/signal.h>
 #include <linux/syscalls.h>
-#include <linux/errno.h>
-#include <linux/wait.h>
-#include <linux/ptrace.h>
-#include <linux/suspend.h>
-#include <linux/compiler.h>
-#include <linux/uaccess.h>
 
-#include <asm/abi.h>
-#include <asm/asm.h>
+#include <asm/compat.h>
 #include <asm/compat-signal.h>
-#include <linux/bitops.h>
-#include <asm/cacheflush.h>
-#include <asm/sim.h>
-#include <asm/ucontext.h>
-#include <asm/fpu.h>
-#include <asm/war.h>
-#include <asm/dsp.h>
+#include <asm/uaccess.h>
+#include <asm/unistd.h>
 
 #include "signal-common.h"
 
-/*
- * Including <asm/unistd.h> would give use the 64-bit syscall numbers ...
- */
-#define __NR_O32_restart_syscall	4253
-
 /* 32-bit compatibility types */
 
 typedef unsigned int __sighandler32_t;
 typedef void (*vfptr_t)(void);
 
-struct ucontext32 {
-	u32		    uc_flags;
-	s32		    uc_link;
-	compat_stack_t      uc_stack;
-	struct sigcontext32 uc_mcontext;
-	compat_sigset_t	    uc_sigmask;	  /* mask last for extensibility */
-};
-
-struct sigframe32 {
-	u32 sf_ass[4];		/* argument save space for o32 */
-	u32 sf_pad[2];		/* Was: signal trampoline */
-	struct sigcontext32 sf_sc;
-	compat_sigset_t sf_mask;
-};
-
-struct rt_sigframe32 {
-	u32 rs_ass[4];			/* argument save space for o32 */
-	u32 rs_pad[2];			/* Was: signal trampoline */
-	compat_siginfo_t rs_info;
-	struct ucontext32 rs_uc;
-};
-
-static int setup_sigcontext32(struct pt_regs *regs,
-			      struct sigcontext32 __user *sc)
-{
-	int err = 0;
-	int i;
-
-	err |= __put_user(regs->cp0_epc, &sc->sc_pc);
-
-	err |= __put_user(0, &sc->sc_regs[0]);
-	for (i = 1; i < 32; i++)
-		err |= __put_user(regs->regs[i], &sc->sc_regs[i]);
-
-	err |= __put_user(regs->hi, &sc->sc_mdhi);
-	err |= __put_user(regs->lo, &sc->sc_mdlo);
-	if (cpu_has_dsp) {
-		err |= __put_user(rddsp(DSP_MASK), &sc->sc_dsp);
-		err |= __put_user(mfhi1(), &sc->sc_hi1);
-		err |= __put_user(mflo1(), &sc->sc_lo1);
-		err |= __put_user(mfhi2(), &sc->sc_hi2);
-		err |= __put_user(mflo2(), &sc->sc_lo2);
-		err |= __put_user(mfhi3(), &sc->sc_hi3);
-		err |= __put_user(mflo3(), &sc->sc_lo3);
-	}
-
-	/*
-	 * Save FPU state to signal context.  Signal handler
-	 * will "inherit" current FPU state.
-	 */
-	err |= protected_save_fp_context(sc);
-
-	return err;
-}
-
-static int restore_sigcontext32(struct pt_regs *regs,
-				struct sigcontext32 __user *sc)
-{
-	int err = 0;
-	s32 treg;
-	int i;
-
-	/* Always make any pending restarted system calls return -EINTR */
-	current->restart_block.fn = do_no_restart_syscall;
-
-	err |= __get_user(regs->cp0_epc, &sc->sc_pc);
-	err |= __get_user(regs->hi, &sc->sc_mdhi);
-	err |= __get_user(regs->lo, &sc->sc_mdlo);
-	if (cpu_has_dsp) {
-		err |= __get_user(treg, &sc->sc_hi1); mthi1(treg);
-		err |= __get_user(treg, &sc->sc_lo1); mtlo1(treg);
-		err |= __get_user(treg, &sc->sc_hi2); mthi2(treg);
-		err |= __get_user(treg, &sc->sc_lo2); mtlo2(treg);
-		err |= __get_user(treg, &sc->sc_hi3); mthi3(treg);
-		err |= __get_user(treg, &sc->sc_lo3); mtlo3(treg);
-		err |= __get_user(treg, &sc->sc_dsp); wrdsp(treg, DSP_MASK);
-	}
-
-	for (i = 1; i < 32; i++)
-		err |= __get_user(regs->regs[i], &sc->sc_regs[i]);
-
-	return err ?: protected_restore_fp_context(sc);
-}
-
 /*
  * Atomically swap in the new signal mask, and wait for a signal.
  */
@@ -247,176 +144,3 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
 
 	return 0;
 }
-
-asmlinkage void sys32_sigreturn(nabi_no_regargs struct pt_regs regs)
-{
-	struct sigframe32 __user *frame;
-	sigset_t blocked;
-	int sig;
-
-	frame = (struct sigframe32 __user *) regs.regs[29];
-	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
-		goto badframe;
-	if (__copy_conv_sigset_from_user(&blocked, &frame->sf_mask))
-		goto badframe;
-
-	set_current_blocked(&blocked);
-
-	sig = restore_sigcontext32(&regs, &frame->sf_sc);
-	if (sig < 0)
-		goto badframe;
-	else if (sig)
-		force_sig(sig, current);
-
-	/*
-	 * Don't let your children do this ...
-	 */
-	__asm__ __volatile__(
-		"move\t$29, %0\n\t"
-		"j\tsyscall_exit"
-		:/* no outputs */
-		:"r" (&regs));
-	/* Unreached */
-
-badframe:
-	force_sig(SIGSEGV, current);
-}
-
-asmlinkage void sys32_rt_sigreturn(nabi_no_regargs struct pt_regs regs)
-{
-	struct rt_sigframe32 __user *frame;
-	sigset_t set;
-	int sig;
-
-	frame = (struct rt_sigframe32 __user *) regs.regs[29];
-	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
-		goto badframe;
-	if (__copy_conv_sigset_from_user(&set, &frame->rs_uc.uc_sigmask))
-		goto badframe;
-
-	set_current_blocked(&set);
-
-	sig = restore_sigcontext32(&regs, &frame->rs_uc.uc_mcontext);
-	if (sig < 0)
-		goto badframe;
-	else if (sig)
-		force_sig(sig, current);
-
-	if (compat_restore_altstack(&frame->rs_uc.uc_stack))
-		goto badframe;
-
-	/*
-	 * Don't let your children do this ...
-	 */
-	__asm__ __volatile__(
-		"move\t$29, %0\n\t"
-		"j\tsyscall_exit"
-		:/* no outputs */
-		:"r" (&regs));
-	/* Unreached */
-
-badframe:
-	force_sig(SIGSEGV, current);
-}
-
-static int setup_frame_32(void *sig_return, struct ksignal *ksig,
-			  struct pt_regs *regs, sigset_t *set)
-{
-	struct sigframe32 __user *frame;
-	int err = 0;
-
-	frame = get_sigframe(ksig, regs, sizeof(*frame));
-	if (!access_ok(VERIFY_WRITE, frame, sizeof (*frame)))
-		return -EFAULT;
-
-	err |= setup_sigcontext32(regs, &frame->sf_sc);
-	err |= __copy_conv_sigset_to_user(&frame->sf_mask, set);
-
-	if (err)
-		return -EFAULT;
-
-	/*
-	 * Arguments to signal handler:
-	 *
-	 *   a0 = signal number
-	 *   a1 = 0 (should be cause)
-	 *   a2 = pointer to struct sigcontext
-	 *
-	 * $25 and c0_epc point to the signal handler, $29 points to the
-	 * struct sigframe.
-	 */
-	regs->regs[ 4] = ksig->sig;
-	regs->regs[ 5] = 0;
-	regs->regs[ 6] = (unsigned long) &frame->sf_sc;
-	regs->regs[29] = (unsigned long) frame;
-	regs->regs[31] = (unsigned long) sig_return;
-	regs->cp0_epc = regs->regs[25] = (unsigned long) ksig->ka.sa.sa_handler;
-
-	DEBUGP("SIG deliver (%s:%d): sp=0x%p pc=0x%lx ra=0x%lx\n",
-	       current->comm, current->pid,
-	       frame, regs->cp0_epc, regs->regs[31]);
-
-	return 0;
-}
-
-static int setup_rt_frame_32(void *sig_return, struct ksignal *ksig,
-			     struct pt_regs *regs, sigset_t *set)
-{
-	struct rt_sigframe32 __user *frame;
-	int err = 0;
-
-	frame = get_sigframe(ksig, regs, sizeof(*frame));
-	if (!access_ok(VERIFY_WRITE, frame, sizeof (*frame)))
-		return -EFAULT;
-
-	/* Convert (siginfo_t -> compat_siginfo_t) and copy to user. */
-	err |= copy_siginfo_to_user32(&frame->rs_info, &ksig->info);
-
-	/* Create the ucontext.	 */
-	err |= __put_user(0, &frame->rs_uc.uc_flags);
-	err |= __put_user(0, &frame->rs_uc.uc_link);
-	err |= __compat_save_altstack(&frame->rs_uc.uc_stack, regs->regs[29]);
-	err |= setup_sigcontext32(regs, &frame->rs_uc.uc_mcontext);
-	err |= __copy_conv_sigset_to_user(&frame->rs_uc.uc_sigmask, set);
-
-	if (err)
-		return -EFAULT;
-
-	/*
-	 * Arguments to signal handler:
-	 *
-	 *   a0 = signal number
-	 *   a1 = 0 (should be cause)
-	 *   a2 = pointer to ucontext
-	 *
-	 * $25 and c0_epc point to the signal handler, $29 points to
-	 * the struct rt_sigframe32.
-	 */
-	regs->regs[ 4] = ksig->sig;
-	regs->regs[ 5] = (unsigned long) &frame->rs_info;
-	regs->regs[ 6] = (unsigned long) &frame->rs_uc;
-	regs->regs[29] = (unsigned long) frame;
-	regs->regs[31] = (unsigned long) sig_return;
-	regs->cp0_epc = regs->regs[25] = (unsigned long) ksig->ka.sa.sa_handler;
-
-	DEBUGP("SIG deliver (%s:%d): sp=0x%p pc=0x%lx ra=0x%lx\n",
-	       current->comm, current->pid,
-	       frame, regs->cp0_epc, regs->regs[31]);
-
-	return 0;
-}
-
-/*
- * o32 compatibility on 64-bit kernels, without DSP ASE
- */
-struct mips_abi mips_abi_32 = {
-	.setup_frame	= setup_frame_32,
-	.setup_rt_frame = setup_rt_frame_32,
-	.restart	= __NR_O32_restart_syscall,
-
-	.off_sc_fpregs = offsetof(struct sigcontext32, sc_fpregs),
-	.off_sc_fpc_csr = offsetof(struct sigcontext32, sc_fpc_csr),
-	.off_sc_used_math = offsetof(struct sigcontext32, sc_used_math),
-
-	.vdso		= &vdso_image_o32,
-};
diff --git a/arch/mips/kernel/signal_o32.c b/arch/mips/kernel/signal_o32.c
new file mode 100644
index 000000000000..5e169fc5ca5c
--- /dev/null
+++ b/arch/mips/kernel/signal_o32.c
@@ -0,0 +1,285 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1991, 1992  Linus Torvalds
+ * Copyright (C) 1994 - 2000, 2006  Ralf Baechle
+ * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
+ * Copyright (C) 2016, Imagination Technologies Ltd.
+ */
+#include <linux/compiler.h>
+#include <linux/errno.h>
+#include <linux/signal.h>
+#include <linux/uaccess.h>
+
+#include <asm/abi.h>
+#include <asm/compat-signal.h>
+#include <asm/dsp.h>
+#include <asm/sim.h>
+#include <asm/unistd.h>
+
+#include "signal-common.h"
+
+/*
+ * Including <asm/unistd.h> would give use the 64-bit syscall numbers ...
+ */
+#define __NR_O32_restart_syscall	4253
+
+struct sigframe32 {
+	u32 sf_ass[4];		/* argument save space for o32 */
+	u32 sf_pad[2];		/* Was: signal trampoline */
+	struct sigcontext32 sf_sc;
+	compat_sigset_t sf_mask;
+};
+
+struct ucontext32 {
+	u32		    uc_flags;
+	s32		    uc_link;
+	compat_stack_t      uc_stack;
+	struct sigcontext32 uc_mcontext;
+	compat_sigset_t	    uc_sigmask;	  /* mask last for extensibility */
+};
+
+struct rt_sigframe32 {
+	u32 rs_ass[4];			/* argument save space for o32 */
+	u32 rs_pad[2];			/* Was: signal trampoline */
+	compat_siginfo_t rs_info;
+	struct ucontext32 rs_uc;
+};
+
+static int setup_sigcontext32(struct pt_regs *regs,
+			      struct sigcontext32 __user *sc)
+{
+	int err = 0;
+	int i;
+
+	err |= __put_user(regs->cp0_epc, &sc->sc_pc);
+
+	err |= __put_user(0, &sc->sc_regs[0]);
+	for (i = 1; i < 32; i++)
+		err |= __put_user(regs->regs[i], &sc->sc_regs[i]);
+
+	err |= __put_user(regs->hi, &sc->sc_mdhi);
+	err |= __put_user(regs->lo, &sc->sc_mdlo);
+	if (cpu_has_dsp) {
+		err |= __put_user(rddsp(DSP_MASK), &sc->sc_dsp);
+		err |= __put_user(mfhi1(), &sc->sc_hi1);
+		err |= __put_user(mflo1(), &sc->sc_lo1);
+		err |= __put_user(mfhi2(), &sc->sc_hi2);
+		err |= __put_user(mflo2(), &sc->sc_lo2);
+		err |= __put_user(mfhi3(), &sc->sc_hi3);
+		err |= __put_user(mflo3(), &sc->sc_lo3);
+	}
+
+	/*
+	 * Save FPU state to signal context.  Signal handler
+	 * will "inherit" current FPU state.
+	 */
+	err |= protected_save_fp_context(sc);
+
+	return err;
+}
+
+static int restore_sigcontext32(struct pt_regs *regs,
+				struct sigcontext32 __user *sc)
+{
+	int err = 0;
+	s32 treg;
+	int i;
+
+	/* Always make any pending restarted system calls return -EINTR */
+	current->restart_block.fn = do_no_restart_syscall;
+
+	err |= __get_user(regs->cp0_epc, &sc->sc_pc);
+	err |= __get_user(regs->hi, &sc->sc_mdhi);
+	err |= __get_user(regs->lo, &sc->sc_mdlo);
+	if (cpu_has_dsp) {
+		err |= __get_user(treg, &sc->sc_hi1); mthi1(treg);
+		err |= __get_user(treg, &sc->sc_lo1); mtlo1(treg);
+		err |= __get_user(treg, &sc->sc_hi2); mthi2(treg);
+		err |= __get_user(treg, &sc->sc_lo2); mtlo2(treg);
+		err |= __get_user(treg, &sc->sc_hi3); mthi3(treg);
+		err |= __get_user(treg, &sc->sc_lo3); mtlo3(treg);
+		err |= __get_user(treg, &sc->sc_dsp); wrdsp(treg, DSP_MASK);
+	}
+
+	for (i = 1; i < 32; i++)
+		err |= __get_user(regs->regs[i], &sc->sc_regs[i]);
+
+	return err ?: protected_restore_fp_context(sc);
+}
+
+static int setup_frame_32(void *sig_return, struct ksignal *ksig,
+			  struct pt_regs *regs, sigset_t *set)
+{
+	struct sigframe32 __user *frame;
+	int err = 0;
+
+	frame = get_sigframe(ksig, regs, sizeof(*frame));
+	if (!access_ok(VERIFY_WRITE, frame, sizeof (*frame)))
+		return -EFAULT;
+
+	err |= setup_sigcontext32(regs, &frame->sf_sc);
+	err |= __copy_conv_sigset_to_user(&frame->sf_mask, set);
+
+	if (err)
+		return -EFAULT;
+
+	/*
+	 * Arguments to signal handler:
+	 *
+	 *   a0 = signal number
+	 *   a1 = 0 (should be cause)
+	 *   a2 = pointer to struct sigcontext
+	 *
+	 * $25 and c0_epc point to the signal handler, $29 points to the
+	 * struct sigframe.
+	 */
+	regs->regs[ 4] = ksig->sig;
+	regs->regs[ 5] = 0;
+	regs->regs[ 6] = (unsigned long) &frame->sf_sc;
+	regs->regs[29] = (unsigned long) frame;
+	regs->regs[31] = (unsigned long) sig_return;
+	regs->cp0_epc = regs->regs[25] = (unsigned long) ksig->ka.sa.sa_handler;
+
+	DEBUGP("SIG deliver (%s:%d): sp=0x%p pc=0x%lx ra=0x%lx\n",
+	       current->comm, current->pid,
+	       frame, regs->cp0_epc, regs->regs[31]);
+
+	return 0;
+}
+
+asmlinkage void sys32_rt_sigreturn(nabi_no_regargs struct pt_regs regs)
+{
+	struct rt_sigframe32 __user *frame;
+	sigset_t set;
+	int sig;
+
+	frame = (struct rt_sigframe32 __user *) regs.regs[29];
+	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
+		goto badframe;
+	if (__copy_conv_sigset_from_user(&set, &frame->rs_uc.uc_sigmask))
+		goto badframe;
+
+	set_current_blocked(&set);
+
+	sig = restore_sigcontext32(&regs, &frame->rs_uc.uc_mcontext);
+	if (sig < 0)
+		goto badframe;
+	else if (sig)
+		force_sig(sig, current);
+
+	if (compat_restore_altstack(&frame->rs_uc.uc_stack))
+		goto badframe;
+
+	/*
+	 * Don't let your children do this ...
+	 */
+	__asm__ __volatile__(
+		"move\t$29, %0\n\t"
+		"j\tsyscall_exit"
+		:/* no outputs */
+		:"r" (&regs));
+	/* Unreached */
+
+badframe:
+	force_sig(SIGSEGV, current);
+}
+
+static int setup_rt_frame_32(void *sig_return, struct ksignal *ksig,
+			     struct pt_regs *regs, sigset_t *set)
+{
+	struct rt_sigframe32 __user *frame;
+	int err = 0;
+
+	frame = get_sigframe(ksig, regs, sizeof(*frame));
+	if (!access_ok(VERIFY_WRITE, frame, sizeof (*frame)))
+		return -EFAULT;
+
+	/* Convert (siginfo_t -> compat_siginfo_t) and copy to user. */
+	err |= copy_siginfo_to_user32(&frame->rs_info, &ksig->info);
+
+	/* Create the ucontext.	 */
+	err |= __put_user(0, &frame->rs_uc.uc_flags);
+	err |= __put_user(0, &frame->rs_uc.uc_link);
+	err |= __compat_save_altstack(&frame->rs_uc.uc_stack, regs->regs[29]);
+	err |= setup_sigcontext32(regs, &frame->rs_uc.uc_mcontext);
+	err |= __copy_conv_sigset_to_user(&frame->rs_uc.uc_sigmask, set);
+
+	if (err)
+		return -EFAULT;
+
+	/*
+	 * Arguments to signal handler:
+	 *
+	 *   a0 = signal number
+	 *   a1 = 0 (should be cause)
+	 *   a2 = pointer to ucontext
+	 *
+	 * $25 and c0_epc point to the signal handler, $29 points to
+	 * the struct rt_sigframe32.
+	 */
+	regs->regs[ 4] = ksig->sig;
+	regs->regs[ 5] = (unsigned long) &frame->rs_info;
+	regs->regs[ 6] = (unsigned long) &frame->rs_uc;
+	regs->regs[29] = (unsigned long) frame;
+	regs->regs[31] = (unsigned long) sig_return;
+	regs->cp0_epc = regs->regs[25] = (unsigned long) ksig->ka.sa.sa_handler;
+
+	DEBUGP("SIG deliver (%s:%d): sp=0x%p pc=0x%lx ra=0x%lx\n",
+	       current->comm, current->pid,
+	       frame, regs->cp0_epc, regs->regs[31]);
+
+	return 0;
+}
+
+/*
+ * o32 compatibility on 64-bit kernels, without DSP ASE
+ */
+struct mips_abi mips_abi_32 = {
+	.setup_frame	= setup_frame_32,
+	.setup_rt_frame = setup_rt_frame_32,
+	.restart	= __NR_O32_restart_syscall,
+
+	.off_sc_fpregs = offsetof(struct sigcontext32, sc_fpregs),
+	.off_sc_fpc_csr = offsetof(struct sigcontext32, sc_fpc_csr),
+	.off_sc_used_math = offsetof(struct sigcontext32, sc_used_math),
+
+	.vdso		= &vdso_image_o32,
+};
+
+
+asmlinkage void sys32_sigreturn(nabi_no_regargs struct pt_regs regs)
+{
+	struct sigframe32 __user *frame;
+	sigset_t blocked;
+	int sig;
+
+	frame = (struct sigframe32 __user *) regs.regs[29];
+	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
+		goto badframe;
+	if (__copy_conv_sigset_from_user(&blocked, &frame->sf_mask))
+		goto badframe;
+
+	set_current_blocked(&blocked);
+
+	sig = restore_sigcontext32(&regs, &frame->sf_sc);
+	if (sig < 0)
+		goto badframe;
+	else if (sig)
+		force_sig(sig, current);
+
+	/*
+	 * Don't let your children do this ...
+	 */
+	__asm__ __volatile__(
+		"move\t$29, %0\n\t"
+		"j\tsyscall_exit"
+		:/* no outputs */
+		:"r" (&regs));
+	/* Unreached */
+
+badframe:
+	force_sig(SIGSEGV, current);
+}
diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c
index e02addc0307f..6d0f1321e084 100644
--- a/arch/mips/kernel/smp-bmips.c
+++ b/arch/mips/kernel/smp-bmips.c
@@ -363,6 +363,7 @@ static int bmips_cpu_disable(void)
 	pr_info("SMP: CPU%d is offline\n", cpu);
 
 	set_cpu_online(cpu, false);
+	calculate_cpu_foreign_map();
 	cpumask_clear_cpu(cpu, &cpu_callin_map);
 	clear_c0_status(IE_IRQ5);
 
diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c
index 4ed36f288d64..e9d9fc6c754c 100644
--- a/arch/mips/kernel/smp-cps.c
+++ b/arch/mips/kernel/smp-cps.c
@@ -46,8 +46,8 @@ static unsigned core_vpe_count(unsigned core)
 	if (threads_disabled)
 		return 1;
 
-	if ((!config_enabled(CONFIG_MIPS_MT_SMP) || !cpu_has_mipsmt)
-		&& (!config_enabled(CONFIG_CPU_MIPSR6) || !cpu_has_vp))
+	if ((!IS_ENABLED(CONFIG_MIPS_MT_SMP) || !cpu_has_mipsmt)
+		&& (!IS_ENABLED(CONFIG_CPU_MIPSR6) || !cpu_has_vp))
 		return 1;
 
 	mips_cm_lock_other(core, 0);
@@ -206,7 +206,7 @@ err_out:
 	}
 }
 
-static void boot_core(unsigned core)
+static void boot_core(unsigned int core, unsigned int vpe_id)
 {
 	u32 access, stat, seq_state;
 	unsigned timeout;
@@ -233,8 +233,9 @@ static void boot_core(unsigned core)
 		mips_cpc_lock_other(core);
 
 		if (mips_cm_revision() >= CM_REV_CM3) {
-			/* Run VP0 following the reset */
-			write_cpc_co_vp_run(0x1);
+			/* Run only the requested VP following the reset */
+			write_cpc_co_vp_stop(0xf);
+			write_cpc_co_vp_run(1 << vpe_id);
 
 			/*
 			 * Ensure that the VP_RUN register is written before the
@@ -306,7 +307,7 @@ static void cps_boot_secondary(int cpu, struct task_struct *idle)
 
 	if (!test_bit(core, core_power)) {
 		/* Boot a VPE on a powered down core */
-		boot_core(core);
+		boot_core(core, vpe_id);
 		goto out;
 	}
 
@@ -397,6 +398,7 @@ static int cps_cpu_disable(void)
 	atomic_sub(1 << cpu_vpe_id(&current_cpu_data), &core_cfg->vpe_mask);
 	smp_mb__after_atomic();
 	set_cpu_online(cpu, false);
+	calculate_cpu_foreign_map();
 	cpumask_clear_cpu(cpu, &cpu_callin_map);
 
 	return 0;
@@ -411,14 +413,16 @@ static enum {
 
 void play_dead(void)
 {
-	unsigned cpu, core;
+	unsigned int cpu, core, vpe_id;
 
 	local_irq_disable();
 	idle_task_exit();
 	cpu = smp_processor_id();
 	cpu_death = CPU_DEATH_POWER;
 
-	if (cpu_has_mipsmt) {
+	pr_debug("CPU%d going offline\n", cpu);
+
+	if (cpu_has_mipsmt || cpu_has_vp) {
 		core = cpu_data[cpu].core;
 
 		/* Look for another online VPE within the core */
@@ -439,10 +443,21 @@ void play_dead(void)
 	complete(&cpu_death_chosen);
 
 	if (cpu_death == CPU_DEATH_HALT) {
-		/* Halt this TC */
-		write_c0_tchalt(TCHALT_H);
-		instruction_hazard();
+		vpe_id = cpu_vpe_id(&cpu_data[cpu]);
+
+		pr_debug("Halting core %d VP%d\n", core, vpe_id);
+		if (cpu_has_mipsmt) {
+			/* Halt this TC */
+			write_c0_tchalt(TCHALT_H);
+			instruction_hazard();
+		} else if (cpu_has_vp) {
+			write_cpc_cl_vp_stop(1 << vpe_id);
+
+			/* Ensure that the VP_STOP register is written */
+			wmb();
+		}
 	} else {
+		pr_debug("Gating power to core %d\n", core);
 		/* Power down the core */
 		cps_pm_enter_state(CPS_PM_POWER_GATED);
 	}
@@ -469,6 +484,7 @@ static void wait_for_sibling_halt(void *ptr_cpu)
 static void cps_cpu_die(unsigned int cpu)
 {
 	unsigned core = cpu_data[cpu].core;
+	unsigned int vpe_id = cpu_vpe_id(&cpu_data[cpu]);
 	unsigned stat;
 	int err;
 
@@ -497,10 +513,12 @@ static void cps_cpu_die(unsigned int cpu)
 		 * in which case the CPC will refuse to power down the core.
 		 */
 		do {
+			mips_cm_lock_other(core, vpe_id);
 			mips_cpc_lock_other(core);
 			stat = read_cpc_co_stat_conf();
 			stat &= CPC_Cx_STAT_CONF_SEQSTATE_MSK;
 			mips_cpc_unlock_other();
+			mips_cm_unlock_other();
 		} while (stat != CPC_Cx_STAT_CONF_SEQSTATE_D0 &&
 			 stat != CPC_Cx_STAT_CONF_SEQSTATE_D2 &&
 			 stat != CPC_Cx_STAT_CONF_SEQSTATE_U2);
@@ -517,6 +535,12 @@ static void cps_cpu_die(unsigned int cpu)
 					       (void *)(unsigned long)cpu, 1);
 		if (err)
 			panic("Failed to call remote sibling CPU\n");
+	} else if (cpu_has_vp) {
+		do {
+			mips_cm_lock_other(core, vpe_id);
+			stat = read_cpc_co_vp_running();
+			mips_cm_unlock_other();
+		} while (stat & (1 << vpe_id));
 	}
 }
 
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index f9d01e953acb..f95f094f36e4 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -72,7 +72,7 @@ EXPORT_SYMBOL(cpu_core_map);
  * A logcal cpu mask containing only one VPE per core to
  * reduce the number of IPIs on large MT systems.
  */
-cpumask_t cpu_foreign_map __read_mostly;
+cpumask_t cpu_foreign_map[NR_CPUS] __read_mostly;
 EXPORT_SYMBOL(cpu_foreign_map);
 
 /* representing cpus for which sibling maps can be computed */
@@ -124,7 +124,7 @@ static inline void set_cpu_core_map(int cpu)
  * Calculate a new cpu_foreign_map mask whenever a
  * new cpu appears or disappears.
  */
-static inline void calculate_cpu_foreign_map(void)
+void calculate_cpu_foreign_map(void)
 {
 	int i, k, core_present;
 	cpumask_t temp_foreign_map;
@@ -141,7 +141,9 @@ static inline void calculate_cpu_foreign_map(void)
 			cpumask_set_cpu(i, &temp_foreign_map);
 	}
 
-	cpumask_copy(&cpu_foreign_map, &temp_foreign_map);
+	for_each_online_cpu(i)
+		cpumask_andnot(&cpu_foreign_map[i],
+			       &temp_foreign_map, &cpu_sibling_map[i]);
 }
 
 struct plat_smp_ops *mp_ops;
@@ -344,16 +346,9 @@ asmlinkage void start_secondary(void)
 static void stop_this_cpu(void *dummy)
 {
 	/*
-	 * Remove this CPU. Be a bit slow here and
-	 * set the bits for every online CPU so we don't miss
-	 * any IPI whilst taking this VPE down.
+	 * Remove this CPU:
 	 */
 
-	cpumask_copy(&cpu_foreign_map, cpu_online_mask);
-
-	/* Make it visible to every other CPU */
-	smp_mb();
-
 	set_cpu_online(smp_processor_id(), false);
 	calculate_cpu_foreign_map();
 	local_irq_disable();
@@ -512,10 +507,17 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned l
 		smp_on_other_tlbs(flush_tlb_range_ipi, &fd);
 	} else {
 		unsigned int cpu;
+		int exec = vma->vm_flags & VM_EXEC;
 
 		for_each_online_cpu(cpu) {
+			/*
+			 * flush_cache_range() will only fully flush icache if
+			 * the VMA is executable, otherwise we must invalidate
+			 * ASID without it appearing to has_valid_asid() as if
+			 * mm has been completely unused by that CPU.
+			 */
 			if (cpu != smp_processor_id() && cpu_context(cpu, mm))
-				cpu_context(cpu, mm) = 0;
+				cpu_context(cpu, mm) = !exec;
 		}
 	}
 	local_flush_tlb_range(vma, start, end);
@@ -560,8 +562,14 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
 		unsigned int cpu;
 
 		for_each_online_cpu(cpu) {
+			/*
+			 * flush_cache_page() only does partial flushes, so
+			 * invalidate ASID without it appearing to
+			 * has_valid_asid() as if mm has been completely unused
+			 * by that CPU.
+			 */
 			if (cpu != smp_processor_id() && cpu_context(cpu, vma->vm_mm))
-				cpu_context(cpu, vma->vm_mm) = 0;
+				cpu_context(cpu, vma->vm_mm) = 1;
 		}
 	}
 	local_flush_tlb_page(vma, page);
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 4a1712b5abdf..3de85be2486a 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -619,17 +619,17 @@ static int simulate_rdhwr(struct pt_regs *regs, int rd, int rt)
 	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS,
 			1, regs, 0);
 	switch (rd) {
-	case 0:		/* CPU number */
+	case MIPS_HWR_CPUNUM:		/* CPU number */
 		regs->regs[rt] = smp_processor_id();
 		return 0;
-	case 1:		/* SYNCI length */
+	case MIPS_HWR_SYNCISTEP:	/* SYNCI length */
 		regs->regs[rt] = min(current_cpu_data.dcache.linesz,
 				     current_cpu_data.icache.linesz);
 		return 0;
-	case 2:		/* Read count register */
+	case MIPS_HWR_CC:		/* Read count register */
 		regs->regs[rt] = read_c0_count();
 		return 0;
-	case 3:		/* Count register resolution */
+	case MIPS_HWR_CCRES:		/* Count register resolution */
 		switch (current_cpu_type()) {
 		case CPU_20KC:
 		case CPU_25KF:
@@ -639,7 +639,7 @@ static int simulate_rdhwr(struct pt_regs *regs, int rd, int rt)
 			regs->regs[rt] = 2;
 		}
 		return 0;
-	case 29:
+	case MIPS_HWR_ULR:		/* Read UserLocal register */
 		regs->regs[rt] = ti->tp_value;
 		return 0;
 	default:
@@ -704,6 +704,7 @@ asmlinkage void do_ov(struct pt_regs *regs)
 int process_fpemu_return(int sig, void __user *fault_addr, unsigned long fcr31)
 {
 	struct siginfo si = { 0 };
+	struct vm_area_struct *vma;
 
 	switch (sig) {
 	case 0:
@@ -744,7 +745,8 @@ int process_fpemu_return(int sig, void __user *fault_addr, unsigned long fcr31)
 		si.si_addr = fault_addr;
 		si.si_signo = sig;
 		down_read(&current->mm->mmap_sem);
-		if (find_vma(current->mm, (unsigned long)fault_addr))
+		vma = find_vma(current->mm, (unsigned long)fault_addr);
+		if (vma && (vma->vm_start <= (unsigned long)fault_addr))
 			si.si_code = SEGV_ACCERR;
 		else
 			si.si_code = SEGV_MAPERR;
@@ -1859,6 +1861,7 @@ void __noreturn nmi_exception_handler(struct pt_regs *regs)
 #define VECTORSPACING 0x100	/* for EI/VI mode */
 
 unsigned long ebase;
+EXPORT_SYMBOL_GPL(ebase);
 unsigned long exception_handlers[32];
 unsigned long vi_handlers[64];
 
@@ -2063,16 +2066,22 @@ static void configure_status(void)
 			 status_set);
 }
 
+unsigned int hwrena;
+EXPORT_SYMBOL_GPL(hwrena);
+
 /* configure HWRENA register */
 static void configure_hwrena(void)
 {
-	unsigned int hwrena = cpu_hwrena_impl_bits;
+	hwrena = cpu_hwrena_impl_bits;
 
 	if (cpu_has_mips_r2_r6)
-		hwrena |= 0x0000000f;
+		hwrena |= MIPS_HWRENA_CPUNUM |
+			  MIPS_HWRENA_SYNCISTEP |
+			  MIPS_HWRENA_CC |
+			  MIPS_HWRENA_CCRES;
 
 	if (!noulri && cpu_has_userlocal)
-		hwrena |= (1 << 29);
+		hwrena |= MIPS_HWRENA_ULR;
 
 	if (hwrena)
 		write_c0_hwrena(hwrena);
diff --git a/arch/mips/kernel/unaligned.c b/arch/mips/kernel/unaligned.c
index 28b3af73a17b..f1c308dbbc4a 100644
--- a/arch/mips/kernel/unaligned.c
+++ b/arch/mips/kernel/unaligned.c
@@ -1025,7 +1025,7 @@ static void emulate_load_store_insn(struct pt_regs *regs,
 		if (!access_ok(VERIFY_READ, addr, 2))
 			goto sigbus;
 
-		if (config_enabled(CONFIG_EVA)) {
+		if (IS_ENABLED(CONFIG_EVA)) {
 			if (segment_eq(get_fs(), get_ds()))
 				LoadHW(addr, value, res);
 			else
@@ -1044,7 +1044,7 @@ static void emulate_load_store_insn(struct pt_regs *regs,
 		if (!access_ok(VERIFY_READ, addr, 4))
 			goto sigbus;
 
-		if (config_enabled(CONFIG_EVA)) {
+		if (IS_ENABLED(CONFIG_EVA)) {
 			if (segment_eq(get_fs(), get_ds()))
 				LoadW(addr, value, res);
 			else
@@ -1063,7 +1063,7 @@ static void emulate_load_store_insn(struct pt_regs *regs,
 		if (!access_ok(VERIFY_READ, addr, 2))
 			goto sigbus;
 
-		if (config_enabled(CONFIG_EVA)) {
+		if (IS_ENABLED(CONFIG_EVA)) {
 			if (segment_eq(get_fs(), get_ds()))
 				LoadHWU(addr, value, res);
 			else
@@ -1131,7 +1131,7 @@ static void emulate_load_store_insn(struct pt_regs *regs,
 		compute_return_epc(regs);
 		value = regs->regs[insn.i_format.rt];
 
-		if (config_enabled(CONFIG_EVA)) {
+		if (IS_ENABLED(CONFIG_EVA)) {
 			if (segment_eq(get_fs(), get_ds()))
 				StoreHW(addr, value, res);
 			else
@@ -1151,7 +1151,7 @@ static void emulate_load_store_insn(struct pt_regs *regs,
 		compute_return_epc(regs);
 		value = regs->regs[insn.i_format.rt];
 
-		if (config_enabled(CONFIG_EVA)) {
+		if (IS_ENABLED(CONFIG_EVA)) {
 			if (segment_eq(get_fs(), get_ds()))
 				StoreW(addr, value, res);
 			else
diff --git a/arch/mips/kernel/vdso.c b/arch/mips/kernel/vdso.c
index 54e1663ce639..9abe447a4b48 100644
--- a/arch/mips/kernel/vdso.c
+++ b/arch/mips/kernel/vdso.c
@@ -107,6 +107,16 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 	if (down_write_killable(&mm->mmap_sem))
 		return -EINTR;
 
+	/* Map delay slot emulation page */
+	base = mmap_region(NULL, STACK_TOP, PAGE_SIZE,
+			   VM_READ|VM_WRITE|VM_EXEC|
+			   VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
+			   0);
+	if (IS_ERR_VALUE(base)) {
+		ret = base;
+		goto out;
+	}
+
 	/*
 	 * Determine total area size. This includes the VDSO data itself, the
 	 * data page, and the GIC user page if present. Always create a mapping
diff --git a/arch/mips/kvm/Kconfig b/arch/mips/kvm/Kconfig
index 2ae12825529f..7c56d6b124d1 100644
--- a/arch/mips/kvm/Kconfig
+++ b/arch/mips/kvm/Kconfig
@@ -17,6 +17,7 @@ if VIRTUALIZATION
 config KVM
 	tristate "Kernel-based Virtual Machine (KVM) support"
 	depends on HAVE_KVM
+	select EXPORT_UASM
 	select PREEMPT_NOTIFIERS
 	select ANON_INODES
 	select KVM_MMIO
diff --git a/arch/mips/kvm/Makefile b/arch/mips/kvm/Makefile
index 637ebbebd549..847429de780d 100644
--- a/arch/mips/kvm/Makefile
+++ b/arch/mips/kvm/Makefile
@@ -7,9 +7,10 @@ EXTRA_CFLAGS += -Ivirt/kvm -Iarch/mips/kvm
 
 common-objs-$(CONFIG_CPU_HAS_MSA) += msa.o
 
-kvm-objs := $(common-objs-y) mips.o emulate.o locore.o \
+kvm-objs := $(common-objs-y) mips.o emulate.o entry.o \
 	    interrupt.o stats.o commpage.o \
 	    dyntrans.o trap_emul.o fpu.o
+kvm-objs += mmu.o
 
 obj-$(CONFIG_KVM)	+= kvm.o
 obj-y			+= callback.o tlb.o
diff --git a/arch/mips/kvm/commpage.c b/arch/mips/kvm/commpage.c
index 2d6e976d1add..a36b77e1705c 100644
--- a/arch/mips/kvm/commpage.c
+++ b/arch/mips/kvm/commpage.c
@@ -4,7 +4,7 @@
  * for more details.
  *
  * commpage, currently used for Virtual COP0 registers.
- * Mapped into the guest kernel @ 0x0.
+ * Mapped into the guest kernel @ KVM_GUEST_COMMPAGE_ADDR.
  *
  * Copyright (C) 2012  MIPS Technologies, Inc.  All rights reserved.
  * Authors: Sanjay Lal <sanjayl@kymasys.com>
diff --git a/arch/mips/kvm/dyntrans.c b/arch/mips/kvm/dyntrans.c
index f1527a465c1b..d280894915ed 100644
--- a/arch/mips/kvm/dyntrans.c
+++ b/arch/mips/kvm/dyntrans.c
@@ -11,6 +11,7 @@
 
 #include <linux/errno.h>
 #include <linux/err.h>
+#include <linux/highmem.h>
 #include <linux/kvm_host.h>
 #include <linux/module.h>
 #include <linux/vmalloc.h>
@@ -20,125 +21,114 @@
 
 #include "commpage.h"
 
-#define SYNCI_TEMPLATE  0x041f0000
-#define SYNCI_BASE(x)   (((x) >> 21) & 0x1f)
-#define SYNCI_OFFSET    ((x) & 0xffff)
+/**
+ * kvm_mips_trans_replace() - Replace trapping instruction in guest memory.
+ * @vcpu:	Virtual CPU.
+ * @opc:	PC of instruction to replace.
+ * @replace:	Instruction to write
+ */
+static int kvm_mips_trans_replace(struct kvm_vcpu *vcpu, u32 *opc,
+				  union mips_instruction replace)
+{
+	unsigned long paddr, flags;
+	void *vaddr;
+
+	if (KVM_GUEST_KSEGX((unsigned long)opc) == KVM_GUEST_KSEG0) {
+		paddr = kvm_mips_translate_guest_kseg0_to_hpa(vcpu,
+							    (unsigned long)opc);
+		vaddr = kmap_atomic(pfn_to_page(PHYS_PFN(paddr)));
+		vaddr += paddr & ~PAGE_MASK;
+		memcpy(vaddr, (void *)&replace, sizeof(u32));
+		local_flush_icache_range((unsigned long)vaddr,
+					 (unsigned long)vaddr + 32);
+		kunmap_atomic(vaddr);
+	} else if (KVM_GUEST_KSEGX((unsigned long) opc) == KVM_GUEST_KSEG23) {
+		local_irq_save(flags);
+		memcpy((void *)opc, (void *)&replace, sizeof(u32));
+		local_flush_icache_range((unsigned long)opc,
+					 (unsigned long)opc + 32);
+		local_irq_restore(flags);
+	} else {
+		kvm_err("%s: Invalid address: %p\n", __func__, opc);
+		return -EFAULT;
+	}
 
-#define LW_TEMPLATE     0x8c000000
-#define CLEAR_TEMPLATE  0x00000020
-#define SW_TEMPLATE     0xac000000
+	return 0;
+}
 
-int kvm_mips_trans_cache_index(uint32_t inst, uint32_t *opc,
+int kvm_mips_trans_cache_index(union mips_instruction inst, u32 *opc,
 			       struct kvm_vcpu *vcpu)
 {
-	int result = 0;
-	unsigned long kseg0_opc;
-	uint32_t synci_inst = 0x0;
+	union mips_instruction nop_inst = { 0 };
 
 	/* Replace the CACHE instruction, with a NOP */
-	kseg0_opc =
-	    CKSEG0ADDR(kvm_mips_translate_guest_kseg0_to_hpa
-		       (vcpu, (unsigned long) opc));
-	memcpy((void *)kseg0_opc, (void *)&synci_inst, sizeof(uint32_t));
-	local_flush_icache_range(kseg0_opc, kseg0_opc + 32);
-
-	return result;
+	return kvm_mips_trans_replace(vcpu, opc, nop_inst);
 }
 
 /*
  * Address based CACHE instructions are transformed into synci(s). A little
  * heavy for just D-cache invalidates, but avoids an expensive trap
  */
-int kvm_mips_trans_cache_va(uint32_t inst, uint32_t *opc,
+int kvm_mips_trans_cache_va(union mips_instruction inst, u32 *opc,
 			    struct kvm_vcpu *vcpu)
 {
-	int result = 0;
-	unsigned long kseg0_opc;
-	uint32_t synci_inst = SYNCI_TEMPLATE, base, offset;
-
-	base = (inst >> 21) & 0x1f;
-	offset = inst & 0xffff;
-	synci_inst |= (base << 21);
-	synci_inst |= offset;
-
-	kseg0_opc =
-	    CKSEG0ADDR(kvm_mips_translate_guest_kseg0_to_hpa
-		       (vcpu, (unsigned long) opc));
-	memcpy((void *)kseg0_opc, (void *)&synci_inst, sizeof(uint32_t));
-	local_flush_icache_range(kseg0_opc, kseg0_opc + 32);
-
-	return result;
+	union mips_instruction synci_inst = { 0 };
+
+	synci_inst.i_format.opcode = bcond_op;
+	synci_inst.i_format.rs = inst.i_format.rs;
+	synci_inst.i_format.rt = synci_op;
+	if (cpu_has_mips_r6)
+		synci_inst.i_format.simmediate = inst.spec3_format.simmediate;
+	else
+		synci_inst.i_format.simmediate = inst.i_format.simmediate;
+
+	return kvm_mips_trans_replace(vcpu, opc, synci_inst);
 }
 
-int kvm_mips_trans_mfc0(uint32_t inst, uint32_t *opc, struct kvm_vcpu *vcpu)
+int kvm_mips_trans_mfc0(union mips_instruction inst, u32 *opc,
+			struct kvm_vcpu *vcpu)
 {
-	int32_t rt, rd, sel;
-	uint32_t mfc0_inst;
-	unsigned long kseg0_opc, flags;
-
-	rt = (inst >> 16) & 0x1f;
-	rd = (inst >> 11) & 0x1f;
-	sel = inst & 0x7;
+	union mips_instruction mfc0_inst = { 0 };
+	u32 rd, sel;
 
-	if ((rd == MIPS_CP0_ERRCTL) && (sel == 0)) {
-		mfc0_inst = CLEAR_TEMPLATE;
-		mfc0_inst |= ((rt & 0x1f) << 16);
-	} else {
-		mfc0_inst = LW_TEMPLATE;
-		mfc0_inst |= ((rt & 0x1f) << 16);
-		mfc0_inst |= offsetof(struct kvm_mips_commpage,
-				      cop0.reg[rd][sel]);
-	}
+	rd = inst.c0r_format.rd;
+	sel = inst.c0r_format.sel;
 
-	if (KVM_GUEST_KSEGX(opc) == KVM_GUEST_KSEG0) {
-		kseg0_opc =
-		    CKSEG0ADDR(kvm_mips_translate_guest_kseg0_to_hpa
-			       (vcpu, (unsigned long) opc));
-		memcpy((void *)kseg0_opc, (void *)&mfc0_inst, sizeof(uint32_t));
-		local_flush_icache_range(kseg0_opc, kseg0_opc + 32);
-	} else if (KVM_GUEST_KSEGX((unsigned long) opc) == KVM_GUEST_KSEG23) {
-		local_irq_save(flags);
-		memcpy((void *)opc, (void *)&mfc0_inst, sizeof(uint32_t));
-		local_flush_icache_range((unsigned long)opc,
-					 (unsigned long)opc + 32);
-		local_irq_restore(flags);
+	if (rd == MIPS_CP0_ERRCTL && sel == 0) {
+		mfc0_inst.r_format.opcode = spec_op;
+		mfc0_inst.r_format.rd = inst.c0r_format.rt;
+		mfc0_inst.r_format.func = add_op;
 	} else {
-		kvm_err("%s: Invalid address: %p\n", __func__, opc);
-		return -EFAULT;
+		mfc0_inst.i_format.opcode = lw_op;
+		mfc0_inst.i_format.rt = inst.c0r_format.rt;
+		mfc0_inst.i_format.simmediate = KVM_GUEST_COMMPAGE_ADDR |
+			offsetof(struct kvm_mips_commpage, cop0.reg[rd][sel]);
+#ifdef CONFIG_CPU_BIG_ENDIAN
+		if (sizeof(vcpu->arch.cop0->reg[0][0]) == 8)
+			mfc0_inst.i_format.simmediate |= 4;
+#endif
 	}
 
-	return 0;
+	return kvm_mips_trans_replace(vcpu, opc, mfc0_inst);
 }
 
-int kvm_mips_trans_mtc0(uint32_t inst, uint32_t *opc, struct kvm_vcpu *vcpu)
+int kvm_mips_trans_mtc0(union mips_instruction inst, u32 *opc,
+			struct kvm_vcpu *vcpu)
 {
-	int32_t rt, rd, sel;
-	uint32_t mtc0_inst = SW_TEMPLATE;
-	unsigned long kseg0_opc, flags;
-
-	rt = (inst >> 16) & 0x1f;
-	rd = (inst >> 11) & 0x1f;
-	sel = inst & 0x7;
-
-	mtc0_inst |= ((rt & 0x1f) << 16);
-	mtc0_inst |= offsetof(struct kvm_mips_commpage, cop0.reg[rd][sel]);
-
-	if (KVM_GUEST_KSEGX(opc) == KVM_GUEST_KSEG0) {
-		kseg0_opc =
-		    CKSEG0ADDR(kvm_mips_translate_guest_kseg0_to_hpa
-			       (vcpu, (unsigned long) opc));
-		memcpy((void *)kseg0_opc, (void *)&mtc0_inst, sizeof(uint32_t));
-		local_flush_icache_range(kseg0_opc, kseg0_opc + 32);
-	} else if (KVM_GUEST_KSEGX((unsigned long) opc) == KVM_GUEST_KSEG23) {
-		local_irq_save(flags);
-		memcpy((void *)opc, (void *)&mtc0_inst, sizeof(uint32_t));
-		local_flush_icache_range((unsigned long)opc,
-					 (unsigned long)opc + 32);
-		local_irq_restore(flags);
-	} else {
-		kvm_err("%s: Invalid address: %p\n", __func__, opc);
-		return -EFAULT;
-	}
-
-	return 0;
+	union mips_instruction mtc0_inst = { 0 };
+	u32 rd, sel;
+
+	rd = inst.c0r_format.rd;
+	sel = inst.c0r_format.sel;
+
+	mtc0_inst.i_format.opcode = sw_op;
+	mtc0_inst.i_format.rt = inst.c0r_format.rt;
+	mtc0_inst.i_format.simmediate = KVM_GUEST_COMMPAGE_ADDR |
+		offsetof(struct kvm_mips_commpage, cop0.reg[rd][sel]);
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	if (sizeof(vcpu->arch.cop0->reg[0][0]) == 8)
+		mtc0_inst.i_format.simmediate |= 4;
+#endif
+
+	return kvm_mips_trans_replace(vcpu, opc, mtc0_inst);
 }
diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c
index 645c8a1982a7..6eb52b9c9818 100644
--- a/arch/mips/kvm/emulate.c
+++ b/arch/mips/kvm/emulate.c
@@ -52,7 +52,7 @@ unsigned long kvm_compute_return_epc(struct kvm_vcpu *vcpu,
 		goto unaligned;
 
 	/* Read the instruction */
-	insn.word = kvm_get_inst((uint32_t *) epc, vcpu);
+	insn.word = kvm_get_inst((u32 *) epc, vcpu);
 
 	if (insn.word == KVM_INVALID_INST)
 		return KVM_INVALID_INST;
@@ -161,9 +161,12 @@ unsigned long kvm_compute_return_epc(struct kvm_vcpu *vcpu,
 		nextpc = epc;
 		break;
 
-	case blez_op:		/* not really i_format */
-	case blezl_op:
-		/* rt field assumed to be zero */
+	case blez_op:	/* POP06 */
+#ifndef CONFIG_CPU_MIPSR6
+	case blezl_op:	/* removed in R6 */
+#endif
+		if (insn.i_format.rt != 0)
+			goto compact_branch;
 		if ((long)arch->gprs[insn.i_format.rs] <= 0)
 			epc = epc + 4 + (insn.i_format.simmediate << 2);
 		else
@@ -171,9 +174,12 @@ unsigned long kvm_compute_return_epc(struct kvm_vcpu *vcpu,
 		nextpc = epc;
 		break;
 
-	case bgtz_op:
-	case bgtzl_op:
-		/* rt field assumed to be zero */
+	case bgtz_op:	/* POP07 */
+#ifndef CONFIG_CPU_MIPSR6
+	case bgtzl_op:	/* removed in R6 */
+#endif
+		if (insn.i_format.rt != 0)
+			goto compact_branch;
 		if ((long)arch->gprs[insn.i_format.rs] > 0)
 			epc = epc + 4 + (insn.i_format.simmediate << 2);
 		else
@@ -185,6 +191,40 @@ unsigned long kvm_compute_return_epc(struct kvm_vcpu *vcpu,
 	case cop1_op:
 		kvm_err("%s: unsupported cop1_op\n", __func__);
 		break;
+
+#ifdef CONFIG_CPU_MIPSR6
+	/* R6 added the following compact branches with forbidden slots */
+	case blezl_op:	/* POP26 */
+	case bgtzl_op:	/* POP27 */
+		/* only rt == 0 isn't compact branch */
+		if (insn.i_format.rt != 0)
+			goto compact_branch;
+		break;
+	case pop10_op:
+	case pop30_op:
+		/* only rs == rt == 0 is reserved, rest are compact branches */
+		if (insn.i_format.rs != 0 || insn.i_format.rt != 0)
+			goto compact_branch;
+		break;
+	case pop66_op:
+	case pop76_op:
+		/* only rs == 0 isn't compact branch */
+		if (insn.i_format.rs != 0)
+			goto compact_branch;
+		break;
+compact_branch:
+		/*
+		 * If we've hit an exception on the forbidden slot, then
+		 * the branch must not have been taken.
+		 */
+		epc += 8;
+		nextpc = epc;
+		break;
+#else
+compact_branch:
+		/* Compact branches not supported before R6 */
+		break;
+#endif
 	}
 
 	return nextpc;
@@ -198,7 +238,7 @@ sigill:
 	return nextpc;
 }
 
-enum emulation_result update_pc(struct kvm_vcpu *vcpu, uint32_t cause)
+enum emulation_result update_pc(struct kvm_vcpu *vcpu, u32 cause)
 {
 	unsigned long branch_pc;
 	enum emulation_result er = EMULATE_DONE;
@@ -243,7 +283,7 @@ static inline int kvm_mips_count_disabled(struct kvm_vcpu *vcpu)
  *
  * Assumes !kvm_mips_count_disabled(@vcpu) (guest CP0_Count timer is running).
  */
-static uint32_t kvm_mips_ktime_to_count(struct kvm_vcpu *vcpu, ktime_t now)
+static u32 kvm_mips_ktime_to_count(struct kvm_vcpu *vcpu, ktime_t now)
 {
 	s64 now_ns, periods;
 	u64 delta;
@@ -300,11 +340,11 @@ static inline ktime_t kvm_mips_count_time(struct kvm_vcpu *vcpu)
  *
  * Returns:	The current value of the guest CP0_Count register.
  */
-static uint32_t kvm_mips_read_count_running(struct kvm_vcpu *vcpu, ktime_t now)
+static u32 kvm_mips_read_count_running(struct kvm_vcpu *vcpu, ktime_t now)
 {
 	struct mips_coproc *cop0 = vcpu->arch.cop0;
 	ktime_t expires, threshold;
-	uint32_t count, compare;
+	u32 count, compare;
 	int running;
 
 	/* Calculate the biased and scaled guest CP0_Count */
@@ -315,7 +355,7 @@ static uint32_t kvm_mips_read_count_running(struct kvm_vcpu *vcpu, ktime_t now)
 	 * Find whether CP0_Count has reached the closest timer interrupt. If
 	 * not, we shouldn't inject it.
 	 */
-	if ((int32_t)(count - compare) < 0)
+	if ((s32)(count - compare) < 0)
 		return count;
 
 	/*
@@ -360,7 +400,7 @@ static uint32_t kvm_mips_read_count_running(struct kvm_vcpu *vcpu, ktime_t now)
  *
  * Returns:	The current guest CP0_Count value.
  */
-uint32_t kvm_mips_read_count(struct kvm_vcpu *vcpu)
+u32 kvm_mips_read_count(struct kvm_vcpu *vcpu)
 {
 	struct mips_coproc *cop0 = vcpu->arch.cop0;
 
@@ -387,8 +427,7 @@ uint32_t kvm_mips_read_count(struct kvm_vcpu *vcpu)
  *
  * Returns:	The ktime at the point of freeze.
  */
-static ktime_t kvm_mips_freeze_hrtimer(struct kvm_vcpu *vcpu,
-				       uint32_t *count)
+static ktime_t kvm_mips_freeze_hrtimer(struct kvm_vcpu *vcpu, u32 *count)
 {
 	ktime_t now;
 
@@ -419,16 +458,16 @@ static ktime_t kvm_mips_freeze_hrtimer(struct kvm_vcpu *vcpu,
  * Assumes !kvm_mips_count_disabled(@vcpu) (guest CP0_Count timer is running).
  */
 static void kvm_mips_resume_hrtimer(struct kvm_vcpu *vcpu,
-				    ktime_t now, uint32_t count)
+				    ktime_t now, u32 count)
 {
 	struct mips_coproc *cop0 = vcpu->arch.cop0;
-	uint32_t compare;
+	u32 compare;
 	u64 delta;
 	ktime_t expire;
 
 	/* Calculate timeout (wrap 0 to 2^32) */
 	compare = kvm_read_c0_guest_compare(cop0);
-	delta = (u64)(uint32_t)(compare - count - 1) + 1;
+	delta = (u64)(u32)(compare - count - 1) + 1;
 	delta = div_u64(delta * NSEC_PER_SEC, vcpu->arch.count_hz);
 	expire = ktime_add_ns(now, delta);
 
@@ -444,7 +483,7 @@ static void kvm_mips_resume_hrtimer(struct kvm_vcpu *vcpu,
  *
  * Sets the CP0_Count value and updates the timer accordingly.
  */
-void kvm_mips_write_count(struct kvm_vcpu *vcpu, uint32_t count)
+void kvm_mips_write_count(struct kvm_vcpu *vcpu, u32 count)
 {
 	struct mips_coproc *cop0 = vcpu->arch.cop0;
 	ktime_t now;
@@ -538,13 +577,13 @@ int kvm_mips_set_count_hz(struct kvm_vcpu *vcpu, s64 count_hz)
  * If @ack, atomically acknowledge any pending timer interrupt, otherwise ensure
  * any pending timer interrupt is preserved.
  */
-void kvm_mips_write_compare(struct kvm_vcpu *vcpu, uint32_t compare, bool ack)
+void kvm_mips_write_compare(struct kvm_vcpu *vcpu, u32 compare, bool ack)
 {
 	struct mips_coproc *cop0 = vcpu->arch.cop0;
 	int dc;
 	u32 old_compare = kvm_read_c0_guest_compare(cop0);
 	ktime_t now;
-	uint32_t count;
+	u32 count;
 
 	/* if unchanged, must just be an ack */
 	if (old_compare == compare) {
@@ -585,7 +624,7 @@ void kvm_mips_write_compare(struct kvm_vcpu *vcpu, uint32_t compare, bool ack)
 static ktime_t kvm_mips_count_disable(struct kvm_vcpu *vcpu)
 {
 	struct mips_coproc *cop0 = vcpu->arch.cop0;
-	uint32_t count;
+	u32 count;
 	ktime_t now;
 
 	/* Stop hrtimer */
@@ -632,7 +671,7 @@ void kvm_mips_count_disable_cause(struct kvm_vcpu *vcpu)
 void kvm_mips_count_enable_cause(struct kvm_vcpu *vcpu)
 {
 	struct mips_coproc *cop0 = vcpu->arch.cop0;
-	uint32_t count;
+	u32 count;
 
 	kvm_clear_c0_guest_cause(cop0, CAUSEF_DC);
 
@@ -661,7 +700,7 @@ int kvm_mips_set_count_ctl(struct kvm_vcpu *vcpu, s64 count_ctl)
 	s64 changed = count_ctl ^ vcpu->arch.count_ctl;
 	s64 delta;
 	ktime_t expire, now;
-	uint32_t count, compare;
+	u32 count, compare;
 
 	/* Only allow defined bits to be changed */
 	if (changed & ~(s64)(KVM_REG_MIPS_COUNT_CTL_DC))
@@ -687,7 +726,7 @@ int kvm_mips_set_count_ctl(struct kvm_vcpu *vcpu, s64 count_ctl)
 			 */
 			count = kvm_read_c0_guest_count(cop0);
 			compare = kvm_read_c0_guest_compare(cop0);
-			delta = (u64)(uint32_t)(compare - count - 1) + 1;
+			delta = (u64)(u32)(compare - count - 1) + 1;
 			delta = div_u64(delta * NSEC_PER_SEC,
 					vcpu->arch.count_hz);
 			expire = ktime_add_ns(vcpu->arch.count_resume, delta);
@@ -776,7 +815,7 @@ enum emulation_result kvm_mips_emul_wait(struct kvm_vcpu *vcpu)
 		  vcpu->arch.pending_exceptions);
 
 	++vcpu->stat.wait_exits;
-	trace_kvm_exit(vcpu, WAIT_EXITS);
+	trace_kvm_exit(vcpu, KVM_TRACE_EXIT_WAIT);
 	if (!vcpu->arch.pending_exceptions) {
 		vcpu->arch.wait = 1;
 		kvm_vcpu_block(vcpu);
@@ -801,9 +840,9 @@ enum emulation_result kvm_mips_emul_wait(struct kvm_vcpu *vcpu)
 enum emulation_result kvm_mips_emul_tlbr(struct kvm_vcpu *vcpu)
 {
 	struct mips_coproc *cop0 = vcpu->arch.cop0;
-	uint32_t pc = vcpu->arch.pc;
+	unsigned long pc = vcpu->arch.pc;
 
-	kvm_err("[%#x] COP0_TLBR [%ld]\n", pc, kvm_read_c0_guest_index(cop0));
+	kvm_err("[%#lx] COP0_TLBR [%ld]\n", pc, kvm_read_c0_guest_index(cop0));
 	return EMULATE_FAIL;
 }
 
@@ -813,11 +852,11 @@ enum emulation_result kvm_mips_emul_tlbwi(struct kvm_vcpu *vcpu)
 	struct mips_coproc *cop0 = vcpu->arch.cop0;
 	int index = kvm_read_c0_guest_index(cop0);
 	struct kvm_mips_tlb *tlb = NULL;
-	uint32_t pc = vcpu->arch.pc;
+	unsigned long pc = vcpu->arch.pc;
 
 	if (index < 0 || index >= KVM_MIPS_GUEST_TLB_SIZE) {
 		kvm_debug("%s: illegal index: %d\n", __func__, index);
-		kvm_debug("[%#x] COP0_TLBWI [%d] (entryhi: %#lx, entrylo0: %#lx entrylo1: %#lx, mask: %#lx)\n",
+		kvm_debug("[%#lx] COP0_TLBWI [%d] (entryhi: %#lx, entrylo0: %#lx entrylo1: %#lx, mask: %#lx)\n",
 			  pc, index, kvm_read_c0_guest_entryhi(cop0),
 			  kvm_read_c0_guest_entrylo0(cop0),
 			  kvm_read_c0_guest_entrylo1(cop0),
@@ -834,10 +873,10 @@ enum emulation_result kvm_mips_emul_tlbwi(struct kvm_vcpu *vcpu)
 
 	tlb->tlb_mask = kvm_read_c0_guest_pagemask(cop0);
 	tlb->tlb_hi = kvm_read_c0_guest_entryhi(cop0);
-	tlb->tlb_lo0 = kvm_read_c0_guest_entrylo0(cop0);
-	tlb->tlb_lo1 = kvm_read_c0_guest_entrylo1(cop0);
+	tlb->tlb_lo[0] = kvm_read_c0_guest_entrylo0(cop0);
+	tlb->tlb_lo[1] = kvm_read_c0_guest_entrylo1(cop0);
 
-	kvm_debug("[%#x] COP0_TLBWI [%d] (entryhi: %#lx, entrylo0: %#lx entrylo1: %#lx, mask: %#lx)\n",
+	kvm_debug("[%#lx] COP0_TLBWI [%d] (entryhi: %#lx, entrylo0: %#lx entrylo1: %#lx, mask: %#lx)\n",
 		  pc, index, kvm_read_c0_guest_entryhi(cop0),
 		  kvm_read_c0_guest_entrylo0(cop0),
 		  kvm_read_c0_guest_entrylo1(cop0),
@@ -851,7 +890,7 @@ enum emulation_result kvm_mips_emul_tlbwr(struct kvm_vcpu *vcpu)
 {
 	struct mips_coproc *cop0 = vcpu->arch.cop0;
 	struct kvm_mips_tlb *tlb = NULL;
-	uint32_t pc = vcpu->arch.pc;
+	unsigned long pc = vcpu->arch.pc;
 	int index;
 
 	get_random_bytes(&index, sizeof(index));
@@ -867,10 +906,10 @@ enum emulation_result kvm_mips_emul_tlbwr(struct kvm_vcpu *vcpu)
 
 	tlb->tlb_mask = kvm_read_c0_guest_pagemask(cop0);
 	tlb->tlb_hi = kvm_read_c0_guest_entryhi(cop0);
-	tlb->tlb_lo0 = kvm_read_c0_guest_entrylo0(cop0);
-	tlb->tlb_lo1 = kvm_read_c0_guest_entrylo1(cop0);
+	tlb->tlb_lo[0] = kvm_read_c0_guest_entrylo0(cop0);
+	tlb->tlb_lo[1] = kvm_read_c0_guest_entrylo1(cop0);
 
-	kvm_debug("[%#x] COP0_TLBWR[%d] (entryhi: %#lx, entrylo0: %#lx entrylo1: %#lx)\n",
+	kvm_debug("[%#lx] COP0_TLBWR[%d] (entryhi: %#lx, entrylo0: %#lx entrylo1: %#lx)\n",
 		  pc, index, kvm_read_c0_guest_entryhi(cop0),
 		  kvm_read_c0_guest_entrylo0(cop0),
 		  kvm_read_c0_guest_entrylo1(cop0));
@@ -882,14 +921,14 @@ enum emulation_result kvm_mips_emul_tlbp(struct kvm_vcpu *vcpu)
 {
 	struct mips_coproc *cop0 = vcpu->arch.cop0;
 	long entryhi = kvm_read_c0_guest_entryhi(cop0);
-	uint32_t pc = vcpu->arch.pc;
+	unsigned long pc = vcpu->arch.pc;
 	int index = -1;
 
 	index = kvm_mips_guest_tlb_lookup(vcpu, entryhi);
 
 	kvm_write_c0_guest_index(cop0, index);
 
-	kvm_debug("[%#x] COP0_TLBP (entryhi: %#lx), index: %d\n", pc, entryhi,
+	kvm_debug("[%#lx] COP0_TLBP (entryhi: %#lx), index: %d\n", pc, entryhi,
 		  index);
 
 	return EMULATE_DONE;
@@ -922,8 +961,8 @@ unsigned int kvm_mips_config1_wrmask(struct kvm_vcpu *vcpu)
  */
 unsigned int kvm_mips_config3_wrmask(struct kvm_vcpu *vcpu)
 {
-	/* Config4 is optional */
-	unsigned int mask = MIPS_CONF_M;
+	/* Config4 and ULRI are optional */
+	unsigned int mask = MIPS_CONF_M | MIPS_CONF3_ULRI;
 
 	/* Permit MSA to be present if MSA is supported */
 	if (kvm_mips_guest_can_have_msa(&vcpu->arch))
@@ -942,7 +981,12 @@ unsigned int kvm_mips_config3_wrmask(struct kvm_vcpu *vcpu)
 unsigned int kvm_mips_config4_wrmask(struct kvm_vcpu *vcpu)
 {
 	/* Config5 is optional */
-	return MIPS_CONF_M;
+	unsigned int mask = MIPS_CONF_M;
+
+	/* KScrExist */
+	mask |= (unsigned int)vcpu->arch.kscratch_enabled << 16;
+
+	return mask;
 }
 
 /**
@@ -973,14 +1017,14 @@ unsigned int kvm_mips_config5_wrmask(struct kvm_vcpu *vcpu)
 	return mask;
 }
 
-enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc,
-					   uint32_t cause, struct kvm_run *run,
+enum emulation_result kvm_mips_emulate_CP0(union mips_instruction inst,
+					   u32 *opc, u32 cause,
+					   struct kvm_run *run,
 					   struct kvm_vcpu *vcpu)
 {
 	struct mips_coproc *cop0 = vcpu->arch.cop0;
 	enum emulation_result er = EMULATE_DONE;
-	int32_t rt, rd, copz, sel, co_bit, op;
-	uint32_t pc = vcpu->arch.pc;
+	u32 rt, rd, sel;
 	unsigned long curr_pc;
 
 	/*
@@ -992,16 +1036,8 @@ enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc,
 	if (er == EMULATE_FAIL)
 		return er;
 
-	copz = (inst >> 21) & 0x1f;
-	rt = (inst >> 16) & 0x1f;
-	rd = (inst >> 11) & 0x1f;
-	sel = inst & 0x7;
-	co_bit = (inst >> 25) & 1;
-
-	if (co_bit) {
-		op = (inst) & 0xff;
-
-		switch (op) {
+	if (inst.co_format.co) {
+		switch (inst.co_format.func) {
 		case tlbr_op:	/*  Read indexed TLB entry  */
 			er = kvm_mips_emul_tlbr(vcpu);
 			break;
@@ -1020,47 +1056,58 @@ enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc,
 		case eret_op:
 			er = kvm_mips_emul_eret(vcpu);
 			goto dont_update_pc;
-			break;
 		case wait_op:
 			er = kvm_mips_emul_wait(vcpu);
 			break;
 		}
 	} else {
-		switch (copz) {
+		rt = inst.c0r_format.rt;
+		rd = inst.c0r_format.rd;
+		sel = inst.c0r_format.sel;
+
+		switch (inst.c0r_format.rs) {
 		case mfc_op:
 #ifdef CONFIG_KVM_MIPS_DEBUG_COP0_COUNTERS
 			cop0->stat[rd][sel]++;
 #endif
 			/* Get reg */
 			if ((rd == MIPS_CP0_COUNT) && (sel == 0)) {
-				vcpu->arch.gprs[rt] = kvm_mips_read_count(vcpu);
+				vcpu->arch.gprs[rt] =
+				    (s32)kvm_mips_read_count(vcpu);
 			} else if ((rd == MIPS_CP0_ERRCTL) && (sel == 0)) {
 				vcpu->arch.gprs[rt] = 0x0;
 #ifdef CONFIG_KVM_MIPS_DYN_TRANS
 				kvm_mips_trans_mfc0(inst, opc, vcpu);
 #endif
 			} else {
-				vcpu->arch.gprs[rt] = cop0->reg[rd][sel];
+				vcpu->arch.gprs[rt] = (s32)cop0->reg[rd][sel];
 
 #ifdef CONFIG_KVM_MIPS_DYN_TRANS
 				kvm_mips_trans_mfc0(inst, opc, vcpu);
 #endif
 			}
 
-			kvm_debug
-			    ("[%#x] MFCz[%d][%d], vcpu->arch.gprs[%d]: %#lx\n",
-			     pc, rd, sel, rt, vcpu->arch.gprs[rt]);
-
+			trace_kvm_hwr(vcpu, KVM_TRACE_MFC0,
+				      KVM_TRACE_COP0(rd, sel),
+				      vcpu->arch.gprs[rt]);
 			break;
 
 		case dmfc_op:
 			vcpu->arch.gprs[rt] = cop0->reg[rd][sel];
+
+			trace_kvm_hwr(vcpu, KVM_TRACE_DMFC0,
+				      KVM_TRACE_COP0(rd, sel),
+				      vcpu->arch.gprs[rt]);
 			break;
 
 		case mtc_op:
 #ifdef CONFIG_KVM_MIPS_DEBUG_COP0_COUNTERS
 			cop0->stat[rd][sel]++;
 #endif
+			trace_kvm_hwr(vcpu, KVM_TRACE_MTC0,
+				      KVM_TRACE_COP0(rd, sel),
+				      vcpu->arch.gprs[rt]);
+
 			if ((rd == MIPS_CP0_TLB_INDEX)
 			    && (vcpu->arch.gprs[rt] >=
 				KVM_MIPS_GUEST_TLB_SIZE)) {
@@ -1078,16 +1125,15 @@ enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc,
 				kvm_err("MTCz, cop0->reg[EBASE]: %#lx\n",
 					kvm_read_c0_guest_ebase(cop0));
 			} else if (rd == MIPS_CP0_TLB_HI && sel == 0) {
-				uint32_t nasid =
+				u32 nasid =
 					vcpu->arch.gprs[rt] & KVM_ENTRYHI_ASID;
 				if ((KSEGX(vcpu->arch.gprs[rt]) != CKSEG0) &&
 				    ((kvm_read_c0_guest_entryhi(cop0) &
 				      KVM_ENTRYHI_ASID) != nasid)) {
-					kvm_debug("MTCz, change ASID from %#lx to %#lx\n",
+					trace_kvm_asid_change(vcpu,
 						kvm_read_c0_guest_entryhi(cop0)
-						& KVM_ENTRYHI_ASID,
-						vcpu->arch.gprs[rt]
-						& KVM_ENTRYHI_ASID);
+							& KVM_ENTRYHI_ASID,
+						nasid);
 
 					/* Blow away the shadow host TLBs */
 					kvm_mips_flush_host_tlb(1);
@@ -1100,10 +1146,6 @@ enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc,
 				kvm_mips_write_count(vcpu, vcpu->arch.gprs[rt]);
 				goto done;
 			} else if ((rd == MIPS_CP0_COMPARE) && (sel == 0)) {
-				kvm_debug("[%#x] MTCz, COMPARE %#lx <- %#lx\n",
-					  pc, kvm_read_c0_guest_compare(cop0),
-					  vcpu->arch.gprs[rt]);
-
 				/* If we are writing to COMPARE */
 				/* Clear pending timer interrupt, if any */
 				kvm_mips_write_compare(vcpu,
@@ -1155,7 +1197,7 @@ enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc,
 				 * it first.
 				 */
 				if (change & ST0_CU1 && !(val & ST0_FR) &&
-				    vcpu->arch.fpu_inuse & KVM_MIPS_FPU_MSA)
+				    vcpu->arch.aux_inuse & KVM_MIPS_AUX_MSA)
 					kvm_lose_fpu(vcpu);
 
 				/*
@@ -1166,7 +1208,7 @@ enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc,
 				 * the near future.
 				 */
 				if (change & ST0_CU1 &&
-				    vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU)
+				    vcpu->arch.aux_inuse & KVM_MIPS_AUX_FPU)
 					change_c0_status(ST0_CU1, val);
 
 				preempt_enable();
@@ -1201,7 +1243,7 @@ enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc,
 				 * context is already loaded.
 				 */
 				if (change & MIPS_CONF5_FRE &&
-				    vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU)
+				    vcpu->arch.aux_inuse & KVM_MIPS_AUX_FPU)
 					change_c0_config5(MIPS_CONF5_FRE, val);
 
 				/*
@@ -1211,7 +1253,7 @@ enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc,
 				 * quickly enabled again in the near future.
 				 */
 				if (change & MIPS_CONF5_MSAEN &&
-				    vcpu->arch.fpu_inuse & KVM_MIPS_FPU_MSA)
+				    vcpu->arch.aux_inuse & KVM_MIPS_AUX_MSA)
 					change_c0_config5(MIPS_CONF5_MSAEN,
 							  val);
 
@@ -1219,7 +1261,7 @@ enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc,
 
 				kvm_write_c0_guest_config5(cop0, val);
 			} else if ((rd == MIPS_CP0_CAUSE) && (sel == 0)) {
-				uint32_t old_cause, new_cause;
+				u32 old_cause, new_cause;
 
 				old_cause = kvm_read_c0_guest_cause(cop0);
 				new_cause = vcpu->arch.gprs[rt];
@@ -1233,20 +1275,30 @@ enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc,
 					else
 						kvm_mips_count_enable_cause(vcpu);
 				}
+			} else if ((rd == MIPS_CP0_HWRENA) && (sel == 0)) {
+				u32 mask = MIPS_HWRENA_CPUNUM |
+					   MIPS_HWRENA_SYNCISTEP |
+					   MIPS_HWRENA_CC |
+					   MIPS_HWRENA_CCRES;
+
+				if (kvm_read_c0_guest_config3(cop0) &
+				    MIPS_CONF3_ULRI)
+					mask |= MIPS_HWRENA_ULR;
+				cop0->reg[rd][sel] = vcpu->arch.gprs[rt] & mask;
 			} else {
 				cop0->reg[rd][sel] = vcpu->arch.gprs[rt];
 #ifdef CONFIG_KVM_MIPS_DYN_TRANS
 				kvm_mips_trans_mtc0(inst, opc, vcpu);
 #endif
 			}
-
-			kvm_debug("[%#x] MTCz, cop0->reg[%d][%d]: %#lx\n", pc,
-				  rd, sel, cop0->reg[rd][sel]);
 			break;
 
 		case dmtc_op:
 			kvm_err("!!!!!!![%#lx]dmtc_op: rt: %d, rd: %d, sel: %d!!!!!!\n",
 				vcpu->arch.pc, rt, rd, sel);
+			trace_kvm_hwr(vcpu, KVM_TRACE_DMTC0,
+				      KVM_TRACE_COP0(rd, sel),
+				      vcpu->arch.gprs[rt]);
 			er = EMULATE_FAIL;
 			break;
 
@@ -1258,7 +1310,7 @@ enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc,
 				vcpu->arch.gprs[rt] =
 				    kvm_read_c0_guest_status(cop0);
 			/* EI */
-			if (inst & 0x20) {
+			if (inst.mfmc0_format.sc) {
 				kvm_debug("[%#lx] mfmc0_op: EI\n",
 					  vcpu->arch.pc);
 				kvm_set_c0_guest_status(cop0, ST0_IE);
@@ -1272,9 +1324,8 @@ enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc,
 
 		case wrpgpr_op:
 			{
-				uint32_t css =
-				    cop0->reg[MIPS_CP0_STATUS][2] & 0xf;
-				uint32_t pss =
+				u32 css = cop0->reg[MIPS_CP0_STATUS][2] & 0xf;
+				u32 pss =
 				    (cop0->reg[MIPS_CP0_STATUS][2] >> 6) & 0xf;
 				/*
 				 * We don't support any shadow register sets, so
@@ -1291,7 +1342,7 @@ enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc,
 			break;
 		default:
 			kvm_err("[%#lx]MachEmulateCP0: unsupported COP0, copz: 0x%x\n",
-				vcpu->arch.pc, copz);
+				vcpu->arch.pc, inst.c0r_format.rs);
 			er = EMULATE_FAIL;
 			break;
 		}
@@ -1312,13 +1363,14 @@ dont_update_pc:
 	return er;
 }
 
-enum emulation_result kvm_mips_emulate_store(uint32_t inst, uint32_t cause,
+enum emulation_result kvm_mips_emulate_store(union mips_instruction inst,
+					     u32 cause,
 					     struct kvm_run *run,
 					     struct kvm_vcpu *vcpu)
 {
 	enum emulation_result er = EMULATE_DO_MMIO;
-	int32_t op, base, rt, offset;
-	uint32_t bytes;
+	u32 rt;
+	u32 bytes;
 	void *data = run->mmio.data;
 	unsigned long curr_pc;
 
@@ -1331,12 +1383,9 @@ enum emulation_result kvm_mips_emulate_store(uint32_t inst, uint32_t cause,
 	if (er == EMULATE_FAIL)
 		return er;
 
-	rt = (inst >> 16) & 0x1f;
-	base = (inst >> 21) & 0x1f;
-	offset = inst & 0xffff;
-	op = (inst >> 26) & 0x3f;
+	rt = inst.i_format.rt;
 
-	switch (op) {
+	switch (inst.i_format.opcode) {
 	case sb_op:
 		bytes = 1;
 		if (bytes > sizeof(run->mmio.data)) {
@@ -1357,7 +1406,7 @@ enum emulation_result kvm_mips_emulate_store(uint32_t inst, uint32_t cause,
 		*(u8 *) data = vcpu->arch.gprs[rt];
 		kvm_debug("OP_SB: eaddr: %#lx, gpr: %#lx, data: %#x\n",
 			  vcpu->arch.host_cp0_badvaddr, vcpu->arch.gprs[rt],
-			  *(uint8_t *) data);
+			  *(u8 *) data);
 
 		break;
 
@@ -1379,11 +1428,11 @@ enum emulation_result kvm_mips_emulate_store(uint32_t inst, uint32_t cause,
 		run->mmio.is_write = 1;
 		vcpu->mmio_needed = 1;
 		vcpu->mmio_is_write = 1;
-		*(uint32_t *) data = vcpu->arch.gprs[rt];
+		*(u32 *) data = vcpu->arch.gprs[rt];
 
 		kvm_debug("[%#lx] OP_SW: eaddr: %#lx, gpr: %#lx, data: %#x\n",
 			  vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr,
-			  vcpu->arch.gprs[rt], *(uint32_t *) data);
+			  vcpu->arch.gprs[rt], *(u32 *) data);
 		break;
 
 	case sh_op:
@@ -1404,15 +1453,16 @@ enum emulation_result kvm_mips_emulate_store(uint32_t inst, uint32_t cause,
 		run->mmio.is_write = 1;
 		vcpu->mmio_needed = 1;
 		vcpu->mmio_is_write = 1;
-		*(uint16_t *) data = vcpu->arch.gprs[rt];
+		*(u16 *) data = vcpu->arch.gprs[rt];
 
 		kvm_debug("[%#lx] OP_SH: eaddr: %#lx, gpr: %#lx, data: %#x\n",
 			  vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr,
-			  vcpu->arch.gprs[rt], *(uint32_t *) data);
+			  vcpu->arch.gprs[rt], *(u32 *) data);
 		break;
 
 	default:
-		kvm_err("Store not yet supported");
+		kvm_err("Store not yet supported (inst=0x%08x)\n",
+			inst.word);
 		er = EMULATE_FAIL;
 		break;
 	}
@@ -1424,18 +1474,16 @@ enum emulation_result kvm_mips_emulate_store(uint32_t inst, uint32_t cause,
 	return er;
 }
 
-enum emulation_result kvm_mips_emulate_load(uint32_t inst, uint32_t cause,
-					    struct kvm_run *run,
+enum emulation_result kvm_mips_emulate_load(union mips_instruction inst,
+					    u32 cause, struct kvm_run *run,
 					    struct kvm_vcpu *vcpu)
 {
 	enum emulation_result er = EMULATE_DO_MMIO;
-	int32_t op, base, rt, offset;
-	uint32_t bytes;
+	u32 op, rt;
+	u32 bytes;
 
-	rt = (inst >> 16) & 0x1f;
-	base = (inst >> 21) & 0x1f;
-	offset = inst & 0xffff;
-	op = (inst >> 26) & 0x3f;
+	rt = inst.i_format.rt;
+	op = inst.i_format.opcode;
 
 	vcpu->arch.pending_load_cause = cause;
 	vcpu->arch.io_gpr = rt;
@@ -1521,7 +1569,8 @@ enum emulation_result kvm_mips_emulate_load(uint32_t inst, uint32_t cause,
 		break;
 
 	default:
-		kvm_err("Load not yet supported");
+		kvm_err("Load not yet supported (inst=0x%08x)\n",
+			inst.word);
 		er = EMULATE_FAIL;
 		break;
 	}
@@ -1529,40 +1578,15 @@ enum emulation_result kvm_mips_emulate_load(uint32_t inst, uint32_t cause,
 	return er;
 }
 
-int kvm_mips_sync_icache(unsigned long va, struct kvm_vcpu *vcpu)
-{
-	unsigned long offset = (va & ~PAGE_MASK);
-	struct kvm *kvm = vcpu->kvm;
-	unsigned long pa;
-	gfn_t gfn;
-	kvm_pfn_t pfn;
-
-	gfn = va >> PAGE_SHIFT;
-
-	if (gfn >= kvm->arch.guest_pmap_npages) {
-		kvm_err("%s: Invalid gfn: %#llx\n", __func__, gfn);
-		kvm_mips_dump_host_tlbs();
-		kvm_arch_vcpu_dump_regs(vcpu);
-		return -1;
-	}
-	pfn = kvm->arch.guest_pmap[gfn];
-	pa = (pfn << PAGE_SHIFT) | offset;
-
-	kvm_debug("%s: va: %#lx, unmapped: %#x\n", __func__, va,
-		  CKSEG0ADDR(pa));
-
-	local_flush_icache_range(CKSEG0ADDR(pa), 32);
-	return 0;
-}
-
-enum emulation_result kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc,
-					     uint32_t cause,
+enum emulation_result kvm_mips_emulate_cache(union mips_instruction inst,
+					     u32 *opc, u32 cause,
 					     struct kvm_run *run,
 					     struct kvm_vcpu *vcpu)
 {
 	struct mips_coproc *cop0 = vcpu->arch.cop0;
 	enum emulation_result er = EMULATE_DONE;
-	int32_t offset, cache, op_inst, op, base;
+	u32 cache, op_inst, op, base;
+	s16 offset;
 	struct kvm_vcpu_arch *arch = &vcpu->arch;
 	unsigned long va;
 	unsigned long curr_pc;
@@ -1576,9 +1600,12 @@ enum emulation_result kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc,
 	if (er == EMULATE_FAIL)
 		return er;
 
-	base = (inst >> 21) & 0x1f;
-	op_inst = (inst >> 16) & 0x1f;
-	offset = (int16_t)inst;
+	base = inst.i_format.rs;
+	op_inst = inst.i_format.rt;
+	if (cpu_has_mips_r6)
+		offset = inst.spec3_format.simmediate;
+	else
+		offset = inst.i_format.simmediate;
 	cache = op_inst & CacheOp_Cache;
 	op = op_inst & CacheOp_Op;
 
@@ -1634,7 +1661,6 @@ enum emulation_result kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc,
 						   (cop0) & KVM_ENTRYHI_ASID));
 
 		if (index < 0) {
-			vcpu->arch.host_cp0_entryhi = (va & VPN2_MASK);
 			vcpu->arch.host_cp0_badvaddr = va;
 			vcpu->arch.pc = curr_pc;
 			er = kvm_mips_emulate_tlbmiss_ld(cause, NULL, run,
@@ -1659,9 +1685,7 @@ enum emulation_result kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc,
 				 * We fault an entry from the guest tlb to the
 				 * shadow host TLB
 				 */
-				kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb,
-								     NULL,
-								     NULL);
+				kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb);
 			}
 		}
 	} else {
@@ -1714,20 +1738,20 @@ dont_update_pc:
 	return er;
 }
 
-enum emulation_result kvm_mips_emulate_inst(unsigned long cause, uint32_t *opc,
+enum emulation_result kvm_mips_emulate_inst(u32 cause, u32 *opc,
 					    struct kvm_run *run,
 					    struct kvm_vcpu *vcpu)
 {
+	union mips_instruction inst;
 	enum emulation_result er = EMULATE_DONE;
-	uint32_t inst;
 
 	/* Fetch the instruction. */
 	if (cause & CAUSEF_BD)
 		opc += 1;
 
-	inst = kvm_get_inst(opc, vcpu);
+	inst.word = kvm_get_inst(opc, vcpu);
 
-	switch (((union mips_instruction)inst).r_format.opcode) {
+	switch (inst.r_format.opcode) {
 	case cop0_op:
 		er = kvm_mips_emulate_CP0(inst, opc, cause, run, vcpu);
 		break;
@@ -1744,15 +1768,31 @@ enum emulation_result kvm_mips_emulate_inst(unsigned long cause, uint32_t *opc,
 		er = kvm_mips_emulate_load(inst, cause, run, vcpu);
 		break;
 
+#ifndef CONFIG_CPU_MIPSR6
 	case cache_op:
 		++vcpu->stat.cache_exits;
-		trace_kvm_exit(vcpu, CACHE_EXITS);
+		trace_kvm_exit(vcpu, KVM_TRACE_EXIT_CACHE);
 		er = kvm_mips_emulate_cache(inst, opc, cause, run, vcpu);
 		break;
+#else
+	case spec3_op:
+		switch (inst.spec3_format.func) {
+		case cache6_op:
+			++vcpu->stat.cache_exits;
+			trace_kvm_exit(vcpu, KVM_TRACE_EXIT_CACHE);
+			er = kvm_mips_emulate_cache(inst, opc, cause, run,
+						    vcpu);
+			break;
+		default:
+			goto unknown;
+		};
+		break;
+unknown:
+#endif
 
 	default:
 		kvm_err("Instruction emulation not supported (%p/%#x)\n", opc,
-			inst);
+			inst.word);
 		kvm_arch_vcpu_dump_regs(vcpu);
 		er = EMULATE_FAIL;
 		break;
@@ -1761,8 +1801,8 @@ enum emulation_result kvm_mips_emulate_inst(unsigned long cause, uint32_t *opc,
 	return er;
 }
 
-enum emulation_result kvm_mips_emulate_syscall(unsigned long cause,
-					       uint32_t *opc,
+enum emulation_result kvm_mips_emulate_syscall(u32 cause,
+					       u32 *opc,
 					       struct kvm_run *run,
 					       struct kvm_vcpu *vcpu)
 {
@@ -1796,8 +1836,8 @@ enum emulation_result kvm_mips_emulate_syscall(unsigned long cause,
 	return er;
 }
 
-enum emulation_result kvm_mips_emulate_tlbmiss_ld(unsigned long cause,
-						  uint32_t *opc,
+enum emulation_result kvm_mips_emulate_tlbmiss_ld(u32 cause,
+						  u32 *opc,
 						  struct kvm_run *run,
 						  struct kvm_vcpu *vcpu)
 {
@@ -1842,8 +1882,8 @@ enum emulation_result kvm_mips_emulate_tlbmiss_ld(unsigned long cause,
 	return EMULATE_DONE;
 }
 
-enum emulation_result kvm_mips_emulate_tlbinv_ld(unsigned long cause,
-						 uint32_t *opc,
+enum emulation_result kvm_mips_emulate_tlbinv_ld(u32 cause,
+						 u32 *opc,
 						 struct kvm_run *run,
 						 struct kvm_vcpu *vcpu)
 {
@@ -1888,8 +1928,8 @@ enum emulation_result kvm_mips_emulate_tlbinv_ld(unsigned long cause,
 	return EMULATE_DONE;
 }
 
-enum emulation_result kvm_mips_emulate_tlbmiss_st(unsigned long cause,
-						  uint32_t *opc,
+enum emulation_result kvm_mips_emulate_tlbmiss_st(u32 cause,
+						  u32 *opc,
 						  struct kvm_run *run,
 						  struct kvm_vcpu *vcpu)
 {
@@ -1932,8 +1972,8 @@ enum emulation_result kvm_mips_emulate_tlbmiss_st(unsigned long cause,
 	return EMULATE_DONE;
 }
 
-enum emulation_result kvm_mips_emulate_tlbinv_st(unsigned long cause,
-						 uint32_t *opc,
+enum emulation_result kvm_mips_emulate_tlbinv_st(u32 cause,
+						 u32 *opc,
 						 struct kvm_run *run,
 						 struct kvm_vcpu *vcpu)
 {
@@ -1977,7 +2017,7 @@ enum emulation_result kvm_mips_emulate_tlbinv_st(unsigned long cause,
 }
 
 /* TLBMOD: store into address matching TLB with Dirty bit off */
-enum emulation_result kvm_mips_handle_tlbmod(unsigned long cause, uint32_t *opc,
+enum emulation_result kvm_mips_handle_tlbmod(u32 cause, u32 *opc,
 					     struct kvm_run *run,
 					     struct kvm_vcpu *vcpu)
 {
@@ -2005,8 +2045,8 @@ enum emulation_result kvm_mips_handle_tlbmod(unsigned long cause, uint32_t *opc,
 	return er;
 }
 
-enum emulation_result kvm_mips_emulate_tlbmod(unsigned long cause,
-					      uint32_t *opc,
+enum emulation_result kvm_mips_emulate_tlbmod(u32 cause,
+					      u32 *opc,
 					      struct kvm_run *run,
 					      struct kvm_vcpu *vcpu)
 {
@@ -2048,8 +2088,8 @@ enum emulation_result kvm_mips_emulate_tlbmod(unsigned long cause,
 	return EMULATE_DONE;
 }
 
-enum emulation_result kvm_mips_emulate_fpu_exc(unsigned long cause,
-					       uint32_t *opc,
+enum emulation_result kvm_mips_emulate_fpu_exc(u32 cause,
+					       u32 *opc,
 					       struct kvm_run *run,
 					       struct kvm_vcpu *vcpu)
 {
@@ -2077,8 +2117,8 @@ enum emulation_result kvm_mips_emulate_fpu_exc(unsigned long cause,
 	return EMULATE_DONE;
 }
 
-enum emulation_result kvm_mips_emulate_ri_exc(unsigned long cause,
-					      uint32_t *opc,
+enum emulation_result kvm_mips_emulate_ri_exc(u32 cause,
+					      u32 *opc,
 					      struct kvm_run *run,
 					      struct kvm_vcpu *vcpu)
 {
@@ -2112,8 +2152,8 @@ enum emulation_result kvm_mips_emulate_ri_exc(unsigned long cause,
 	return er;
 }
 
-enum emulation_result kvm_mips_emulate_bp_exc(unsigned long cause,
-					      uint32_t *opc,
+enum emulation_result kvm_mips_emulate_bp_exc(u32 cause,
+					      u32 *opc,
 					      struct kvm_run *run,
 					      struct kvm_vcpu *vcpu)
 {
@@ -2147,8 +2187,8 @@ enum emulation_result kvm_mips_emulate_bp_exc(unsigned long cause,
 	return er;
 }
 
-enum emulation_result kvm_mips_emulate_trap_exc(unsigned long cause,
-						uint32_t *opc,
+enum emulation_result kvm_mips_emulate_trap_exc(u32 cause,
+						u32 *opc,
 						struct kvm_run *run,
 						struct kvm_vcpu *vcpu)
 {
@@ -2182,8 +2222,8 @@ enum emulation_result kvm_mips_emulate_trap_exc(unsigned long cause,
 	return er;
 }
 
-enum emulation_result kvm_mips_emulate_msafpe_exc(unsigned long cause,
-						  uint32_t *opc,
+enum emulation_result kvm_mips_emulate_msafpe_exc(u32 cause,
+						  u32 *opc,
 						  struct kvm_run *run,
 						  struct kvm_vcpu *vcpu)
 {
@@ -2217,8 +2257,8 @@ enum emulation_result kvm_mips_emulate_msafpe_exc(unsigned long cause,
 	return er;
 }
 
-enum emulation_result kvm_mips_emulate_fpe_exc(unsigned long cause,
-					       uint32_t *opc,
+enum emulation_result kvm_mips_emulate_fpe_exc(u32 cause,
+					       u32 *opc,
 					       struct kvm_run *run,
 					       struct kvm_vcpu *vcpu)
 {
@@ -2252,8 +2292,8 @@ enum emulation_result kvm_mips_emulate_fpe_exc(unsigned long cause,
 	return er;
 }
 
-enum emulation_result kvm_mips_emulate_msadis_exc(unsigned long cause,
-						  uint32_t *opc,
+enum emulation_result kvm_mips_emulate_msadis_exc(u32 cause,
+						  u32 *opc,
 						  struct kvm_run *run,
 						  struct kvm_vcpu *vcpu)
 {
@@ -2287,22 +2327,7 @@ enum emulation_result kvm_mips_emulate_msadis_exc(unsigned long cause,
 	return er;
 }
 
-/* ll/sc, rdhwr, sync emulation */
-
-#define OPCODE 0xfc000000
-#define BASE   0x03e00000
-#define RT     0x001f0000
-#define OFFSET 0x0000ffff
-#define LL     0xc0000000
-#define SC     0xe0000000
-#define SPEC0  0x00000000
-#define SPEC3  0x7c000000
-#define RD     0x0000f800
-#define FUNC   0x0000003f
-#define SYNC   0x0000000f
-#define RDHWR  0x0000003b
-
-enum emulation_result kvm_mips_handle_ri(unsigned long cause, uint32_t *opc,
+enum emulation_result kvm_mips_handle_ri(u32 cause, u32 *opc,
 					 struct kvm_run *run,
 					 struct kvm_vcpu *vcpu)
 {
@@ -2310,7 +2335,7 @@ enum emulation_result kvm_mips_handle_ri(unsigned long cause, uint32_t *opc,
 	struct kvm_vcpu_arch *arch = &vcpu->arch;
 	enum emulation_result er = EMULATE_DONE;
 	unsigned long curr_pc;
-	uint32_t inst;
+	union mips_instruction inst;
 
 	/*
 	 * Update PC and hold onto current PC in case there is
@@ -2325,17 +2350,22 @@ enum emulation_result kvm_mips_handle_ri(unsigned long cause, uint32_t *opc,
 	if (cause & CAUSEF_BD)
 		opc += 1;
 
-	inst = kvm_get_inst(opc, vcpu);
+	inst.word = kvm_get_inst(opc, vcpu);
 
-	if (inst == KVM_INVALID_INST) {
+	if (inst.word == KVM_INVALID_INST) {
 		kvm_err("%s: Cannot get inst @ %p\n", __func__, opc);
 		return EMULATE_FAIL;
 	}
 
-	if ((inst & OPCODE) == SPEC3 && (inst & FUNC) == RDHWR) {
+	if (inst.r_format.opcode == spec3_op &&
+	    inst.r_format.func == rdhwr_op &&
+	    inst.r_format.rs == 0 &&
+	    (inst.r_format.re >> 3) == 0) {
 		int usermode = !KVM_GUEST_KERNEL_MODE(vcpu);
-		int rd = (inst & RD) >> 11;
-		int rt = (inst & RT) >> 16;
+		int rd = inst.r_format.rd;
+		int rt = inst.r_format.rt;
+		int sel = inst.r_format.re & 0x7;
+
 		/* If usermode, check RDHWR rd is allowed by guest HWREna */
 		if (usermode && !(kvm_read_c0_guest_hwrena(cop0) & BIT(rd))) {
 			kvm_debug("RDHWR %#x disallowed by HWREna @ %p\n",
@@ -2343,17 +2373,17 @@ enum emulation_result kvm_mips_handle_ri(unsigned long cause, uint32_t *opc,
 			goto emulate_ri;
 		}
 		switch (rd) {
-		case 0:	/* CPU number */
-			arch->gprs[rt] = 0;
+		case MIPS_HWR_CPUNUM:		/* CPU number */
+			arch->gprs[rt] = vcpu->vcpu_id;
 			break;
-		case 1:	/* SYNCI length */
+		case MIPS_HWR_SYNCISTEP:	/* SYNCI length */
 			arch->gprs[rt] = min(current_cpu_data.dcache.linesz,
 					     current_cpu_data.icache.linesz);
 			break;
-		case 2:	/* Read count register */
-			arch->gprs[rt] = kvm_mips_read_count(vcpu);
+		case MIPS_HWR_CC:		/* Read count register */
+			arch->gprs[rt] = (s32)kvm_mips_read_count(vcpu);
 			break;
-		case 3:	/* Count register resolution */
+		case MIPS_HWR_CCRES:		/* Count register resolution */
 			switch (current_cpu_data.cputype) {
 			case CPU_20KC:
 			case CPU_25KF:
@@ -2363,7 +2393,7 @@ enum emulation_result kvm_mips_handle_ri(unsigned long cause, uint32_t *opc,
 				arch->gprs[rt] = 2;
 			}
 			break;
-		case 29:
+		case MIPS_HWR_ULR:		/* Read UserLocal register */
 			arch->gprs[rt] = kvm_read_c0_guest_userlocal(cop0);
 			break;
 
@@ -2371,8 +2401,12 @@ enum emulation_result kvm_mips_handle_ri(unsigned long cause, uint32_t *opc,
 			kvm_debug("RDHWR %#x not supported @ %p\n", rd, opc);
 			goto emulate_ri;
 		}
+
+		trace_kvm_hwr(vcpu, KVM_TRACE_RDHWR, KVM_TRACE_HWR(rd, sel),
+			      vcpu->arch.gprs[rt]);
 	} else {
-		kvm_debug("Emulate RI not supported @ %p: %#x\n", opc, inst);
+		kvm_debug("Emulate RI not supported @ %p: %#x\n",
+			  opc, inst.word);
 		goto emulate_ri;
 	}
 
@@ -2405,19 +2439,19 @@ enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu,
 
 	switch (run->mmio.len) {
 	case 4:
-		*gpr = *(int32_t *) run->mmio.data;
+		*gpr = *(s32 *) run->mmio.data;
 		break;
 
 	case 2:
 		if (vcpu->mmio_needed == 2)
-			*gpr = *(int16_t *) run->mmio.data;
+			*gpr = *(s16 *) run->mmio.data;
 		else
-			*gpr = *(uint16_t *)run->mmio.data;
+			*gpr = *(u16 *)run->mmio.data;
 
 		break;
 	case 1:
 		if (vcpu->mmio_needed == 2)
-			*gpr = *(int8_t *) run->mmio.data;
+			*gpr = *(s8 *) run->mmio.data;
 		else
 			*gpr = *(u8 *) run->mmio.data;
 		break;
@@ -2432,12 +2466,12 @@ done:
 	return er;
 }
 
-static enum emulation_result kvm_mips_emulate_exc(unsigned long cause,
-						  uint32_t *opc,
+static enum emulation_result kvm_mips_emulate_exc(u32 cause,
+						  u32 *opc,
 						  struct kvm_run *run,
 						  struct kvm_vcpu *vcpu)
 {
-	uint32_t exccode = (cause >> CAUSEB_EXCCODE) & 0x1f;
+	u32 exccode = (cause >> CAUSEB_EXCCODE) & 0x1f;
 	struct mips_coproc *cop0 = vcpu->arch.cop0;
 	struct kvm_vcpu_arch *arch = &vcpu->arch;
 	enum emulation_result er = EMULATE_DONE;
@@ -2470,13 +2504,13 @@ static enum emulation_result kvm_mips_emulate_exc(unsigned long cause,
 	return er;
 }
 
-enum emulation_result kvm_mips_check_privilege(unsigned long cause,
-					       uint32_t *opc,
+enum emulation_result kvm_mips_check_privilege(u32 cause,
+					       u32 *opc,
 					       struct kvm_run *run,
 					       struct kvm_vcpu *vcpu)
 {
 	enum emulation_result er = EMULATE_DONE;
-	uint32_t exccode = (cause >> CAUSEB_EXCCODE) & 0x1f;
+	u32 exccode = (cause >> CAUSEB_EXCCODE) & 0x1f;
 	unsigned long badvaddr = vcpu->arch.host_cp0_badvaddr;
 
 	int usermode = !KVM_GUEST_KERNEL_MODE(vcpu);
@@ -2566,18 +2600,18 @@ enum emulation_result kvm_mips_check_privilege(unsigned long cause,
  * (2) TLB entry is present in the Guest TLB but not in the shadow, in this
  *     case we inject the TLB from the Guest TLB into the shadow host TLB
  */
-enum emulation_result kvm_mips_handle_tlbmiss(unsigned long cause,
-					      uint32_t *opc,
+enum emulation_result kvm_mips_handle_tlbmiss(u32 cause,
+					      u32 *opc,
 					      struct kvm_run *run,
 					      struct kvm_vcpu *vcpu)
 {
 	enum emulation_result er = EMULATE_DONE;
-	uint32_t exccode = (cause >> CAUSEB_EXCCODE) & 0x1f;
+	u32 exccode = (cause >> CAUSEB_EXCCODE) & 0x1f;
 	unsigned long va = vcpu->arch.host_cp0_badvaddr;
 	int index;
 
-	kvm_debug("kvm_mips_handle_tlbmiss: badvaddr: %#lx, entryhi: %#lx\n",
-		  vcpu->arch.host_cp0_badvaddr, vcpu->arch.host_cp0_entryhi);
+	kvm_debug("kvm_mips_handle_tlbmiss: badvaddr: %#lx\n",
+		  vcpu->arch.host_cp0_badvaddr);
 
 	/*
 	 * KVM would not have got the exception if this entry was valid in the
@@ -2620,13 +2654,12 @@ enum emulation_result kvm_mips_handle_tlbmiss(unsigned long cause,
 			}
 		} else {
 			kvm_debug("Injecting hi: %#lx, lo0: %#lx, lo1: %#lx into shadow host TLB\n",
-				  tlb->tlb_hi, tlb->tlb_lo0, tlb->tlb_lo1);
+				  tlb->tlb_hi, tlb->tlb_lo[0], tlb->tlb_lo[1]);
 			/*
 			 * OK we have a Guest TLB entry, now inject it into the
 			 * shadow host TLB
 			 */
-			kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb, NULL,
-							     NULL);
+			kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb);
 		}
 	}
 
diff --git a/arch/mips/kvm/entry.c b/arch/mips/kvm/entry.c
new file mode 100644
index 000000000000..6a02b3a3fa65
--- /dev/null
+++ b/arch/mips/kvm/entry.c
@@ -0,0 +1,701 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Generation of main entry point for the guest, exception handling.
+ *
+ * Copyright (C) 2012  MIPS Technologies, Inc.
+ * Authors: Sanjay Lal <sanjayl@kymasys.com>
+ *
+ * Copyright (C) 2016 Imagination Technologies Ltd.
+ */
+
+#include <linux/kvm_host.h>
+#include <asm/msa.h>
+#include <asm/setup.h>
+#include <asm/uasm.h>
+
+/* Register names */
+#define ZERO		0
+#define AT		1
+#define V0		2
+#define V1		3
+#define A0		4
+#define A1		5
+
+#if _MIPS_SIM == _MIPS_SIM_ABI32
+#define T0		8
+#define T1		9
+#define T2		10
+#define T3		11
+#endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */
+
+#if _MIPS_SIM == _MIPS_SIM_ABI64 || _MIPS_SIM == _MIPS_SIM_NABI32
+#define T0		12
+#define T1		13
+#define T2		14
+#define T3		15
+#endif /* _MIPS_SIM == _MIPS_SIM_ABI64 || _MIPS_SIM == _MIPS_SIM_NABI32 */
+
+#define S0		16
+#define S1		17
+#define T9		25
+#define K0		26
+#define K1		27
+#define GP		28
+#define SP		29
+#define RA		31
+
+/* Some CP0 registers */
+#define C0_HWRENA	7, 0
+#define C0_BADVADDR	8, 0
+#define C0_ENTRYHI	10, 0
+#define C0_STATUS	12, 0
+#define C0_CAUSE	13, 0
+#define C0_EPC		14, 0
+#define C0_EBASE	15, 1
+#define C0_CONFIG5	16, 5
+#define C0_DDATA_LO	28, 3
+#define C0_ERROREPC	30, 0
+
+#define CALLFRAME_SIZ   32
+
+#ifdef CONFIG_64BIT
+#define ST0_KX_IF_64	ST0_KX
+#else
+#define ST0_KX_IF_64	0
+#endif
+
+static unsigned int scratch_vcpu[2] = { C0_DDATA_LO };
+static unsigned int scratch_tmp[2] = { C0_ERROREPC };
+
+enum label_id {
+	label_fpu_1 = 1,
+	label_msa_1,
+	label_return_to_host,
+	label_kernel_asid,
+	label_exit_common,
+};
+
+UASM_L_LA(_fpu_1)
+UASM_L_LA(_msa_1)
+UASM_L_LA(_return_to_host)
+UASM_L_LA(_kernel_asid)
+UASM_L_LA(_exit_common)
+
+static void *kvm_mips_build_enter_guest(void *addr);
+static void *kvm_mips_build_ret_from_exit(void *addr);
+static void *kvm_mips_build_ret_to_guest(void *addr);
+static void *kvm_mips_build_ret_to_host(void *addr);
+
+/**
+ * kvm_mips_entry_setup() - Perform global setup for entry code.
+ *
+ * Perform global setup for entry code, such as choosing a scratch register.
+ *
+ * Returns:	0 on success.
+ *		-errno on failure.
+ */
+int kvm_mips_entry_setup(void)
+{
+	/*
+	 * We prefer to use KScratchN registers if they are available over the
+	 * defaults above, which may not work on all cores.
+	 */
+	unsigned int kscratch_mask = cpu_data[0].kscratch_mask & 0xfc;
+
+	/* Pick a scratch register for storing VCPU */
+	if (kscratch_mask) {
+		scratch_vcpu[0] = 31;
+		scratch_vcpu[1] = ffs(kscratch_mask) - 1;
+		kscratch_mask &= ~BIT(scratch_vcpu[1]);
+	}
+
+	/* Pick a scratch register to use as a temp for saving state */
+	if (kscratch_mask) {
+		scratch_tmp[0] = 31;
+		scratch_tmp[1] = ffs(kscratch_mask) - 1;
+		kscratch_mask &= ~BIT(scratch_tmp[1]);
+	}
+
+	return 0;
+}
+
+static void kvm_mips_build_save_scratch(u32 **p, unsigned int tmp,
+					unsigned int frame)
+{
+	/* Save the VCPU scratch register value in cp0_epc of the stack frame */
+	UASM_i_MFC0(p, tmp, scratch_vcpu[0], scratch_vcpu[1]);
+	UASM_i_SW(p, tmp, offsetof(struct pt_regs, cp0_epc), frame);
+
+	/* Save the temp scratch register value in cp0_cause of stack frame */
+	if (scratch_tmp[0] == 31) {
+		UASM_i_MFC0(p, tmp, scratch_tmp[0], scratch_tmp[1]);
+		UASM_i_SW(p, tmp, offsetof(struct pt_regs, cp0_cause), frame);
+	}
+}
+
+static void kvm_mips_build_restore_scratch(u32 **p, unsigned int tmp,
+					   unsigned int frame)
+{
+	/*
+	 * Restore host scratch register values saved by
+	 * kvm_mips_build_save_scratch().
+	 */
+	UASM_i_LW(p, tmp, offsetof(struct pt_regs, cp0_epc), frame);
+	UASM_i_MTC0(p, tmp, scratch_vcpu[0], scratch_vcpu[1]);
+
+	if (scratch_tmp[0] == 31) {
+		UASM_i_LW(p, tmp, offsetof(struct pt_regs, cp0_cause), frame);
+		UASM_i_MTC0(p, tmp, scratch_tmp[0], scratch_tmp[1]);
+	}
+}
+
+/**
+ * build_set_exc_base() - Assemble code to write exception base address.
+ * @p:		Code buffer pointer.
+ * @reg:	Source register (generated code may set WG bit in @reg).
+ *
+ * Assemble code to modify the exception base address in the EBase register,
+ * using the appropriately sized access and setting the WG bit if necessary.
+ */
+static inline void build_set_exc_base(u32 **p, unsigned int reg)
+{
+	if (cpu_has_ebase_wg) {
+		/* Set WG so that all the bits get written */
+		uasm_i_ori(p, reg, reg, MIPS_EBASE_WG);
+		UASM_i_MTC0(p, reg, C0_EBASE);
+	} else {
+		uasm_i_mtc0(p, reg, C0_EBASE);
+	}
+}
+
+/**
+ * kvm_mips_build_vcpu_run() - Assemble function to start running a guest VCPU.
+ * @addr:	Address to start writing code.
+ *
+ * Assemble the start of the vcpu_run function to run a guest VCPU. The function
+ * conforms to the following prototype:
+ *
+ * int vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu);
+ *
+ * The exit from the guest and return to the caller is handled by the code
+ * generated by kvm_mips_build_ret_to_host().
+ *
+ * Returns:	Next address after end of written function.
+ */
+void *kvm_mips_build_vcpu_run(void *addr)
+{
+	u32 *p = addr;
+	unsigned int i;
+
+	/*
+	 * A0: run
+	 * A1: vcpu
+	 */
+
+	/* k0/k1 not being used in host kernel context */
+	UASM_i_ADDIU(&p, K1, SP, -(int)sizeof(struct pt_regs));
+	for (i = 16; i < 32; ++i) {
+		if (i == 24)
+			i = 28;
+		UASM_i_SW(&p, i, offsetof(struct pt_regs, regs[i]), K1);
+	}
+
+	/* Save host status */
+	uasm_i_mfc0(&p, V0, C0_STATUS);
+	UASM_i_SW(&p, V0, offsetof(struct pt_regs, cp0_status), K1);
+
+	/* Save scratch registers, will be used to store pointer to vcpu etc */
+	kvm_mips_build_save_scratch(&p, V1, K1);
+
+	/* VCPU scratch register has pointer to vcpu */
+	UASM_i_MTC0(&p, A1, scratch_vcpu[0], scratch_vcpu[1]);
+
+	/* Offset into vcpu->arch */
+	UASM_i_ADDIU(&p, K1, A1, offsetof(struct kvm_vcpu, arch));
+
+	/*
+	 * Save the host stack to VCPU, used for exception processing
+	 * when we exit from the Guest
+	 */
+	UASM_i_SW(&p, SP, offsetof(struct kvm_vcpu_arch, host_stack), K1);
+
+	/* Save the kernel gp as well */
+	UASM_i_SW(&p, GP, offsetof(struct kvm_vcpu_arch, host_gp), K1);
+
+	/*
+	 * Setup status register for running the guest in UM, interrupts
+	 * are disabled
+	 */
+	UASM_i_LA(&p, K0, ST0_EXL | KSU_USER | ST0_BEV | ST0_KX_IF_64);
+	uasm_i_mtc0(&p, K0, C0_STATUS);
+	uasm_i_ehb(&p);
+
+	/* load up the new EBASE */
+	UASM_i_LW(&p, K0, offsetof(struct kvm_vcpu_arch, guest_ebase), K1);
+	build_set_exc_base(&p, K0);
+
+	/*
+	 * Now that the new EBASE has been loaded, unset BEV, set
+	 * interrupt mask as it was but make sure that timer interrupts
+	 * are enabled
+	 */
+	uasm_i_addiu(&p, K0, ZERO, ST0_EXL | KSU_USER | ST0_IE | ST0_KX_IF_64);
+	uasm_i_andi(&p, V0, V0, ST0_IM);
+	uasm_i_or(&p, K0, K0, V0);
+	uasm_i_mtc0(&p, K0, C0_STATUS);
+	uasm_i_ehb(&p);
+
+	p = kvm_mips_build_enter_guest(p);
+
+	return p;
+}
+
+/**
+ * kvm_mips_build_enter_guest() - Assemble code to resume guest execution.
+ * @addr:	Address to start writing code.
+ *
+ * Assemble the code to resume guest execution. This code is common between the
+ * initial entry into the guest from the host, and returning from the exit
+ * handler back to the guest.
+ *
+ * Returns:	Next address after end of written function.
+ */
+static void *kvm_mips_build_enter_guest(void *addr)
+{
+	u32 *p = addr;
+	unsigned int i;
+	struct uasm_label labels[2];
+	struct uasm_reloc relocs[2];
+	struct uasm_label *l = labels;
+	struct uasm_reloc *r = relocs;
+
+	memset(labels, 0, sizeof(labels));
+	memset(relocs, 0, sizeof(relocs));
+
+	/* Set Guest EPC */
+	UASM_i_LW(&p, T0, offsetof(struct kvm_vcpu_arch, pc), K1);
+	UASM_i_MTC0(&p, T0, C0_EPC);
+
+	/* Set the ASID for the Guest Kernel */
+	UASM_i_LW(&p, T0, offsetof(struct kvm_vcpu_arch, cop0), K1);
+	UASM_i_LW(&p, T0, offsetof(struct mips_coproc, reg[MIPS_CP0_STATUS][0]),
+		  T0);
+	uasm_i_andi(&p, T0, T0, KSU_USER | ST0_ERL | ST0_EXL);
+	uasm_i_xori(&p, T0, T0, KSU_USER);
+	uasm_il_bnez(&p, &r, T0, label_kernel_asid);
+	 UASM_i_ADDIU(&p, T1, K1,
+		      offsetof(struct kvm_vcpu_arch, guest_kernel_asid));
+	/* else user */
+	UASM_i_ADDIU(&p, T1, K1,
+		     offsetof(struct kvm_vcpu_arch, guest_user_asid));
+	uasm_l_kernel_asid(&l, p);
+
+	/* t1: contains the base of the ASID array, need to get the cpu id  */
+	/* smp_processor_id */
+	uasm_i_lw(&p, T2, offsetof(struct thread_info, cpu), GP);
+	/* x4 */
+	uasm_i_sll(&p, T2, T2, 2);
+	UASM_i_ADDU(&p, T3, T1, T2);
+	uasm_i_lw(&p, K0, 0, T3);
+#ifdef CONFIG_MIPS_ASID_BITS_VARIABLE
+	/* x sizeof(struct cpuinfo_mips)/4 */
+	uasm_i_addiu(&p, T3, ZERO, sizeof(struct cpuinfo_mips)/4);
+	uasm_i_mul(&p, T2, T2, T3);
+
+	UASM_i_LA_mostly(&p, AT, (long)&cpu_data[0].asid_mask);
+	UASM_i_ADDU(&p, AT, AT, T2);
+	UASM_i_LW(&p, T2, uasm_rel_lo((long)&cpu_data[0].asid_mask), AT);
+	uasm_i_and(&p, K0, K0, T2);
+#else
+	uasm_i_andi(&p, K0, K0, MIPS_ENTRYHI_ASID);
+#endif
+	uasm_i_mtc0(&p, K0, C0_ENTRYHI);
+	uasm_i_ehb(&p);
+
+	/* Disable RDHWR access */
+	uasm_i_mtc0(&p, ZERO, C0_HWRENA);
+
+	/* load the guest context from VCPU and return */
+	for (i = 1; i < 32; ++i) {
+		/* Guest k0/k1 loaded later */
+		if (i == K0 || i == K1)
+			continue;
+		UASM_i_LW(&p, i, offsetof(struct kvm_vcpu_arch, gprs[i]), K1);
+	}
+
+#ifndef CONFIG_CPU_MIPSR6
+	/* Restore hi/lo */
+	UASM_i_LW(&p, K0, offsetof(struct kvm_vcpu_arch, hi), K1);
+	uasm_i_mthi(&p, K0);
+
+	UASM_i_LW(&p, K0, offsetof(struct kvm_vcpu_arch, lo), K1);
+	uasm_i_mtlo(&p, K0);
+#endif
+
+	/* Restore the guest's k0/k1 registers */
+	UASM_i_LW(&p, K0, offsetof(struct kvm_vcpu_arch, gprs[K0]), K1);
+	UASM_i_LW(&p, K1, offsetof(struct kvm_vcpu_arch, gprs[K1]), K1);
+
+	/* Jump to guest */
+	uasm_i_eret(&p);
+
+	uasm_resolve_relocs(relocs, labels);
+
+	return p;
+}
+
+/**
+ * kvm_mips_build_exception() - Assemble first level guest exception handler.
+ * @addr:	Address to start writing code.
+ * @handler:	Address of common handler (within range of @addr).
+ *
+ * Assemble exception vector code for guest execution. The generated vector will
+ * branch to the common exception handler generated by kvm_mips_build_exit().
+ *
+ * Returns:	Next address after end of written function.
+ */
+void *kvm_mips_build_exception(void *addr, void *handler)
+{
+	u32 *p = addr;
+	struct uasm_label labels[2];
+	struct uasm_reloc relocs[2];
+	struct uasm_label *l = labels;
+	struct uasm_reloc *r = relocs;
+
+	memset(labels, 0, sizeof(labels));
+	memset(relocs, 0, sizeof(relocs));
+
+	/* Save guest k1 into scratch register */
+	UASM_i_MTC0(&p, K1, scratch_tmp[0], scratch_tmp[1]);
+
+	/* Get the VCPU pointer from the VCPU scratch register */
+	UASM_i_MFC0(&p, K1, scratch_vcpu[0], scratch_vcpu[1]);
+	UASM_i_ADDIU(&p, K1, K1, offsetof(struct kvm_vcpu, arch));
+
+	/* Save guest k0 into VCPU structure */
+	UASM_i_SW(&p, K0, offsetof(struct kvm_vcpu_arch, gprs[K0]), K1);
+
+	/* Branch to the common handler */
+	uasm_il_b(&p, &r, label_exit_common);
+	 uasm_i_nop(&p);
+
+	uasm_l_exit_common(&l, handler);
+	uasm_resolve_relocs(relocs, labels);
+
+	return p;
+}
+
+/**
+ * kvm_mips_build_exit() - Assemble common guest exit handler.
+ * @addr:	Address to start writing code.
+ *
+ * Assemble the generic guest exit handling code. This is called by the
+ * exception vectors (generated by kvm_mips_build_exception()), and calls
+ * kvm_mips_handle_exit(), then either resumes the guest or returns to the host
+ * depending on the return value.
+ *
+ * Returns:	Next address after end of written function.
+ */
+void *kvm_mips_build_exit(void *addr)
+{
+	u32 *p = addr;
+	unsigned int i;
+	struct uasm_label labels[3];
+	struct uasm_reloc relocs[3];
+	struct uasm_label *l = labels;
+	struct uasm_reloc *r = relocs;
+
+	memset(labels, 0, sizeof(labels));
+	memset(relocs, 0, sizeof(relocs));
+
+	/*
+	 * Generic Guest exception handler. We end up here when the guest
+	 * does something that causes a trap to kernel mode.
+	 *
+	 * Both k0/k1 registers will have already been saved (k0 into the vcpu
+	 * structure, and k1 into the scratch_tmp register).
+	 *
+	 * The k1 register will already contain the kvm_vcpu_arch pointer.
+	 */
+
+	/* Start saving Guest context to VCPU */
+	for (i = 0; i < 32; ++i) {
+		/* Guest k0/k1 saved later */
+		if (i == K0 || i == K1)
+			continue;
+		UASM_i_SW(&p, i, offsetof(struct kvm_vcpu_arch, gprs[i]), K1);
+	}
+
+#ifndef CONFIG_CPU_MIPSR6
+	/* We need to save hi/lo and restore them on the way out */
+	uasm_i_mfhi(&p, T0);
+	UASM_i_SW(&p, T0, offsetof(struct kvm_vcpu_arch, hi), K1);
+
+	uasm_i_mflo(&p, T0);
+	UASM_i_SW(&p, T0, offsetof(struct kvm_vcpu_arch, lo), K1);
+#endif
+
+	/* Finally save guest k1 to VCPU */
+	uasm_i_ehb(&p);
+	UASM_i_MFC0(&p, T0, scratch_tmp[0], scratch_tmp[1]);
+	UASM_i_SW(&p, T0, offsetof(struct kvm_vcpu_arch, gprs[K1]), K1);
+
+	/* Now that context has been saved, we can use other registers */
+
+	/* Restore vcpu */
+	UASM_i_MFC0(&p, A1, scratch_vcpu[0], scratch_vcpu[1]);
+	uasm_i_move(&p, S1, A1);
+
+	/* Restore run (vcpu->run) */
+	UASM_i_LW(&p, A0, offsetof(struct kvm_vcpu, run), A1);
+	/* Save pointer to run in s0, will be saved by the compiler */
+	uasm_i_move(&p, S0, A0);
+
+	/*
+	 * Save Host level EPC, BadVaddr and Cause to VCPU, useful to process
+	 * the exception
+	 */
+	UASM_i_MFC0(&p, K0, C0_EPC);
+	UASM_i_SW(&p, K0, offsetof(struct kvm_vcpu_arch, pc), K1);
+
+	UASM_i_MFC0(&p, K0, C0_BADVADDR);
+	UASM_i_SW(&p, K0, offsetof(struct kvm_vcpu_arch, host_cp0_badvaddr),
+		  K1);
+
+	uasm_i_mfc0(&p, K0, C0_CAUSE);
+	uasm_i_sw(&p, K0, offsetof(struct kvm_vcpu_arch, host_cp0_cause), K1);
+
+	/* Now restore the host state just enough to run the handlers */
+
+	/* Switch EBASE to the one used by Linux */
+	/* load up the host EBASE */
+	uasm_i_mfc0(&p, V0, C0_STATUS);
+
+	uasm_i_lui(&p, AT, ST0_BEV >> 16);
+	uasm_i_or(&p, K0, V0, AT);
+
+	uasm_i_mtc0(&p, K0, C0_STATUS);
+	uasm_i_ehb(&p);
+
+	UASM_i_LA_mostly(&p, K0, (long)&ebase);
+	UASM_i_LW(&p, K0, uasm_rel_lo((long)&ebase), K0);
+	build_set_exc_base(&p, K0);
+
+	if (raw_cpu_has_fpu) {
+		/*
+		 * If FPU is enabled, save FCR31 and clear it so that later
+		 * ctc1's don't trigger FPE for pending exceptions.
+		 */
+		uasm_i_lui(&p, AT, ST0_CU1 >> 16);
+		uasm_i_and(&p, V1, V0, AT);
+		uasm_il_beqz(&p, &r, V1, label_fpu_1);
+		 uasm_i_nop(&p);
+		uasm_i_cfc1(&p, T0, 31);
+		uasm_i_sw(&p, T0, offsetof(struct kvm_vcpu_arch, fpu.fcr31),
+			  K1);
+		uasm_i_ctc1(&p, ZERO, 31);
+		uasm_l_fpu_1(&l, p);
+	}
+
+	if (cpu_has_msa) {
+		/*
+		 * If MSA is enabled, save MSACSR and clear it so that later
+		 * instructions don't trigger MSAFPE for pending exceptions.
+		 */
+		uasm_i_mfc0(&p, T0, C0_CONFIG5);
+		uasm_i_ext(&p, T0, T0, 27, 1); /* MIPS_CONF5_MSAEN */
+		uasm_il_beqz(&p, &r, T0, label_msa_1);
+		 uasm_i_nop(&p);
+		uasm_i_cfcmsa(&p, T0, MSA_CSR);
+		uasm_i_sw(&p, T0, offsetof(struct kvm_vcpu_arch, fpu.msacsr),
+			  K1);
+		uasm_i_ctcmsa(&p, MSA_CSR, ZERO);
+		uasm_l_msa_1(&l, p);
+	}
+
+	/* Now that the new EBASE has been loaded, unset BEV and KSU_USER */
+	uasm_i_addiu(&p, AT, ZERO, ~(ST0_EXL | KSU_USER | ST0_IE));
+	uasm_i_and(&p, V0, V0, AT);
+	uasm_i_lui(&p, AT, ST0_CU0 >> 16);
+	uasm_i_or(&p, V0, V0, AT);
+	uasm_i_mtc0(&p, V0, C0_STATUS);
+	uasm_i_ehb(&p);
+
+	/* Load up host GP */
+	UASM_i_LW(&p, GP, offsetof(struct kvm_vcpu_arch, host_gp), K1);
+
+	/* Need a stack before we can jump to "C" */
+	UASM_i_LW(&p, SP, offsetof(struct kvm_vcpu_arch, host_stack), K1);
+
+	/* Saved host state */
+	UASM_i_ADDIU(&p, SP, SP, -(int)sizeof(struct pt_regs));
+
+	/*
+	 * XXXKYMA do we need to load the host ASID, maybe not because the
+	 * kernel entries are marked GLOBAL, need to verify
+	 */
+
+	/* Restore host scratch registers, as we'll have clobbered them */
+	kvm_mips_build_restore_scratch(&p, K0, SP);
+
+	/* Restore RDHWR access */
+	UASM_i_LA_mostly(&p, K0, (long)&hwrena);
+	uasm_i_lw(&p, K0, uasm_rel_lo((long)&hwrena), K0);
+	uasm_i_mtc0(&p, K0, C0_HWRENA);
+
+	/* Jump to handler */
+	/*
+	 * XXXKYMA: not sure if this is safe, how large is the stack??
+	 * Now jump to the kvm_mips_handle_exit() to see if we can deal
+	 * with this in the kernel
+	 */
+	UASM_i_LA(&p, T9, (unsigned long)kvm_mips_handle_exit);
+	uasm_i_jalr(&p, RA, T9);
+	 UASM_i_ADDIU(&p, SP, SP, -CALLFRAME_SIZ);
+
+	uasm_resolve_relocs(relocs, labels);
+
+	p = kvm_mips_build_ret_from_exit(p);
+
+	return p;
+}
+
+/**
+ * kvm_mips_build_ret_from_exit() - Assemble guest exit return handler.
+ * @addr:	Address to start writing code.
+ *
+ * Assemble the code to handle the return from kvm_mips_handle_exit(), either
+ * resuming the guest or returning to the host depending on the return value.
+ *
+ * Returns:	Next address after end of written function.
+ */
+static void *kvm_mips_build_ret_from_exit(void *addr)
+{
+	u32 *p = addr;
+	struct uasm_label labels[2];
+	struct uasm_reloc relocs[2];
+	struct uasm_label *l = labels;
+	struct uasm_reloc *r = relocs;
+
+	memset(labels, 0, sizeof(labels));
+	memset(relocs, 0, sizeof(relocs));
+
+	/* Return from handler Make sure interrupts are disabled */
+	uasm_i_di(&p, ZERO);
+	uasm_i_ehb(&p);
+
+	/*
+	 * XXXKYMA: k0/k1 could have been blown away if we processed
+	 * an exception while we were handling the exception from the
+	 * guest, reload k1
+	 */
+
+	uasm_i_move(&p, K1, S1);
+	UASM_i_ADDIU(&p, K1, K1, offsetof(struct kvm_vcpu, arch));
+
+	/*
+	 * Check return value, should tell us if we are returning to the
+	 * host (handle I/O etc)or resuming the guest
+	 */
+	uasm_i_andi(&p, T0, V0, RESUME_HOST);
+	uasm_il_bnez(&p, &r, T0, label_return_to_host);
+	 uasm_i_nop(&p);
+
+	p = kvm_mips_build_ret_to_guest(p);
+
+	uasm_l_return_to_host(&l, p);
+	p = kvm_mips_build_ret_to_host(p);
+
+	uasm_resolve_relocs(relocs, labels);
+
+	return p;
+}
+
+/**
+ * kvm_mips_build_ret_to_guest() - Assemble code to return to the guest.
+ * @addr:	Address to start writing code.
+ *
+ * Assemble the code to handle return from the guest exit handler
+ * (kvm_mips_handle_exit()) back to the guest.
+ *
+ * Returns:	Next address after end of written function.
+ */
+static void *kvm_mips_build_ret_to_guest(void *addr)
+{
+	u32 *p = addr;
+
+	/* Put the saved pointer to vcpu (s1) back into the scratch register */
+	UASM_i_MTC0(&p, S1, scratch_vcpu[0], scratch_vcpu[1]);
+
+	/* Load up the Guest EBASE to minimize the window where BEV is set */
+	UASM_i_LW(&p, T0, offsetof(struct kvm_vcpu_arch, guest_ebase), K1);
+
+	/* Switch EBASE back to the one used by KVM */
+	uasm_i_mfc0(&p, V1, C0_STATUS);
+	uasm_i_lui(&p, AT, ST0_BEV >> 16);
+	uasm_i_or(&p, K0, V1, AT);
+	uasm_i_mtc0(&p, K0, C0_STATUS);
+	uasm_i_ehb(&p);
+	build_set_exc_base(&p, T0);
+
+	/* Setup status register for running guest in UM */
+	uasm_i_ori(&p, V1, V1, ST0_EXL | KSU_USER | ST0_IE);
+	UASM_i_LA(&p, AT, ~(ST0_CU0 | ST0_MX));
+	uasm_i_and(&p, V1, V1, AT);
+	uasm_i_mtc0(&p, V1, C0_STATUS);
+	uasm_i_ehb(&p);
+
+	p = kvm_mips_build_enter_guest(p);
+
+	return p;
+}
+
+/**
+ * kvm_mips_build_ret_to_host() - Assemble code to return to the host.
+ * @addr:	Address to start writing code.
+ *
+ * Assemble the code to handle return from the guest exit handler
+ * (kvm_mips_handle_exit()) back to the host, i.e. to the caller of the vcpu_run
+ * function generated by kvm_mips_build_vcpu_run().
+ *
+ * Returns:	Next address after end of written function.
+ */
+static void *kvm_mips_build_ret_to_host(void *addr)
+{
+	u32 *p = addr;
+	unsigned int i;
+
+	/* EBASE is already pointing to Linux */
+	UASM_i_LW(&p, K1, offsetof(struct kvm_vcpu_arch, host_stack), K1);
+	UASM_i_ADDIU(&p, K1, K1, -(int)sizeof(struct pt_regs));
+
+	/*
+	 * r2/v0 is the return code, shift it down by 2 (arithmetic)
+	 * to recover the err code
+	 */
+	uasm_i_sra(&p, K0, V0, 2);
+	uasm_i_move(&p, V0, K0);
+
+	/* Load context saved on the host stack */
+	for (i = 16; i < 31; ++i) {
+		if (i == 24)
+			i = 28;
+		UASM_i_LW(&p, i, offsetof(struct pt_regs, regs[i]), K1);
+	}
+
+	/* Restore RDHWR access */
+	UASM_i_LA_mostly(&p, K0, (long)&hwrena);
+	uasm_i_lw(&p, K0, uasm_rel_lo((long)&hwrena), K0);
+	uasm_i_mtc0(&p, K0, C0_HWRENA);
+
+	/* Restore RA, which is the address we will return to */
+	UASM_i_LW(&p, RA, offsetof(struct pt_regs, regs[RA]), K1);
+	uasm_i_jr(&p, RA);
+	 uasm_i_nop(&p);
+
+	return p;
+}
+
diff --git a/arch/mips/kvm/fpu.S b/arch/mips/kvm/fpu.S
index 531fbf5131c0..16f17c6390dd 100644
--- a/arch/mips/kvm/fpu.S
+++ b/arch/mips/kvm/fpu.S
@@ -14,13 +14,16 @@
 #include <asm/mipsregs.h>
 #include <asm/regdef.h>
 
+/* preprocessor replaces the fp in ".set fp=64" with $30 otherwise */
+#undef fp
+
 	.set	noreorder
 	.set	noat
 
 LEAF(__kvm_save_fpu)
 	.set	push
-	.set	mips64r2
 	SET_HARDFLOAT
+	.set	fp=64
 	mfc0	t0, CP0_STATUS
 	sll     t0, t0, 5			# is Status.FR set?
 	bgez    t0, 1f				# no: skip odd doubles
@@ -63,8 +66,8 @@ LEAF(__kvm_save_fpu)
 
 LEAF(__kvm_restore_fpu)
 	.set	push
-	.set	mips64r2
 	SET_HARDFLOAT
+	.set	fp=64
 	mfc0	t0, CP0_STATUS
 	sll     t0, t0, 5			# is Status.FR set?
 	bgez    t0, 1f				# no: skip odd doubles
diff --git a/arch/mips/kvm/interrupt.c b/arch/mips/kvm/interrupt.c
index 95f790663b0c..ad28dac6b7e9 100644
--- a/arch/mips/kvm/interrupt.c
+++ b/arch/mips/kvm/interrupt.c
@@ -22,12 +22,12 @@
 
 #include "interrupt.h"
 
-void kvm_mips_queue_irq(struct kvm_vcpu *vcpu, uint32_t priority)
+void kvm_mips_queue_irq(struct kvm_vcpu *vcpu, unsigned int priority)
 {
 	set_bit(priority, &vcpu->arch.pending_exceptions);
 }
 
-void kvm_mips_dequeue_irq(struct kvm_vcpu *vcpu, uint32_t priority)
+void kvm_mips_dequeue_irq(struct kvm_vcpu *vcpu, unsigned int priority)
 {
 	clear_bit(priority, &vcpu->arch.pending_exceptions);
 }
@@ -114,10 +114,10 @@ void kvm_mips_dequeue_io_int_cb(struct kvm_vcpu *vcpu,
 
 /* Deliver the interrupt of the corresponding priority, if possible. */
 int kvm_mips_irq_deliver_cb(struct kvm_vcpu *vcpu, unsigned int priority,
-			    uint32_t cause)
+			    u32 cause)
 {
 	int allowed = 0;
-	uint32_t exccode;
+	u32 exccode;
 
 	struct kvm_vcpu_arch *arch = &vcpu->arch;
 	struct mips_coproc *cop0 = vcpu->arch.cop0;
@@ -196,12 +196,12 @@ int kvm_mips_irq_deliver_cb(struct kvm_vcpu *vcpu, unsigned int priority,
 }
 
 int kvm_mips_irq_clear_cb(struct kvm_vcpu *vcpu, unsigned int priority,
-			  uint32_t cause)
+			  u32 cause)
 {
 	return 1;
 }
 
-void kvm_mips_deliver_interrupts(struct kvm_vcpu *vcpu, uint32_t cause)
+void kvm_mips_deliver_interrupts(struct kvm_vcpu *vcpu, u32 cause)
 {
 	unsigned long *pending = &vcpu->arch.pending_exceptions;
 	unsigned long *pending_clr = &vcpu->arch.pending_exceptions_clr;
diff --git a/arch/mips/kvm/interrupt.h b/arch/mips/kvm/interrupt.h
index 2143884709e4..fb118a2c8379 100644
--- a/arch/mips/kvm/interrupt.h
+++ b/arch/mips/kvm/interrupt.h
@@ -28,17 +28,13 @@
 #define MIPS_EXC_MAX                12
 /* XXXSL More to follow */
 
-extern char __kvm_mips_vcpu_run_end[];
-extern char mips32_exception[], mips32_exceptionEnd[];
-extern char mips32_GuestException[], mips32_GuestExceptionEnd[];
-
 #define C_TI        (_ULCAST_(1) << 30)
 
 #define KVM_MIPS_IRQ_DELIVER_ALL_AT_ONCE (0)
 #define KVM_MIPS_IRQ_CLEAR_ALL_AT_ONCE   (0)
 
-void kvm_mips_queue_irq(struct kvm_vcpu *vcpu, uint32_t priority);
-void kvm_mips_dequeue_irq(struct kvm_vcpu *vcpu, uint32_t priority);
+void kvm_mips_queue_irq(struct kvm_vcpu *vcpu, unsigned int priority);
+void kvm_mips_dequeue_irq(struct kvm_vcpu *vcpu, unsigned int priority);
 int kvm_mips_pending_timer(struct kvm_vcpu *vcpu);
 
 void kvm_mips_queue_timer_int_cb(struct kvm_vcpu *vcpu);
@@ -48,7 +44,7 @@ void kvm_mips_queue_io_int_cb(struct kvm_vcpu *vcpu,
 void kvm_mips_dequeue_io_int_cb(struct kvm_vcpu *vcpu,
 				struct kvm_mips_interrupt *irq);
 int kvm_mips_irq_deliver_cb(struct kvm_vcpu *vcpu, unsigned int priority,
-			    uint32_t cause);
+			    u32 cause);
 int kvm_mips_irq_clear_cb(struct kvm_vcpu *vcpu, unsigned int priority,
-			  uint32_t cause);
-void kvm_mips_deliver_interrupts(struct kvm_vcpu *vcpu, uint32_t cause);
+			  u32 cause);
+void kvm_mips_deliver_interrupts(struct kvm_vcpu *vcpu, u32 cause);
diff --git a/arch/mips/kvm/locore.S b/arch/mips/kvm/locore.S
deleted file mode 100644
index 828fcfc1cd7f..000000000000
--- a/arch/mips/kvm/locore.S
+++ /dev/null
@@ -1,605 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Main entry point for the guest, exception handling.
- *
- * Copyright (C) 2012  MIPS Technologies, Inc.  All rights reserved.
- * Authors: Sanjay Lal <sanjayl@kymasys.com>
- */
-
-#include <asm/asm.h>
-#include <asm/asmmacro.h>
-#include <asm/regdef.h>
-#include <asm/mipsregs.h>
-#include <asm/stackframe.h>
-#include <asm/asm-offsets.h>
-
-#define _C_LABEL(x)     x
-#define MIPSX(name)     mips32_ ## name
-#define CALLFRAME_SIZ   32
-
-/*
- * VECTOR
- *  exception vector entrypoint
- */
-#define VECTOR(x, regmask)      \
-    .ent    _C_LABEL(x),0;      \
-    EXPORT(x);
-
-#define VECTOR_END(x)      \
-    EXPORT(x);
-
-/* Overload, Danger Will Robinson!! */
-#define PT_HOST_USERLOCAL   PT_EPC
-
-#define CP0_DDATA_LO        $28,3
-
-/* Resume Flags */
-#define RESUME_FLAG_HOST        (1<<1)  /* Resume host? */
-
-#define RESUME_GUEST            0
-#define RESUME_HOST             RESUME_FLAG_HOST
-
-/*
- * __kvm_mips_vcpu_run: entry point to the guest
- * a0: run
- * a1: vcpu
- */
-	.set	noreorder
-
-FEXPORT(__kvm_mips_vcpu_run)
-	/* k0/k1 not being used in host kernel context */
-	INT_ADDIU k1, sp, -PT_SIZE
-	LONG_S	$16, PT_R16(k1)
-	LONG_S	$17, PT_R17(k1)
-	LONG_S	$18, PT_R18(k1)
-	LONG_S	$19, PT_R19(k1)
-	LONG_S	$20, PT_R20(k1)
-	LONG_S	$21, PT_R21(k1)
-	LONG_S	$22, PT_R22(k1)
-	LONG_S	$23, PT_R23(k1)
-
-	LONG_S	$28, PT_R28(k1)
-	LONG_S	$29, PT_R29(k1)
-	LONG_S	$30, PT_R30(k1)
-	LONG_S	$31, PT_R31(k1)
-
-	/* Save hi/lo */
-	mflo	v0
-	LONG_S	v0, PT_LO(k1)
-	mfhi	v1
-	LONG_S	v1, PT_HI(k1)
-
-	/* Save host status */
-	mfc0	v0, CP0_STATUS
-	LONG_S	v0, PT_STATUS(k1)
-
-	/* Save DDATA_LO, will be used to store pointer to vcpu */
-	mfc0	v1, CP0_DDATA_LO
-	LONG_S	v1, PT_HOST_USERLOCAL(k1)
-
-	/* DDATA_LO has pointer to vcpu */
-	mtc0	a1, CP0_DDATA_LO
-
-	/* Offset into vcpu->arch */
-	INT_ADDIU k1, a1, VCPU_HOST_ARCH
-
-	/*
-	 * Save the host stack to VCPU, used for exception processing
-	 * when we exit from the Guest
-	 */
-	LONG_S	sp, VCPU_HOST_STACK(k1)
-
-	/* Save the kernel gp as well */
-	LONG_S	gp, VCPU_HOST_GP(k1)
-
-	/*
-	 * Setup status register for running the guest in UM, interrupts
-	 * are disabled
-	 */
-	li	k0, (ST0_EXL | KSU_USER | ST0_BEV)
-	mtc0	k0, CP0_STATUS
-	ehb
-
-	/* load up the new EBASE */
-	LONG_L	k0, VCPU_GUEST_EBASE(k1)
-	mtc0	k0, CP0_EBASE
-
-	/*
-	 * Now that the new EBASE has been loaded, unset BEV, set
-	 * interrupt mask as it was but make sure that timer interrupts
-	 * are enabled
-	 */
-	li	k0, (ST0_EXL | KSU_USER | ST0_IE)
-	andi	v0, v0, ST0_IM
-	or	k0, k0, v0
-	mtc0	k0, CP0_STATUS
-	ehb
-
-	/* Set Guest EPC */
-	LONG_L	t0, VCPU_PC(k1)
-	mtc0	t0, CP0_EPC
-
-FEXPORT(__kvm_mips_load_asid)
-	/* Set the ASID for the Guest Kernel */
-	PTR_L	t0, VCPU_COP0(k1)
-	LONG_L	t0, COP0_STATUS(t0)
-	andi	t0, KSU_USER | ST0_ERL | ST0_EXL
-	xori	t0, KSU_USER
-	bnez	t0, 1f		/* If kernel */
-	 INT_ADDIU t1, k1, VCPU_GUEST_KERNEL_ASID  /* (BD)  */
-	INT_ADDIU t1, k1, VCPU_GUEST_USER_ASID    /* else user */
-1:
-	/* t1: contains the base of the ASID array, need to get the cpu id */
-	LONG_L	t2, TI_CPU($28)             /* smp_processor_id */
-	INT_SLL	t2, t2, 2                   /* x4 */
-	REG_ADDU t3, t1, t2
-	LONG_L	k0, (t3)
-#ifdef CONFIG_MIPS_ASID_BITS_VARIABLE
-	li	t3, CPUINFO_SIZE/4
-	mul	t2, t2, t3		/* x sizeof(struct cpuinfo_mips)/4 */
-	LONG_L	t2, (cpu_data + CPUINFO_ASID_MASK)(t2)
-	and	k0, k0, t2
-#else
-	andi	k0, k0, MIPS_ENTRYHI_ASID
-#endif
-	mtc0	k0, CP0_ENTRYHI
-	ehb
-
-	/* Disable RDHWR access */
-	mtc0	zero, CP0_HWRENA
-
-	.set	noat
-	/* Now load up the Guest Context from VCPU */
-	LONG_L	$1, VCPU_R1(k1)
-	LONG_L	$2, VCPU_R2(k1)
-	LONG_L	$3, VCPU_R3(k1)
-
-	LONG_L	$4, VCPU_R4(k1)
-	LONG_L	$5, VCPU_R5(k1)
-	LONG_L	$6, VCPU_R6(k1)
-	LONG_L	$7, VCPU_R7(k1)
-
-	LONG_L	$8, VCPU_R8(k1)
-	LONG_L	$9, VCPU_R9(k1)
-	LONG_L	$10, VCPU_R10(k1)
-	LONG_L	$11, VCPU_R11(k1)
-	LONG_L	$12, VCPU_R12(k1)
-	LONG_L	$13, VCPU_R13(k1)
-	LONG_L	$14, VCPU_R14(k1)
-	LONG_L	$15, VCPU_R15(k1)
-	LONG_L	$16, VCPU_R16(k1)
-	LONG_L	$17, VCPU_R17(k1)
-	LONG_L	$18, VCPU_R18(k1)
-	LONG_L	$19, VCPU_R19(k1)
-	LONG_L	$20, VCPU_R20(k1)
-	LONG_L	$21, VCPU_R21(k1)
-	LONG_L	$22, VCPU_R22(k1)
-	LONG_L	$23, VCPU_R23(k1)
-	LONG_L	$24, VCPU_R24(k1)
-	LONG_L	$25, VCPU_R25(k1)
-
-	/* k0/k1 loaded up later */
-
-	LONG_L	$28, VCPU_R28(k1)
-	LONG_L	$29, VCPU_R29(k1)
-	LONG_L	$30, VCPU_R30(k1)
-	LONG_L	$31, VCPU_R31(k1)
-
-	/* Restore hi/lo */
-	LONG_L	k0, VCPU_LO(k1)
-	mtlo	k0
-
-	LONG_L	k0, VCPU_HI(k1)
-	mthi	k0
-
-FEXPORT(__kvm_mips_load_k0k1)
-	/* Restore the guest's k0/k1 registers */
-	LONG_L	k0, VCPU_R26(k1)
-	LONG_L	k1, VCPU_R27(k1)
-
-	/* Jump to guest */
-	eret
-EXPORT(__kvm_mips_vcpu_run_end)
-
-VECTOR(MIPSX(exception), unknown)
-/* Find out what mode we came from and jump to the proper handler. */
-	mtc0	k0, CP0_ERROREPC	#01: Save guest k0
-	ehb				#02:
-
-	mfc0	k0, CP0_EBASE		#02: Get EBASE
-	INT_SRL	k0, k0, 10		#03: Get rid of CPUNum
-	INT_SLL	k0, k0, 10		#04
-	LONG_S	k1, 0x3000(k0)		#05: Save k1 @ offset 0x3000
-	INT_ADDIU k0, k0, 0x2000	#06: Exception handler is
-					#    installed @ offset 0x2000
-	j	k0			#07: jump to the function
-	 nop				#08: branch delay slot
-VECTOR_END(MIPSX(exceptionEnd))
-.end MIPSX(exception)
-
-/*
- * Generic Guest exception handler. We end up here when the guest
- * does something that causes a trap to kernel mode.
- */
-NESTED (MIPSX(GuestException), CALLFRAME_SIZ, ra)
-	/* Get the VCPU pointer from DDTATA_LO */
-	mfc0	k1, CP0_DDATA_LO
-	INT_ADDIU k1, k1, VCPU_HOST_ARCH
-
-	/* Start saving Guest context to VCPU */
-	LONG_S	$0, VCPU_R0(k1)
-	LONG_S	$1, VCPU_R1(k1)
-	LONG_S	$2, VCPU_R2(k1)
-	LONG_S	$3, VCPU_R3(k1)
-	LONG_S	$4, VCPU_R4(k1)
-	LONG_S	$5, VCPU_R5(k1)
-	LONG_S	$6, VCPU_R6(k1)
-	LONG_S	$7, VCPU_R7(k1)
-	LONG_S	$8, VCPU_R8(k1)
-	LONG_S	$9, VCPU_R9(k1)
-	LONG_S	$10, VCPU_R10(k1)
-	LONG_S	$11, VCPU_R11(k1)
-	LONG_S	$12, VCPU_R12(k1)
-	LONG_S	$13, VCPU_R13(k1)
-	LONG_S	$14, VCPU_R14(k1)
-	LONG_S	$15, VCPU_R15(k1)
-	LONG_S	$16, VCPU_R16(k1)
-	LONG_S	$17, VCPU_R17(k1)
-	LONG_S	$18, VCPU_R18(k1)
-	LONG_S	$19, VCPU_R19(k1)
-	LONG_S	$20, VCPU_R20(k1)
-	LONG_S	$21, VCPU_R21(k1)
-	LONG_S	$22, VCPU_R22(k1)
-	LONG_S	$23, VCPU_R23(k1)
-	LONG_S	$24, VCPU_R24(k1)
-	LONG_S	$25, VCPU_R25(k1)
-
-	/* Guest k0/k1 saved later */
-
-	LONG_S	$28, VCPU_R28(k1)
-	LONG_S	$29, VCPU_R29(k1)
-	LONG_S	$30, VCPU_R30(k1)
-	LONG_S	$31, VCPU_R31(k1)
-
-	.set at
-
-	/* We need to save hi/lo and restore them on the way out */
-	mfhi	t0
-	LONG_S	t0, VCPU_HI(k1)
-
-	mflo	t0
-	LONG_S	t0, VCPU_LO(k1)
-
-	/* Finally save guest k0/k1 to VCPU */
-	mfc0	t0, CP0_ERROREPC
-	LONG_S	t0, VCPU_R26(k1)
-
-	/* Get GUEST k1 and save it in VCPU */
-	PTR_LI	t1, ~0x2ff
-	mfc0	t0, CP0_EBASE
-	and	t0, t0, t1
-	LONG_L	t0, 0x3000(t0)
-	LONG_S	t0, VCPU_R27(k1)
-
-	/* Now that context has been saved, we can use other registers */
-
-	/* Restore vcpu */
-	mfc0	a1, CP0_DDATA_LO
-	move	s1, a1
-
-	/* Restore run (vcpu->run) */
-	LONG_L	a0, VCPU_RUN(a1)
-	/* Save pointer to run in s0, will be saved by the compiler */
-	move	s0, a0
-
-	/*
-	 * Save Host level EPC, BadVaddr and Cause to VCPU, useful to
-	 * process the exception
-	 */
-	mfc0	k0,CP0_EPC
-	LONG_S	k0, VCPU_PC(k1)
-
-	mfc0	k0, CP0_BADVADDR
-	LONG_S	k0, VCPU_HOST_CP0_BADVADDR(k1)
-
-	mfc0	k0, CP0_CAUSE
-	LONG_S	k0, VCPU_HOST_CP0_CAUSE(k1)
-
-	mfc0	k0, CP0_ENTRYHI
-	LONG_S	k0, VCPU_HOST_ENTRYHI(k1)
-
-	/* Now restore the host state just enough to run the handlers */
-
-	/* Switch EBASE to the one used by Linux */
-	/* load up the host EBASE */
-	mfc0	v0, CP0_STATUS
-
-	or	k0, v0, ST0_BEV
-
-	mtc0	k0, CP0_STATUS
-	ehb
-
-	LONG_L	k0, VCPU_HOST_EBASE(k1)
-	mtc0	k0,CP0_EBASE
-
-	/*
-	 * If FPU is enabled, save FCR31 and clear it so that later ctc1's don't
-	 * trigger FPE for pending exceptions.
-	 */
-	and	v1, v0, ST0_CU1
-	beqz	v1, 1f
-	 nop
-	.set	push
-	SET_HARDFLOAT
-	cfc1	t0, fcr31
-	sw	t0, VCPU_FCR31(k1)
-	ctc1	zero,fcr31
-	.set	pop
-1:
-
-#ifdef CONFIG_CPU_HAS_MSA
-	/*
-	 * If MSA is enabled, save MSACSR and clear it so that later
-	 * instructions don't trigger MSAFPE for pending exceptions.
-	 */
-	mfc0	t0, CP0_CONFIG3
-	ext	t0, t0, 28, 1 /* MIPS_CONF3_MSAP */
-	beqz	t0, 1f
-	 nop
-	mfc0	t0, CP0_CONFIG5
-	ext	t0, t0, 27, 1 /* MIPS_CONF5_MSAEN */
-	beqz	t0, 1f
-	 nop
-	_cfcmsa	t0, MSA_CSR
-	sw	t0, VCPU_MSA_CSR(k1)
-	_ctcmsa	MSA_CSR, zero
-1:
-#endif
-
-	/* Now that the new EBASE has been loaded, unset BEV and KSU_USER */
-	and	v0, v0, ~(ST0_EXL | KSU_USER | ST0_IE)
-	or	v0, v0, ST0_CU0
-	mtc0	v0, CP0_STATUS
-	ehb
-
-	/* Load up host GP */
-	LONG_L	gp, VCPU_HOST_GP(k1)
-
-	/* Need a stack before we can jump to "C" */
-	LONG_L	sp, VCPU_HOST_STACK(k1)
-
-	/* Saved host state */
-	INT_ADDIU sp, sp, -PT_SIZE
-
-	/*
-	 * XXXKYMA do we need to load the host ASID, maybe not because the
-	 * kernel entries are marked GLOBAL, need to verify
-	 */
-
-	/* Restore host DDATA_LO */
-	LONG_L	k0, PT_HOST_USERLOCAL(sp)
-	mtc0	k0, CP0_DDATA_LO
-
-	/* Restore RDHWR access */
-	PTR_LI	k0, 0x2000000F
-	mtc0	k0, CP0_HWRENA
-
-	/* Jump to handler */
-FEXPORT(__kvm_mips_jump_to_handler)
-	/*
-	 * XXXKYMA: not sure if this is safe, how large is the stack??
-	 * Now jump to the kvm_mips_handle_exit() to see if we can deal
-	 * with this in the kernel
-	 */
-	PTR_LA	t9, kvm_mips_handle_exit
-	jalr.hb	t9
-	 INT_ADDIU sp, sp, -CALLFRAME_SIZ           /* BD Slot */
-
-	/* Return from handler Make sure interrupts are disabled */
-	di
-	ehb
-
-	/*
-	 * XXXKYMA: k0/k1 could have been blown away if we processed
-	 * an exception while we were handling the exception from the
-	 * guest, reload k1
-	 */
-
-	move	k1, s1
-	INT_ADDIU k1, k1, VCPU_HOST_ARCH
-
-	/*
-	 * Check return value, should tell us if we are returning to the
-	 * host (handle I/O etc)or resuming the guest
-	 */
-	andi	t0, v0, RESUME_HOST
-	bnez	t0, __kvm_mips_return_to_host
-	 nop
-
-__kvm_mips_return_to_guest:
-	/* Put the saved pointer to vcpu (s1) back into the DDATA_LO Register */
-	mtc0	s1, CP0_DDATA_LO
-
-	/* Load up the Guest EBASE to minimize the window where BEV is set */
-	LONG_L	t0, VCPU_GUEST_EBASE(k1)
-
-	/* Switch EBASE back to the one used by KVM */
-	mfc0	v1, CP0_STATUS
-	or	k0, v1, ST0_BEV
-	mtc0	k0, CP0_STATUS
-	ehb
-	mtc0	t0, CP0_EBASE
-
-	/* Setup status register for running guest in UM */
-	or	v1, v1, (ST0_EXL | KSU_USER | ST0_IE)
-	and	v1, v1, ~(ST0_CU0 | ST0_MX)
-	mtc0	v1, CP0_STATUS
-	ehb
-
-	/* Set Guest EPC */
-	LONG_L	t0, VCPU_PC(k1)
-	mtc0	t0, CP0_EPC
-
-	/* Set the ASID for the Guest Kernel */
-	PTR_L	t0, VCPU_COP0(k1)
-	LONG_L	t0, COP0_STATUS(t0)
-	andi	t0, KSU_USER | ST0_ERL | ST0_EXL
-	xori	t0, KSU_USER
-	bnez	t0, 1f		/* If kernel */
-	 INT_ADDIU t1, k1, VCPU_GUEST_KERNEL_ASID  /* (BD)  */
-	INT_ADDIU t1, k1, VCPU_GUEST_USER_ASID    /* else user */
-1:
-	/* t1: contains the base of the ASID array, need to get the cpu id  */
-	LONG_L	t2, TI_CPU($28)		/* smp_processor_id */
-	INT_SLL	t2, t2, 2		/* x4 */
-	REG_ADDU t3, t1, t2
-	LONG_L	k0, (t3)
-#ifdef CONFIG_MIPS_ASID_BITS_VARIABLE
-	li	t3, CPUINFO_SIZE/4
-	mul	t2, t2, t3		/* x sizeof(struct cpuinfo_mips)/4 */
-	LONG_L	t2, (cpu_data + CPUINFO_ASID_MASK)(t2)
-	and	k0, k0, t2
-#else
-	andi	k0, k0, MIPS_ENTRYHI_ASID
-#endif
-	mtc0	k0, CP0_ENTRYHI
-	ehb
-
-	/* Disable RDHWR access */
-	mtc0	zero, CP0_HWRENA
-
-	.set	noat
-	/* load the guest context from VCPU and return */
-	LONG_L	$0, VCPU_R0(k1)
-	LONG_L	$1, VCPU_R1(k1)
-	LONG_L	$2, VCPU_R2(k1)
-	LONG_L	$3, VCPU_R3(k1)
-	LONG_L	$4, VCPU_R4(k1)
-	LONG_L	$5, VCPU_R5(k1)
-	LONG_L	$6, VCPU_R6(k1)
-	LONG_L	$7, VCPU_R7(k1)
-	LONG_L	$8, VCPU_R8(k1)
-	LONG_L	$9, VCPU_R9(k1)
-	LONG_L	$10, VCPU_R10(k1)
-	LONG_L	$11, VCPU_R11(k1)
-	LONG_L	$12, VCPU_R12(k1)
-	LONG_L	$13, VCPU_R13(k1)
-	LONG_L	$14, VCPU_R14(k1)
-	LONG_L	$15, VCPU_R15(k1)
-	LONG_L	$16, VCPU_R16(k1)
-	LONG_L	$17, VCPU_R17(k1)
-	LONG_L	$18, VCPU_R18(k1)
-	LONG_L	$19, VCPU_R19(k1)
-	LONG_L	$20, VCPU_R20(k1)
-	LONG_L	$21, VCPU_R21(k1)
-	LONG_L	$22, VCPU_R22(k1)
-	LONG_L	$23, VCPU_R23(k1)
-	LONG_L	$24, VCPU_R24(k1)
-	LONG_L	$25, VCPU_R25(k1)
-
-	/* $/k1 loaded later */
-	LONG_L	$28, VCPU_R28(k1)
-	LONG_L	$29, VCPU_R29(k1)
-	LONG_L	$30, VCPU_R30(k1)
-	LONG_L	$31, VCPU_R31(k1)
-
-FEXPORT(__kvm_mips_skip_guest_restore)
-	LONG_L	k0, VCPU_HI(k1)
-	mthi	k0
-
-	LONG_L	k0, VCPU_LO(k1)
-	mtlo	k0
-
-	LONG_L	k0, VCPU_R26(k1)
-	LONG_L	k1, VCPU_R27(k1)
-
-	eret
-	.set	at
-
-__kvm_mips_return_to_host:
-	/* EBASE is already pointing to Linux */
-	LONG_L	k1, VCPU_HOST_STACK(k1)
-	INT_ADDIU k1,k1, -PT_SIZE
-
-	/* Restore host DDATA_LO */
-	LONG_L	k0, PT_HOST_USERLOCAL(k1)
-	mtc0	k0, CP0_DDATA_LO
-
-	/*
-	 * r2/v0 is the return code, shift it down by 2 (arithmetic)
-	 * to recover the err code
-	 */
-	INT_SRA	k0, v0, 2
-	move	$2, k0
-
-	/* Load context saved on the host stack */
-	LONG_L	$16, PT_R16(k1)
-	LONG_L	$17, PT_R17(k1)
-	LONG_L	$18, PT_R18(k1)
-	LONG_L	$19, PT_R19(k1)
-	LONG_L	$20, PT_R20(k1)
-	LONG_L	$21, PT_R21(k1)
-	LONG_L	$22, PT_R22(k1)
-	LONG_L	$23, PT_R23(k1)
-
-	LONG_L	$28, PT_R28(k1)
-	LONG_L	$29, PT_R29(k1)
-	LONG_L	$30, PT_R30(k1)
-
-	LONG_L	k0, PT_HI(k1)
-	mthi	k0
-
-	LONG_L	k0, PT_LO(k1)
-	mtlo	k0
-
-	/* Restore RDHWR access */
-	PTR_LI	k0, 0x2000000F
-	mtc0	k0, CP0_HWRENA
-
-	/* Restore RA, which is the address we will return to */
-	LONG_L	ra, PT_R31(k1)
-	j	ra
-	 nop
-
-VECTOR_END(MIPSX(GuestExceptionEnd))
-.end MIPSX(GuestException)
-
-MIPSX(exceptions):
-	####
-	##### The exception handlers.
-	#####
-	.word _C_LABEL(MIPSX(GuestException))	#  0
-	.word _C_LABEL(MIPSX(GuestException))	#  1
-	.word _C_LABEL(MIPSX(GuestException))	#  2
-	.word _C_LABEL(MIPSX(GuestException))	#  3
-	.word _C_LABEL(MIPSX(GuestException))	#  4
-	.word _C_LABEL(MIPSX(GuestException))	#  5
-	.word _C_LABEL(MIPSX(GuestException))	#  6
-	.word _C_LABEL(MIPSX(GuestException))	#  7
-	.word _C_LABEL(MIPSX(GuestException))	#  8
-	.word _C_LABEL(MIPSX(GuestException))	#  9
-	.word _C_LABEL(MIPSX(GuestException))	# 10
-	.word _C_LABEL(MIPSX(GuestException))	# 11
-	.word _C_LABEL(MIPSX(GuestException))	# 12
-	.word _C_LABEL(MIPSX(GuestException))	# 13
-	.word _C_LABEL(MIPSX(GuestException))	# 14
-	.word _C_LABEL(MIPSX(GuestException))	# 15
-	.word _C_LABEL(MIPSX(GuestException))	# 16
-	.word _C_LABEL(MIPSX(GuestException))	# 17
-	.word _C_LABEL(MIPSX(GuestException))	# 18
-	.word _C_LABEL(MIPSX(GuestException))	# 19
-	.word _C_LABEL(MIPSX(GuestException))	# 20
-	.word _C_LABEL(MIPSX(GuestException))	# 21
-	.word _C_LABEL(MIPSX(GuestException))	# 22
-	.word _C_LABEL(MIPSX(GuestException))	# 23
-	.word _C_LABEL(MIPSX(GuestException))	# 24
-	.word _C_LABEL(MIPSX(GuestException))	# 25
-	.word _C_LABEL(MIPSX(GuestException))	# 26
-	.word _C_LABEL(MIPSX(GuestException))	# 27
-	.word _C_LABEL(MIPSX(GuestException))	# 28
-	.word _C_LABEL(MIPSX(GuestException))	# 29
-	.word _C_LABEL(MIPSX(GuestException))	# 30
-	.word _C_LABEL(MIPSX(GuestException))	# 31
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 44da5259f390..a6ea084b4d9d 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -9,6 +9,7 @@
  * Authors: Sanjay Lal <sanjayl@kymasys.com>
  */
 
+#include <linux/bitops.h>
 #include <linux/errno.h>
 #include <linux/err.h>
 #include <linux/kdebug.h>
@@ -147,7 +148,7 @@ void kvm_mips_free_vcpus(struct kvm *kvm)
 	/* Put the pages we reserved for the guest pmap */
 	for (i = 0; i < kvm->arch.guest_pmap_npages; i++) {
 		if (kvm->arch.guest_pmap[i] != KVM_INVALID_PAGE)
-			kvm_mips_release_pfn_clean(kvm->arch.guest_pmap[i]);
+			kvm_release_pfn_clean(kvm->arch.guest_pmap[i]);
 	}
 	kfree(kvm->arch.guest_pmap);
 
@@ -244,10 +245,27 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
 	}
 }
 
+static inline void dump_handler(const char *symbol, void *start, void *end)
+{
+	u32 *p;
+
+	pr_debug("LEAF(%s)\n", symbol);
+
+	pr_debug("\t.set push\n");
+	pr_debug("\t.set noreorder\n");
+
+	for (p = start; p < (u32 *)end; ++p)
+		pr_debug("\t.word\t0x%08x\t\t# %p\n", *p, p);
+
+	pr_debug("\t.set\tpop\n");
+
+	pr_debug("\tEND(%s)\n", symbol);
+}
+
 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
 {
-	int err, size, offset;
-	void *gebase;
+	int err, size;
+	void *gebase, *p, *handler;
 	int i;
 
 	struct kvm_vcpu *vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL);
@@ -273,9 +291,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
 	else
 		size = 0x4000;
 
-	/* Save Linux EBASE */
-	vcpu->arch.host_ebase = (void *)read_c0_ebase();
-
 	gebase = kzalloc(ALIGN(size, PAGE_SIZE), GFP_KERNEL);
 
 	if (!gebase) {
@@ -285,44 +300,53 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
 	kvm_debug("Allocated %d bytes for KVM Exception Handlers @ %p\n",
 		  ALIGN(size, PAGE_SIZE), gebase);
 
+	/*
+	 * Check new ebase actually fits in CP0_EBase. The lack of a write gate
+	 * limits us to the low 512MB of physical address space. If the memory
+	 * we allocate is out of range, just give up now.
+	 */
+	if (!cpu_has_ebase_wg && virt_to_phys(gebase) >= 0x20000000) {
+		kvm_err("CP0_EBase.WG required for guest exception base %pK\n",
+			gebase);
+		err = -ENOMEM;
+		goto out_free_gebase;
+	}
+
 	/* Save new ebase */
 	vcpu->arch.guest_ebase = gebase;
 
-	/* Copy L1 Guest Exception handler to correct offset */
+	/* Build guest exception vectors dynamically in unmapped memory */
+	handler = gebase + 0x2000;
 
 	/* TLB Refill, EXL = 0 */
-	memcpy(gebase, mips32_exception,
-	       mips32_exceptionEnd - mips32_exception);
+	kvm_mips_build_exception(gebase, handler);
 
 	/* General Exception Entry point */
-	memcpy(gebase + 0x180, mips32_exception,
-	       mips32_exceptionEnd - mips32_exception);
+	kvm_mips_build_exception(gebase + 0x180, handler);
 
 	/* For vectored interrupts poke the exception code @ all offsets 0-7 */
 	for (i = 0; i < 8; i++) {
 		kvm_debug("L1 Vectored handler @ %p\n",
 			  gebase + 0x200 + (i * VECTORSPACING));
-		memcpy(gebase + 0x200 + (i * VECTORSPACING), mips32_exception,
-		       mips32_exceptionEnd - mips32_exception);
+		kvm_mips_build_exception(gebase + 0x200 + i * VECTORSPACING,
+					 handler);
 	}
 
-	/* General handler, relocate to unmapped space for sanity's sake */
-	offset = 0x2000;
-	kvm_debug("Installing KVM Exception handlers @ %p, %#x bytes\n",
-		  gebase + offset,
-		  mips32_GuestExceptionEnd - mips32_GuestException);
+	/* General exit handler */
+	p = handler;
+	p = kvm_mips_build_exit(p);
 
-	memcpy(gebase + offset, mips32_GuestException,
-	       mips32_GuestExceptionEnd - mips32_GuestException);
+	/* Guest entry routine */
+	vcpu->arch.vcpu_run = p;
+	p = kvm_mips_build_vcpu_run(p);
 
-#ifdef MODULE
-	offset += mips32_GuestExceptionEnd - mips32_GuestException;
-	memcpy(gebase + offset, (char *)__kvm_mips_vcpu_run,
-	       __kvm_mips_vcpu_run_end - (char *)__kvm_mips_vcpu_run);
-	vcpu->arch.vcpu_run = gebase + offset;
-#else
-	vcpu->arch.vcpu_run = __kvm_mips_vcpu_run;
-#endif
+	/* Dump the generated code */
+	pr_debug("#include <asm/asm.h>\n");
+	pr_debug("#include <asm/regdef.h>\n");
+	pr_debug("\n");
+	dump_handler("kvm_vcpu_run", vcpu->arch.vcpu_run, p);
+	dump_handler("kvm_gen_exc", gebase + 0x180, gebase + 0x200);
+	dump_handler("kvm_exit", gebase + 0x2000, vcpu->arch.vcpu_run);
 
 	/* Invalidate the icache for these ranges */
 	local_flush_icache_range((unsigned long)gebase,
@@ -408,17 +432,19 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	kvm_mips_deliver_interrupts(vcpu,
 				    kvm_read_c0_guest_cause(vcpu->arch.cop0));
 
-	__kvm_guest_enter();
+	guest_enter_irqoff();
 
 	/* Disable hardware page table walking while in guest */
 	htw_stop();
 
+	trace_kvm_enter(vcpu);
 	r = vcpu->arch.vcpu_run(run, vcpu);
+	trace_kvm_out(vcpu);
 
 	/* Re-enable HTW before enabling interrupts */
 	htw_start();
 
-	__kvm_guest_exit();
+	guest_exit_irqoff();
 	local_irq_enable();
 
 	if (vcpu->sigset_active)
@@ -507,8 +533,10 @@ static u64 kvm_mips_get_one_regs[] = {
 	KVM_REG_MIPS_R30,
 	KVM_REG_MIPS_R31,
 
+#ifndef CONFIG_CPU_MIPSR6
 	KVM_REG_MIPS_HI,
 	KVM_REG_MIPS_LO,
+#endif
 	KVM_REG_MIPS_PC,
 
 	KVM_REG_MIPS_CP0_INDEX,
@@ -539,6 +567,104 @@ static u64 kvm_mips_get_one_regs[] = {
 	KVM_REG_MIPS_COUNT_HZ,
 };
 
+static u64 kvm_mips_get_one_regs_fpu[] = {
+	KVM_REG_MIPS_FCR_IR,
+	KVM_REG_MIPS_FCR_CSR,
+};
+
+static u64 kvm_mips_get_one_regs_msa[] = {
+	KVM_REG_MIPS_MSA_IR,
+	KVM_REG_MIPS_MSA_CSR,
+};
+
+static u64 kvm_mips_get_one_regs_kscratch[] = {
+	KVM_REG_MIPS_CP0_KSCRATCH1,
+	KVM_REG_MIPS_CP0_KSCRATCH2,
+	KVM_REG_MIPS_CP0_KSCRATCH3,
+	KVM_REG_MIPS_CP0_KSCRATCH4,
+	KVM_REG_MIPS_CP0_KSCRATCH5,
+	KVM_REG_MIPS_CP0_KSCRATCH6,
+};
+
+static unsigned long kvm_mips_num_regs(struct kvm_vcpu *vcpu)
+{
+	unsigned long ret;
+
+	ret = ARRAY_SIZE(kvm_mips_get_one_regs);
+	if (kvm_mips_guest_can_have_fpu(&vcpu->arch)) {
+		ret += ARRAY_SIZE(kvm_mips_get_one_regs_fpu) + 48;
+		/* odd doubles */
+		if (boot_cpu_data.fpu_id & MIPS_FPIR_F64)
+			ret += 16;
+	}
+	if (kvm_mips_guest_can_have_msa(&vcpu->arch))
+		ret += ARRAY_SIZE(kvm_mips_get_one_regs_msa) + 32;
+	ret += __arch_hweight8(vcpu->arch.kscratch_enabled);
+	ret += kvm_mips_callbacks->num_regs(vcpu);
+
+	return ret;
+}
+
+static int kvm_mips_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices)
+{
+	u64 index;
+	unsigned int i;
+
+	if (copy_to_user(indices, kvm_mips_get_one_regs,
+			 sizeof(kvm_mips_get_one_regs)))
+		return -EFAULT;
+	indices += ARRAY_SIZE(kvm_mips_get_one_regs);
+
+	if (kvm_mips_guest_can_have_fpu(&vcpu->arch)) {
+		if (copy_to_user(indices, kvm_mips_get_one_regs_fpu,
+				 sizeof(kvm_mips_get_one_regs_fpu)))
+			return -EFAULT;
+		indices += ARRAY_SIZE(kvm_mips_get_one_regs_fpu);
+
+		for (i = 0; i < 32; ++i) {
+			index = KVM_REG_MIPS_FPR_32(i);
+			if (copy_to_user(indices, &index, sizeof(index)))
+				return -EFAULT;
+			++indices;
+
+			/* skip odd doubles if no F64 */
+			if (i & 1 && !(boot_cpu_data.fpu_id & MIPS_FPIR_F64))
+				continue;
+
+			index = KVM_REG_MIPS_FPR_64(i);
+			if (copy_to_user(indices, &index, sizeof(index)))
+				return -EFAULT;
+			++indices;
+		}
+	}
+
+	if (kvm_mips_guest_can_have_msa(&vcpu->arch)) {
+		if (copy_to_user(indices, kvm_mips_get_one_regs_msa,
+				 sizeof(kvm_mips_get_one_regs_msa)))
+			return -EFAULT;
+		indices += ARRAY_SIZE(kvm_mips_get_one_regs_msa);
+
+		for (i = 0; i < 32; ++i) {
+			index = KVM_REG_MIPS_VEC_128(i);
+			if (copy_to_user(indices, &index, sizeof(index)))
+				return -EFAULT;
+			++indices;
+		}
+	}
+
+	for (i = 0; i < 6; ++i) {
+		if (!(vcpu->arch.kscratch_enabled & BIT(i + 2)))
+			continue;
+
+		if (copy_to_user(indices, &kvm_mips_get_one_regs_kscratch[i],
+				 sizeof(kvm_mips_get_one_regs_kscratch[i])))
+			return -EFAULT;
+		++indices;
+	}
+
+	return kvm_mips_callbacks->copy_reg_indices(vcpu, indices);
+}
+
 static int kvm_mips_get_reg(struct kvm_vcpu *vcpu,
 			    const struct kvm_one_reg *reg)
 {
@@ -554,12 +680,14 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu,
 	case KVM_REG_MIPS_R0 ... KVM_REG_MIPS_R31:
 		v = (long)vcpu->arch.gprs[reg->id - KVM_REG_MIPS_R0];
 		break;
+#ifndef CONFIG_CPU_MIPSR6
 	case KVM_REG_MIPS_HI:
 		v = (long)vcpu->arch.hi;
 		break;
 	case KVM_REG_MIPS_LO:
 		v = (long)vcpu->arch.lo;
 		break;
+#endif
 	case KVM_REG_MIPS_PC:
 		v = (long)vcpu->arch.pc;
 		break;
@@ -688,17 +816,37 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu,
 	case KVM_REG_MIPS_CP0_ERROREPC:
 		v = (long)kvm_read_c0_guest_errorepc(cop0);
 		break;
+	case KVM_REG_MIPS_CP0_KSCRATCH1 ... KVM_REG_MIPS_CP0_KSCRATCH6:
+		idx = reg->id - KVM_REG_MIPS_CP0_KSCRATCH1 + 2;
+		if (!(vcpu->arch.kscratch_enabled & BIT(idx)))
+			return -EINVAL;
+		switch (idx) {
+		case 2:
+			v = (long)kvm_read_c0_guest_kscratch1(cop0);
+			break;
+		case 3:
+			v = (long)kvm_read_c0_guest_kscratch2(cop0);
+			break;
+		case 4:
+			v = (long)kvm_read_c0_guest_kscratch3(cop0);
+			break;
+		case 5:
+			v = (long)kvm_read_c0_guest_kscratch4(cop0);
+			break;
+		case 6:
+			v = (long)kvm_read_c0_guest_kscratch5(cop0);
+			break;
+		case 7:
+			v = (long)kvm_read_c0_guest_kscratch6(cop0);
+			break;
+		}
+		break;
 	/* registers to be handled specially */
-	case KVM_REG_MIPS_CP0_COUNT:
-	case KVM_REG_MIPS_COUNT_CTL:
-	case KVM_REG_MIPS_COUNT_RESUME:
-	case KVM_REG_MIPS_COUNT_HZ:
+	default:
 		ret = kvm_mips_callbacks->get_one_reg(vcpu, reg, &v);
 		if (ret)
 			return ret;
 		break;
-	default:
-		return -EINVAL;
 	}
 	if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U64) {
 		u64 __user *uaddr64 = (u64 __user *)(long)reg->addr;
@@ -755,12 +903,14 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu,
 	case KVM_REG_MIPS_R1 ... KVM_REG_MIPS_R31:
 		vcpu->arch.gprs[reg->id - KVM_REG_MIPS_R0] = v;
 		break;
+#ifndef CONFIG_CPU_MIPSR6
 	case KVM_REG_MIPS_HI:
 		vcpu->arch.hi = v;
 		break;
 	case KVM_REG_MIPS_LO:
 		vcpu->arch.lo = v;
 		break;
+#endif
 	case KVM_REG_MIPS_PC:
 		vcpu->arch.pc = v;
 		break;
@@ -859,22 +1009,34 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu,
 	case KVM_REG_MIPS_CP0_ERROREPC:
 		kvm_write_c0_guest_errorepc(cop0, v);
 		break;
+	case KVM_REG_MIPS_CP0_KSCRATCH1 ... KVM_REG_MIPS_CP0_KSCRATCH6:
+		idx = reg->id - KVM_REG_MIPS_CP0_KSCRATCH1 + 2;
+		if (!(vcpu->arch.kscratch_enabled & BIT(idx)))
+			return -EINVAL;
+		switch (idx) {
+		case 2:
+			kvm_write_c0_guest_kscratch1(cop0, v);
+			break;
+		case 3:
+			kvm_write_c0_guest_kscratch2(cop0, v);
+			break;
+		case 4:
+			kvm_write_c0_guest_kscratch3(cop0, v);
+			break;
+		case 5:
+			kvm_write_c0_guest_kscratch4(cop0, v);
+			break;
+		case 6:
+			kvm_write_c0_guest_kscratch5(cop0, v);
+			break;
+		case 7:
+			kvm_write_c0_guest_kscratch6(cop0, v);
+			break;
+		}
+		break;
 	/* registers to be handled specially */
-	case KVM_REG_MIPS_CP0_COUNT:
-	case KVM_REG_MIPS_CP0_COMPARE:
-	case KVM_REG_MIPS_CP0_CAUSE:
-	case KVM_REG_MIPS_CP0_CONFIG:
-	case KVM_REG_MIPS_CP0_CONFIG1:
-	case KVM_REG_MIPS_CP0_CONFIG2:
-	case KVM_REG_MIPS_CP0_CONFIG3:
-	case KVM_REG_MIPS_CP0_CONFIG4:
-	case KVM_REG_MIPS_CP0_CONFIG5:
-	case KVM_REG_MIPS_COUNT_CTL:
-	case KVM_REG_MIPS_COUNT_RESUME:
-	case KVM_REG_MIPS_COUNT_HZ:
-		return kvm_mips_callbacks->set_one_reg(vcpu, reg, v);
 	default:
-		return -EINVAL;
+		return kvm_mips_callbacks->set_one_reg(vcpu, reg, v);
 	}
 	return 0;
 }
@@ -927,23 +1089,18 @@ long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl,
 	}
 	case KVM_GET_REG_LIST: {
 		struct kvm_reg_list __user *user_list = argp;
-		u64 __user *reg_dest;
 		struct kvm_reg_list reg_list;
 		unsigned n;
 
 		if (copy_from_user(&reg_list, user_list, sizeof(reg_list)))
 			return -EFAULT;
 		n = reg_list.n;
-		reg_list.n = ARRAY_SIZE(kvm_mips_get_one_regs);
+		reg_list.n = kvm_mips_num_regs(vcpu);
 		if (copy_to_user(user_list, &reg_list, sizeof(reg_list)))
 			return -EFAULT;
 		if (n < reg_list.n)
 			return -E2BIG;
-		reg_dest = user_list->reg;
-		if (copy_to_user(reg_dest, kvm_mips_get_one_regs,
-				 sizeof(kvm_mips_get_one_regs)))
-			return -EFAULT;
-		return 0;
+		return kvm_mips_copy_reg_indices(vcpu, user_list->reg);
 	}
 	case KVM_NMI:
 		/* Treat the NMI as a CPU reset */
@@ -1222,7 +1379,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 
 static void kvm_mips_set_c0_status(void)
 {
-	uint32_t status = read_c0_status();
+	u32 status = read_c0_status();
 
 	if (cpu_has_dsp)
 		status |= (ST0_MX);
@@ -1236,9 +1393,9 @@ static void kvm_mips_set_c0_status(void)
  */
 int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
 {
-	uint32_t cause = vcpu->arch.host_cp0_cause;
-	uint32_t exccode = (cause >> CAUSEB_EXCCODE) & 0x1f;
-	uint32_t __user *opc = (uint32_t __user *) vcpu->arch.pc;
+	u32 cause = vcpu->arch.host_cp0_cause;
+	u32 exccode = (cause >> CAUSEB_EXCCODE) & 0x1f;
+	u32 __user *opc = (u32 __user *) vcpu->arch.pc;
 	unsigned long badvaddr = vcpu->arch.host_cp0_badvaddr;
 	enum emulation_result er = EMULATE_DONE;
 	int ret = RESUME_GUEST;
@@ -1260,6 +1417,7 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
 
 	kvm_debug("kvm_mips_handle_exit: cause: %#x, PC: %p, kvm_run: %p, kvm_vcpu: %p\n",
 			cause, opc, run, vcpu);
+	trace_kvm_exit(vcpu, exccode);
 
 	/*
 	 * Do a privilege check, if in UM most of these exit conditions end up
@@ -1279,7 +1437,6 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
 		kvm_debug("[%d]EXCCODE_INT @ %p\n", vcpu->vcpu_id, opc);
 
 		++vcpu->stat.int_exits;
-		trace_kvm_exit(vcpu, INT_EXITS);
 
 		if (need_resched())
 			cond_resched();
@@ -1291,7 +1448,6 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
 		kvm_debug("EXCCODE_CPU: @ PC: %p\n", opc);
 
 		++vcpu->stat.cop_unusable_exits;
-		trace_kvm_exit(vcpu, COP_UNUSABLE_EXITS);
 		ret = kvm_mips_callbacks->handle_cop_unusable(vcpu);
 		/* XXXKYMA: Might need to return to user space */
 		if (run->exit_reason == KVM_EXIT_IRQ_WINDOW_OPEN)
@@ -1300,7 +1456,6 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
 
 	case EXCCODE_MOD:
 		++vcpu->stat.tlbmod_exits;
-		trace_kvm_exit(vcpu, TLBMOD_EXITS);
 		ret = kvm_mips_callbacks->handle_tlb_mod(vcpu);
 		break;
 
@@ -1310,7 +1465,6 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
 			  badvaddr);
 
 		++vcpu->stat.tlbmiss_st_exits;
-		trace_kvm_exit(vcpu, TLBMISS_ST_EXITS);
 		ret = kvm_mips_callbacks->handle_tlb_st_miss(vcpu);
 		break;
 
@@ -1319,61 +1473,51 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
 			  cause, opc, badvaddr);
 
 		++vcpu->stat.tlbmiss_ld_exits;
-		trace_kvm_exit(vcpu, TLBMISS_LD_EXITS);
 		ret = kvm_mips_callbacks->handle_tlb_ld_miss(vcpu);
 		break;
 
 	case EXCCODE_ADES:
 		++vcpu->stat.addrerr_st_exits;
-		trace_kvm_exit(vcpu, ADDRERR_ST_EXITS);
 		ret = kvm_mips_callbacks->handle_addr_err_st(vcpu);
 		break;
 
 	case EXCCODE_ADEL:
 		++vcpu->stat.addrerr_ld_exits;
-		trace_kvm_exit(vcpu, ADDRERR_LD_EXITS);
 		ret = kvm_mips_callbacks->handle_addr_err_ld(vcpu);
 		break;
 
 	case EXCCODE_SYS:
 		++vcpu->stat.syscall_exits;
-		trace_kvm_exit(vcpu, SYSCALL_EXITS);
 		ret = kvm_mips_callbacks->handle_syscall(vcpu);
 		break;
 
 	case EXCCODE_RI:
 		++vcpu->stat.resvd_inst_exits;
-		trace_kvm_exit(vcpu, RESVD_INST_EXITS);
 		ret = kvm_mips_callbacks->handle_res_inst(vcpu);
 		break;
 
 	case EXCCODE_BP:
 		++vcpu->stat.break_inst_exits;
-		trace_kvm_exit(vcpu, BREAK_INST_EXITS);
 		ret = kvm_mips_callbacks->handle_break(vcpu);
 		break;
 
 	case EXCCODE_TR:
 		++vcpu->stat.trap_inst_exits;
-		trace_kvm_exit(vcpu, TRAP_INST_EXITS);
 		ret = kvm_mips_callbacks->handle_trap(vcpu);
 		break;
 
 	case EXCCODE_MSAFPE:
 		++vcpu->stat.msa_fpe_exits;
-		trace_kvm_exit(vcpu, MSA_FPE_EXITS);
 		ret = kvm_mips_callbacks->handle_msa_fpe(vcpu);
 		break;
 
 	case EXCCODE_FPE:
 		++vcpu->stat.fpe_exits;
-		trace_kvm_exit(vcpu, FPE_EXITS);
 		ret = kvm_mips_callbacks->handle_fpe(vcpu);
 		break;
 
 	case EXCCODE_MSADIS:
 		++vcpu->stat.msa_disabled_exits;
-		trace_kvm_exit(vcpu, MSA_DISABLED_EXITS);
 		ret = kvm_mips_callbacks->handle_msa_disabled(vcpu);
 		break;
 
@@ -1400,11 +1544,13 @@ skip_emul:
 			run->exit_reason = KVM_EXIT_INTR;
 			ret = (-EINTR << 2) | RESUME_HOST;
 			++vcpu->stat.signal_exits;
-			trace_kvm_exit(vcpu, SIGNAL_EXITS);
+			trace_kvm_exit(vcpu, KVM_TRACE_EXIT_SIGNAL);
 		}
 	}
 
 	if (ret == RESUME_GUEST) {
+		trace_kvm_reenter(vcpu);
+
 		/*
 		 * If FPU / MSA are enabled (i.e. the guest's FPU / MSA context
 		 * is live), restore FCR31 / MSACSR.
@@ -1450,7 +1596,7 @@ void kvm_own_fpu(struct kvm_vcpu *vcpu)
 	 * not to clobber the status register directly via the commpage.
 	 */
 	if (cpu_has_msa && sr & ST0_CU1 && !(sr & ST0_FR) &&
-	    vcpu->arch.fpu_inuse & KVM_MIPS_FPU_MSA)
+	    vcpu->arch.aux_inuse & KVM_MIPS_AUX_MSA)
 		kvm_lose_fpu(vcpu);
 
 	/*
@@ -1465,9 +1611,12 @@ void kvm_own_fpu(struct kvm_vcpu *vcpu)
 	enable_fpu_hazard();
 
 	/* If guest FPU state not active, restore it now */
-	if (!(vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU)) {
+	if (!(vcpu->arch.aux_inuse & KVM_MIPS_AUX_FPU)) {
 		__kvm_restore_fpu(&vcpu->arch);
-		vcpu->arch.fpu_inuse |= KVM_MIPS_FPU_FPU;
+		vcpu->arch.aux_inuse |= KVM_MIPS_AUX_FPU;
+		trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, KVM_TRACE_AUX_FPU);
+	} else {
+		trace_kvm_aux(vcpu, KVM_TRACE_AUX_ENABLE, KVM_TRACE_AUX_FPU);
 	}
 
 	preempt_enable();
@@ -1494,8 +1643,8 @@ void kvm_own_msa(struct kvm_vcpu *vcpu)
 		 * interacts with MSA state, so play it safe and save it first.
 		 */
 		if (!(sr & ST0_FR) &&
-		    (vcpu->arch.fpu_inuse & (KVM_MIPS_FPU_FPU |
-				KVM_MIPS_FPU_MSA)) == KVM_MIPS_FPU_FPU)
+		    (vcpu->arch.aux_inuse & (KVM_MIPS_AUX_FPU |
+				KVM_MIPS_AUX_MSA)) == KVM_MIPS_AUX_FPU)
 			kvm_lose_fpu(vcpu);
 
 		change_c0_status(ST0_CU1 | ST0_FR, sr);
@@ -1509,22 +1658,26 @@ void kvm_own_msa(struct kvm_vcpu *vcpu)
 	set_c0_config5(MIPS_CONF5_MSAEN);
 	enable_fpu_hazard();
 
-	switch (vcpu->arch.fpu_inuse & (KVM_MIPS_FPU_FPU | KVM_MIPS_FPU_MSA)) {
-	case KVM_MIPS_FPU_FPU:
+	switch (vcpu->arch.aux_inuse & (KVM_MIPS_AUX_FPU | KVM_MIPS_AUX_MSA)) {
+	case KVM_MIPS_AUX_FPU:
 		/*
 		 * Guest FPU state already loaded, only restore upper MSA state
 		 */
 		__kvm_restore_msa_upper(&vcpu->arch);
-		vcpu->arch.fpu_inuse |= KVM_MIPS_FPU_MSA;
+		vcpu->arch.aux_inuse |= KVM_MIPS_AUX_MSA;
+		trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, KVM_TRACE_AUX_MSA);
 		break;
 	case 0:
 		/* Neither FPU or MSA already active, restore full MSA state */
 		__kvm_restore_msa(&vcpu->arch);
-		vcpu->arch.fpu_inuse |= KVM_MIPS_FPU_MSA;
+		vcpu->arch.aux_inuse |= KVM_MIPS_AUX_MSA;
 		if (kvm_mips_guest_has_fpu(&vcpu->arch))
-			vcpu->arch.fpu_inuse |= KVM_MIPS_FPU_FPU;
+			vcpu->arch.aux_inuse |= KVM_MIPS_AUX_FPU;
+		trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE,
+			      KVM_TRACE_AUX_FPU_MSA);
 		break;
 	default:
+		trace_kvm_aux(vcpu, KVM_TRACE_AUX_ENABLE, KVM_TRACE_AUX_MSA);
 		break;
 	}
 
@@ -1536,13 +1689,15 @@ void kvm_own_msa(struct kvm_vcpu *vcpu)
 void kvm_drop_fpu(struct kvm_vcpu *vcpu)
 {
 	preempt_disable();
-	if (cpu_has_msa && vcpu->arch.fpu_inuse & KVM_MIPS_FPU_MSA) {
+	if (cpu_has_msa && vcpu->arch.aux_inuse & KVM_MIPS_AUX_MSA) {
 		disable_msa();
-		vcpu->arch.fpu_inuse &= ~KVM_MIPS_FPU_MSA;
+		trace_kvm_aux(vcpu, KVM_TRACE_AUX_DISCARD, KVM_TRACE_AUX_MSA);
+		vcpu->arch.aux_inuse &= ~KVM_MIPS_AUX_MSA;
 	}
-	if (vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU) {
+	if (vcpu->arch.aux_inuse & KVM_MIPS_AUX_FPU) {
 		clear_c0_status(ST0_CU1 | ST0_FR);
-		vcpu->arch.fpu_inuse &= ~KVM_MIPS_FPU_FPU;
+		trace_kvm_aux(vcpu, KVM_TRACE_AUX_DISCARD, KVM_TRACE_AUX_FPU);
+		vcpu->arch.aux_inuse &= ~KVM_MIPS_AUX_FPU;
 	}
 	preempt_enable();
 }
@@ -1558,25 +1713,27 @@ void kvm_lose_fpu(struct kvm_vcpu *vcpu)
 	 */
 
 	preempt_disable();
-	if (cpu_has_msa && vcpu->arch.fpu_inuse & KVM_MIPS_FPU_MSA) {
+	if (cpu_has_msa && vcpu->arch.aux_inuse & KVM_MIPS_AUX_MSA) {
 		set_c0_config5(MIPS_CONF5_MSAEN);
 		enable_fpu_hazard();
 
 		__kvm_save_msa(&vcpu->arch);
+		trace_kvm_aux(vcpu, KVM_TRACE_AUX_SAVE, KVM_TRACE_AUX_FPU_MSA);
 
 		/* Disable MSA & FPU */
 		disable_msa();
-		if (vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU) {
+		if (vcpu->arch.aux_inuse & KVM_MIPS_AUX_FPU) {
 			clear_c0_status(ST0_CU1 | ST0_FR);
 			disable_fpu_hazard();
 		}
-		vcpu->arch.fpu_inuse &= ~(KVM_MIPS_FPU_FPU | KVM_MIPS_FPU_MSA);
-	} else if (vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU) {
+		vcpu->arch.aux_inuse &= ~(KVM_MIPS_AUX_FPU | KVM_MIPS_AUX_MSA);
+	} else if (vcpu->arch.aux_inuse & KVM_MIPS_AUX_FPU) {
 		set_c0_status(ST0_CU1);
 		enable_fpu_hazard();
 
 		__kvm_save_fpu(&vcpu->arch);
-		vcpu->arch.fpu_inuse &= ~KVM_MIPS_FPU_FPU;
+		vcpu->arch.aux_inuse &= ~KVM_MIPS_AUX_FPU;
+		trace_kvm_aux(vcpu, KVM_TRACE_AUX_SAVE, KVM_TRACE_AUX_FPU);
 
 		/* Disable FPU */
 		clear_c0_status(ST0_CU1 | ST0_FR);
@@ -1638,6 +1795,10 @@ static int __init kvm_mips_init(void)
 {
 	int ret;
 
+	ret = kvm_mips_entry_setup();
+	if (ret)
+		return ret;
+
 	ret = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
 
 	if (ret)
@@ -1645,18 +1806,6 @@ static int __init kvm_mips_init(void)
 
 	register_die_notifier(&kvm_mips_csr_die_notifier);
 
-	/*
-	 * On MIPS, kernel modules are executed from "mapped space", which
-	 * requires TLBs. The TLB handling code is statically linked with
-	 * the rest of the kernel (tlb.c) to avoid the possibility of
-	 * double faulting. The issue is that the TLB code references
-	 * routines that are part of the the KVM module, which are only
-	 * available once the module is loaded.
-	 */
-	kvm_mips_gfn_to_pfn = gfn_to_pfn;
-	kvm_mips_release_pfn_clean = kvm_release_pfn_clean;
-	kvm_mips_is_error_pfn = is_error_pfn;
-
 	return 0;
 }
 
@@ -1664,10 +1813,6 @@ static void __exit kvm_mips_exit(void)
 {
 	kvm_exit();
 
-	kvm_mips_gfn_to_pfn = NULL;
-	kvm_mips_release_pfn_clean = NULL;
-	kvm_mips_is_error_pfn = NULL;
-
 	unregister_die_notifier(&kvm_mips_csr_die_notifier);
 }
 
diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c
new file mode 100644
index 000000000000..57319ee57c4f
--- /dev/null
+++ b/arch/mips/kvm/mmu.c
@@ -0,0 +1,375 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * KVM/MIPS MMU handling in the KVM module.
+ *
+ * Copyright (C) 2012  MIPS Technologies, Inc.  All rights reserved.
+ * Authors: Sanjay Lal <sanjayl@kymasys.com>
+ */
+
+#include <linux/highmem.h>
+#include <linux/kvm_host.h>
+#include <asm/mmu_context.h>
+
+static u32 kvm_mips_get_kernel_asid(struct kvm_vcpu *vcpu)
+{
+	int cpu = smp_processor_id();
+
+	return vcpu->arch.guest_kernel_asid[cpu] &
+			cpu_asid_mask(&cpu_data[cpu]);
+}
+
+static u32 kvm_mips_get_user_asid(struct kvm_vcpu *vcpu)
+{
+	int cpu = smp_processor_id();
+
+	return vcpu->arch.guest_user_asid[cpu] &
+			cpu_asid_mask(&cpu_data[cpu]);
+}
+
+static int kvm_mips_map_page(struct kvm *kvm, gfn_t gfn)
+{
+	int srcu_idx, err = 0;
+	kvm_pfn_t pfn;
+
+	if (kvm->arch.guest_pmap[gfn] != KVM_INVALID_PAGE)
+		return 0;
+
+	srcu_idx = srcu_read_lock(&kvm->srcu);
+	pfn = gfn_to_pfn(kvm, gfn);
+
+	if (is_error_pfn(pfn)) {
+		kvm_err("Couldn't get pfn for gfn %#llx!\n", gfn);
+		err = -EFAULT;
+		goto out;
+	}
+
+	kvm->arch.guest_pmap[gfn] = pfn;
+out:
+	srcu_read_unlock(&kvm->srcu, srcu_idx);
+	return err;
+}
+
+/* Translate guest KSEG0 addresses to Host PA */
+unsigned long kvm_mips_translate_guest_kseg0_to_hpa(struct kvm_vcpu *vcpu,
+						    unsigned long gva)
+{
+	gfn_t gfn;
+	unsigned long offset = gva & ~PAGE_MASK;
+	struct kvm *kvm = vcpu->kvm;
+
+	if (KVM_GUEST_KSEGX(gva) != KVM_GUEST_KSEG0) {
+		kvm_err("%s/%p: Invalid gva: %#lx\n", __func__,
+			__builtin_return_address(0), gva);
+		return KVM_INVALID_PAGE;
+	}
+
+	gfn = (KVM_GUEST_CPHYSADDR(gva) >> PAGE_SHIFT);
+
+	if (gfn >= kvm->arch.guest_pmap_npages) {
+		kvm_err("%s: Invalid gfn: %#llx, GVA: %#lx\n", __func__, gfn,
+			gva);
+		return KVM_INVALID_PAGE;
+	}
+
+	if (kvm_mips_map_page(vcpu->kvm, gfn) < 0)
+		return KVM_INVALID_ADDR;
+
+	return (kvm->arch.guest_pmap[gfn] << PAGE_SHIFT) + offset;
+}
+
+/* XXXKYMA: Must be called with interrupts disabled */
+int kvm_mips_handle_kseg0_tlb_fault(unsigned long badvaddr,
+				    struct kvm_vcpu *vcpu)
+{
+	gfn_t gfn;
+	kvm_pfn_t pfn0, pfn1;
+	unsigned long vaddr = 0;
+	unsigned long entryhi = 0, entrylo0 = 0, entrylo1 = 0;
+	struct kvm *kvm = vcpu->kvm;
+	const int flush_dcache_mask = 0;
+	int ret;
+
+	if (KVM_GUEST_KSEGX(badvaddr) != KVM_GUEST_KSEG0) {
+		kvm_err("%s: Invalid BadVaddr: %#lx\n", __func__, badvaddr);
+		kvm_mips_dump_host_tlbs();
+		return -1;
+	}
+
+	gfn = (KVM_GUEST_CPHYSADDR(badvaddr) >> PAGE_SHIFT);
+	if (gfn >= kvm->arch.guest_pmap_npages) {
+		kvm_err("%s: Invalid gfn: %#llx, BadVaddr: %#lx\n", __func__,
+			gfn, badvaddr);
+		kvm_mips_dump_host_tlbs();
+		return -1;
+	}
+	vaddr = badvaddr & (PAGE_MASK << 1);
+
+	if (kvm_mips_map_page(vcpu->kvm, gfn) < 0)
+		return -1;
+
+	if (kvm_mips_map_page(vcpu->kvm, gfn ^ 0x1) < 0)
+		return -1;
+
+	pfn0 = kvm->arch.guest_pmap[gfn & ~0x1];
+	pfn1 = kvm->arch.guest_pmap[gfn | 0x1];
+
+	entrylo0 = mips3_paddr_to_tlbpfn(pfn0 << PAGE_SHIFT) |
+		((_page_cachable_default >> _CACHE_SHIFT) << ENTRYLO_C_SHIFT) |
+		ENTRYLO_D | ENTRYLO_V;
+	entrylo1 = mips3_paddr_to_tlbpfn(pfn1 << PAGE_SHIFT) |
+		((_page_cachable_default >> _CACHE_SHIFT) << ENTRYLO_C_SHIFT) |
+		ENTRYLO_D | ENTRYLO_V;
+
+	preempt_disable();
+	entryhi = (vaddr | kvm_mips_get_kernel_asid(vcpu));
+	ret = kvm_mips_host_tlb_write(vcpu, entryhi, entrylo0, entrylo1,
+				      flush_dcache_mask);
+	preempt_enable();
+
+	return ret;
+}
+
+int kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu,
+					 struct kvm_mips_tlb *tlb)
+{
+	unsigned long entryhi = 0, entrylo0 = 0, entrylo1 = 0;
+	struct kvm *kvm = vcpu->kvm;
+	kvm_pfn_t pfn0, pfn1;
+	int ret;
+
+	if ((tlb->tlb_hi & VPN2_MASK) == 0) {
+		pfn0 = 0;
+		pfn1 = 0;
+	} else {
+		if (kvm_mips_map_page(kvm, mips3_tlbpfn_to_paddr(tlb->tlb_lo[0])
+					   >> PAGE_SHIFT) < 0)
+			return -1;
+
+		if (kvm_mips_map_page(kvm, mips3_tlbpfn_to_paddr(tlb->tlb_lo[1])
+					   >> PAGE_SHIFT) < 0)
+			return -1;
+
+		pfn0 = kvm->arch.guest_pmap[
+			mips3_tlbpfn_to_paddr(tlb->tlb_lo[0]) >> PAGE_SHIFT];
+		pfn1 = kvm->arch.guest_pmap[
+			mips3_tlbpfn_to_paddr(tlb->tlb_lo[1]) >> PAGE_SHIFT];
+	}
+
+	/* Get attributes from the Guest TLB */
+	entrylo0 = mips3_paddr_to_tlbpfn(pfn0 << PAGE_SHIFT) |
+		((_page_cachable_default >> _CACHE_SHIFT) << ENTRYLO_C_SHIFT) |
+		(tlb->tlb_lo[0] & ENTRYLO_D) |
+		(tlb->tlb_lo[0] & ENTRYLO_V);
+	entrylo1 = mips3_paddr_to_tlbpfn(pfn1 << PAGE_SHIFT) |
+		((_page_cachable_default >> _CACHE_SHIFT) << ENTRYLO_C_SHIFT) |
+		(tlb->tlb_lo[1] & ENTRYLO_D) |
+		(tlb->tlb_lo[1] & ENTRYLO_V);
+
+	kvm_debug("@ %#lx tlb_lo0: 0x%08lx tlb_lo1: 0x%08lx\n", vcpu->arch.pc,
+		  tlb->tlb_lo[0], tlb->tlb_lo[1]);
+
+	preempt_disable();
+	entryhi = (tlb->tlb_hi & VPN2_MASK) | (KVM_GUEST_KERNEL_MODE(vcpu) ?
+					       kvm_mips_get_kernel_asid(vcpu) :
+					       kvm_mips_get_user_asid(vcpu));
+	ret = kvm_mips_host_tlb_write(vcpu, entryhi, entrylo0, entrylo1,
+				      tlb->tlb_mask);
+	preempt_enable();
+
+	return ret;
+}
+
+void kvm_get_new_mmu_context(struct mm_struct *mm, unsigned long cpu,
+			     struct kvm_vcpu *vcpu)
+{
+	unsigned long asid = asid_cache(cpu);
+
+	asid += cpu_asid_inc();
+	if (!(asid & cpu_asid_mask(&cpu_data[cpu]))) {
+		if (cpu_has_vtag_icache)
+			flush_icache_all();
+
+		kvm_local_flush_tlb_all();      /* start new asid cycle */
+
+		if (!asid)      /* fix version if needed */
+			asid = asid_first_version(cpu);
+	}
+
+	cpu_context(cpu, mm) = asid_cache(cpu) = asid;
+}
+
+/**
+ * kvm_mips_migrate_count() - Migrate timer.
+ * @vcpu:	Virtual CPU.
+ *
+ * Migrate CP0_Count hrtimer to the current CPU by cancelling and restarting it
+ * if it was running prior to being cancelled.
+ *
+ * Must be called when the VCPU is migrated to a different CPU to ensure that
+ * timer expiry during guest execution interrupts the guest and causes the
+ * interrupt to be delivered in a timely manner.
+ */
+static void kvm_mips_migrate_count(struct kvm_vcpu *vcpu)
+{
+	if (hrtimer_cancel(&vcpu->arch.comparecount_timer))
+		hrtimer_restart(&vcpu->arch.comparecount_timer);
+}
+
+/* Restore ASID once we are scheduled back after preemption */
+void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+	unsigned long asid_mask = cpu_asid_mask(&cpu_data[cpu]);
+	unsigned long flags;
+	int newasid = 0;
+
+	kvm_debug("%s: vcpu %p, cpu: %d\n", __func__, vcpu, cpu);
+
+	/* Allocate new kernel and user ASIDs if needed */
+
+	local_irq_save(flags);
+
+	if ((vcpu->arch.guest_kernel_asid[cpu] ^ asid_cache(cpu)) &
+						asid_version_mask(cpu)) {
+		kvm_get_new_mmu_context(&vcpu->arch.guest_kernel_mm, cpu, vcpu);
+		vcpu->arch.guest_kernel_asid[cpu] =
+		    vcpu->arch.guest_kernel_mm.context.asid[cpu];
+		kvm_get_new_mmu_context(&vcpu->arch.guest_user_mm, cpu, vcpu);
+		vcpu->arch.guest_user_asid[cpu] =
+		    vcpu->arch.guest_user_mm.context.asid[cpu];
+		newasid++;
+
+		kvm_debug("[%d]: cpu_context: %#lx\n", cpu,
+			  cpu_context(cpu, current->mm));
+		kvm_debug("[%d]: Allocated new ASID for Guest Kernel: %#x\n",
+			  cpu, vcpu->arch.guest_kernel_asid[cpu]);
+		kvm_debug("[%d]: Allocated new ASID for Guest User: %#x\n", cpu,
+			  vcpu->arch.guest_user_asid[cpu]);
+	}
+
+	if (vcpu->arch.last_sched_cpu != cpu) {
+		kvm_debug("[%d->%d]KVM VCPU[%d] switch\n",
+			  vcpu->arch.last_sched_cpu, cpu, vcpu->vcpu_id);
+		/*
+		 * Migrate the timer interrupt to the current CPU so that it
+		 * always interrupts the guest and synchronously triggers a
+		 * guest timer interrupt.
+		 */
+		kvm_mips_migrate_count(vcpu);
+	}
+
+	if (!newasid) {
+		/*
+		 * If we preempted while the guest was executing, then reload
+		 * the pre-empted ASID
+		 */
+		if (current->flags & PF_VCPU) {
+			write_c0_entryhi(vcpu->arch.
+					 preempt_entryhi & asid_mask);
+			ehb();
+		}
+	} else {
+		/* New ASIDs were allocated for the VM */
+
+		/*
+		 * Were we in guest context? If so then the pre-empted ASID is
+		 * no longer valid, we need to set it to what it should be based
+		 * on the mode of the Guest (Kernel/User)
+		 */
+		if (current->flags & PF_VCPU) {
+			if (KVM_GUEST_KERNEL_MODE(vcpu))
+				write_c0_entryhi(vcpu->arch.
+						 guest_kernel_asid[cpu] &
+						 asid_mask);
+			else
+				write_c0_entryhi(vcpu->arch.
+						 guest_user_asid[cpu] &
+						 asid_mask);
+			ehb();
+		}
+	}
+
+	/* restore guest state to registers */
+	kvm_mips_callbacks->vcpu_set_regs(vcpu);
+
+	local_irq_restore(flags);
+
+}
+
+/* ASID can change if another task is scheduled during preemption */
+void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
+{
+	unsigned long flags;
+	int cpu;
+
+	local_irq_save(flags);
+
+	cpu = smp_processor_id();
+
+	vcpu->arch.preempt_entryhi = read_c0_entryhi();
+	vcpu->arch.last_sched_cpu = cpu;
+
+	/* save guest state in registers */
+	kvm_mips_callbacks->vcpu_get_regs(vcpu);
+
+	if (((cpu_context(cpu, current->mm) ^ asid_cache(cpu)) &
+	     asid_version_mask(cpu))) {
+		kvm_debug("%s: Dropping MMU Context:  %#lx\n", __func__,
+			  cpu_context(cpu, current->mm));
+		drop_mmu_context(current->mm, cpu);
+	}
+	write_c0_entryhi(cpu_asid(cpu, current->mm));
+	ehb();
+
+	local_irq_restore(flags);
+}
+
+u32 kvm_get_inst(u32 *opc, struct kvm_vcpu *vcpu)
+{
+	struct mips_coproc *cop0 = vcpu->arch.cop0;
+	unsigned long paddr, flags, vpn2, asid;
+	unsigned long va = (unsigned long)opc;
+	void *vaddr;
+	u32 inst;
+	int index;
+
+	if (KVM_GUEST_KSEGX(va) < KVM_GUEST_KSEG0 ||
+	    KVM_GUEST_KSEGX(va) == KVM_GUEST_KSEG23) {
+		local_irq_save(flags);
+		index = kvm_mips_host_tlb_lookup(vcpu, va);
+		if (index >= 0) {
+			inst = *(opc);
+		} else {
+			vpn2 = va & VPN2_MASK;
+			asid = kvm_read_c0_guest_entryhi(cop0) &
+						KVM_ENTRYHI_ASID;
+			index = kvm_mips_guest_tlb_lookup(vcpu, vpn2 | asid);
+			if (index < 0) {
+				kvm_err("%s: get_user_failed for %p, vcpu: %p, ASID: %#lx\n",
+					__func__, opc, vcpu, read_c0_entryhi());
+				kvm_mips_dump_host_tlbs();
+				kvm_mips_dump_guest_tlbs(vcpu);
+				local_irq_restore(flags);
+				return KVM_INVALID_INST;
+			}
+			kvm_mips_handle_mapped_seg_tlb_fault(vcpu,
+							     &vcpu->arch.
+							     guest_tlb[index]);
+			inst = *(opc);
+		}
+		local_irq_restore(flags);
+	} else if (KVM_GUEST_KSEGX(va) == KVM_GUEST_KSEG0) {
+		paddr = kvm_mips_translate_guest_kseg0_to_hpa(vcpu, va);
+		vaddr = kmap_atomic(pfn_to_page(PHYS_PFN(paddr)));
+		vaddr += paddr & ~PAGE_MASK;
+		inst = *(u32 *)vaddr;
+		kunmap_atomic(vaddr);
+	} else {
+		kvm_err("%s: illegal address: %p\n", __func__, opc);
+		return KVM_INVALID_INST;
+	}
+
+	return inst;
+}
diff --git a/arch/mips/kvm/stats.c b/arch/mips/kvm/stats.c
index 888bb67070ac..53f851a61554 100644
--- a/arch/mips/kvm/stats.c
+++ b/arch/mips/kvm/stats.c
@@ -11,27 +11,6 @@
 
 #include <linux/kvm_host.h>
 
-char *kvm_mips_exit_types_str[MAX_KVM_MIPS_EXIT_TYPES] = {
-	"WAIT",
-	"CACHE",
-	"Signal",
-	"Interrupt",
-	"COP0/1 Unusable",
-	"TLB Mod",
-	"TLB Miss (LD)",
-	"TLB Miss (ST)",
-	"Address Err (ST)",
-	"Address Error (LD)",
-	"System Call",
-	"Reserved Inst",
-	"Break Inst",
-	"Trap Inst",
-	"MSA FPE",
-	"FPE",
-	"MSA Disabled",
-	"D-Cache Flushes",
-};
-
 char *kvm_cop0_str[N_MIPS_COPROC_REGS] = {
 	"Index",
 	"Random",
diff --git a/arch/mips/kvm/tlb.c b/arch/mips/kvm/tlb.c
index ed021ae7867a..254377d8e0b9 100644
--- a/arch/mips/kvm/tlb.c
+++ b/arch/mips/kvm/tlb.c
@@ -14,7 +14,7 @@
 #include <linux/smp.h>
 #include <linux/mm.h>
 #include <linux/delay.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/kvm_host.h>
 #include <linux/srcu.h>
 
@@ -24,6 +24,7 @@
 #include <asm/pgtable.h>
 #include <asm/cacheflush.h>
 #include <asm/tlb.h>
+#include <asm/tlbdebug.h>
 
 #undef CONFIG_MIPS_MT
 #include <asm/r4kcache.h>
@@ -32,22 +33,10 @@
 #define KVM_GUEST_PC_TLB    0
 #define KVM_GUEST_SP_TLB    1
 
-#define PRIx64 "llx"
-
 atomic_t kvm_mips_instance;
 EXPORT_SYMBOL_GPL(kvm_mips_instance);
 
-/* These function pointers are initialized once the KVM module is loaded */
-kvm_pfn_t (*kvm_mips_gfn_to_pfn)(struct kvm *kvm, gfn_t gfn);
-EXPORT_SYMBOL_GPL(kvm_mips_gfn_to_pfn);
-
-void (*kvm_mips_release_pfn_clean)(kvm_pfn_t pfn);
-EXPORT_SYMBOL_GPL(kvm_mips_release_pfn_clean);
-
-bool (*kvm_mips_is_error_pfn)(kvm_pfn_t pfn);
-EXPORT_SYMBOL_GPL(kvm_mips_is_error_pfn);
-
-uint32_t kvm_mips_get_kernel_asid(struct kvm_vcpu *vcpu)
+static u32 kvm_mips_get_kernel_asid(struct kvm_vcpu *vcpu)
 {
 	int cpu = smp_processor_id();
 
@@ -55,7 +44,7 @@ uint32_t kvm_mips_get_kernel_asid(struct kvm_vcpu *vcpu)
 			cpu_asid_mask(&cpu_data[cpu]);
 }
 
-uint32_t kvm_mips_get_user_asid(struct kvm_vcpu *vcpu)
+static u32 kvm_mips_get_user_asid(struct kvm_vcpu *vcpu)
 {
 	int cpu = smp_processor_id();
 
@@ -63,7 +52,7 @@ uint32_t kvm_mips_get_user_asid(struct kvm_vcpu *vcpu)
 			cpu_asid_mask(&cpu_data[cpu]);
 }
 
-inline uint32_t kvm_mips_get_commpage_asid(struct kvm_vcpu *vcpu)
+inline u32 kvm_mips_get_commpage_asid(struct kvm_vcpu *vcpu)
 {
 	return vcpu->kvm->arch.commpage_tlb;
 }
@@ -72,50 +61,15 @@ inline uint32_t kvm_mips_get_commpage_asid(struct kvm_vcpu *vcpu)
 
 void kvm_mips_dump_host_tlbs(void)
 {
-	unsigned long old_entryhi;
-	unsigned long old_pagemask;
-	struct kvm_mips_tlb tlb;
 	unsigned long flags;
-	int i;
 
 	local_irq_save(flags);
 
-	old_entryhi = read_c0_entryhi();
-	old_pagemask = read_c0_pagemask();
-
 	kvm_info("HOST TLBs:\n");
-	kvm_info("ASID: %#lx\n", read_c0_entryhi() &
-		 cpu_asid_mask(&current_cpu_data));
-
-	for (i = 0; i < current_cpu_data.tlbsize; i++) {
-		write_c0_index(i);
-		mtc0_tlbw_hazard();
-
-		tlb_read();
-		tlbw_use_hazard();
+	dump_tlb_regs();
+	pr_info("\n");
+	dump_tlb_all();
 
-		tlb.tlb_hi = read_c0_entryhi();
-		tlb.tlb_lo0 = read_c0_entrylo0();
-		tlb.tlb_lo1 = read_c0_entrylo1();
-		tlb.tlb_mask = read_c0_pagemask();
-
-		kvm_info("TLB%c%3d Hi 0x%08lx ",
-			 (tlb.tlb_lo0 | tlb.tlb_lo1) & MIPS3_PG_V ? ' ' : '*',
-			 i, tlb.tlb_hi);
-		kvm_info("Lo0=0x%09" PRIx64 " %c%c attr %lx ",
-			 (uint64_t) mips3_tlbpfn_to_paddr(tlb.tlb_lo0),
-			 (tlb.tlb_lo0 & MIPS3_PG_D) ? 'D' : ' ',
-			 (tlb.tlb_lo0 & MIPS3_PG_G) ? 'G' : ' ',
-			 (tlb.tlb_lo0 >> 3) & 7);
-		kvm_info("Lo1=0x%09" PRIx64 " %c%c attr %lx sz=%lx\n",
-			 (uint64_t) mips3_tlbpfn_to_paddr(tlb.tlb_lo1),
-			 (tlb.tlb_lo1 & MIPS3_PG_D) ? 'D' : ' ',
-			 (tlb.tlb_lo1 & MIPS3_PG_G) ? 'G' : ' ',
-			 (tlb.tlb_lo1 >> 3) & 7, tlb.tlb_mask);
-	}
-	write_c0_entryhi(old_entryhi);
-	write_c0_pagemask(old_pagemask);
-	mtc0_tlbw_hazard();
 	local_irq_restore(flags);
 }
 EXPORT_SYMBOL_GPL(kvm_mips_dump_host_tlbs);
@@ -132,74 +86,24 @@ void kvm_mips_dump_guest_tlbs(struct kvm_vcpu *vcpu)
 	for (i = 0; i < KVM_MIPS_GUEST_TLB_SIZE; i++) {
 		tlb = vcpu->arch.guest_tlb[i];
 		kvm_info("TLB%c%3d Hi 0x%08lx ",
-			 (tlb.tlb_lo0 | tlb.tlb_lo1) & MIPS3_PG_V ? ' ' : '*',
+			 (tlb.tlb_lo[0] | tlb.tlb_lo[1]) & ENTRYLO_V
+							? ' ' : '*',
 			 i, tlb.tlb_hi);
-		kvm_info("Lo0=0x%09" PRIx64 " %c%c attr %lx ",
-			 (uint64_t) mips3_tlbpfn_to_paddr(tlb.tlb_lo0),
-			 (tlb.tlb_lo0 & MIPS3_PG_D) ? 'D' : ' ',
-			 (tlb.tlb_lo0 & MIPS3_PG_G) ? 'G' : ' ',
-			 (tlb.tlb_lo0 >> 3) & 7);
-		kvm_info("Lo1=0x%09" PRIx64 " %c%c attr %lx sz=%lx\n",
-			 (uint64_t) mips3_tlbpfn_to_paddr(tlb.tlb_lo1),
-			 (tlb.tlb_lo1 & MIPS3_PG_D) ? 'D' : ' ',
-			 (tlb.tlb_lo1 & MIPS3_PG_G) ? 'G' : ' ',
-			 (tlb.tlb_lo1 >> 3) & 7, tlb.tlb_mask);
+		kvm_info("Lo0=0x%09llx %c%c attr %lx ",
+			 (u64) mips3_tlbpfn_to_paddr(tlb.tlb_lo[0]),
+			 (tlb.tlb_lo[0] & ENTRYLO_D) ? 'D' : ' ',
+			 (tlb.tlb_lo[0] & ENTRYLO_G) ? 'G' : ' ',
+			 (tlb.tlb_lo[0] & ENTRYLO_C) >> ENTRYLO_C_SHIFT);
+		kvm_info("Lo1=0x%09llx %c%c attr %lx sz=%lx\n",
+			 (u64) mips3_tlbpfn_to_paddr(tlb.tlb_lo[1]),
+			 (tlb.tlb_lo[1] & ENTRYLO_D) ? 'D' : ' ',
+			 (tlb.tlb_lo[1] & ENTRYLO_G) ? 'G' : ' ',
+			 (tlb.tlb_lo[1] & ENTRYLO_C) >> ENTRYLO_C_SHIFT,
+			 tlb.tlb_mask);
 	}
 }
 EXPORT_SYMBOL_GPL(kvm_mips_dump_guest_tlbs);
 
-static int kvm_mips_map_page(struct kvm *kvm, gfn_t gfn)
-{
-	int srcu_idx, err = 0;
-	kvm_pfn_t pfn;
-
-	if (kvm->arch.guest_pmap[gfn] != KVM_INVALID_PAGE)
-		return 0;
-
-	srcu_idx = srcu_read_lock(&kvm->srcu);
-	pfn = kvm_mips_gfn_to_pfn(kvm, gfn);
-
-	if (kvm_mips_is_error_pfn(pfn)) {
-		kvm_err("Couldn't get pfn for gfn %#" PRIx64 "!\n", gfn);
-		err = -EFAULT;
-		goto out;
-	}
-
-	kvm->arch.guest_pmap[gfn] = pfn;
-out:
-	srcu_read_unlock(&kvm->srcu, srcu_idx);
-	return err;
-}
-
-/* Translate guest KSEG0 addresses to Host PA */
-unsigned long kvm_mips_translate_guest_kseg0_to_hpa(struct kvm_vcpu *vcpu,
-						    unsigned long gva)
-{
-	gfn_t gfn;
-	uint32_t offset = gva & ~PAGE_MASK;
-	struct kvm *kvm = vcpu->kvm;
-
-	if (KVM_GUEST_KSEGX(gva) != KVM_GUEST_KSEG0) {
-		kvm_err("%s/%p: Invalid gva: %#lx\n", __func__,
-			__builtin_return_address(0), gva);
-		return KVM_INVALID_PAGE;
-	}
-
-	gfn = (KVM_GUEST_CPHYSADDR(gva) >> PAGE_SHIFT);
-
-	if (gfn >= kvm->arch.guest_pmap_npages) {
-		kvm_err("%s: Invalid gfn: %#llx, GVA: %#lx\n", __func__, gfn,
-			gva);
-		return KVM_INVALID_PAGE;
-	}
-
-	if (kvm_mips_map_page(vcpu->kvm, gfn) < 0)
-		return KVM_INVALID_ADDR;
-
-	return (kvm->arch.guest_pmap[gfn] << PAGE_SHIFT) + offset;
-}
-EXPORT_SYMBOL_GPL(kvm_mips_translate_guest_kseg0_to_hpa);
-
 /* XXXKYMA: Must be called with interrupts disabled */
 /* set flush_dcache_mask == 0 if no dcache flush required */
 int kvm_mips_host_tlb_write(struct kvm_vcpu *vcpu, unsigned long entryhi,
@@ -243,12 +147,12 @@ int kvm_mips_host_tlb_write(struct kvm_vcpu *vcpu, unsigned long entryhi,
 
 	/* Flush D-cache */
 	if (flush_dcache_mask) {
-		if (entrylo0 & MIPS3_PG_V) {
+		if (entrylo0 & ENTRYLO_V) {
 			++vcpu->stat.flush_dcache_exits;
 			flush_data_cache_page((entryhi & VPN2_MASK) &
 					      ~flush_dcache_mask);
 		}
-		if (entrylo1 & MIPS3_PG_V) {
+		if (entrylo1 & ENTRYLO_V) {
 			++vcpu->stat.flush_dcache_exits;
 			flush_data_cache_page(((entryhi & VPN2_MASK) &
 					       ~flush_dcache_mask) |
@@ -259,96 +163,35 @@ int kvm_mips_host_tlb_write(struct kvm_vcpu *vcpu, unsigned long entryhi,
 	/* Restore old ASID */
 	write_c0_entryhi(old_entryhi);
 	mtc0_tlbw_hazard();
-	tlbw_use_hazard();
 	local_irq_restore(flags);
 	return 0;
 }
-
-/* XXXKYMA: Must be called with interrupts disabled */
-int kvm_mips_handle_kseg0_tlb_fault(unsigned long badvaddr,
-				    struct kvm_vcpu *vcpu)
-{
-	gfn_t gfn;
-	kvm_pfn_t pfn0, pfn1;
-	unsigned long vaddr = 0;
-	unsigned long entryhi = 0, entrylo0 = 0, entrylo1 = 0;
-	int even;
-	struct kvm *kvm = vcpu->kvm;
-	const int flush_dcache_mask = 0;
-	int ret;
-
-	if (KVM_GUEST_KSEGX(badvaddr) != KVM_GUEST_KSEG0) {
-		kvm_err("%s: Invalid BadVaddr: %#lx\n", __func__, badvaddr);
-		kvm_mips_dump_host_tlbs();
-		return -1;
-	}
-
-	gfn = (KVM_GUEST_CPHYSADDR(badvaddr) >> PAGE_SHIFT);
-	if (gfn >= kvm->arch.guest_pmap_npages) {
-		kvm_err("%s: Invalid gfn: %#llx, BadVaddr: %#lx\n", __func__,
-			gfn, badvaddr);
-		kvm_mips_dump_host_tlbs();
-		return -1;
-	}
-	even = !(gfn & 0x1);
-	vaddr = badvaddr & (PAGE_MASK << 1);
-
-	if (kvm_mips_map_page(vcpu->kvm, gfn) < 0)
-		return -1;
-
-	if (kvm_mips_map_page(vcpu->kvm, gfn ^ 0x1) < 0)
-		return -1;
-
-	if (even) {
-		pfn0 = kvm->arch.guest_pmap[gfn];
-		pfn1 = kvm->arch.guest_pmap[gfn ^ 0x1];
-	} else {
-		pfn0 = kvm->arch.guest_pmap[gfn ^ 0x1];
-		pfn1 = kvm->arch.guest_pmap[gfn];
-	}
-
-	entrylo0 = mips3_paddr_to_tlbpfn(pfn0 << PAGE_SHIFT) | (0x3 << 3) |
-		   (1 << 2) | (0x1 << 1);
-	entrylo1 = mips3_paddr_to_tlbpfn(pfn1 << PAGE_SHIFT) | (0x3 << 3) |
-		   (1 << 2) | (0x1 << 1);
-
-	preempt_disable();
-	entryhi = (vaddr | kvm_mips_get_kernel_asid(vcpu));
-	ret = kvm_mips_host_tlb_write(vcpu, entryhi, entrylo0, entrylo1,
-				      flush_dcache_mask);
-	preempt_enable();
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(kvm_mips_handle_kseg0_tlb_fault);
+EXPORT_SYMBOL_GPL(kvm_mips_host_tlb_write);
 
 int kvm_mips_handle_commpage_tlb_fault(unsigned long badvaddr,
 	struct kvm_vcpu *vcpu)
 {
-	kvm_pfn_t pfn0, pfn1;
+	kvm_pfn_t pfn;
 	unsigned long flags, old_entryhi = 0, vaddr = 0;
-	unsigned long entrylo0 = 0, entrylo1 = 0;
+	unsigned long entrylo[2] = { 0, 0 };
+	unsigned int pair_idx;
 
-	pfn0 = CPHYSADDR(vcpu->arch.kseg0_commpage) >> PAGE_SHIFT;
-	pfn1 = 0;
-	entrylo0 = mips3_paddr_to_tlbpfn(pfn0 << PAGE_SHIFT) | (0x3 << 3) |
-		   (1 << 2) | (0x1 << 1);
-	entrylo1 = 0;
+	pfn = PFN_DOWN(virt_to_phys(vcpu->arch.kseg0_commpage));
+	pair_idx = (badvaddr >> PAGE_SHIFT) & 1;
+	entrylo[pair_idx] = mips3_paddr_to_tlbpfn(pfn << PAGE_SHIFT) |
+		((_page_cachable_default >> _CACHE_SHIFT) << ENTRYLO_C_SHIFT) |
+		ENTRYLO_D | ENTRYLO_V;
 
 	local_irq_save(flags);
 
 	old_entryhi = read_c0_entryhi();
 	vaddr = badvaddr & (PAGE_MASK << 1);
 	write_c0_entryhi(vaddr | kvm_mips_get_kernel_asid(vcpu));
-	mtc0_tlbw_hazard();
-	write_c0_entrylo0(entrylo0);
-	mtc0_tlbw_hazard();
-	write_c0_entrylo1(entrylo1);
-	mtc0_tlbw_hazard();
+	write_c0_entrylo0(entrylo[0]);
+	write_c0_entrylo1(entrylo[1]);
 	write_c0_index(kvm_mips_get_commpage_asid(vcpu));
 	mtc0_tlbw_hazard();
 	tlb_write_indexed();
-	mtc0_tlbw_hazard();
 	tlbw_use_hazard();
 
 	kvm_debug("@ %#lx idx: %2d [entryhi(R): %#lx] entrylo0 (R): 0x%08lx, entrylo1(R): 0x%08lx\n",
@@ -358,68 +201,12 @@ int kvm_mips_handle_commpage_tlb_fault(unsigned long badvaddr,
 	/* Restore old ASID */
 	write_c0_entryhi(old_entryhi);
 	mtc0_tlbw_hazard();
-	tlbw_use_hazard();
 	local_irq_restore(flags);
 
 	return 0;
 }
 EXPORT_SYMBOL_GPL(kvm_mips_handle_commpage_tlb_fault);
 
-int kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu,
-					 struct kvm_mips_tlb *tlb,
-					 unsigned long *hpa0,
-					 unsigned long *hpa1)
-{
-	unsigned long entryhi = 0, entrylo0 = 0, entrylo1 = 0;
-	struct kvm *kvm = vcpu->kvm;
-	kvm_pfn_t pfn0, pfn1;
-	int ret;
-
-	if ((tlb->tlb_hi & VPN2_MASK) == 0) {
-		pfn0 = 0;
-		pfn1 = 0;
-	} else {
-		if (kvm_mips_map_page(kvm, mips3_tlbpfn_to_paddr(tlb->tlb_lo0)
-					   >> PAGE_SHIFT) < 0)
-			return -1;
-
-		if (kvm_mips_map_page(kvm, mips3_tlbpfn_to_paddr(tlb->tlb_lo1)
-					   >> PAGE_SHIFT) < 0)
-			return -1;
-
-		pfn0 = kvm->arch.guest_pmap[mips3_tlbpfn_to_paddr(tlb->tlb_lo0)
-					    >> PAGE_SHIFT];
-		pfn1 = kvm->arch.guest_pmap[mips3_tlbpfn_to_paddr(tlb->tlb_lo1)
-					    >> PAGE_SHIFT];
-	}
-
-	if (hpa0)
-		*hpa0 = pfn0 << PAGE_SHIFT;
-
-	if (hpa1)
-		*hpa1 = pfn1 << PAGE_SHIFT;
-
-	/* Get attributes from the Guest TLB */
-	entrylo0 = mips3_paddr_to_tlbpfn(pfn0 << PAGE_SHIFT) | (0x3 << 3) |
-		   (tlb->tlb_lo0 & MIPS3_PG_D) | (tlb->tlb_lo0 & MIPS3_PG_V);
-	entrylo1 = mips3_paddr_to_tlbpfn(pfn1 << PAGE_SHIFT) | (0x3 << 3) |
-		   (tlb->tlb_lo1 & MIPS3_PG_D) | (tlb->tlb_lo1 & MIPS3_PG_V);
-
-	kvm_debug("@ %#lx tlb_lo0: 0x%08lx tlb_lo1: 0x%08lx\n", vcpu->arch.pc,
-		  tlb->tlb_lo0, tlb->tlb_lo1);
-
-	preempt_disable();
-	entryhi = (tlb->tlb_hi & VPN2_MASK) | (KVM_GUEST_KERNEL_MODE(vcpu) ?
-					       kvm_mips_get_kernel_asid(vcpu) :
-					       kvm_mips_get_user_asid(vcpu));
-	ret = kvm_mips_host_tlb_write(vcpu, entryhi, entrylo0, entrylo1,
-				      tlb->tlb_mask);
-	preempt_enable();
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(kvm_mips_handle_mapped_seg_tlb_fault);
-
 int kvm_mips_guest_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long entryhi)
 {
 	int i;
@@ -435,7 +222,7 @@ int kvm_mips_guest_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long entryhi)
 	}
 
 	kvm_debug("%s: entryhi: %#lx, index: %d lo0: %#lx, lo1: %#lx\n",
-		  __func__, entryhi, index, tlb[i].tlb_lo0, tlb[i].tlb_lo1);
+		  __func__, entryhi, index, tlb[i].tlb_lo[0], tlb[i].tlb_lo[1]);
 
 	return index;
 }
@@ -467,7 +254,6 @@ int kvm_mips_host_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long vaddr)
 	/* Restore old ASID */
 	write_c0_entryhi(old_entryhi);
 	mtc0_tlbw_hazard();
-	tlbw_use_hazard();
 
 	local_irq_restore(flags);
 
@@ -498,21 +284,16 @@ int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va)
 
 	if (idx > 0) {
 		write_c0_entryhi(UNIQUE_ENTRYHI(idx));
-		mtc0_tlbw_hazard();
-
 		write_c0_entrylo0(0);
-		mtc0_tlbw_hazard();
-
 		write_c0_entrylo1(0);
 		mtc0_tlbw_hazard();
 
 		tlb_write_indexed();
-		mtc0_tlbw_hazard();
+		tlbw_use_hazard();
 	}
 
 	write_c0_entryhi(old_entryhi);
 	mtc0_tlbw_hazard();
-	tlbw_use_hazard();
 
 	local_irq_restore(flags);
 
@@ -540,61 +321,39 @@ void kvm_mips_flush_host_tlb(int skip_kseg0)
 	/* Blast 'em all away. */
 	for (entry = 0; entry < maxentry; entry++) {
 		write_c0_index(entry);
-		mtc0_tlbw_hazard();
 
 		if (skip_kseg0) {
+			mtc0_tlbr_hazard();
 			tlb_read();
-			tlbw_use_hazard();
+			tlb_read_hazard();
 
 			entryhi = read_c0_entryhi();
 
 			/* Don't blow away guest kernel entries */
 			if (KVM_GUEST_KSEGX(entryhi) == KVM_GUEST_KSEG0)
 				continue;
+
+			write_c0_pagemask(old_pagemask);
 		}
 
 		/* Make sure all entries differ. */
 		write_c0_entryhi(UNIQUE_ENTRYHI(entry));
-		mtc0_tlbw_hazard();
 		write_c0_entrylo0(0);
-		mtc0_tlbw_hazard();
 		write_c0_entrylo1(0);
 		mtc0_tlbw_hazard();
 
 		tlb_write_indexed();
-		mtc0_tlbw_hazard();
+		tlbw_use_hazard();
 	}
 
-	tlbw_use_hazard();
-
 	write_c0_entryhi(old_entryhi);
 	write_c0_pagemask(old_pagemask);
 	mtc0_tlbw_hazard();
-	tlbw_use_hazard();
 
 	local_irq_restore(flags);
 }
 EXPORT_SYMBOL_GPL(kvm_mips_flush_host_tlb);
 
-void kvm_get_new_mmu_context(struct mm_struct *mm, unsigned long cpu,
-			     struct kvm_vcpu *vcpu)
-{
-	unsigned long asid = asid_cache(cpu);
-
-	asid += cpu_asid_inc();
-	if (!(asid & cpu_asid_mask(&cpu_data[cpu]))) {
-		if (cpu_has_vtag_icache)
-			flush_icache_all();
-
-		kvm_local_flush_tlb_all();      /* start new asid cycle */
-
-		if (!asid)      /* fix version if needed */
-			asid = asid_first_version(cpu);
-	}
-
-	cpu_context(cpu, mm) = asid_cache(cpu) = asid;
-}
-
 void kvm_local_flush_tlb_all(void)
 {
 	unsigned long flags;
@@ -614,185 +373,12 @@ void kvm_local_flush_tlb_all(void)
 		write_c0_index(entry);
 		mtc0_tlbw_hazard();
 		tlb_write_indexed();
+		tlbw_use_hazard();
 		entry++;
 	}
-	tlbw_use_hazard();
 	write_c0_entryhi(old_ctx);
 	mtc0_tlbw_hazard();
 
 	local_irq_restore(flags);
 }
 EXPORT_SYMBOL_GPL(kvm_local_flush_tlb_all);
-
-/**
- * kvm_mips_migrate_count() - Migrate timer.
- * @vcpu:	Virtual CPU.
- *
- * Migrate CP0_Count hrtimer to the current CPU by cancelling and restarting it
- * if it was running prior to being cancelled.
- *
- * Must be called when the VCPU is migrated to a different CPU to ensure that
- * timer expiry during guest execution interrupts the guest and causes the
- * interrupt to be delivered in a timely manner.
- */
-static void kvm_mips_migrate_count(struct kvm_vcpu *vcpu)
-{
-	if (hrtimer_cancel(&vcpu->arch.comparecount_timer))
-		hrtimer_restart(&vcpu->arch.comparecount_timer);
-}
-
-/* Restore ASID once we are scheduled back after preemption */
-void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
-{
-	unsigned long asid_mask = cpu_asid_mask(&cpu_data[cpu]);
-	unsigned long flags;
-	int newasid = 0;
-
-	kvm_debug("%s: vcpu %p, cpu: %d\n", __func__, vcpu, cpu);
-
-	/* Allocate new kernel and user ASIDs if needed */
-
-	local_irq_save(flags);
-
-	if ((vcpu->arch.guest_kernel_asid[cpu] ^ asid_cache(cpu)) &
-						asid_version_mask(cpu)) {
-		kvm_get_new_mmu_context(&vcpu->arch.guest_kernel_mm, cpu, vcpu);
-		vcpu->arch.guest_kernel_asid[cpu] =
-		    vcpu->arch.guest_kernel_mm.context.asid[cpu];
-		kvm_get_new_mmu_context(&vcpu->arch.guest_user_mm, cpu, vcpu);
-		vcpu->arch.guest_user_asid[cpu] =
-		    vcpu->arch.guest_user_mm.context.asid[cpu];
-		newasid++;
-
-		kvm_debug("[%d]: cpu_context: %#lx\n", cpu,
-			  cpu_context(cpu, current->mm));
-		kvm_debug("[%d]: Allocated new ASID for Guest Kernel: %#x\n",
-			  cpu, vcpu->arch.guest_kernel_asid[cpu]);
-		kvm_debug("[%d]: Allocated new ASID for Guest User: %#x\n", cpu,
-			  vcpu->arch.guest_user_asid[cpu]);
-	}
-
-	if (vcpu->arch.last_sched_cpu != cpu) {
-		kvm_debug("[%d->%d]KVM VCPU[%d] switch\n",
-			  vcpu->arch.last_sched_cpu, cpu, vcpu->vcpu_id);
-		/*
-		 * Migrate the timer interrupt to the current CPU so that it
-		 * always interrupts the guest and synchronously triggers a
-		 * guest timer interrupt.
-		 */
-		kvm_mips_migrate_count(vcpu);
-	}
-
-	if (!newasid) {
-		/*
-		 * If we preempted while the guest was executing, then reload
-		 * the pre-empted ASID
-		 */
-		if (current->flags & PF_VCPU) {
-			write_c0_entryhi(vcpu->arch.
-					 preempt_entryhi & asid_mask);
-			ehb();
-		}
-	} else {
-		/* New ASIDs were allocated for the VM */
-
-		/*
-		 * Were we in guest context? If so then the pre-empted ASID is
-		 * no longer valid, we need to set it to what it should be based
-		 * on the mode of the Guest (Kernel/User)
-		 */
-		if (current->flags & PF_VCPU) {
-			if (KVM_GUEST_KERNEL_MODE(vcpu))
-				write_c0_entryhi(vcpu->arch.
-						 guest_kernel_asid[cpu] &
-						 asid_mask);
-			else
-				write_c0_entryhi(vcpu->arch.
-						 guest_user_asid[cpu] &
-						 asid_mask);
-			ehb();
-		}
-	}
-
-	/* restore guest state to registers */
-	kvm_mips_callbacks->vcpu_set_regs(vcpu);
-
-	local_irq_restore(flags);
-
-}
-EXPORT_SYMBOL_GPL(kvm_arch_vcpu_load);
-
-/* ASID can change if another task is scheduled during preemption */
-void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
-{
-	unsigned long flags;
-	uint32_t cpu;
-
-	local_irq_save(flags);
-
-	cpu = smp_processor_id();
-
-	vcpu->arch.preempt_entryhi = read_c0_entryhi();
-	vcpu->arch.last_sched_cpu = cpu;
-
-	/* save guest state in registers */
-	kvm_mips_callbacks->vcpu_get_regs(vcpu);
-
-	if (((cpu_context(cpu, current->mm) ^ asid_cache(cpu)) &
-	     asid_version_mask(cpu))) {
-		kvm_debug("%s: Dropping MMU Context:  %#lx\n", __func__,
-			  cpu_context(cpu, current->mm));
-		drop_mmu_context(current->mm, cpu);
-	}
-	write_c0_entryhi(cpu_asid(cpu, current->mm));
-	ehb();
-
-	local_irq_restore(flags);
-}
-EXPORT_SYMBOL_GPL(kvm_arch_vcpu_put);
-
-uint32_t kvm_get_inst(uint32_t *opc, struct kvm_vcpu *vcpu)
-{
-	struct mips_coproc *cop0 = vcpu->arch.cop0;
-	unsigned long paddr, flags, vpn2, asid;
-	uint32_t inst;
-	int index;
-
-	if (KVM_GUEST_KSEGX((unsigned long) opc) < KVM_GUEST_KSEG0 ||
-	    KVM_GUEST_KSEGX((unsigned long) opc) == KVM_GUEST_KSEG23) {
-		local_irq_save(flags);
-		index = kvm_mips_host_tlb_lookup(vcpu, (unsigned long) opc);
-		if (index >= 0) {
-			inst = *(opc);
-		} else {
-			vpn2 = (unsigned long) opc & VPN2_MASK;
-			asid = kvm_read_c0_guest_entryhi(cop0) &
-						KVM_ENTRYHI_ASID;
-			index = kvm_mips_guest_tlb_lookup(vcpu, vpn2 | asid);
-			if (index < 0) {
-				kvm_err("%s: get_user_failed for %p, vcpu: %p, ASID: %#lx\n",
-					__func__, opc, vcpu, read_c0_entryhi());
-				kvm_mips_dump_host_tlbs();
-				local_irq_restore(flags);
-				return KVM_INVALID_INST;
-			}
-			kvm_mips_handle_mapped_seg_tlb_fault(vcpu,
-							     &vcpu->arch.
-							     guest_tlb[index],
-							     NULL, NULL);
-			inst = *(opc);
-		}
-		local_irq_restore(flags);
-	} else if (KVM_GUEST_KSEGX(opc) == KVM_GUEST_KSEG0) {
-		paddr =
-		    kvm_mips_translate_guest_kseg0_to_hpa(vcpu,
-							  (unsigned long) opc);
-		inst = *(uint32_t *) CKSEG0ADDR(paddr);
-	} else {
-		kvm_err("%s: illegal address: %p\n", __func__, opc);
-		return KVM_INVALID_INST;
-	}
-
-	return inst;
-}
-EXPORT_SYMBOL_GPL(kvm_get_inst);
diff --git a/arch/mips/kvm/trace.h b/arch/mips/kvm/trace.h
index bd6437f67dc0..c858cf168078 100644
--- a/arch/mips/kvm/trace.h
+++ b/arch/mips/kvm/trace.h
@@ -17,8 +17,75 @@
 #define TRACE_INCLUDE_PATH .
 #define TRACE_INCLUDE_FILE trace
 
-/* Tracepoints for VM eists */
-extern char *kvm_mips_exit_types_str[MAX_KVM_MIPS_EXIT_TYPES];
+/*
+ * Tracepoints for VM enters
+ */
+DECLARE_EVENT_CLASS(kvm_transition,
+	TP_PROTO(struct kvm_vcpu *vcpu),
+	TP_ARGS(vcpu),
+	TP_STRUCT__entry(
+		__field(unsigned long, pc)
+	),
+
+	TP_fast_assign(
+		__entry->pc = vcpu->arch.pc;
+	),
+
+	TP_printk("PC: 0x%08lx",
+		  __entry->pc)
+);
+
+DEFINE_EVENT(kvm_transition, kvm_enter,
+	     TP_PROTO(struct kvm_vcpu *vcpu),
+	     TP_ARGS(vcpu));
+
+DEFINE_EVENT(kvm_transition, kvm_reenter,
+	     TP_PROTO(struct kvm_vcpu *vcpu),
+	     TP_ARGS(vcpu));
+
+DEFINE_EVENT(kvm_transition, kvm_out,
+	     TP_PROTO(struct kvm_vcpu *vcpu),
+	     TP_ARGS(vcpu));
+
+/* The first 32 exit reasons correspond to Cause.ExcCode */
+#define KVM_TRACE_EXIT_INT		 0
+#define KVM_TRACE_EXIT_TLBMOD		 1
+#define KVM_TRACE_EXIT_TLBMISS_LD	 2
+#define KVM_TRACE_EXIT_TLBMISS_ST	 3
+#define KVM_TRACE_EXIT_ADDRERR_LD	 4
+#define KVM_TRACE_EXIT_ADDRERR_ST	 5
+#define KVM_TRACE_EXIT_SYSCALL		 8
+#define KVM_TRACE_EXIT_BREAK_INST	 9
+#define KVM_TRACE_EXIT_RESVD_INST	10
+#define KVM_TRACE_EXIT_COP_UNUSABLE	11
+#define KVM_TRACE_EXIT_TRAP_INST	13
+#define KVM_TRACE_EXIT_MSA_FPE		14
+#define KVM_TRACE_EXIT_FPE		15
+#define KVM_TRACE_EXIT_MSA_DISABLED	21
+/* Further exit reasons */
+#define KVM_TRACE_EXIT_WAIT		32
+#define KVM_TRACE_EXIT_CACHE		33
+#define KVM_TRACE_EXIT_SIGNAL		34
+
+/* Tracepoints for VM exits */
+#define kvm_trace_symbol_exit_types				\
+	{ KVM_TRACE_EXIT_INT,		"Interrupt" },		\
+	{ KVM_TRACE_EXIT_TLBMOD,	"TLB Mod" },		\
+	{ KVM_TRACE_EXIT_TLBMISS_LD,	"TLB Miss (LD)" },	\
+	{ KVM_TRACE_EXIT_TLBMISS_ST,	"TLB Miss (ST)" },	\
+	{ KVM_TRACE_EXIT_ADDRERR_LD,	"Address Error (LD)" },	\
+	{ KVM_TRACE_EXIT_ADDRERR_ST,	"Address Err (ST)" },	\
+	{ KVM_TRACE_EXIT_SYSCALL,	"System Call" },	\
+	{ KVM_TRACE_EXIT_BREAK_INST,	"Break Inst" },		\
+	{ KVM_TRACE_EXIT_RESVD_INST,	"Reserved Inst" },	\
+	{ KVM_TRACE_EXIT_COP_UNUSABLE,	"COP0/1 Unusable" },	\
+	{ KVM_TRACE_EXIT_TRAP_INST,	"Trap Inst" },		\
+	{ KVM_TRACE_EXIT_MSA_FPE,	"MSA FPE" },		\
+	{ KVM_TRACE_EXIT_FPE,		"FPE" },		\
+	{ KVM_TRACE_EXIT_MSA_DISABLED,	"MSA Disabled" },	\
+	{ KVM_TRACE_EXIT_WAIT,		"WAIT" },		\
+	{ KVM_TRACE_EXIT_CACHE,		"CACHE" },		\
+	{ KVM_TRACE_EXIT_SIGNAL,	"Signal" }
 
 TRACE_EVENT(kvm_exit,
 	    TP_PROTO(struct kvm_vcpu *vcpu, unsigned int reason),
@@ -34,10 +101,173 @@ TRACE_EVENT(kvm_exit,
 	    ),
 
 	    TP_printk("[%s]PC: 0x%08lx",
-		      kvm_mips_exit_types_str[__entry->reason],
+		      __print_symbolic(__entry->reason,
+				       kvm_trace_symbol_exit_types),
 		      __entry->pc)
 );
 
+#define KVM_TRACE_MFC0		0
+#define KVM_TRACE_MTC0		1
+#define KVM_TRACE_DMFC0		2
+#define KVM_TRACE_DMTC0		3
+#define KVM_TRACE_RDHWR		4
+
+#define KVM_TRACE_HWR_COP0	0
+#define KVM_TRACE_HWR_HWR	1
+
+#define KVM_TRACE_COP0(REG, SEL)	((KVM_TRACE_HWR_COP0 << 8) |	\
+					 ((REG) << 3) | (SEL))
+#define KVM_TRACE_HWR(REG, SEL)		((KVM_TRACE_HWR_HWR  << 8) |	\
+					 ((REG) << 3) | (SEL))
+
+#define kvm_trace_symbol_hwr_ops				\
+	{ KVM_TRACE_MFC0,		"MFC0" },		\
+	{ KVM_TRACE_MTC0,		"MTC0" },		\
+	{ KVM_TRACE_DMFC0,		"DMFC0" },		\
+	{ KVM_TRACE_DMTC0,		"DMTC0" },		\
+	{ KVM_TRACE_RDHWR,		"RDHWR" }
+
+#define kvm_trace_symbol_hwr_cop				\
+	{ KVM_TRACE_HWR_COP0,		"COP0" },		\
+	{ KVM_TRACE_HWR_HWR,		"HWR" }
+
+#define kvm_trace_symbol_hwr_regs				\
+	{ KVM_TRACE_COP0( 0, 0),	"Index" },		\
+	{ KVM_TRACE_COP0( 2, 0),	"EntryLo0" },		\
+	{ KVM_TRACE_COP0( 3, 0),	"EntryLo1" },		\
+	{ KVM_TRACE_COP0( 4, 0),	"Context" },		\
+	{ KVM_TRACE_COP0( 4, 2),	"UserLocal" },		\
+	{ KVM_TRACE_COP0( 5, 0),	"PageMask" },		\
+	{ KVM_TRACE_COP0( 6, 0),	"Wired" },		\
+	{ KVM_TRACE_COP0( 7, 0),	"HWREna" },		\
+	{ KVM_TRACE_COP0( 8, 0),	"BadVAddr" },		\
+	{ KVM_TRACE_COP0( 9, 0),	"Count" },		\
+	{ KVM_TRACE_COP0(10, 0),	"EntryHi" },		\
+	{ KVM_TRACE_COP0(11, 0),	"Compare" },		\
+	{ KVM_TRACE_COP0(12, 0),	"Status" },		\
+	{ KVM_TRACE_COP0(12, 1),	"IntCtl" },		\
+	{ KVM_TRACE_COP0(12, 2),	"SRSCtl" },		\
+	{ KVM_TRACE_COP0(13, 0),	"Cause" },		\
+	{ KVM_TRACE_COP0(14, 0),	"EPC" },		\
+	{ KVM_TRACE_COP0(15, 0),	"PRId" },		\
+	{ KVM_TRACE_COP0(15, 1),	"EBase" },		\
+	{ KVM_TRACE_COP0(16, 0),	"Config" },		\
+	{ KVM_TRACE_COP0(16, 1),	"Config1" },		\
+	{ KVM_TRACE_COP0(16, 2),	"Config2" },		\
+	{ KVM_TRACE_COP0(16, 3),	"Config3" },		\
+	{ KVM_TRACE_COP0(16, 4),	"Config4" },		\
+	{ KVM_TRACE_COP0(16, 5),	"Config5" },		\
+	{ KVM_TRACE_COP0(16, 7),	"Config7" },		\
+	{ KVM_TRACE_COP0(26, 0),	"ECC" },		\
+	{ KVM_TRACE_COP0(30, 0),	"ErrorEPC" },		\
+	{ KVM_TRACE_COP0(31, 2),	"KScratch1" },		\
+	{ KVM_TRACE_COP0(31, 3),	"KScratch2" },		\
+	{ KVM_TRACE_COP0(31, 4),	"KScratch3" },		\
+	{ KVM_TRACE_COP0(31, 5),	"KScratch4" },		\
+	{ KVM_TRACE_COP0(31, 6),	"KScratch5" },		\
+	{ KVM_TRACE_COP0(31, 7),	"KScratch6" },		\
+	{ KVM_TRACE_HWR( 0, 0),		"CPUNum" },		\
+	{ KVM_TRACE_HWR( 1, 0),		"SYNCI_Step" },		\
+	{ KVM_TRACE_HWR( 2, 0),		"CC" },			\
+	{ KVM_TRACE_HWR( 3, 0),		"CCRes" },		\
+	{ KVM_TRACE_HWR(29, 0),		"ULR" }
+
+TRACE_EVENT(kvm_hwr,
+	    TP_PROTO(struct kvm_vcpu *vcpu, unsigned int op, unsigned int reg,
+		     unsigned long val),
+	    TP_ARGS(vcpu, op, reg, val),
+	    TP_STRUCT__entry(
+			__field(unsigned long, val)
+			__field(u16, reg)
+			__field(u8, op)
+	    ),
+
+	    TP_fast_assign(
+			__entry->val = val;
+			__entry->reg = reg;
+			__entry->op = op;
+	    ),
+
+	    TP_printk("%s %s (%s:%u:%u) 0x%08lx",
+		      __print_symbolic(__entry->op,
+				       kvm_trace_symbol_hwr_ops),
+		      __print_symbolic(__entry->reg,
+				       kvm_trace_symbol_hwr_regs),
+		      __print_symbolic(__entry->reg >> 8,
+				       kvm_trace_symbol_hwr_cop),
+		      (__entry->reg >> 3) & 0x1f,
+		      __entry->reg & 0x7,
+		      __entry->val)
+);
+
+#define KVM_TRACE_AUX_RESTORE		0
+#define KVM_TRACE_AUX_SAVE		1
+#define KVM_TRACE_AUX_ENABLE		2
+#define KVM_TRACE_AUX_DISABLE		3
+#define KVM_TRACE_AUX_DISCARD		4
+
+#define KVM_TRACE_AUX_FPU		1
+#define KVM_TRACE_AUX_MSA		2
+#define KVM_TRACE_AUX_FPU_MSA		3
+
+#define kvm_trace_symbol_aux_op		\
+	{ KVM_TRACE_AUX_RESTORE, "restore" },	\
+	{ KVM_TRACE_AUX_SAVE,    "save" },	\
+	{ KVM_TRACE_AUX_ENABLE,  "enable" },	\
+	{ KVM_TRACE_AUX_DISABLE, "disable" },	\
+	{ KVM_TRACE_AUX_DISCARD, "discard" }
+
+#define kvm_trace_symbol_aux_state		\
+	{ KVM_TRACE_AUX_FPU,     "FPU" },	\
+	{ KVM_TRACE_AUX_MSA,     "MSA" },	\
+	{ KVM_TRACE_AUX_FPU_MSA, "FPU & MSA" }
+
+TRACE_EVENT(kvm_aux,
+	    TP_PROTO(struct kvm_vcpu *vcpu, unsigned int op,
+		     unsigned int state),
+	    TP_ARGS(vcpu, op, state),
+	    TP_STRUCT__entry(
+			__field(unsigned long, pc)
+			__field(u8, op)
+			__field(u8, state)
+	    ),
+
+	    TP_fast_assign(
+			__entry->pc = vcpu->arch.pc;
+			__entry->op = op;
+			__entry->state = state;
+	    ),
+
+	    TP_printk("%s %s PC: 0x%08lx",
+		      __print_symbolic(__entry->op,
+				       kvm_trace_symbol_aux_op),
+		      __print_symbolic(__entry->state,
+				       kvm_trace_symbol_aux_state),
+		      __entry->pc)
+);
+
+TRACE_EVENT(kvm_asid_change,
+	    TP_PROTO(struct kvm_vcpu *vcpu, unsigned int old_asid,
+		     unsigned int new_asid),
+	    TP_ARGS(vcpu, old_asid, new_asid),
+	    TP_STRUCT__entry(
+			__field(unsigned long, pc)
+			__field(u8, old_asid)
+			__field(u8, new_asid)
+	    ),
+
+	    TP_fast_assign(
+			__entry->pc = vcpu->arch.pc;
+			__entry->old_asid = old_asid;
+			__entry->new_asid = new_asid;
+	    ),
+
+	    TP_printk("PC: 0x%08lx old: 0x%02x new: 0x%02x",
+		      __entry->pc,
+		      __entry->old_asid,
+		      __entry->new_asid)
+);
+
 #endif /* _TRACE_KVM_H */
 
 /* This part must be outside protection */
diff --git a/arch/mips/kvm/trap_emul.c b/arch/mips/kvm/trap_emul.c
index 6ba0fafcecbc..091553942bcb 100644
--- a/arch/mips/kvm/trap_emul.c
+++ b/arch/mips/kvm/trap_emul.c
@@ -21,7 +21,7 @@
 static gpa_t kvm_trap_emul_gva_to_gpa_cb(gva_t gva)
 {
 	gpa_t gpa;
-	uint32_t kseg = KSEGX(gva);
+	gva_t kseg = KSEGX(gva);
 
 	if ((kseg == CKSEG0) || (kseg == CKSEG1))
 		gpa = CPHYSADDR(gva);
@@ -40,8 +40,8 @@ static int kvm_trap_emul_handle_cop_unusable(struct kvm_vcpu *vcpu)
 {
 	struct mips_coproc *cop0 = vcpu->arch.cop0;
 	struct kvm_run *run = vcpu->run;
-	uint32_t __user *opc = (uint32_t __user *) vcpu->arch.pc;
-	unsigned long cause = vcpu->arch.host_cp0_cause;
+	u32 __user *opc = (u32 __user *) vcpu->arch.pc;
+	u32 cause = vcpu->arch.host_cp0_cause;
 	enum emulation_result er = EMULATE_DONE;
 	int ret = RESUME_GUEST;
 
@@ -87,15 +87,15 @@ static int kvm_trap_emul_handle_cop_unusable(struct kvm_vcpu *vcpu)
 static int kvm_trap_emul_handle_tlb_mod(struct kvm_vcpu *vcpu)
 {
 	struct kvm_run *run = vcpu->run;
-	uint32_t __user *opc = (uint32_t __user *) vcpu->arch.pc;
+	u32 __user *opc = (u32 __user *) vcpu->arch.pc;
 	unsigned long badvaddr = vcpu->arch.host_cp0_badvaddr;
-	unsigned long cause = vcpu->arch.host_cp0_cause;
+	u32 cause = vcpu->arch.host_cp0_cause;
 	enum emulation_result er = EMULATE_DONE;
 	int ret = RESUME_GUEST;
 
 	if (KVM_GUEST_KSEGX(badvaddr) < KVM_GUEST_KSEG0
 	    || KVM_GUEST_KSEGX(badvaddr) == KVM_GUEST_KSEG23) {
-		kvm_debug("USER/KSEG23 ADDR TLB MOD fault: cause %#lx, PC: %p, BadVaddr: %#lx\n",
+		kvm_debug("USER/KSEG23 ADDR TLB MOD fault: cause %#x, PC: %p, BadVaddr: %#lx\n",
 			  cause, opc, badvaddr);
 		er = kvm_mips_handle_tlbmod(cause, opc, run, vcpu);
 
@@ -111,14 +111,14 @@ static int kvm_trap_emul_handle_tlb_mod(struct kvm_vcpu *vcpu)
 		 * when we are not using HIGHMEM. Need to address this in a
 		 * HIGHMEM kernel
 		 */
-		kvm_err("TLB MOD fault not handled, cause %#lx, PC: %p, BadVaddr: %#lx\n",
+		kvm_err("TLB MOD fault not handled, cause %#x, PC: %p, BadVaddr: %#lx\n",
 			cause, opc, badvaddr);
 		kvm_mips_dump_host_tlbs();
 		kvm_arch_vcpu_dump_regs(vcpu);
 		run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
 		ret = RESUME_HOST;
 	} else {
-		kvm_err("Illegal TLB Mod fault address , cause %#lx, PC: %p, BadVaddr: %#lx\n",
+		kvm_err("Illegal TLB Mod fault address , cause %#x, PC: %p, BadVaddr: %#lx\n",
 			cause, opc, badvaddr);
 		kvm_mips_dump_host_tlbs();
 		kvm_arch_vcpu_dump_regs(vcpu);
@@ -128,59 +128,12 @@ static int kvm_trap_emul_handle_tlb_mod(struct kvm_vcpu *vcpu)
 	return ret;
 }
 
-static int kvm_trap_emul_handle_tlb_st_miss(struct kvm_vcpu *vcpu)
-{
-	struct kvm_run *run = vcpu->run;
-	uint32_t __user *opc = (uint32_t __user *) vcpu->arch.pc;
-	unsigned long badvaddr = vcpu->arch.host_cp0_badvaddr;
-	unsigned long cause = vcpu->arch.host_cp0_cause;
-	enum emulation_result er = EMULATE_DONE;
-	int ret = RESUME_GUEST;
-
-	if (((badvaddr & PAGE_MASK) == KVM_GUEST_COMMPAGE_ADDR)
-	    && KVM_GUEST_KERNEL_MODE(vcpu)) {
-		if (kvm_mips_handle_commpage_tlb_fault(badvaddr, vcpu) < 0) {
-			run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
-			ret = RESUME_HOST;
-		}
-	} else if (KVM_GUEST_KSEGX(badvaddr) < KVM_GUEST_KSEG0
-		   || KVM_GUEST_KSEGX(badvaddr) == KVM_GUEST_KSEG23) {
-		kvm_debug("USER ADDR TLB LD fault: cause %#lx, PC: %p, BadVaddr: %#lx\n",
-			  cause, opc, badvaddr);
-		er = kvm_mips_handle_tlbmiss(cause, opc, run, vcpu);
-		if (er == EMULATE_DONE)
-			ret = RESUME_GUEST;
-		else {
-			run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
-			ret = RESUME_HOST;
-		}
-	} else if (KVM_GUEST_KSEGX(badvaddr) == KVM_GUEST_KSEG0) {
-		/*
-		 * All KSEG0 faults are handled by KVM, as the guest kernel does
-		 * not expect to ever get them
-		 */
-		if (kvm_mips_handle_kseg0_tlb_fault
-		    (vcpu->arch.host_cp0_badvaddr, vcpu) < 0) {
-			run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
-			ret = RESUME_HOST;
-		}
-	} else {
-		kvm_err("Illegal TLB LD fault address , cause %#lx, PC: %p, BadVaddr: %#lx\n",
-			cause, opc, badvaddr);
-		kvm_mips_dump_host_tlbs();
-		kvm_arch_vcpu_dump_regs(vcpu);
-		run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
-		ret = RESUME_HOST;
-	}
-	return ret;
-}
-
-static int kvm_trap_emul_handle_tlb_ld_miss(struct kvm_vcpu *vcpu)
+static int kvm_trap_emul_handle_tlb_miss(struct kvm_vcpu *vcpu, bool store)
 {
 	struct kvm_run *run = vcpu->run;
-	uint32_t __user *opc = (uint32_t __user *) vcpu->arch.pc;
+	u32 __user *opc = (u32 __user *) vcpu->arch.pc;
 	unsigned long badvaddr = vcpu->arch.host_cp0_badvaddr;
-	unsigned long cause = vcpu->arch.host_cp0_cause;
+	u32 cause = vcpu->arch.host_cp0_cause;
 	enum emulation_result er = EMULATE_DONE;
 	int ret = RESUME_GUEST;
 
@@ -192,8 +145,8 @@ static int kvm_trap_emul_handle_tlb_ld_miss(struct kvm_vcpu *vcpu)
 		}
 	} else if (KVM_GUEST_KSEGX(badvaddr) < KVM_GUEST_KSEG0
 		   || KVM_GUEST_KSEGX(badvaddr) == KVM_GUEST_KSEG23) {
-		kvm_debug("USER ADDR TLB ST fault: PC: %#lx, BadVaddr: %#lx\n",
-			  vcpu->arch.pc, badvaddr);
+		kvm_debug("USER ADDR TLB %s fault: cause %#x, PC: %p, BadVaddr: %#lx\n",
+			  store ? "ST" : "LD", cause, opc, badvaddr);
 
 		/*
 		 * User Address (UA) fault, this could happen if
@@ -213,14 +166,18 @@ static int kvm_trap_emul_handle_tlb_ld_miss(struct kvm_vcpu *vcpu)
 			ret = RESUME_HOST;
 		}
 	} else if (KVM_GUEST_KSEGX(badvaddr) == KVM_GUEST_KSEG0) {
+		/*
+		 * All KSEG0 faults are handled by KVM, as the guest kernel does
+		 * not expect to ever get them
+		 */
 		if (kvm_mips_handle_kseg0_tlb_fault
 		    (vcpu->arch.host_cp0_badvaddr, vcpu) < 0) {
 			run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
 			ret = RESUME_HOST;
 		}
 	} else {
-		kvm_err("Illegal TLB ST fault address , cause %#lx, PC: %p, BadVaddr: %#lx\n",
-			cause, opc, badvaddr);
+		kvm_err("Illegal TLB %s fault address , cause %#x, PC: %p, BadVaddr: %#lx\n",
+			store ? "ST" : "LD", cause, opc, badvaddr);
 		kvm_mips_dump_host_tlbs();
 		kvm_arch_vcpu_dump_regs(vcpu);
 		run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
@@ -229,12 +186,22 @@ static int kvm_trap_emul_handle_tlb_ld_miss(struct kvm_vcpu *vcpu)
 	return ret;
 }
 
+static int kvm_trap_emul_handle_tlb_st_miss(struct kvm_vcpu *vcpu)
+{
+	return kvm_trap_emul_handle_tlb_miss(vcpu, true);
+}
+
+static int kvm_trap_emul_handle_tlb_ld_miss(struct kvm_vcpu *vcpu)
+{
+	return kvm_trap_emul_handle_tlb_miss(vcpu, false);
+}
+
 static int kvm_trap_emul_handle_addr_err_st(struct kvm_vcpu *vcpu)
 {
 	struct kvm_run *run = vcpu->run;
-	uint32_t __user *opc = (uint32_t __user *) vcpu->arch.pc;
+	u32 __user *opc = (u32 __user *) vcpu->arch.pc;
 	unsigned long badvaddr = vcpu->arch.host_cp0_badvaddr;
-	unsigned long cause = vcpu->arch.host_cp0_cause;
+	u32 cause = vcpu->arch.host_cp0_cause;
 	enum emulation_result er = EMULATE_DONE;
 	int ret = RESUME_GUEST;
 
@@ -251,7 +218,7 @@ static int kvm_trap_emul_handle_addr_err_st(struct kvm_vcpu *vcpu)
 			ret = RESUME_HOST;
 		}
 	} else {
-		kvm_err("Address Error (STORE): cause %#lx, PC: %p, BadVaddr: %#lx\n",
+		kvm_err("Address Error (STORE): cause %#x, PC: %p, BadVaddr: %#lx\n",
 			cause, opc, badvaddr);
 		run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
 		ret = RESUME_HOST;
@@ -262,9 +229,9 @@ static int kvm_trap_emul_handle_addr_err_st(struct kvm_vcpu *vcpu)
 static int kvm_trap_emul_handle_addr_err_ld(struct kvm_vcpu *vcpu)
 {
 	struct kvm_run *run = vcpu->run;
-	uint32_t __user *opc = (uint32_t __user *) vcpu->arch.pc;
+	u32 __user *opc = (u32 __user *) vcpu->arch.pc;
 	unsigned long badvaddr = vcpu->arch.host_cp0_badvaddr;
-	unsigned long cause = vcpu->arch.host_cp0_cause;
+	u32 cause = vcpu->arch.host_cp0_cause;
 	enum emulation_result er = EMULATE_DONE;
 	int ret = RESUME_GUEST;
 
@@ -280,7 +247,7 @@ static int kvm_trap_emul_handle_addr_err_ld(struct kvm_vcpu *vcpu)
 			ret = RESUME_HOST;
 		}
 	} else {
-		kvm_err("Address Error (LOAD): cause %#lx, PC: %p, BadVaddr: %#lx\n",
+		kvm_err("Address Error (LOAD): cause %#x, PC: %p, BadVaddr: %#lx\n",
 			cause, opc, badvaddr);
 		run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
 		ret = RESUME_HOST;
@@ -292,8 +259,8 @@ static int kvm_trap_emul_handle_addr_err_ld(struct kvm_vcpu *vcpu)
 static int kvm_trap_emul_handle_syscall(struct kvm_vcpu *vcpu)
 {
 	struct kvm_run *run = vcpu->run;
-	uint32_t __user *opc = (uint32_t __user *) vcpu->arch.pc;
-	unsigned long cause = vcpu->arch.host_cp0_cause;
+	u32 __user *opc = (u32 __user *) vcpu->arch.pc;
+	u32 cause = vcpu->arch.host_cp0_cause;
 	enum emulation_result er = EMULATE_DONE;
 	int ret = RESUME_GUEST;
 
@@ -310,8 +277,8 @@ static int kvm_trap_emul_handle_syscall(struct kvm_vcpu *vcpu)
 static int kvm_trap_emul_handle_res_inst(struct kvm_vcpu *vcpu)
 {
 	struct kvm_run *run = vcpu->run;
-	uint32_t __user *opc = (uint32_t __user *) vcpu->arch.pc;
-	unsigned long cause = vcpu->arch.host_cp0_cause;
+	u32 __user *opc = (u32 __user *) vcpu->arch.pc;
+	u32 cause = vcpu->arch.host_cp0_cause;
 	enum emulation_result er = EMULATE_DONE;
 	int ret = RESUME_GUEST;
 
@@ -328,8 +295,8 @@ static int kvm_trap_emul_handle_res_inst(struct kvm_vcpu *vcpu)
 static int kvm_trap_emul_handle_break(struct kvm_vcpu *vcpu)
 {
 	struct kvm_run *run = vcpu->run;
-	uint32_t __user *opc = (uint32_t __user *) vcpu->arch.pc;
-	unsigned long cause = vcpu->arch.host_cp0_cause;
+	u32 __user *opc = (u32 __user *) vcpu->arch.pc;
+	u32 cause = vcpu->arch.host_cp0_cause;
 	enum emulation_result er = EMULATE_DONE;
 	int ret = RESUME_GUEST;
 
@@ -346,8 +313,8 @@ static int kvm_trap_emul_handle_break(struct kvm_vcpu *vcpu)
 static int kvm_trap_emul_handle_trap(struct kvm_vcpu *vcpu)
 {
 	struct kvm_run *run = vcpu->run;
-	uint32_t __user *opc = (uint32_t __user *)vcpu->arch.pc;
-	unsigned long cause = vcpu->arch.host_cp0_cause;
+	u32 __user *opc = (u32 __user *)vcpu->arch.pc;
+	u32 cause = vcpu->arch.host_cp0_cause;
 	enum emulation_result er = EMULATE_DONE;
 	int ret = RESUME_GUEST;
 
@@ -364,8 +331,8 @@ static int kvm_trap_emul_handle_trap(struct kvm_vcpu *vcpu)
 static int kvm_trap_emul_handle_msa_fpe(struct kvm_vcpu *vcpu)
 {
 	struct kvm_run *run = vcpu->run;
-	uint32_t __user *opc = (uint32_t __user *)vcpu->arch.pc;
-	unsigned long cause = vcpu->arch.host_cp0_cause;
+	u32 __user *opc = (u32 __user *)vcpu->arch.pc;
+	u32 cause = vcpu->arch.host_cp0_cause;
 	enum emulation_result er = EMULATE_DONE;
 	int ret = RESUME_GUEST;
 
@@ -382,8 +349,8 @@ static int kvm_trap_emul_handle_msa_fpe(struct kvm_vcpu *vcpu)
 static int kvm_trap_emul_handle_fpe(struct kvm_vcpu *vcpu)
 {
 	struct kvm_run *run = vcpu->run;
-	uint32_t __user *opc = (uint32_t __user *)vcpu->arch.pc;
-	unsigned long cause = vcpu->arch.host_cp0_cause;
+	u32 __user *opc = (u32 __user *)vcpu->arch.pc;
+	u32 cause = vcpu->arch.host_cp0_cause;
 	enum emulation_result er = EMULATE_DONE;
 	int ret = RESUME_GUEST;
 
@@ -407,8 +374,8 @@ static int kvm_trap_emul_handle_msa_disabled(struct kvm_vcpu *vcpu)
 {
 	struct mips_coproc *cop0 = vcpu->arch.cop0;
 	struct kvm_run *run = vcpu->run;
-	uint32_t __user *opc = (uint32_t __user *) vcpu->arch.pc;
-	unsigned long cause = vcpu->arch.host_cp0_cause;
+	u32 __user *opc = (u32 __user *) vcpu->arch.pc;
+	u32 cause = vcpu->arch.host_cp0_cause;
 	enum emulation_result er = EMULATE_DONE;
 	int ret = RESUME_GUEST;
 
@@ -451,24 +418,41 @@ static int kvm_trap_emul_vm_init(struct kvm *kvm)
 
 static int kvm_trap_emul_vcpu_init(struct kvm_vcpu *vcpu)
 {
+	vcpu->arch.kscratch_enabled = 0xfc;
+
 	return 0;
 }
 
 static int kvm_trap_emul_vcpu_setup(struct kvm_vcpu *vcpu)
 {
 	struct mips_coproc *cop0 = vcpu->arch.cop0;
-	uint32_t config1;
+	u32 config, config1;
 	int vcpu_id = vcpu->vcpu_id;
 
 	/*
 	 * Arch specific stuff, set up config registers properly so that the
-	 * guest will come up as expected, for now we simulate a MIPS 24kc
+	 * guest will come up as expected
 	 */
+#ifndef CONFIG_CPU_MIPSR6
+	/* r2-r5, simulate a MIPS 24kc */
 	kvm_write_c0_guest_prid(cop0, 0x00019300);
-	/* Have config1, Cacheable, noncoherent, write-back, write allocate */
-	kvm_write_c0_guest_config(cop0, MIPS_CONF_M | (0x3 << CP0C0_K0) |
-				  (0x1 << CP0C0_AR) |
-				  (MMU_TYPE_R4000 << CP0C0_MT));
+#else
+	/* r6+, simulate a generic QEMU machine */
+	kvm_write_c0_guest_prid(cop0, 0x00010000);
+#endif
+	/*
+	 * Have config1, Cacheable, noncoherent, write-back, write allocate.
+	 * Endianness, arch revision & virtually tagged icache should match
+	 * host.
+	 */
+	config = read_c0_config() & MIPS_CONF_AR;
+	config |= MIPS_CONF_M | CONF_CM_CACHABLE_NONCOHERENT | MIPS_CONF_MT_TLB;
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	config |= CONF_BE;
+#endif
+	if (cpu_has_vtag_icache)
+		config |= MIPS_CONF_VI;
+	kvm_write_c0_guest_config(cop0, config);
 
 	/* Read the cache characteristics from the host Config1 Register */
 	config1 = (read_c0_config1() & ~0x7f);
@@ -478,9 +462,8 @@ static int kvm_trap_emul_vcpu_setup(struct kvm_vcpu *vcpu)
 	config1 |= ((KVM_MIPS_GUEST_TLB_SIZE - 1) << 25);
 
 	/* We unset some bits that we aren't emulating */
-	config1 &=
-	    ~((1 << CP0C1_C2) | (1 << CP0C1_MD) | (1 << CP0C1_PC) |
-	      (1 << CP0C1_WR) | (1 << CP0C1_CA));
+	config1 &= ~(MIPS_CONF1_C2 | MIPS_CONF1_MD | MIPS_CONF1_PC |
+		     MIPS_CONF1_WR | MIPS_CONF1_CA);
 	kvm_write_c0_guest_config1(cop0, config1);
 
 	/* Have config3, no tertiary/secondary caches implemented */
@@ -511,6 +494,17 @@ static int kvm_trap_emul_vcpu_setup(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
+static unsigned long kvm_trap_emul_num_regs(struct kvm_vcpu *vcpu)
+{
+	return 0;
+}
+
+static int kvm_trap_emul_copy_reg_indices(struct kvm_vcpu *vcpu,
+					  u64 __user *indices)
+{
+	return 0;
+}
+
 static int kvm_trap_emul_get_one_reg(struct kvm_vcpu *vcpu,
 				     const struct kvm_one_reg *reg,
 				     s64 *v)
@@ -660,6 +654,8 @@ static struct kvm_mips_callbacks kvm_trap_emul_callbacks = {
 	.dequeue_io_int = kvm_mips_dequeue_io_int_cb,
 	.irq_deliver = kvm_mips_irq_deliver_cb,
 	.irq_clear = kvm_mips_irq_clear_cb,
+	.num_regs = kvm_trap_emul_num_regs,
+	.copy_reg_indices = kvm_trap_emul_copy_reg_indices,
 	.get_one_reg = kvm_trap_emul_get_one_reg,
 	.set_one_reg = kvm_trap_emul_set_one_reg,
 	.vcpu_get_regs = kvm_trap_emul_vcpu_get_regs,
diff --git a/arch/mips/lantiq/irq.c b/arch/mips/lantiq/irq.c
index ff17669e30a3..8ac0e5994ed2 100644
--- a/arch/mips/lantiq/irq.c
+++ b/arch/mips/lantiq/irq.c
@@ -66,7 +66,7 @@ int gic_present;
 #endif
 
 static int exin_avail;
-static struct resource ltq_eiu_irq[MAX_EIU];
+static u32 ltq_eiu_irq[MAX_EIU];
 static void __iomem *ltq_icu_membase[MAX_IM];
 static void __iomem *ltq_eiu_membase;
 static struct irq_domain *ltq_domain;
@@ -75,7 +75,7 @@ static int ltq_perfcount_irq;
 int ltq_eiu_get_irq(int exin)
 {
 	if (exin < exin_avail)
-		return ltq_eiu_irq[exin].start;
+		return ltq_eiu_irq[exin];
 	return -1;
 }
 
@@ -125,8 +125,8 @@ static int ltq_eiu_settype(struct irq_data *d, unsigned int type)
 {
 	int i;
 
-	for (i = 0; i < MAX_EIU; i++) {
-		if (d->hwirq == ltq_eiu_irq[i].start) {
+	for (i = 0; i < exin_avail; i++) {
+		if (d->hwirq == ltq_eiu_irq[i]) {
 			int val = 0;
 			int edge = 0;
 
@@ -173,8 +173,8 @@ static unsigned int ltq_startup_eiu_irq(struct irq_data *d)
 	int i;
 
 	ltq_enable_irq(d);
-	for (i = 0; i < MAX_EIU; i++) {
-		if (d->hwirq == ltq_eiu_irq[i].start) {
+	for (i = 0; i < exin_avail; i++) {
+		if (d->hwirq == ltq_eiu_irq[i]) {
 			/* by default we are low level triggered */
 			ltq_eiu_settype(d, IRQF_TRIGGER_LOW);
 			/* clear all pending */
@@ -195,8 +195,8 @@ static void ltq_shutdown_eiu_irq(struct irq_data *d)
 	int i;
 
 	ltq_disable_irq(d);
-	for (i = 0; i < MAX_EIU; i++) {
-		if (d->hwirq == ltq_eiu_irq[i].start) {
+	for (i = 0; i < exin_avail; i++) {
+		if (d->hwirq == ltq_eiu_irq[i]) {
 			/* disable */
 			ltq_eiu_w32(ltq_eiu_r32(LTQ_EIU_EXIN_INEN) & ~BIT(i),
 				LTQ_EIU_EXIN_INEN);
@@ -206,7 +206,7 @@ static void ltq_shutdown_eiu_irq(struct irq_data *d)
 }
 
 static struct irq_chip ltq_irq_type = {
-	"icu",
+	.name = "icu",
 	.irq_enable = ltq_enable_irq,
 	.irq_disable = ltq_disable_irq,
 	.irq_unmask = ltq_enable_irq,
@@ -216,7 +216,7 @@ static struct irq_chip ltq_irq_type = {
 };
 
 static struct irq_chip ltq_eiu_type = {
-	"eiu",
+	.name = "eiu",
 	.irq_startup = ltq_startup_eiu_irq,
 	.irq_shutdown = ltq_shutdown_eiu_irq,
 	.irq_enable = ltq_enable_irq,
@@ -341,10 +341,10 @@ static int icu_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw)
 		return 0;
 
 	for (i = 0; i < exin_avail; i++)
-		if (hw == ltq_eiu_irq[i].start)
+		if (hw == ltq_eiu_irq[i])
 			chip = &ltq_eiu_type;
 
-	irq_set_chip_and_handler(hw, chip, handle_level_irq);
+	irq_set_chip_and_handler(irq, chip, handle_level_irq);
 
 	return 0;
 }
@@ -439,14 +439,15 @@ int __init icu_of_init(struct device_node *node, struct device_node *parent)
 	eiu_node = of_find_compatible_node(NULL, NULL, "lantiq,eiu-xway");
 	if (eiu_node && !of_address_to_resource(eiu_node, 0, &res)) {
 		/* find out how many external irq sources we have */
-		exin_avail = of_irq_count(eiu_node);
+		exin_avail = of_property_count_u32_elems(eiu_node,
+							 "lantiq,eiu-irqs");
 
 		if (exin_avail > MAX_EIU)
 			exin_avail = MAX_EIU;
 
-		ret = of_irq_to_resource_table(eiu_node,
+		ret = of_property_read_u32_array(eiu_node, "lantiq,eiu-irqs",
 						ltq_eiu_irq, exin_avail);
-		if (ret != exin_avail)
+		if (ret)
 			panic("failed to load external irq resources");
 
 		if (!request_mem_region(res.start, resource_size(&res),
diff --git a/arch/mips/lantiq/prom.c b/arch/mips/lantiq/prom.c
index 5f693ac77a0d..4cbb000e778e 100644
--- a/arch/mips/lantiq/prom.c
+++ b/arch/mips/lantiq/prom.c
@@ -74,8 +74,8 @@ void __init plat_mem_setup(void)
 
 	set_io_port_base((unsigned long) KSEG1);
 
-	if (fw_arg0 == -2) /* UHI interface */
-		dtb = (void *)fw_arg1;
+	if (fw_passed_dtb) /* UHI interface */
+		dtb = (void *)fw_passed_dtb;
 	else if (__dtb_start != __dtb_end)
 		dtb = (void *)__dtb_start;
 	else
diff --git a/arch/mips/loongson64/common/dma-swiotlb.c b/arch/mips/loongson64/common/dma-swiotlb.c
index 4ffa6fc81c8f..1a80b6f73ab2 100644
--- a/arch/mips/loongson64/common/dma-swiotlb.c
+++ b/arch/mips/loongson64/common/dma-swiotlb.c
@@ -10,7 +10,7 @@
 #include <dma-coherence.h>
 
 static void *loongson_dma_alloc_coherent(struct device *dev, size_t size,
-		dma_addr_t *dma_handle, gfp_t gfp, struct dma_attrs *attrs)
+		dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
 {
 	void *ret;
 
@@ -41,7 +41,7 @@ static void *loongson_dma_alloc_coherent(struct device *dev, size_t size,
 }
 
 static void loongson_dma_free_coherent(struct device *dev, size_t size,
-		void *vaddr, dma_addr_t dma_handle, struct dma_attrs *attrs)
+		void *vaddr, dma_addr_t dma_handle, unsigned long attrs)
 {
 	swiotlb_free_coherent(dev, size, vaddr, dma_handle);
 }
@@ -49,7 +49,7 @@ static void loongson_dma_free_coherent(struct device *dev, size_t size,
 static dma_addr_t loongson_dma_map_page(struct device *dev, struct page *page,
 				unsigned long offset, size_t size,
 				enum dma_data_direction dir,
-				struct dma_attrs *attrs)
+				unsigned long attrs)
 {
 	dma_addr_t daddr = swiotlb_map_page(dev, page, offset, size,
 					dir, attrs);
@@ -59,9 +59,9 @@ static dma_addr_t loongson_dma_map_page(struct device *dev, struct page *page,
 
 static int loongson_dma_map_sg(struct device *dev, struct scatterlist *sg,
 				int nents, enum dma_data_direction dir,
-				struct dma_attrs *attrs)
+				unsigned long attrs)
 {
-	int r = swiotlb_map_sg_attrs(dev, sg, nents, dir, NULL);
+	int r = swiotlb_map_sg_attrs(dev, sg, nents, dir, 0);
 	mb();
 
 	return r;
diff --git a/arch/mips/loongson64/loongson-3/hpet.c b/arch/mips/loongson64/loongson-3/hpet.c
index 249039af66c4..4788bea62a6a 100644
--- a/arch/mips/loongson64/loongson-3/hpet.c
+++ b/arch/mips/loongson64/loongson-3/hpet.c
@@ -13,8 +13,8 @@
 #define SMBUS_PCI_REG64		0x64
 #define SMBUS_PCI_REGB4		0xb4
 
-#define HPET_MIN_CYCLES		64
-#define HPET_MIN_PROG_DELTA	(HPET_MIN_CYCLES + (HPET_MIN_CYCLES >> 1))
+#define HPET_MIN_CYCLES		16
+#define HPET_MIN_PROG_DELTA	(HPET_MIN_CYCLES * 12)
 
 static DEFINE_SPINLOCK(hpet_lock);
 DEFINE_PER_CPU(struct clock_event_device, hpet_clockevent_device);
@@ -157,14 +157,14 @@ static int hpet_tick_resume(struct clock_event_device *evt)
 static int hpet_next_event(unsigned long delta,
 		struct clock_event_device *evt)
 {
-	unsigned int cnt;
-	int res;
+	u32 cnt;
+	s32 res;
 
 	cnt = hpet_read(HPET_COUNTER);
-	cnt += delta;
+	cnt += (u32) delta;
 	hpet_write(HPET_T0_CMP, cnt);
 
-	res = (int)(cnt - hpet_read(HPET_COUNTER));
+	res = (s32)(cnt - hpet_read(HPET_COUNTER));
 
 	return res < HPET_MIN_CYCLES ? -ETIME : 0;
 }
@@ -230,7 +230,7 @@ void __init setup_hpet_timer(void)
 
 	cd = &per_cpu(hpet_clockevent_device, cpu);
 	cd->name = "hpet";
-	cd->rating = 320;
+	cd->rating = 100;
 	cd->features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT;
 	cd->set_state_shutdown = hpet_set_state_shutdown;
 	cd->set_state_periodic = hpet_set_state_periodic;
diff --git a/arch/mips/loongson64/loongson-3/smp.c b/arch/mips/loongson64/loongson-3/smp.c
index e59759af63d9..2fec6f753a35 100644
--- a/arch/mips/loongson64/loongson-3/smp.c
+++ b/arch/mips/loongson64/loongson-3/smp.c
@@ -417,6 +417,7 @@ static int loongson3_cpu_disable(void)
 		return -EBUSY;
 
 	set_cpu_online(cpu, false);
+	calculate_cpu_foreign_map();
 	cpumask_clear_cpu(cpu, &cpu_callin_map);
 	local_irq_save(flags);
 	fixup_irqs();
diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c
index d96e912b9d44..36775d20b0e7 100644
--- a/arch/mips/math-emu/cp1emu.c
+++ b/arch/mips/math-emu/cp1emu.c
@@ -434,8 +434,8 @@ static int microMIPS32_to_MIPS32(union mips_instruction *insn_ptr)
  * a single subroutine should be used across both
  * modules.
  */
-static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
-			 unsigned long *contpc)
+int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
+		  unsigned long *contpc)
 {
 	union mips_instruction insn = (union mips_instruction)dec_insn.insn;
 	unsigned int fcr31;
@@ -627,8 +627,8 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
 				dec_insn.pc_inc +
 				dec_insn.next_pc_inc;
 		return 1;
-	case cbcond0_op:
-	case cbcond1_op:
+	case pop10_op:
+	case pop30_op:
 		if (!cpu_has_mips_r6)
 			break;
 		if (insn.i_format.rt && !insn.i_format.rs)
@@ -683,14 +683,14 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
 			dec_insn.next_pc_inc;
 
 		return 1;
-	case beqzcjic_op:
+	case pop66_op:
 		if (!cpu_has_mips_r6)
 			break;
 		*contpc = regs->cp0_epc + dec_insn.pc_inc +
 			dec_insn.next_pc_inc;
 
 		return 1;
-	case bnezcjialc_op:
+	case pop76_op:
 		if (!cpu_has_mips_r6)
 			break;
 		if (!insn.i_format.rs)
@@ -784,10 +784,10 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
  */
 static inline int cop1_64bit(struct pt_regs *xcp)
 {
-	if (config_enabled(CONFIG_64BIT) && !config_enabled(CONFIG_MIPS32_O32))
+	if (IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_MIPS32_O32))
 		return 1;
-	else if (config_enabled(CONFIG_32BIT) &&
-		 !config_enabled(CONFIG_MIPS_O32_FP64_SUPPORT))
+	else if (IS_ENABLED(CONFIG_32BIT) &&
+		 !IS_ENABLED(CONFIG_MIPS_O32_FP64_SUPPORT))
 		return 0;
 
 	return !test_thread_flag(TIF_32BIT_FPREGS);
@@ -1268,7 +1268,7 @@ branch_common:
 						 * instruction in the dslot.
 						 */
 						sig = mips_dsemul(xcp, ir,
-								  contpc);
+								  bcpc, contpc);
 						if (sig < 0)
 							break;
 						if (sig)
@@ -1323,7 +1323,7 @@ branch_common:
 				 * Single step the non-cp1
 				 * instruction in the dslot
 				 */
-				sig = mips_dsemul(xcp, ir, contpc);
+				sig = mips_dsemul(xcp, ir, bcpc, contpc);
 				if (sig < 0)
 					break;
 				if (sig)
diff --git a/arch/mips/math-emu/dsemul.c b/arch/mips/math-emu/dsemul.c
index 47074887e64c..72a4642eee2c 100644
--- a/arch/mips/math-emu/dsemul.c
+++ b/arch/mips/math-emu/dsemul.c
@@ -1,3 +1,6 @@
+#include <linux/err.h>
+#include <linux/slab.h>
+
 #include <asm/branch.h>
 #include <asm/cacheflush.h>
 #include <asm/fpu_emulator.h>
@@ -5,43 +8,211 @@
 #include <asm/mipsregs.h>
 #include <asm/uaccess.h>
 
-#include "ieee754.h"
-
-/*
- * Emulate the arbitrary instruction ir at xcp->cp0_epc.  Required when
- * we have to emulate the instruction in a COP1 branch delay slot.  Do
- * not change cp0_epc due to the instruction
+/**
+ * struct emuframe - The 'emulation' frame structure
+ * @emul:	The instruction to 'emulate'.
+ * @badinst:	A break instruction to cause a return to the kernel.
  *
- * According to the spec:
- * 1) it shouldn't be a branch :-)
- * 2) it can be a COP instruction :-(
- * 3) if we are tring to run a protected memory space we must take
- *    special care on memory access instructions :-(
- */
-
-/*
- * "Trampoline" return routine to catch exception following
- *  execution of delay-slot instruction execution.
+ * This structure defines the frames placed within the delay slot emulation
+ * page in response to a call to mips_dsemul(). Each thread may be allocated
+ * only one frame at any given time. The kernel stores within it the
+ * instruction to be 'emulated' followed by a break instruction, then
+ * executes the frame in user mode. The break causes a trap to the kernel
+ * which leads to do_dsemulret() being called unless the instruction in
+ * @emul causes a trap itself, is a branch, or a signal is delivered to
+ * the thread. In these cases the allocated frame will either be reused by
+ * a subsequent delay slot 'emulation', or be freed during signal delivery or
+ * upon thread exit.
+ *
+ * This approach is used because:
+ *
+ * - Actually emulating all instructions isn't feasible. We would need to
+ *   be able to handle instructions from all revisions of the MIPS ISA,
+ *   all ASEs & all vendor instruction set extensions. This would be a
+ *   whole lot of work & continual maintenance burden as new instructions
+ *   are introduced, and in the case of some vendor extensions may not
+ *   even be possible. Thus we need to take the approach of actually
+ *   executing the instruction.
+ *
+ * - We must execute the instruction within user context. If we were to
+ *   execute the instruction in kernel mode then it would have access to
+ *   kernel resources without very careful checks, leaving us with a
+ *   high potential for security or stability issues to arise.
+ *
+ * - We used to place the frame on the users stack, but this requires
+ *   that the stack be executable. This is bad for security so the
+ *   per-process page is now used instead.
+ *
+ * - The instruction in @emul may be something entirely invalid for a
+ *   delay slot. The user may (intentionally or otherwise) place a branch
+ *   in a delay slot, or a kernel mode instruction, or something else
+ *   which generates an exception. Thus we can't rely upon the break in
+ *   @badinst always being hit. For this reason we track the index of the
+ *   frame allocated to each thread, allowing us to clean it up at later
+ *   points such as signal delivery or thread exit.
+ *
+ * - The user may generate a fake struct emuframe if they wish, invoking
+ *   the BRK_MEMU break instruction themselves. We must therefore not
+ *   trust that BRK_MEMU means there's actually a valid frame allocated
+ *   to the thread, and must not allow the user to do anything they
+ *   couldn't already.
  */
-
 struct emuframe {
 	mips_instruction	emul;
 	mips_instruction	badinst;
-	mips_instruction	cookie;
-	unsigned long		epc;
 };
 
-/*
- * Set up an emulation frame for instruction IR, from a delay slot of
- * a branch jumping to CPC.  Return 0 if successful, -1 if no emulation
- * required, otherwise a signal number causing a frame setup failure.
- */
-int mips_dsemul(struct pt_regs *regs, mips_instruction ir, unsigned long cpc)
+static const int emupage_frame_count = PAGE_SIZE / sizeof(struct emuframe);
+
+static inline __user struct emuframe *dsemul_page(void)
+{
+	return (__user struct emuframe *)STACK_TOP;
+}
+
+static int alloc_emuframe(void)
+{
+	mm_context_t *mm_ctx = &current->mm->context;
+	int idx;
+
+retry:
+	spin_lock(&mm_ctx->bd_emupage_lock);
+
+	/* Ensure we have an allocation bitmap */
+	if (!mm_ctx->bd_emupage_allocmap) {
+		mm_ctx->bd_emupage_allocmap =
+			kcalloc(BITS_TO_LONGS(emupage_frame_count),
+					      sizeof(unsigned long),
+				GFP_ATOMIC);
+
+		if (!mm_ctx->bd_emupage_allocmap) {
+			idx = BD_EMUFRAME_NONE;
+			goto out_unlock;
+		}
+	}
+
+	/* Attempt to allocate a single bit/frame */
+	idx = bitmap_find_free_region(mm_ctx->bd_emupage_allocmap,
+				      emupage_frame_count, 0);
+	if (idx < 0) {
+		/*
+		 * Failed to allocate a frame. We'll wait until one becomes
+		 * available. We unlock the page so that other threads actually
+		 * get the opportunity to free their frames, which means
+		 * technically the result of bitmap_full may be incorrect.
+		 * However the worst case is that we repeat all this and end up
+		 * back here again.
+		 */
+		spin_unlock(&mm_ctx->bd_emupage_lock);
+		if (!wait_event_killable(mm_ctx->bd_emupage_queue,
+			!bitmap_full(mm_ctx->bd_emupage_allocmap,
+				     emupage_frame_count)))
+			goto retry;
+
+		/* Received a fatal signal - just give in */
+		return BD_EMUFRAME_NONE;
+	}
+
+	/* Success! */
+	pr_debug("allocate emuframe %d to %d\n", idx, current->pid);
+out_unlock:
+	spin_unlock(&mm_ctx->bd_emupage_lock);
+	return idx;
+}
+
+static void free_emuframe(int idx, struct mm_struct *mm)
+{
+	mm_context_t *mm_ctx = &mm->context;
+
+	spin_lock(&mm_ctx->bd_emupage_lock);
+
+	pr_debug("free emuframe %d from %d\n", idx, current->pid);
+	bitmap_clear(mm_ctx->bd_emupage_allocmap, idx, 1);
+
+	/* If some thread is waiting for a frame, now's its chance */
+	wake_up(&mm_ctx->bd_emupage_queue);
+
+	spin_unlock(&mm_ctx->bd_emupage_lock);
+}
+
+static bool within_emuframe(struct pt_regs *regs)
+{
+	unsigned long base = (unsigned long)dsemul_page();
+
+	if (regs->cp0_epc < base)
+		return false;
+	if (regs->cp0_epc >= (base + PAGE_SIZE))
+		return false;
+
+	return true;
+}
+
+bool dsemul_thread_cleanup(struct task_struct *tsk)
+{
+	int fr_idx;
+
+	/* Clear any allocated frame, retrieving its index */
+	fr_idx = atomic_xchg(&tsk->thread.bd_emu_frame, BD_EMUFRAME_NONE);
+
+	/* If no frame was allocated, we're done */
+	if (fr_idx == BD_EMUFRAME_NONE)
+		return false;
+
+	task_lock(tsk);
+
+	/* Free the frame that this thread had allocated */
+	if (tsk->mm)
+		free_emuframe(fr_idx, tsk->mm);
+
+	task_unlock(tsk);
+	return true;
+}
+
+bool dsemul_thread_rollback(struct pt_regs *regs)
+{
+	struct emuframe __user *fr;
+	int fr_idx;
+
+	/* Do nothing if we're not executing from a frame */
+	if (!within_emuframe(regs))
+		return false;
+
+	/* Find the frame being executed */
+	fr_idx = atomic_read(&current->thread.bd_emu_frame);
+	if (fr_idx == BD_EMUFRAME_NONE)
+		return false;
+	fr = &dsemul_page()[fr_idx];
+
+	/*
+	 * If the PC is at the emul instruction, roll back to the branch. If
+	 * PC is at the badinst (break) instruction, we've already emulated the
+	 * instruction so progress to the continue PC. If it's anything else
+	 * then something is amiss & the user has branched into some other area
+	 * of the emupage - we'll free the allocated frame anyway.
+	 */
+	if (msk_isa16_mode(regs->cp0_epc) == (unsigned long)&fr->emul)
+		regs->cp0_epc = current->thread.bd_emu_branch_pc;
+	else if (msk_isa16_mode(regs->cp0_epc) == (unsigned long)&fr->badinst)
+		regs->cp0_epc = current->thread.bd_emu_cont_pc;
+
+	atomic_set(&current->thread.bd_emu_frame, BD_EMUFRAME_NONE);
+	free_emuframe(fr_idx, current->mm);
+	return true;
+}
+
+void dsemul_mm_cleanup(struct mm_struct *mm)
+{
+	mm_context_t *mm_ctx = &mm->context;
+
+	kfree(mm_ctx->bd_emupage_allocmap);
+}
+
+int mips_dsemul(struct pt_regs *regs, mips_instruction ir,
+		unsigned long branch_pc, unsigned long cont_pc)
 {
 	int isa16 = get_isa16_mode(regs->cp0_epc);
 	mips_instruction break_math;
 	struct emuframe __user *fr;
-	int err;
+	int err, fr_idx;
 
 	/* NOP is easy */
 	if (ir == 0)
@@ -68,30 +239,20 @@ int mips_dsemul(struct pt_regs *regs, mips_instruction ir, unsigned long cpc)
 		}
 	}
 
-	pr_debug("dsemul %lx %lx\n", regs->cp0_epc, cpc);
+	pr_debug("dsemul 0x%08lx cont at 0x%08lx\n", regs->cp0_epc, cont_pc);
 
-	/*
-	 * The strategy is to push the instruction onto the user stack
-	 * and put a trap after it which we can catch and jump to
-	 * the required address any alternative apart from full
-	 * instruction emulation!!.
-	 *
-	 * Algorithmics used a system call instruction, and
-	 * borrowed that vector.  MIPS/Linux version is a bit
-	 * more heavyweight in the interests of portability and
-	 * multiprocessor support.  For Linux we use a BREAK 514
-	 * instruction causing a breakpoint exception.
-	 */
-	break_math = BREAK_MATH(isa16);
-
-	/* Ensure that the two instructions are in the same cache line */
-	fr = (struct emuframe __user *)
-		((regs->regs[29] - sizeof(struct emuframe)) & ~0x7);
-
-	/* Verify that the stack pointer is not completely insane */
-	if (unlikely(!access_ok(VERIFY_WRITE, fr, sizeof(struct emuframe))))
+	/* Allocate a frame if we don't already have one */
+	fr_idx = atomic_read(&current->thread.bd_emu_frame);
+	if (fr_idx == BD_EMUFRAME_NONE)
+		fr_idx = alloc_emuframe();
+	if (fr_idx == BD_EMUFRAME_NONE)
 		return SIGBUS;
+	fr = &dsemul_page()[fr_idx];
+
+	/* Retrieve the appropriately encoded break instruction */
+	break_math = BREAK_MATH(isa16);
 
+	/* Write the instructions to the frame */
 	if (isa16) {
 		err = __put_user(ir >> 16,
 				 (u16 __user *)(&fr->emul));
@@ -106,84 +267,36 @@ int mips_dsemul(struct pt_regs *regs, mips_instruction ir, unsigned long cpc)
 		err |= __put_user(break_math, &fr->badinst);
 	}
 
-	err |= __put_user((mips_instruction)BD_COOKIE, &fr->cookie);
-	err |= __put_user(cpc, &fr->epc);
-
 	if (unlikely(err)) {
 		MIPS_FPU_EMU_INC_STATS(errors);
+		free_emuframe(fr_idx, current->mm);
 		return SIGBUS;
 	}
 
+	/* Record the PC of the branch, PC to continue from & frame index */
+	current->thread.bd_emu_branch_pc = branch_pc;
+	current->thread.bd_emu_cont_pc = cont_pc;
+	atomic_set(&current->thread.bd_emu_frame, fr_idx);
+
+	/* Change user register context to execute the frame */
 	regs->cp0_epc = (unsigned long)&fr->emul | isa16;
 
+	/* Ensure the icache observes our newly written frame */
 	flush_cache_sigtramp((unsigned long)&fr->emul);
 
 	return 0;
 }
 
-int do_dsemulret(struct pt_regs *xcp)
+bool do_dsemulret(struct pt_regs *xcp)
 {
-	int isa16 = get_isa16_mode(xcp->cp0_epc);
-	struct emuframe __user *fr;
-	unsigned long epc;
-	u32 insn, cookie;
-	int err = 0;
-	u16 instr[2];
-
-	fr = (struct emuframe __user *)
-		(msk_isa16_mode(xcp->cp0_epc) - sizeof(mips_instruction));
-
-	/*
-	 * If we can't even access the area, something is very wrong, but we'll
-	 * leave that to the default handling
-	 */
-	if (!access_ok(VERIFY_READ, fr, sizeof(struct emuframe)))
-		return 0;
-
-	/*
-	 * Do some sanity checking on the stackframe:
-	 *
-	 *  - Is the instruction pointed to by the EPC an BREAK_MATH?
-	 *  - Is the following memory word the BD_COOKIE?
-	 */
-	if (isa16) {
-		err = __get_user(instr[0],
-				 (u16 __user *)(&fr->badinst));
-		err |= __get_user(instr[1],
-				  (u16 __user *)((long)(&fr->badinst) + 2));
-		insn = (instr[0] << 16) | instr[1];
-	} else {
-		err = __get_user(insn, &fr->badinst);
-	}
-	err |= __get_user(cookie, &fr->cookie);
-
-	if (unlikely(err ||
-		     insn != BREAK_MATH(isa16) || cookie != BD_COOKIE)) {
+	/* Cleanup the allocated frame, returning if there wasn't one */
+	if (!dsemul_thread_cleanup(current)) {
 		MIPS_FPU_EMU_INC_STATS(errors);
-		return 0;
-	}
-
-	/*
-	 * At this point, we are satisfied that it's a BD emulation trap.  Yes,
-	 * a user might have deliberately put two malformed and useless
-	 * instructions in a row in his program, in which case he's in for a
-	 * nasty surprise - the next instruction will be treated as a
-	 * continuation address!  Alas, this seems to be the only way that we
-	 * can handle signals, recursion, and longjmps() in the context of
-	 * emulating the branch delay instruction.
-	 */
-
-	pr_debug("dsemulret\n");
-
-	if (__get_user(epc, &fr->epc)) {		/* Saved EPC */
-		/* This is not a good situation to be in */
-		force_sig(SIGBUS, current);
-
-		return 0;
+		return false;
 	}
 
 	/* Set EPC to return to post-branch instruction */
-	xcp->cp0_epc = epc;
-	MIPS_FPU_EMU_INC_STATS(ds_emul);
-	return 1;
+	xcp->cp0_epc = current->thread.bd_emu_cont_pc;
+	pr_debug("dsemulret to 0x%08lx\n", xcp->cp0_epc);
+	return true;
 }
diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
index ef7f925dd1b0..cd72805b64a7 100644
--- a/arch/mips/mm/c-r4k.c
+++ b/arch/mips/mm/c-r4k.c
@@ -40,6 +40,51 @@
 #include <asm/mips-cm.h>
 
 /*
+ * Bits describing what cache ops an SMP callback function may perform.
+ *
+ * R4K_HIT   -	Virtual user or kernel address based cache operations. The
+ *		active_mm must be checked before using user addresses, falling
+ *		back to kmap.
+ * R4K_INDEX -	Index based cache operations.
+ */
+
+#define R4K_HIT		BIT(0)
+#define R4K_INDEX	BIT(1)
+
+/**
+ * r4k_op_needs_ipi() - Decide if a cache op needs to be done on every core.
+ * @type:	Type of cache operations (R4K_HIT or R4K_INDEX).
+ *
+ * Decides whether a cache op needs to be performed on every core in the system.
+ * This may change depending on the @type of cache operation, as well as the set
+ * of online CPUs, so preemption should be disabled by the caller to prevent CPU
+ * hotplug from changing the result.
+ *
+ * Returns:	1 if the cache operation @type should be done on every core in
+ *		the system.
+ *		0 if the cache operation @type is globalized and only needs to
+ *		be performed on a simple CPU.
+ */
+static inline bool r4k_op_needs_ipi(unsigned int type)
+{
+	/* The MIPS Coherence Manager (CM) globalizes address-based cache ops */
+	if (type == R4K_HIT && mips_cm_present())
+		return false;
+
+	/*
+	 * Hardware doesn't globalize the required cache ops, so SMP calls may
+	 * be needed, but only if there are foreign CPUs (non-siblings with
+	 * separate caches).
+	 */
+	/* cpu_foreign_map[] undeclared when !CONFIG_SMP */
+#ifdef CONFIG_SMP
+	return !cpumask_empty(&cpu_foreign_map[0]);
+#else
+	return false;
+#endif
+}
+
+/*
  * Special Variant of smp_call_function for use by cache functions:
  *
  *  o No return value
@@ -48,30 +93,17 @@
  *    primary cache.
  *  o doesn't disable interrupts on the local CPU
  */
-static inline void r4k_on_each_cpu(void (*func) (void *info), void *info)
+static inline void r4k_on_each_cpu(unsigned int type,
+				   void (*func)(void *info), void *info)
 {
 	preempt_disable();
-
-	/*
-	 * The Coherent Manager propagates address-based cache ops to other
-	 * cores but not index-based ops. However, r4k_on_each_cpu is used
-	 * in both cases so there is no easy way to tell what kind of op is
-	 * executed to the other cores. The best we can probably do is
-	 * to restrict that call when a CM is not present because both
-	 * CM-based SMP protocols (CMP & CPS) restrict index-based cache ops.
-	 */
-	if (!mips_cm_present())
-		smp_call_function_many(&cpu_foreign_map, func, info, 1);
+	if (r4k_op_needs_ipi(type))
+		smp_call_function_many(&cpu_foreign_map[smp_processor_id()],
+				       func, info, 1);
 	func(info);
 	preempt_enable();
 }
 
-#if defined(CONFIG_MIPS_CMP) || defined(CONFIG_MIPS_CPS)
-#define cpu_has_safe_index_cacheops 0
-#else
-#define cpu_has_safe_index_cacheops 1
-#endif
-
 /*
  * Must die.
  */
@@ -462,22 +494,44 @@ static inline void local_r4k___flush_cache_all(void * args)
 
 static void r4k___flush_cache_all(void)
 {
-	r4k_on_each_cpu(local_r4k___flush_cache_all, NULL);
+	r4k_on_each_cpu(R4K_INDEX, local_r4k___flush_cache_all, NULL);
 }
 
-static inline int has_valid_asid(const struct mm_struct *mm)
+/**
+ * has_valid_asid() - Determine if an mm already has an ASID.
+ * @mm:		Memory map.
+ * @type:	R4K_HIT or R4K_INDEX, type of cache op.
+ *
+ * Determines whether @mm already has an ASID on any of the CPUs which cache ops
+ * of type @type within an r4k_on_each_cpu() call will affect. If
+ * r4k_on_each_cpu() does an SMP call to a single VPE in each core, then the
+ * scope of the operation is confined to sibling CPUs, otherwise all online CPUs
+ * will need to be checked.
+ *
+ * Must be called in non-preemptive context.
+ *
+ * Returns:	1 if the CPUs affected by @type cache ops have an ASID for @mm.
+ *		0 otherwise.
+ */
+static inline int has_valid_asid(const struct mm_struct *mm, unsigned int type)
 {
-#ifdef CONFIG_MIPS_MT_SMP
-	int i;
+	unsigned int i;
+	const cpumask_t *mask = cpu_present_mask;
 
-	for_each_online_cpu(i)
+	/* cpu_sibling_map[] undeclared when !CONFIG_SMP */
+#ifdef CONFIG_SMP
+	/*
+	 * If r4k_on_each_cpu does SMP calls, it does them to a single VPE in
+	 * each foreign core, so we only need to worry about siblings.
+	 * Otherwise we need to worry about all present CPUs.
+	 */
+	if (r4k_op_needs_ipi(type))
+		mask = &cpu_sibling_map[smp_processor_id()];
+#endif
+	for_each_cpu(i, mask)
 		if (cpu_context(i, mm))
 			return 1;
-
 	return 0;
-#else
-	return cpu_context(smp_processor_id(), mm);
-#endif
 }
 
 static void r4k__flush_cache_vmap(void)
@@ -490,12 +544,16 @@ static void r4k__flush_cache_vunmap(void)
 	r4k_blast_dcache();
 }
 
+/*
+ * Note: flush_tlb_range() assumes flush_cache_range() sufficiently flushes
+ * whole caches when vma is executable.
+ */
 static inline void local_r4k_flush_cache_range(void * args)
 {
 	struct vm_area_struct *vma = args;
 	int exec = vma->vm_flags & VM_EXEC;
 
-	if (!(has_valid_asid(vma->vm_mm)))
+	if (!has_valid_asid(vma->vm_mm, R4K_INDEX))
 		return;
 
 	/*
@@ -516,14 +574,14 @@ static void r4k_flush_cache_range(struct vm_area_struct *vma,
 	int exec = vma->vm_flags & VM_EXEC;
 
 	if (cpu_has_dc_aliases || exec)
-		r4k_on_each_cpu(local_r4k_flush_cache_range, vma);
+		r4k_on_each_cpu(R4K_INDEX, local_r4k_flush_cache_range, vma);
 }
 
 static inline void local_r4k_flush_cache_mm(void * args)
 {
 	struct mm_struct *mm = args;
 
-	if (!has_valid_asid(mm))
+	if (!has_valid_asid(mm, R4K_INDEX))
 		return;
 
 	/*
@@ -548,7 +606,7 @@ static void r4k_flush_cache_mm(struct mm_struct *mm)
 	if (!cpu_has_dc_aliases)
 		return;
 
-	r4k_on_each_cpu(local_r4k_flush_cache_mm, mm);
+	r4k_on_each_cpu(R4K_INDEX, local_r4k_flush_cache_mm, mm);
 }
 
 struct flush_cache_page_args {
@@ -573,10 +631,10 @@ static inline void local_r4k_flush_cache_page(void *args)
 	void *vaddr;
 
 	/*
-	 * If ownes no valid ASID yet, cannot possibly have gotten
+	 * If owns no valid ASID yet, cannot possibly have gotten
 	 * this page into the cache.
 	 */
-	if (!has_valid_asid(mm))
+	if (!has_valid_asid(mm, R4K_HIT))
 		return;
 
 	addr &= PAGE_MASK;
@@ -643,7 +701,7 @@ static void r4k_flush_cache_page(struct vm_area_struct *vma,
 	args.addr = addr;
 	args.pfn = pfn;
 
-	r4k_on_each_cpu(local_r4k_flush_cache_page, &args);
+	r4k_on_each_cpu(R4K_HIT, local_r4k_flush_cache_page, &args);
 }
 
 static inline void local_r4k_flush_data_cache_page(void * addr)
@@ -656,18 +714,23 @@ static void r4k_flush_data_cache_page(unsigned long addr)
 	if (in_atomic())
 		local_r4k_flush_data_cache_page((void *)addr);
 	else
-		r4k_on_each_cpu(local_r4k_flush_data_cache_page, (void *) addr);
+		r4k_on_each_cpu(R4K_HIT, local_r4k_flush_data_cache_page,
+				(void *) addr);
 }
 
 struct flush_icache_range_args {
 	unsigned long start;
 	unsigned long end;
+	unsigned int type;
 };
 
-static inline void local_r4k_flush_icache_range(unsigned long start, unsigned long end)
+static inline void __local_r4k_flush_icache_range(unsigned long start,
+						  unsigned long end,
+						  unsigned int type)
 {
 	if (!cpu_has_ic_fills_f_dc) {
-		if (end - start >= dcache_size) {
+		if (type == R4K_INDEX ||
+		    (type & R4K_INDEX && end - start >= dcache_size)) {
 			r4k_blast_dcache();
 		} else {
 			R4600_HIT_CACHEOP_WAR_IMPL;
@@ -675,7 +738,8 @@ static inline void local_r4k_flush_icache_range(unsigned long start, unsigned lo
 		}
 	}
 
-	if (end - start > icache_size)
+	if (type == R4K_INDEX ||
+	    (type & R4K_INDEX && end - start > icache_size))
 		r4k_blast_icache();
 	else {
 		switch (boot_cpu_type()) {
@@ -701,23 +765,52 @@ static inline void local_r4k_flush_icache_range(unsigned long start, unsigned lo
 #endif
 }
 
+static inline void local_r4k_flush_icache_range(unsigned long start,
+						unsigned long end)
+{
+	__local_r4k_flush_icache_range(start, end, R4K_HIT | R4K_INDEX);
+}
+
 static inline void local_r4k_flush_icache_range_ipi(void *args)
 {
 	struct flush_icache_range_args *fir_args = args;
 	unsigned long start = fir_args->start;
 	unsigned long end = fir_args->end;
+	unsigned int type = fir_args->type;
 
-	local_r4k_flush_icache_range(start, end);
+	__local_r4k_flush_icache_range(start, end, type);
 }
 
 static void r4k_flush_icache_range(unsigned long start, unsigned long end)
 {
 	struct flush_icache_range_args args;
+	unsigned long size, cache_size;
 
 	args.start = start;
 	args.end = end;
+	args.type = R4K_HIT | R4K_INDEX;
 
-	r4k_on_each_cpu(local_r4k_flush_icache_range_ipi, &args);
+	/*
+	 * Indexed cache ops require an SMP call.
+	 * Consider if that can or should be avoided.
+	 */
+	preempt_disable();
+	if (r4k_op_needs_ipi(R4K_INDEX) && !r4k_op_needs_ipi(R4K_HIT)) {
+		/*
+		 * If address-based cache ops don't require an SMP call, then
+		 * use them exclusively for small flushes.
+		 */
+		size = start - end;
+		cache_size = icache_size;
+		if (!cpu_has_ic_fills_f_dc) {
+			size *= 2;
+			cache_size += dcache_size;
+		}
+		if (size <= cache_size)
+			args.type &= ~R4K_INDEX;
+	}
+	r4k_on_each_cpu(args.type, local_r4k_flush_icache_range_ipi, &args);
+	preempt_enable();
 	instruction_hazard();
 }
 
@@ -744,7 +837,7 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size)
 	 * subset property so we have to flush the primary caches
 	 * explicitly
 	 */
-	if (cpu_has_safe_index_cacheops && size >= dcache_size) {
+	if (size >= dcache_size) {
 		r4k_blast_dcache();
 	} else {
 		R4600_HIT_CACHEOP_WAR_IMPL;
@@ -781,7 +874,7 @@ static void r4k_dma_cache_inv(unsigned long addr, unsigned long size)
 		return;
 	}
 
-	if (cpu_has_safe_index_cacheops && size >= dcache_size) {
+	if (size >= dcache_size) {
 		r4k_blast_dcache();
 	} else {
 		R4600_HIT_CACHEOP_WAR_IMPL;
@@ -794,25 +887,76 @@ static void r4k_dma_cache_inv(unsigned long addr, unsigned long size)
 }
 #endif /* CONFIG_DMA_NONCOHERENT || CONFIG_DMA_MAYBE_COHERENT */
 
+struct flush_cache_sigtramp_args {
+	struct mm_struct *mm;
+	struct page *page;
+	unsigned long addr;
+};
+
 /*
  * While we're protected against bad userland addresses we don't care
  * very much about what happens in that case.  Usually a segmentation
  * fault will dump the process later on anyway ...
  */
-static void local_r4k_flush_cache_sigtramp(void * arg)
+static void local_r4k_flush_cache_sigtramp(void *args)
 {
+	struct flush_cache_sigtramp_args *fcs_args = args;
+	unsigned long addr = fcs_args->addr;
+	struct page *page = fcs_args->page;
+	struct mm_struct *mm = fcs_args->mm;
+	int map_coherent = 0;
+	void *vaddr;
+
 	unsigned long ic_lsize = cpu_icache_line_size();
 	unsigned long dc_lsize = cpu_dcache_line_size();
 	unsigned long sc_lsize = cpu_scache_line_size();
-	unsigned long addr = (unsigned long) arg;
+
+	/*
+	 * If owns no valid ASID yet, cannot possibly have gotten
+	 * this page into the cache.
+	 */
+	if (!has_valid_asid(mm, R4K_HIT))
+		return;
+
+	if (mm == current->active_mm) {
+		vaddr = NULL;
+	} else {
+		/*
+		 * Use kmap_coherent or kmap_atomic to do flushes for
+		 * another ASID than the current one.
+		 */
+		map_coherent = (cpu_has_dc_aliases &&
+				page_mapcount(page) &&
+				!Page_dcache_dirty(page));
+		if (map_coherent)
+			vaddr = kmap_coherent(page, addr);
+		else
+			vaddr = kmap_atomic(page);
+		addr = (unsigned long)vaddr + (addr & ~PAGE_MASK);
+	}
 
 	R4600_HIT_CACHEOP_WAR_IMPL;
-	if (dc_lsize)
-		protected_writeback_dcache_line(addr & ~(dc_lsize - 1));
-	if (!cpu_icache_snoops_remote_store && scache_size)
-		protected_writeback_scache_line(addr & ~(sc_lsize - 1));
+	if (!cpu_has_ic_fills_f_dc) {
+		if (dc_lsize)
+			vaddr ? flush_dcache_line(addr & ~(dc_lsize - 1))
+			      : protected_writeback_dcache_line(
+							addr & ~(dc_lsize - 1));
+		if (!cpu_icache_snoops_remote_store && scache_size)
+			vaddr ? flush_scache_line(addr & ~(sc_lsize - 1))
+			      : protected_writeback_scache_line(
+							addr & ~(sc_lsize - 1));
+	}
 	if (ic_lsize)
-		protected_flush_icache_line(addr & ~(ic_lsize - 1));
+		vaddr ? flush_icache_line(addr & ~(ic_lsize - 1))
+		      : protected_flush_icache_line(addr & ~(ic_lsize - 1));
+
+	if (vaddr) {
+		if (map_coherent)
+			kunmap_coherent();
+		else
+			kunmap_atomic(vaddr);
+	}
+
 	if (MIPS4K_ICACHE_REFILL_WAR) {
 		__asm__ __volatile__ (
 			".set push\n\t"
@@ -837,7 +981,23 @@ static void local_r4k_flush_cache_sigtramp(void * arg)
 
 static void r4k_flush_cache_sigtramp(unsigned long addr)
 {
-	r4k_on_each_cpu(local_r4k_flush_cache_sigtramp, (void *) addr);
+	struct flush_cache_sigtramp_args args;
+	int npages;
+
+	down_read(&current->mm->mmap_sem);
+
+	npages = get_user_pages_fast(addr, 1, 0, &args.page);
+	if (npages < 1)
+		goto out;
+
+	args.mm = current->mm;
+	args.addr = addr;
+
+	r4k_on_each_cpu(R4K_HIT, local_r4k_flush_cache_sigtramp, &args);
+
+	put_page(args.page);
+out:
+	up_read(&current->mm->mmap_sem);
 }
 
 static void r4k_flush_icache_all(void)
@@ -851,6 +1011,15 @@ struct flush_kernel_vmap_range_args {
 	int		size;
 };
 
+static inline void local_r4k_flush_kernel_vmap_range_index(void *args)
+{
+	/*
+	 * Aliases only affect the primary caches so don't bother with
+	 * S-caches or T-caches.
+	 */
+	r4k_blast_dcache();
+}
+
 static inline void local_r4k_flush_kernel_vmap_range(void *args)
 {
 	struct flush_kernel_vmap_range_args *vmra = args;
@@ -861,12 +1030,8 @@ static inline void local_r4k_flush_kernel_vmap_range(void *args)
 	 * Aliases only affect the primary caches so don't bother with
 	 * S-caches or T-caches.
 	 */
-	if (cpu_has_safe_index_cacheops && size >= dcache_size)
-		r4k_blast_dcache();
-	else {
-		R4600_HIT_CACHEOP_WAR_IMPL;
-		blast_dcache_range(vaddr, vaddr + size);
-	}
+	R4600_HIT_CACHEOP_WAR_IMPL;
+	blast_dcache_range(vaddr, vaddr + size);
 }
 
 static void r4k_flush_kernel_vmap_range(unsigned long vaddr, int size)
@@ -876,7 +1041,12 @@ static void r4k_flush_kernel_vmap_range(unsigned long vaddr, int size)
 	args.vaddr = (unsigned long) vaddr;
 	args.size = size;
 
-	r4k_on_each_cpu(local_r4k_flush_kernel_vmap_range, &args);
+	if (size >= dcache_size)
+		r4k_on_each_cpu(R4K_INDEX,
+				local_r4k_flush_kernel_vmap_range_index, NULL);
+	else
+		r4k_on_each_cpu(R4K_HIT, local_r4k_flush_kernel_vmap_range,
+				&args);
 }
 
 static inline void rm7k_erratum31(void)
@@ -1206,7 +1376,7 @@ static void probe_pcache(void)
 			      c->icache.linesz;
 		c->icache.waybit = __ffs(icache_size/c->icache.ways);
 
-		if (config & 0x8)		/* VI bit */
+		if (config & MIPS_CONF_VI)
 			c->icache.flags |= MIPS_CACHE_VTAG;
 
 		/*
diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c
index cb557d28cb21..b2eadd6fa9a1 100644
--- a/arch/mips/mm/dma-default.c
+++ b/arch/mips/mm/dma-default.c
@@ -131,7 +131,7 @@ static void *mips_dma_alloc_noncoherent(struct device *dev, size_t size,
 }
 
 static void *mips_dma_alloc_coherent(struct device *dev, size_t size,
-	dma_addr_t * dma_handle, gfp_t gfp, struct dma_attrs *attrs)
+	dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
 {
 	void *ret;
 	struct page *page = NULL;
@@ -141,7 +141,7 @@ static void *mips_dma_alloc_coherent(struct device *dev, size_t size,
 	 * XXX: seems like the coherent and non-coherent implementations could
 	 * be consolidated.
 	 */
-	if (dma_get_attr(DMA_ATTR_NON_CONSISTENT, attrs))
+	if (attrs & DMA_ATTR_NON_CONSISTENT)
 		return mips_dma_alloc_noncoherent(dev, size, dma_handle, gfp);
 
 	gfp = massage_gfp_flags(dev, gfp);
@@ -176,13 +176,13 @@ static void mips_dma_free_noncoherent(struct device *dev, size_t size,
 }
 
 static void mips_dma_free_coherent(struct device *dev, size_t size, void *vaddr,
-	dma_addr_t dma_handle, struct dma_attrs *attrs)
+	dma_addr_t dma_handle, unsigned long attrs)
 {
 	unsigned long addr = (unsigned long) vaddr;
 	unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
 	struct page *page = NULL;
 
-	if (dma_get_attr(DMA_ATTR_NON_CONSISTENT, attrs)) {
+	if (attrs & DMA_ATTR_NON_CONSISTENT) {
 		mips_dma_free_noncoherent(dev, size, vaddr, dma_handle);
 		return;
 	}
@@ -200,7 +200,7 @@ static void mips_dma_free_coherent(struct device *dev, size_t size, void *vaddr,
 
 static int mips_dma_mmap(struct device *dev, struct vm_area_struct *vma,
 	void *cpu_addr, dma_addr_t dma_addr, size_t size,
-	struct dma_attrs *attrs)
+	unsigned long attrs)
 {
 	unsigned long user_count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
 	unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
@@ -214,7 +214,7 @@ static int mips_dma_mmap(struct device *dev, struct vm_area_struct *vma,
 
 	pfn = page_to_pfn(virt_to_page((void *)addr));
 
-	if (dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs))
+	if (attrs & DMA_ATTR_WRITE_COMBINE)
 		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
 	else
 		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
@@ -291,7 +291,7 @@ static inline void __dma_sync(struct page *page,
 }
 
 static void mips_dma_unmap_page(struct device *dev, dma_addr_t dma_addr,
-	size_t size, enum dma_data_direction direction, struct dma_attrs *attrs)
+	size_t size, enum dma_data_direction direction, unsigned long attrs)
 {
 	if (cpu_needs_post_dma_flush(dev))
 		__dma_sync(dma_addr_to_page(dev, dma_addr),
@@ -301,7 +301,7 @@ static void mips_dma_unmap_page(struct device *dev, dma_addr_t dma_addr,
 }
 
 static int mips_dma_map_sg(struct device *dev, struct scatterlist *sglist,
-	int nents, enum dma_data_direction direction, struct dma_attrs *attrs)
+	int nents, enum dma_data_direction direction, unsigned long attrs)
 {
 	int i;
 	struct scatterlist *sg;
@@ -322,7 +322,7 @@ static int mips_dma_map_sg(struct device *dev, struct scatterlist *sglist,
 
 static dma_addr_t mips_dma_map_page(struct device *dev, struct page *page,
 	unsigned long offset, size_t size, enum dma_data_direction direction,
-	struct dma_attrs *attrs)
+	unsigned long attrs)
 {
 	if (!plat_device_is_coherent(dev))
 		__dma_sync(page, offset, size, direction);
@@ -332,7 +332,7 @@ static dma_addr_t mips_dma_map_page(struct device *dev, struct page *page,
 
 static void mips_dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
 	int nhwentries, enum dma_data_direction direction,
-	struct dma_attrs *attrs)
+	unsigned long attrs)
 {
 	int i;
 	struct scatterlist *sg;
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 9b58eb5fd0d5..a5509e7dcad2 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -504,7 +504,7 @@ void free_initrd_mem(unsigned long start, unsigned long end)
 
 void (*free_init_pages_eva)(void *begin, void *end) = NULL;
 
-void __init_refok free_initmem(void)
+void __ref free_initmem(void)
 {
 	prom_free_prom_memory();
 	/*
diff --git a/arch/mips/mm/sc-debugfs.c b/arch/mips/mm/sc-debugfs.c
index 5eefe3281b24..01f1154cdb0c 100644
--- a/arch/mips/mm/sc-debugfs.c
+++ b/arch/mips/mm/sc-debugfs.c
@@ -73,8 +73,8 @@ static int __init sc_debugfs_init(void)
 
 	file = debugfs_create_file("prefetch", S_IRUGO | S_IWUSR, dir,
 				   NULL, &sc_prefetch_fops);
-	if (IS_ERR(file))
-		return PTR_ERR(file);
+	if (!file)
+		return -ENOMEM;
 
 	return 0;
 }
diff --git a/arch/mips/mm/sc-rm7k.c b/arch/mips/mm/sc-rm7k.c
index 9ac1efcfbcc7..78f900c59276 100644
--- a/arch/mips/mm/sc-rm7k.c
+++ b/arch/mips/mm/sc-rm7k.c
@@ -161,7 +161,7 @@ static void rm7k_tc_disable(void)
 	local_irq_save(flags);
 	blast_rm7k_tcache();
 	clear_c0_config(RM7K_CONF_TE);
-	local_irq_save(flags);
+	local_irq_restore(flags);
 }
 
 static void rm7k_sc_disable(void)
diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index 4004b659ce50..55ce39606cb8 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -888,7 +888,7 @@ build_get_pgd_vmalloc64(u32 **p, struct uasm_label **l, struct uasm_reloc **r,
 		}
 	}
 	if (!did_vmalloc_branch) {
-		if (uasm_in_compat_space_p(swpd) && !uasm_rel_lo(swpd)) {
+		if (single_insn_swpd) {
 			uasm_il_b(p, r, label_vmalloc_done);
 			uasm_i_lui(p, ptr, uasm_rel_hi(swpd));
 		} else {
@@ -1025,7 +1025,7 @@ static void build_update_entries(u32 **p, unsigned int tmp, unsigned int ptep)
 	pte_off_odd += offsetof(pte_t, pte_high);
 #endif
 
-	if (config_enabled(CONFIG_XPA)) {
+	if (IS_ENABLED(CONFIG_XPA)) {
 		uasm_i_lw(p, tmp, pte_off_even, ptep); /* even pte */
 		UASM_i_ROTR(p, tmp, tmp, ilog2(_PAGE_GLOBAL));
 		UASM_i_MTC0(p, tmp, C0_ENTRYLO0);
@@ -1643,7 +1643,7 @@ iPTE_SW(u32 **p, struct uasm_reloc **r, unsigned int pte, unsigned int ptr,
 	unsigned int hwmode = mode & (_PAGE_VALID | _PAGE_DIRTY);
 	unsigned int swmode = mode & ~hwmode;
 
-	if (config_enabled(CONFIG_XPA) && !cpu_has_64bits) {
+	if (IS_ENABLED(CONFIG_XPA) && !cpu_has_64bits) {
 		uasm_i_lui(p, scratch, swmode >> 16);
 		uasm_i_or(p, pte, pte, scratch);
 		BUG_ON(swmode & 0xffff);
@@ -2432,7 +2432,7 @@ static void config_htw_params(void)
 		pwsize |= ilog2(PTRS_PER_PMD) << MIPS_PWSIZE_MDW_SHIFT;
 
 	/* Set pointer size to size of directory pointers */
-	if (config_enabled(CONFIG_64BIT))
+	if (IS_ENABLED(CONFIG_64BIT))
 		pwsize |= MIPS_PWSIZE_PS_MASK;
 	/* PTEs may be multiple pointers long (e.g. with XPA) */
 	pwsize |= ((PTE_T_LOG2 - PGD_T_LOG2) << MIPS_PWSIZE_PTEW_SHIFT)
@@ -2448,7 +2448,7 @@ static void config_htw_params(void)
 	 * the pwctl fields.
 	 */
 	config = 1 << MIPS_PWCTL_PWEN_SHIFT;
-	if (config_enabled(CONFIG_64BIT))
+	if (IS_ENABLED(CONFIG_64BIT))
 		config |= MIPS_PWCTL_XU_MASK;
 	write_c0_pwctl(config);
 	pr_info("Hardware Page Table Walker enabled\n");
@@ -2522,7 +2522,7 @@ void build_tlb_refill_handler(void)
 	 */
 	static int run_once = 0;
 
-	if (config_enabled(CONFIG_XPA) && !cpu_has_rixi)
+	if (IS_ENABLED(CONFIG_XPA) && !cpu_has_rixi)
 		panic("Kernels supporting XPA currently require CPUs with RIXI");
 
 	output_pgtable_bits_defines();
diff --git a/arch/mips/mm/uasm-micromips.c b/arch/mips/mm/uasm-micromips.c
index d78178daea4b..277cf52d80e1 100644
--- a/arch/mips/mm/uasm-micromips.c
+++ b/arch/mips/mm/uasm-micromips.c
@@ -53,8 +53,13 @@ static struct insn insn_table_MM[] = {
 	{ insn_bltzl, 0, 0 },
 	{ insn_bne, M(mm_bne32_op, 0, 0, 0, 0, 0), RT | RS | BIMM },
 	{ insn_cache, M(mm_pool32b_op, 0, 0, mm_cache_func, 0, 0), RT | RS | SIMM },
+	{ insn_cfc1, M(mm_pool32f_op, 0, 0, 0, mm_cfc1_op, mm_32f_73_op), RT | RS },
+	{ insn_cfcmsa, M(mm_pool32s_op, 0, msa_cfc_op, 0, 0, mm_32s_elm_op), RD | RE },
+	{ insn_ctc1, M(mm_pool32f_op, 0, 0, 0, mm_ctc1_op, mm_32f_73_op), RT | RS },
+	{ insn_ctcmsa, M(mm_pool32s_op, 0, msa_ctc_op, 0, 0, mm_32s_elm_op), RD | RE },
 	{ insn_daddu, 0, 0 },
 	{ insn_daddiu, 0, 0 },
+	{ insn_di, M(mm_pool32a_op, 0, 0, 0, mm_di_op, mm_pool32axf_op), RS },
 	{ insn_divu, M(mm_pool32a_op, 0, 0, 0, mm_divu_op, mm_pool32axf_op), RT | RS },
 	{ insn_dmfc0, 0, 0 },
 	{ insn_dmtc0, 0, 0 },
@@ -84,6 +89,8 @@ static struct insn insn_table_MM[] = {
 	{ insn_mfhi, M(mm_pool32a_op, 0, 0, 0, mm_mfhi32_op, mm_pool32axf_op), RS },
 	{ insn_mflo, M(mm_pool32a_op, 0, 0, 0, mm_mflo32_op, mm_pool32axf_op), RS },
 	{ insn_mtc0, M(mm_pool32a_op, 0, 0, 0, mm_mtc0_op, mm_pool32axf_op), RT | RS | RD },
+	{ insn_mthi, M(mm_pool32a_op, 0, 0, 0, mm_mthi32_op, mm_pool32axf_op), RS },
+	{ insn_mtlo, M(mm_pool32a_op, 0, 0, 0, mm_mtlo32_op, mm_pool32axf_op), RS },
 	{ insn_mul, M(mm_pool32a_op, 0, 0, 0, 0, mm_mul_op), RT | RS | RD },
 	{ insn_or, M(mm_pool32a_op, 0, 0, 0, 0, mm_or32_op), RT | RS | RD },
 	{ insn_ori, M(mm_ori32_op, 0, 0, 0, 0, 0), RT | RS | UIMM },
@@ -166,13 +173,15 @@ static void build_insn(u32 **buf, enum opcode opc, ...)
 	op = ip->match;
 	va_start(ap, opc);
 	if (ip->fields & RS) {
-		if (opc == insn_mfc0 || opc == insn_mtc0)
+		if (opc == insn_mfc0 || opc == insn_mtc0 ||
+		    opc == insn_cfc1 || opc == insn_ctc1)
 			op |= build_rt(va_arg(ap, u32));
 		else
 			op |= build_rs(va_arg(ap, u32));
 	}
 	if (ip->fields & RT) {
-		if (opc == insn_mfc0 || opc == insn_mtc0)
+		if (opc == insn_mfc0 || opc == insn_mtc0 ||
+		    opc == insn_cfc1 || opc == insn_ctc1)
 			op |= build_rs(va_arg(ap, u32));
 		else
 			op |= build_rt(va_arg(ap, u32));
diff --git a/arch/mips/mm/uasm-mips.c b/arch/mips/mm/uasm-mips.c
index 9c2220a45189..763d3f1edb8a 100644
--- a/arch/mips/mm/uasm-mips.c
+++ b/arch/mips/mm/uasm-mips.c
@@ -65,11 +65,16 @@ static struct insn insn_table[] = {
 #ifndef CONFIG_CPU_MIPSR6
 	{ insn_cache,  M(cache_op, 0, 0, 0, 0, 0),  RS | RT | SIMM },
 #else
-	{ insn_cache,  M6(cache_op, 0, 0, 0, cache6_op),  RS | RT | SIMM9 },
+	{ insn_cache,  M6(spec3_op, 0, 0, 0, cache6_op),  RS | RT | SIMM9 },
 #endif
+	{ insn_cfc1, M(cop1_op, cfc_op, 0, 0, 0, 0), RT | RD },
+	{ insn_cfcmsa, M(msa_op, 0, msa_cfc_op, 0, 0, msa_elm_op), RD | RE },
+	{ insn_ctc1, M(cop1_op, ctc_op, 0, 0, 0, 0), RT | RD },
+	{ insn_ctcmsa, M(msa_op, 0, msa_ctc_op, 0, 0, msa_elm_op), RD | RE },
 	{ insn_daddiu, M(daddiu_op, 0, 0, 0, 0, 0), RS | RT | SIMM },
 	{ insn_daddu, M(spec_op, 0, 0, 0, 0, daddu_op), RS | RT | RD },
 	{ insn_dinsm, M(spec3_op, 0, 0, 0, 0, dinsm_op), RS | RT | RD | RE },
+	{ insn_di, M(cop0_op, mfmc0_op, 0, 12, 0, 0), RT },
 	{ insn_dins, M(spec3_op, 0, 0, 0, 0, dins_op), RS | RT | RD | RE },
 	{ insn_divu, M(spec_op, 0, 0, 0, 0, divu_op), RS | RT },
 	{ insn_dmfc0, M(cop0_op, dmfc_op, 0, 0, 0, 0), RT | RD | SET},
@@ -114,7 +119,13 @@ static struct insn insn_table[] = {
 	{ insn_mflo,  M(spec_op, 0, 0, 0, 0, mflo_op), RD },
 	{ insn_mtc0,  M(cop0_op, mtc_op, 0, 0, 0, 0),  RT | RD | SET},
 	{ insn_mthc0,  M(cop0_op, mthc0_op, 0, 0, 0, 0),  RT | RD | SET},
+	{ insn_mthi,  M(spec_op, 0, 0, 0, 0, mthi_op), RS },
+	{ insn_mtlo,  M(spec_op, 0, 0, 0, 0, mtlo_op), RS },
+#ifndef CONFIG_CPU_MIPSR6
 	{ insn_mul, M(spec2_op, 0, 0, 0, 0, mul_op), RS | RT | RD},
+#else
+	{ insn_mul, M(spec_op, 0, 0, 0, mult_mul_op, mult_op), RS | RT | RD},
+#endif
 	{ insn_ori,  M(ori_op, 0, 0, 0, 0, 0),	RS | RT | UIMM },
 	{ insn_or,  M(spec_op, 0, 0, 0, 0, or_op),  RS | RT | RD },
 #ifndef CONFIG_CPU_MIPSR6
diff --git a/arch/mips/mm/uasm.c b/arch/mips/mm/uasm.c
index ad718debc35a..a82970442b8a 100644
--- a/arch/mips/mm/uasm.c
+++ b/arch/mips/mm/uasm.c
@@ -49,18 +49,19 @@ enum opcode {
 	insn_invalid,
 	insn_addiu, insn_addu, insn_and, insn_andi, insn_bbit0, insn_bbit1,
 	insn_beq, insn_beql, insn_bgez, insn_bgezl, insn_bltz, insn_bltzl,
-	insn_bne, insn_cache, insn_daddiu, insn_daddu, insn_dins, insn_dinsm,
-	insn_divu, insn_dmfc0, insn_dmtc0, insn_drotr, insn_drotr32, insn_dsll,
+	insn_bne, insn_cache, insn_cfc1, insn_cfcmsa, insn_ctc1, insn_ctcmsa,
+	insn_daddiu, insn_daddu, insn_di, insn_dins, insn_dinsm, insn_divu,
+	insn_dmfc0, insn_dmtc0, insn_drotr, insn_drotr32, insn_dsll,
 	insn_dsll32, insn_dsra, insn_dsrl, insn_dsrl32, insn_dsubu, insn_eret,
 	insn_ext, insn_ins, insn_j, insn_jal, insn_jalr, insn_jr, insn_lb,
 	insn_ld, insn_ldx, insn_lh, insn_ll, insn_lld, insn_lui, insn_lw,
 	insn_lwx, insn_mfc0, insn_mfhc0, insn_mfhi, insn_mflo, insn_mtc0,
-	insn_mthc0, insn_mul, insn_or, insn_ori, insn_pref, insn_rfe,
-	insn_rotr, insn_sc, insn_scd, insn_sd, insn_sll, insn_sllv, insn_slt,
-	insn_sltiu, insn_sltu, insn_sra, insn_srl, insn_srlv, insn_subu,
-	insn_sw, insn_sync, insn_syscall, insn_tlbp, insn_tlbr, insn_tlbwi,
-	insn_tlbwr, insn_wait, insn_wsbh, insn_xor, insn_xori, insn_yield,
-	insn_lddir, insn_ldpte,
+	insn_mthc0, insn_mthi, insn_mtlo, insn_mul, insn_or, insn_ori,
+	insn_pref, insn_rfe, insn_rotr, insn_sc, insn_scd, insn_sd, insn_sll,
+	insn_sllv, insn_slt, insn_sltiu, insn_sltu, insn_sra, insn_srl,
+	insn_srlv, insn_subu, insn_sw, insn_sync, insn_syscall, insn_tlbp,
+	insn_tlbr, insn_tlbwi, insn_tlbwr, insn_wait, insn_wsbh, insn_xor,
+	insn_xori, insn_yield, insn_lddir, insn_ldpte,
 };
 
 struct insn {
@@ -268,10 +269,15 @@ I_u1s2(_bltz)
 I_u1s2(_bltzl)
 I_u1u2s3(_bne)
 I_u2s3u1(_cache)
+I_u1u2(_cfc1)
+I_u2u1(_cfcmsa)
+I_u1u2(_ctc1)
+I_u2u1(_ctcmsa)
 I_u1u2u3(_dmfc0)
 I_u1u2u3(_dmtc0)
 I_u2u1s3(_daddiu)
 I_u3u1u2(_daddu)
+I_u1(_di);
 I_u1u2(_divu)
 I_u2u1u3(_dsll)
 I_u2u1u3(_dsll32)
@@ -301,6 +307,8 @@ I_u1(_mfhi)
 I_u1(_mflo)
 I_u1u2u3(_mtc0)
 I_u1u2u3(_mthc0)
+I_u1(_mthi)
+I_u1(_mtlo)
 I_u3u1u2(_mul)
 I_u2u1u3(_ori)
 I_u3u1u2(_or)
@@ -370,11 +378,7 @@ UASM_EXPORT_SYMBOL(ISAFUNC(uasm_build_label));
 int ISAFUNC(uasm_in_compat_space_p)(long addr)
 {
 	/* Is this address in 32bit compat space? */
-#ifdef CONFIG_64BIT
-	return (((addr) & 0xffffffff00000000L) == 0xffffffff00000000L);
-#else
-	return 1;
-#endif
+	return addr == (int)addr;
 }
 UASM_EXPORT_SYMBOL(ISAFUNC(uasm_in_compat_space_p));
 
diff --git a/arch/mips/mti-malta/malta-dtshim.c b/arch/mips/mti-malta/malta-dtshim.c
index f7133efc5843..151f4882ec8a 100644
--- a/arch/mips/mti-malta/malta-dtshim.c
+++ b/arch/mips/mti-malta/malta-dtshim.c
@@ -31,7 +31,7 @@ static unsigned __init gen_fdt_mem_array(__be32 *mem_array, unsigned long size)
 
 	entries = 1;
 	mem_array[0] = cpu_to_be32(PHYS_OFFSET);
-	if (config_enabled(CONFIG_EVA)) {
+	if (IS_ENABLED(CONFIG_EVA)) {
 		/*
 		 * The current Malta EVA configuration is "special" in that it
 		 * always makes use of addresses in the upper half of the 32 bit
@@ -82,7 +82,7 @@ static void __init append_memory(void *fdt, int root_off)
 		physical_memsize = 32 << 20;
 	}
 
-	if (config_enabled(CONFIG_CPU_BIG_ENDIAN)) {
+	if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) {
 		/*
 		 * SOC-it swaps, or perhaps doesn't swap, when DMA'ing
 		 * the last word of physical memory.
diff --git a/arch/mips/mti-malta/malta-memory.c b/arch/mips/mti-malta/malta-memory.c
index d5f8dae6a797..a47556723b85 100644
--- a/arch/mips/mti-malta/malta-memory.c
+++ b/arch/mips/mti-malta/malta-memory.c
@@ -32,7 +32,7 @@ static void free_init_pages_eva_malta(void *begin, void *end)
 
 void __init fw_meminit(void)
 {
-	bool eva = config_enabled(CONFIG_EVA);
+	bool eva = IS_ENABLED(CONFIG_EVA);
 
 	free_init_pages_eva = eva ? free_init_pages_eva_malta : NULL;
 }
diff --git a/arch/mips/mti-malta/malta-setup.c b/arch/mips/mti-malta/malta-setup.c
index 33d5ff5069e5..ec5b21678fad 100644
--- a/arch/mips/mti-malta/malta-setup.c
+++ b/arch/mips/mti-malta/malta-setup.c
@@ -261,7 +261,7 @@ void __init plat_mem_setup(void)
 	fdt = malta_dt_shim(fdt);
 	__dt_setup_arch(fdt);
 
-	if (config_enabled(CONFIG_EVA))
+	if (IS_ENABLED(CONFIG_EVA))
 		/* EVA has already been configured in mach-malta/kernel-init.h */
 		pr_info("Enhanced Virtual Addressing (EVA) activated\n");
 
diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c
index 1a8c96035716..39e7b472f0d8 100644
--- a/arch/mips/net/bpf_jit.c
+++ b/arch/mips/net/bpf_jit.c
@@ -426,7 +426,7 @@ static inline void emit_load_ptr(unsigned int dst, unsigned int src,
 static inline void emit_load_func(unsigned int reg, ptr imm,
 				  struct jit_ctx *ctx)
 {
-	if (config_enabled(CONFIG_64BIT)) {
+	if (IS_ENABLED(CONFIG_64BIT)) {
 		/* At this point imm is always 64-bit */
 		emit_load_imm(r_tmp, (u64)imm >> 32, ctx);
 		emit_dsll(r_tmp_imm, r_tmp, 16, ctx); /* left shift by 16 */
@@ -516,7 +516,7 @@ static inline void emit_jr(unsigned int reg, struct jit_ctx *ctx)
 static inline u16 align_sp(unsigned int num)
 {
 	/* Double word alignment for 32-bit, quadword for 64-bit */
-	unsigned int align = config_enabled(CONFIG_64BIT) ? 16 : 8;
+	unsigned int align = IS_ENABLED(CONFIG_64BIT) ? 16 : 8;
 	num = (num + (align - 1)) & -align;
 	return num;
 }
@@ -1199,7 +1199,7 @@ void bpf_jit_compile(struct bpf_prog *fp)
 
 	memset(&ctx, 0, sizeof(ctx));
 
-	ctx.offsets = kcalloc(fp->len, sizeof(*ctx.offsets), GFP_KERNEL);
+	ctx.offsets = kcalloc(fp->len + 1, sizeof(*ctx.offsets), GFP_KERNEL);
 	if (ctx.offsets == NULL)
 		return;
 
diff --git a/arch/mips/netlogic/common/nlm-dma.c b/arch/mips/netlogic/common/nlm-dma.c
index 3758715d4ab6..0630693bec2a 100644
--- a/arch/mips/netlogic/common/nlm-dma.c
+++ b/arch/mips/netlogic/common/nlm-dma.c
@@ -45,7 +45,7 @@
 static char *nlm_swiotlb;
 
 static void *nlm_dma_alloc_coherent(struct device *dev, size_t size,
-	dma_addr_t *dma_handle, gfp_t gfp, struct dma_attrs *attrs)
+	dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
 {
 	/* ignore region specifiers */
 	gfp &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM);
@@ -62,7 +62,7 @@ static void *nlm_dma_alloc_coherent(struct device *dev, size_t size,
 }
 
 static void nlm_dma_free_coherent(struct device *dev, size_t size,
-	void *vaddr, dma_addr_t dma_handle, struct dma_attrs *attrs)
+	void *vaddr, dma_addr_t dma_handle, unsigned long attrs)
 {
 	swiotlb_free_coherent(dev, size, vaddr, dma_handle);
 }
diff --git a/arch/mips/pci/pci.c b/arch/mips/pci/pci.c
index f1b11f0dea2d..b4c02f29663e 100644
--- a/arch/mips/pci/pci.c
+++ b/arch/mips/pci/pci.c
@@ -112,7 +112,14 @@ static void pcibios_scanbus(struct pci_controller *hose)
 		need_domain_info = 1;
 	}
 
-	if (!pci_has_flag(PCI_PROBE_ONLY)) {
+	/*
+	 * We insert PCI resources into the iomem_resource and
+	 * ioport_resource trees in either pci_bus_claim_resources()
+	 * or pci_bus_assign_resources().
+	 */
+	if (pci_has_flag(PCI_PROBE_ONLY)) {
+		pci_bus_claim_resources(bus);
+	} else {
 		pci_bus_size_bridges(bus);
 		pci_bus_assign_resources(bus);
 	}
@@ -319,6 +326,16 @@ void pcibios_fixup_bus(struct pci_bus *bus)
 EXPORT_SYMBOL(PCIBIOS_MIN_IO);
 EXPORT_SYMBOL(PCIBIOS_MIN_MEM);
 
+void pci_resource_to_user(const struct pci_dev *dev, int bar,
+			  const struct resource *rsrc, resource_size_t *start,
+			  resource_size_t *end)
+{
+	phys_addr_t size = resource_size(rsrc);
+
+	*start = fixup_bigphys_addr(rsrc->start, size);
+	*end = rsrc->start + size;
+}
+
 int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
 			enum pci_mmap_state mmap_state, int write_combine)
 {
diff --git a/arch/mips/pic32/pic32mzda/init.c b/arch/mips/pic32/pic32mzda/init.c
index 77ecf32ef3dc..51599710472b 100644
--- a/arch/mips/pic32/pic32mzda/init.c
+++ b/arch/mips/pic32/pic32mzda/init.c
@@ -33,8 +33,8 @@ static ulong get_fdtaddr(void)
 {
 	ulong ftaddr = 0;
 
-	if ((fw_arg0 == -2) && fw_arg1 && !fw_arg2 && !fw_arg3)
-		return (ulong)fw_arg1;
+	if (fw_passed_dtb && !fw_arg2 && !fw_arg3)
+		return (ulong)fw_passed_dtb;
 
 	if (__dtb_start < __dtb_end)
 		ftaddr = (ulong)__dtb_start;
diff --git a/arch/mips/pistachio/init.c b/arch/mips/pistachio/init.c
index c50a670e60d2..1c91cad7988f 100644
--- a/arch/mips/pistachio/init.c
+++ b/arch/mips/pistachio/init.c
@@ -59,29 +59,6 @@ const char *get_system_type(void)
 	return sys_type;
 }
 
-static void __init plat_setup_iocoherency(void)
-{
-	/*
-	 * Kernel has been configured with software coherency
-	 * but we might choose to turn it off and use hardware
-	 * coherency instead.
-	 */
-	if (mips_cm_numiocu() != 0) {
-		/* Nothing special needs to be done to enable coherency */
-		pr_info("CMP IOCU detected\n");
-		hw_coherentio = 1;
-		if (coherentio == 0)
-			pr_info("Hardware DMA cache coherency disabled\n");
-		else
-			pr_info("Hardware DMA cache coherency enabled\n");
-	} else {
-		if (coherentio == 1)
-			pr_info("Hardware DMA cache coherency unsupported, but enabled from command line!\n");
-		else
-			pr_info("Software DMA cache coherency enabled\n");
-	}
-}
-
 void __init *plat_get_fdt(void)
 {
 	if (fw_arg0 != -2)
@@ -92,8 +69,6 @@ void __init *plat_get_fdt(void)
 void __init plat_mem_setup(void)
 {
 	__dt_setup_arch(plat_get_fdt());
-
-	plat_setup_iocoherency();
 }
 
 #define DEFAULT_CPC_BASE_ADDR	0x1bde0000
diff --git a/arch/mips/ralink/mt7620.c b/arch/mips/ralink/mt7620.c
index d40edda0ca3b..3c7c9bf57bf3 100644
--- a/arch/mips/ralink/mt7620.c
+++ b/arch/mips/ralink/mt7620.c
@@ -175,7 +175,7 @@ static struct rt2880_pmx_func spi_cs1_grp_mt7628[] = {
 };
 
 static struct rt2880_pmx_func spis_grp_mt7628[] = {
-	FUNC("pwm", 3, 14, 4),
+	FUNC("pwm_uart2", 3, 14, 4),
 	FUNC("util", 2, 14, 4),
 	FUNC("gpio", 1, 14, 4),
 	FUNC("spis", 0, 14, 4),
diff --git a/arch/mips/sgi-ip22/ip22-reset.c b/arch/mips/sgi-ip22/ip22-reset.c
index 063c2dd31e72..2f45b0357021 100644
--- a/arch/mips/sgi-ip22/ip22-reset.c
+++ b/arch/mips/sgi-ip22/ip22-reset.c
@@ -7,7 +7,7 @@
  */
 #include <linux/linkage.h>
 #include <linux/init.h>
-#include <linux/ds1286.h>
+#include <linux/rtc/ds1286.h>
 #include <linux/module.h>
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
diff --git a/arch/mips/sni/time.c b/arch/mips/sni/time.c
index fb4b3520cdc6..7ee14f41fc25 100644
--- a/arch/mips/sni/time.c
+++ b/arch/mips/sni/time.c
@@ -8,7 +8,6 @@
 
 #include <asm/sni.h>
 #include <asm/time.h>
-#include <asm-generic/rtc.h>
 
 #define SNI_CLOCK_TICK_RATE	3686400
 #define SNI_COUNTER2_DIV	64
diff --git a/arch/mips/txx9/generic/pci.c b/arch/mips/txx9/generic/pci.c
index a77698ff2b6f..1f6bc9a3036c 100644
--- a/arch/mips/txx9/generic/pci.c
+++ b/arch/mips/txx9/generic/pci.c
@@ -268,7 +268,7 @@ static int txx9_i8259_irq_setup(int irq)
 	return err;
 }
 
-static void __init_refok quirk_slc90e66_bridge(struct pci_dev *dev)
+static void __ref quirk_slc90e66_bridge(struct pci_dev *dev)
 {
 	int irq;	/* PCI/ISA Bridge interrupt */
 	u8 reg_64;
diff --git a/arch/mn10300/Kconfig b/arch/mn10300/Kconfig
index 9627e81a6cbb..38e3494bfb63 100644
--- a/arch/mn10300/Kconfig
+++ b/arch/mn10300/Kconfig
@@ -236,7 +236,9 @@ source "kernel/Kconfig.hz"
 config MN10300_RTC
 	bool "Using MN10300 RTC"
 	depends on MN10300_PROC_MN103E010 || MN10300_PROC_MN2WS0050
-	select GENERIC_CMOS_UPDATE
+	select RTC_CLASS
+	select RTC_DRV_CMOS
+	select RTC_SYSTOHC
 	default n
 	help
 	  This option enables support for the RTC, thus enabling time to be
diff --git a/arch/mn10300/include/asm/rtc-regs.h b/arch/mn10300/include/asm/rtc-regs.h
index c42deefaec11..c81cacecb6e3 100644
--- a/arch/mn10300/include/asm/rtc-regs.h
+++ b/arch/mn10300/include/asm/rtc-regs.h
@@ -75,9 +75,9 @@
 #define RTC_PORT(x)		0xd8600000
 #define RTC_ALWAYS_BCD		1	/* RTC operates in binary mode */
 
-#define CMOS_READ(addr)		__SYSREG(0xd8600000 + (addr), u8)
+#define CMOS_READ(addr)		__SYSREG(0xd8600000 + (u32)(addr), u8)
 #define CMOS_WRITE(val, addr)	\
-	do { __SYSREG(0xd8600000 + (addr), u8) = val; } while (0)
+	do { __SYSREG(0xd8600000 + (u32)(addr), u8) = val; } while (0)
 
 #define RTC_IRQ			RTIRQ
 
diff --git a/arch/mn10300/include/asm/rtc.h b/arch/mn10300/include/asm/rtc.h
index 6c14bb1d0d9b..07dc87656197 100644
--- a/arch/mn10300/include/asm/rtc.h
+++ b/arch/mn10300/include/asm/rtc.h
@@ -25,6 +25,4 @@ static inline void calibrate_clock(void)
 
 #endif /* !CONFIG_MN10300_RTC */
 
-#include <asm-generic/rtc.h>
-
 #endif /* _ASM_RTC_H */
diff --git a/arch/mn10300/kernel/rtc.c b/arch/mn10300/kernel/rtc.c
index 48d7058b3295..f81f37025072 100644
--- a/arch/mn10300/kernel/rtc.c
+++ b/arch/mn10300/kernel/rtc.c
@@ -12,107 +12,19 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/mc146818rtc.h>
-#include <linux/bcd.h>
-#include <linux/timex.h>
+#include <linux/ioport.h>
+#include <linux/platform_device.h>
+
 #include <asm/rtc-regs.h>
 #include <asm/rtc.h>
 
 DEFINE_SPINLOCK(rtc_lock);
 EXPORT_SYMBOL(rtc_lock);
 
-/*
- * Read the current RTC time
- */
-void read_persistent_clock(struct timespec *ts)
-{
-	struct rtc_time tm;
-
-	get_rtc_time(&tm);
-
-	ts->tv_nsec = 0;
-	ts->tv_sec = mktime(tm.tm_year, tm.tm_mon, tm.tm_mday,
-			    tm.tm_hour, tm.tm_min, tm.tm_sec);
-
-	/* if rtc is way off in the past, set something reasonable */
-	if (ts->tv_sec < 0)
-		ts->tv_sec = mktime(2009, 1, 1, 12, 0, 0);
-}
-
-/*
- * In order to set the CMOS clock precisely, set_rtc_mmss has to be called 500
- * ms after the second nowtime has started, because when nowtime is written
- * into the registers of the CMOS clock, it will jump to the next second
- * precisely 500 ms later.  Check the Motorola MC146818A or Dallas DS12887 data
- * sheet for details.
- *
- * BUG: This routine does not handle hour overflow properly; it just
- *      sets the minutes. Usually you'll only notice that after reboot!
- */
-static int set_rtc_mmss(unsigned long nowtime)
-{
-	unsigned char save_control, save_freq_select;
-	int retval = 0;
-	int real_seconds, real_minutes, cmos_minutes;
-
-	/* gets recalled with irq locally disabled */
-	spin_lock(&rtc_lock);
-	save_control = CMOS_READ(RTC_CONTROL); /* tell the clock it's being
-						* set */
-	CMOS_WRITE(save_control | RTC_SET, RTC_CONTROL);
-
-	save_freq_select = CMOS_READ(RTC_FREQ_SELECT); /* stop and reset
-							* prescaler */
-	CMOS_WRITE(save_freq_select | RTC_DIV_RESET2, RTC_FREQ_SELECT);
-
-	cmos_minutes = CMOS_READ(RTC_MINUTES);
-	if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
-		cmos_minutes = bcd2bin(cmos_minutes);
-
-	/*
-	 * since we're only adjusting minutes and seconds,
-	 * don't interfere with hour overflow. This avoids
-	 * messing with unknown time zones but requires your
-	 * RTC not to be off by more than 15 minutes
-	 */
-	real_seconds = nowtime % 60;
-	real_minutes = nowtime / 60;
-	if (((abs(real_minutes - cmos_minutes) + 15) / 30) & 1)
-		/* correct for half hour time zone */
-		real_minutes += 30;
-	real_minutes %= 60;
-
-	if (abs(real_minutes - cmos_minutes) < 30) {
-		if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
-			real_seconds = bin2bcd(real_seconds);
-			real_minutes = bin2bcd(real_minutes);
-		}
-		CMOS_WRITE(real_seconds, RTC_SECONDS);
-		CMOS_WRITE(real_minutes, RTC_MINUTES);
-	} else {
-		printk_once(KERN_NOTICE
-		       "set_rtc_mmss: can't update from %d to %d\n",
-		       cmos_minutes, real_minutes);
-		retval = -1;
-	}
-
-	/* The following flags have to be released exactly in this order,
-	 * otherwise the DS12887 (popular MC146818A clone with integrated
-	 * battery and quartz) will not reset the oscillator and will not
-	 * update precisely 500 ms later. You won't find this mentioned in
-	 * the Dallas Semiconductor data sheets, but who believes data
-	 * sheets anyway ...                           -- Markus Kuhn
-	 */
-	CMOS_WRITE(save_control, RTC_CONTROL);
-	CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
-	spin_unlock(&rtc_lock);
-
-	return retval;
-}
-
-int update_persistent_clock(struct timespec now)
-{
-	return set_rtc_mmss(now.tv_sec);
-}
+static const __initdata struct resource res[] = {
+	DEFINE_RES_IO(RTC_PORT(0), RTC_IO_EXTENT),
+	DEFINE_RES_IRQ(RTC_IRQ),
+};
 
 /*
  * calibrate the TSC clock against the RTC
@@ -129,4 +41,6 @@ void __init calibrate_clock(void)
 	RTCRA |= RTCRA_DVR;
 	RTCRA &= ~RTCRA_DVR;
 	RTCRB &= ~RTCRB_SET;
+
+	platform_device_register_simple("rtc_cmos", -1, res, ARRAY_SIZE(res));
 }
diff --git a/arch/mn10300/mm/dma-alloc.c b/arch/mn10300/mm/dma-alloc.c
index 8842394cb49a..4f4b9029f0ea 100644
--- a/arch/mn10300/mm/dma-alloc.c
+++ b/arch/mn10300/mm/dma-alloc.c
@@ -21,7 +21,7 @@
 static unsigned long pci_sram_allocated = 0xbc000000;
 
 static void *mn10300_dma_alloc(struct device *dev, size_t size,
-		dma_addr_t *dma_handle, gfp_t gfp, struct dma_attrs *attrs)
+		dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
 {
 	unsigned long addr;
 	void *ret;
@@ -63,7 +63,7 @@ done:
 }
 
 static void mn10300_dma_free(struct device *dev, size_t size, void *vaddr,
-		dma_addr_t dma_handle, struct dma_attrs *attrs)
+		dma_addr_t dma_handle, unsigned long attrs)
 {
 	unsigned long addr = (unsigned long) vaddr & ~0x20000000;
 
@@ -75,7 +75,7 @@ static void mn10300_dma_free(struct device *dev, size_t size, void *vaddr,
 
 static int mn10300_dma_map_sg(struct device *dev, struct scatterlist *sglist,
 		int nents, enum dma_data_direction direction,
-		struct dma_attrs *attrs)
+		unsigned long attrs)
 {
 	struct scatterlist *sg;
 	int i;
@@ -92,7 +92,7 @@ static int mn10300_dma_map_sg(struct device *dev, struct scatterlist *sglist,
 
 static dma_addr_t mn10300_dma_map_page(struct device *dev, struct page *page,
 		unsigned long offset, size_t size,
-		enum dma_data_direction direction, struct dma_attrs *attrs)
+		enum dma_data_direction direction, unsigned long attrs)
 {
 	return page_to_bus(page) + offset;
 }
diff --git a/arch/mn10300/proc-mn103e010/proc-init.c b/arch/mn10300/proc-mn103e010/proc-init.c
index 27b97980dca4..102d86a6ae56 100644
--- a/arch/mn10300/proc-mn103e010/proc-init.c
+++ b/arch/mn10300/proc-mn103e010/proc-init.c
@@ -9,7 +9,10 @@
  * 2 of the Licence, or (at your option) any later version.
  */
 #include <linux/kernel.h>
+#include <linux/irq.h>
+#include <asm/cacheflush.h>
 #include <asm/fpu.h>
+#include <asm/irq.h>
 #include <asm/rtc.h>
 #include <asm/busctl-regs.h>
 
diff --git a/arch/mn10300/proc-mn2ws0050/proc-init.c b/arch/mn10300/proc-mn2ws0050/proc-init.c
index ee6d03dbc8d8..950cc8dbb284 100644
--- a/arch/mn10300/proc-mn2ws0050/proc-init.c
+++ b/arch/mn10300/proc-mn2ws0050/proc-init.c
@@ -14,6 +14,7 @@
 #include <linux/delay.h>
 #include <linux/interrupt.h>
 
+#include <asm/cacheflush.h>
 #include <asm/processor.h>
 #include <asm/uaccess.h>
 #include <asm/io.h>
diff --git a/arch/nios2/mm/dma-mapping.c b/arch/nios2/mm/dma-mapping.c
index 90422c367ed3..d800fad87896 100644
--- a/arch/nios2/mm/dma-mapping.c
+++ b/arch/nios2/mm/dma-mapping.c
@@ -59,7 +59,7 @@ static inline void __dma_sync_for_cpu(void *vaddr, size_t size,
 }
 
 static void *nios2_dma_alloc(struct device *dev, size_t size,
-		dma_addr_t *dma_handle, gfp_t gfp, struct dma_attrs *attrs)
+		dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
 {
 	void *ret;
 
@@ -84,7 +84,7 @@ static void *nios2_dma_alloc(struct device *dev, size_t size,
 }
 
 static void nios2_dma_free(struct device *dev, size_t size, void *vaddr,
-		dma_addr_t dma_handle, struct dma_attrs *attrs)
+		dma_addr_t dma_handle, unsigned long attrs)
 {
 	unsigned long addr = (unsigned long) CAC_ADDR((unsigned long) vaddr);
 
@@ -93,7 +93,7 @@ static void nios2_dma_free(struct device *dev, size_t size, void *vaddr,
 
 static int nios2_dma_map_sg(struct device *dev, struct scatterlist *sg,
 		int nents, enum dma_data_direction direction,
-		struct dma_attrs *attrs)
+		unsigned long attrs)
 {
 	int i;
 
@@ -113,7 +113,7 @@ static int nios2_dma_map_sg(struct device *dev, struct scatterlist *sg,
 static dma_addr_t nios2_dma_map_page(struct device *dev, struct page *page,
 			unsigned long offset, size_t size,
 			enum dma_data_direction direction,
-			struct dma_attrs *attrs)
+			unsigned long attrs)
 {
 	void *addr = page_address(page) + offset;
 
@@ -123,14 +123,14 @@ static dma_addr_t nios2_dma_map_page(struct device *dev, struct page *page,
 
 static void nios2_dma_unmap_page(struct device *dev, dma_addr_t dma_address,
 		size_t size, enum dma_data_direction direction,
-		struct dma_attrs *attrs)
+		unsigned long attrs)
 {
 	__dma_sync_for_cpu(phys_to_virt(dma_address), size, direction);
 }
 
 static void nios2_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
 		int nhwentries, enum dma_data_direction direction,
-		struct dma_attrs *attrs)
+		unsigned long attrs)
 {
 	void *addr;
 	int i;
diff --git a/arch/nios2/mm/init.c b/arch/nios2/mm/init.c
index e75c75d249d6..c92fe4234009 100644
--- a/arch/nios2/mm/init.c
+++ b/arch/nios2/mm/init.c
@@ -89,7 +89,7 @@ void __init free_initrd_mem(unsigned long start, unsigned long end)
 }
 #endif
 
-void __init_refok free_initmem(void)
+void __ref free_initmem(void)
 {
 	free_initmem_default(-1);
 }
diff --git a/arch/openrisc/kernel/dma.c b/arch/openrisc/kernel/dma.c
index 0b77ddb1ee07..140c99140649 100644
--- a/arch/openrisc/kernel/dma.c
+++ b/arch/openrisc/kernel/dma.c
@@ -22,7 +22,6 @@
 #include <linux/dma-mapping.h>
 #include <linux/dma-debug.h>
 #include <linux/export.h>
-#include <linux/dma-attrs.h>
 
 #include <asm/cpuinfo.h>
 #include <asm/spr_defs.h>
@@ -83,7 +82,7 @@ page_clear_nocache(pte_t *pte, unsigned long addr,
 static void *
 or1k_dma_alloc(struct device *dev, size_t size,
 	       dma_addr_t *dma_handle, gfp_t gfp,
-	       struct dma_attrs *attrs)
+	       unsigned long attrs)
 {
 	unsigned long va;
 	void *page;
@@ -101,7 +100,7 @@ or1k_dma_alloc(struct device *dev, size_t size,
 
 	va = (unsigned long)page;
 
-	if (!dma_get_attr(DMA_ATTR_NON_CONSISTENT, attrs)) {
+	if ((attrs & DMA_ATTR_NON_CONSISTENT) == 0) {
 		/*
 		 * We need to iterate through the pages, clearing the dcache for
 		 * them and setting the cache-inhibit bit.
@@ -117,7 +116,7 @@ or1k_dma_alloc(struct device *dev, size_t size,
 
 static void
 or1k_dma_free(struct device *dev, size_t size, void *vaddr,
-	      dma_addr_t dma_handle, struct dma_attrs *attrs)
+	      dma_addr_t dma_handle, unsigned long attrs)
 {
 	unsigned long va = (unsigned long)vaddr;
 	struct mm_walk walk = {
@@ -125,7 +124,7 @@ or1k_dma_free(struct device *dev, size_t size, void *vaddr,
 		.mm = &init_mm
 	};
 
-	if (!dma_get_attr(DMA_ATTR_NON_CONSISTENT, attrs)) {
+	if ((attrs & DMA_ATTR_NON_CONSISTENT) == 0) {
 		/* walk_page_range shouldn't be able to fail here */
 		WARN_ON(walk_page_range(va, va + size, &walk));
 	}
@@ -137,7 +136,7 @@ static dma_addr_t
 or1k_map_page(struct device *dev, struct page *page,
 	      unsigned long offset, size_t size,
 	      enum dma_data_direction dir,
-	      struct dma_attrs *attrs)
+	      unsigned long attrs)
 {
 	unsigned long cl;
 	dma_addr_t addr = page_to_phys(page) + offset;
@@ -170,7 +169,7 @@ or1k_map_page(struct device *dev, struct page *page,
 static void
 or1k_unmap_page(struct device *dev, dma_addr_t dma_handle,
 		size_t size, enum dma_data_direction dir,
-		struct dma_attrs *attrs)
+		unsigned long attrs)
 {
 	/* Nothing special to do here... */
 }
@@ -178,14 +177,14 @@ or1k_unmap_page(struct device *dev, dma_addr_t dma_handle,
 static int
 or1k_map_sg(struct device *dev, struct scatterlist *sg,
 	    int nents, enum dma_data_direction dir,
-	    struct dma_attrs *attrs)
+	    unsigned long attrs)
 {
 	struct scatterlist *s;
 	int i;
 
 	for_each_sg(sg, s, nents, i) {
 		s->dma_address = or1k_map_page(dev, sg_page(s), s->offset,
-					       s->length, dir, NULL);
+					       s->length, dir, 0);
 	}
 
 	return nents;
@@ -194,13 +193,13 @@ or1k_map_sg(struct device *dev, struct scatterlist *sg,
 static void
 or1k_unmap_sg(struct device *dev, struct scatterlist *sg,
 	      int nents, enum dma_data_direction dir,
-	      struct dma_attrs *attrs)
+	      unsigned long attrs)
 {
 	struct scatterlist *s;
 	int i;
 
 	for_each_sg(sg, s, nents, i) {
-		or1k_unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir, NULL);
+		or1k_unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir, 0);
 	}
 }
 
diff --git a/arch/openrisc/mm/ioremap.c b/arch/openrisc/mm/ioremap.c
index 5b2a95116e8f..fa60b81aee3e 100644
--- a/arch/openrisc/mm/ioremap.c
+++ b/arch/openrisc/mm/ioremap.c
@@ -38,7 +38,7 @@ static unsigned int fixmaps_used __initdata;
  * have to convert them into an offset in a page-aligned mapping, but the
  * caller shouldn't need to know that small detail.
  */
-void __iomem *__init_refok
+void __iomem *__ref
 __ioremap(phys_addr_t addr, unsigned long size, pgprot_t prot)
 {
 	phys_addr_t p;
@@ -116,7 +116,7 @@ void iounmap(void *addr)
  * the memblock infrastructure.
  */
 
-pte_t __init_refok *pte_alloc_one_kernel(struct mm_struct *mm,
+pte_t __ref *pte_alloc_one_kernel(struct mm_struct *mm,
 					 unsigned long address)
 {
 	pte_t *pte;
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index dc117385ce2e..cd8778103165 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -31,6 +31,7 @@ config PARISC
 	select TTY # Needed for pdc_cons.c
 	select HAVE_DEBUG_STACKOVERFLOW
 	select HAVE_ARCH_AUDITSYSCALL
+	select HAVE_ARCH_HASH
 	select HAVE_ARCH_SECCOMP_FILTER
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_UNSTABLE_SCHED_CLOCK if (SMP || !64BIT)
diff --git a/arch/parisc/include/asm/hash.h b/arch/parisc/include/asm/hash.h
new file mode 100644
index 000000000000..dbe93311aa26
--- /dev/null
+++ b/arch/parisc/include/asm/hash.h
@@ -0,0 +1,146 @@
+#ifndef _ASM_HASH_H
+#define _ASM_HASH_H
+
+/*
+ * HP-PA only implements integer multiply in the FPU.  However, for
+ * integer multiplies by constant, it has a number of shift-and-add
+ * (but no shift-and-subtract, sigh!) instructions that a compiler
+ * can synthesize a code sequence with.
+ *
+ * Unfortunately, GCC isn't very efficient at using them.  For example
+ * it uses three instructions for "x *= 21" when only two are needed.
+ * But we can find a sequence manually.
+ */
+
+#define HAVE_ARCH__HASH_32 1
+
+/*
+ * This is a multiply by GOLDEN_RATIO_32 = 0x61C88647 optimized for the
+ * PA7100 pairing rules.  This is an in-order 2-way superscalar processor.
+ * Only one instruction in a pair may be a shift (by more than 3 bits),
+ * but other than that, simple ALU ops (including shift-and-add by up
+ * to 3 bits) may be paired arbitrarily.
+ *
+ * PA8xxx processors also dual-issue ALU instructions, although with
+ * fewer constraints, so this schedule is good for them, too.
+ *
+ * This 6-step sequence was found by Yevgen Voronenko's implementation
+ * of the Hcub algorithm at http://spiral.ece.cmu.edu/mcm/gen.html.
+ */
+static inline u32 __attribute_const__ __hash_32(u32 x)
+{
+	u32 a, b, c;
+
+	/*
+	 * Phase 1: Compute  a = (x << 19) + x,
+	 * b = (x << 9) + a, c = (x << 23) + b.
+	 */
+	a = x << 19;		/* Two shifts can't be paired */
+	b = x << 9;	a += x;
+	c = x << 23;	b += a;
+			c += b;
+	/* Phase 2: Return (b<<11) + (c<<6) + (a<<3) - c */
+	b <<= 11;
+	a += c << 3;	b -= c;
+	return (a << 3) + b;
+}
+
+#if BITS_PER_LONG == 64
+
+#define HAVE_ARCH_HASH_64 1
+
+/*
+ * Finding a good shift-and-add chain for GOLDEN_RATIO_64 is tricky,
+ * because available software for the purpose chokes on constants this
+ * large.  (It's mostly designed for compiling FIR filter coefficients
+ * into FPGAs.)
+ *
+ * However, Jason Thong pointed out a work-around.  The Hcub software
+ * (http://spiral.ece.cmu.edu/mcm/gen.html) is designed for *multiple*
+ * constant multiplication, and is good at finding shift-and-add chains
+ * which share common terms.
+ *
+ * Looking at 0x0x61C8864680B583EB in binary:
+ * 0110000111001000100001100100011010000000101101011000001111101011
+ *  \______________/    \__________/       \_______/     \________/
+ *   \____________________________/         \____________________/
+ * you can see the non-zero bits are divided into several well-separated
+ * blocks.  Hcub can find algorithms for those terms separately, which
+ * can then be shifted and added together.
+ *
+ * Dividing the input into 2, 3 or 4 blocks, Hcub can find solutions
+ * with 10, 9 or 8 adds, respectively, making a total of 11 for the
+ * whole number.
+ *
+ * Using just two large blocks, 0xC3910C8D << 31 in the high bits,
+ * and 0xB583EB in the low bits, produces as good an algorithm as any,
+ * and with one more small shift than alternatives.
+ *
+ * The high bits are a larger number and more work to compute, as well
+ * as needing one extra cycle to shift left 31 bits before the final
+ * addition, so they are the critical path for scheduling.  The low bits
+ * can fit into the scheduling slots left over.
+ */
+
+
+/*
+ * This _ASSIGN(dst, src) macro performs "dst = src", but prevents GCC
+ * from inferring anything about the value assigned to "dest".
+ *
+ * This prevents it from mis-optimizing certain sequences.
+ * In particular, gcc is annoyingly eager to combine consecutive shifts.
+ * Given "x <<= 19; y += x; z += x << 1;", GCC will turn this into
+ * "y += x << 19; z += x << 20;" even though the latter sequence needs
+ * an additional instruction and temporary register.
+ *
+ * Because no actual assembly code is generated, this construct is
+ * usefully portable across all GCC platforms, and so can be test-compiled
+ * on non-PA systems.
+ *
+ * In two places, additional unused input dependencies are added.  This
+ * forces GCC's scheduling so it does not rearrange instructions too much.
+ * Because the PA-8xxx is out of order, I'm not sure how much this matters,
+ * but why make it more difficult for the processor than necessary?
+ */
+#define _ASSIGN(dst, src, ...) asm("" : "=r" (dst) : "0" (src), ##__VA_ARGS__)
+
+/*
+ * Multiply by GOLDEN_RATIO_64 = 0x0x61C8864680B583EB using a heavily
+ * optimized shift-and-add sequence.
+ *
+ * Without the final shift, the multiply proper is 19 instructions,
+ * 10 cycles and uses only 4 temporaries.  Whew!
+ *
+ * You are not expected to understand this.
+ */
+static __always_inline u32 __attribute_const__
+hash_64(u64 a, unsigned int bits)
+{
+	u64 b, c, d;
+
+	/*
+	 * Encourage GCC to move a dynamic shift to %sar early,
+	 * thereby freeing up an additional temporary register.
+	 */
+	if (!__builtin_constant_p(bits))
+		asm("" : "=q" (bits) : "0" (64 - bits));
+	else
+		bits = 64 - bits;
+
+	_ASSIGN(b, a*5);	c = a << 13;
+	b = (b << 2) + a;	_ASSIGN(d, a << 17);
+	a = b + (a << 1);	c += d;
+	d = a << 10;		_ASSIGN(a, a << 19);
+	d = a - d;		_ASSIGN(a, a << 4, "X" (d));
+	c += b;			a += b;
+	d -= c;			c += a << 1;
+	a += c << 3;		_ASSIGN(b, b << (7+31), "X" (c), "X" (d));
+	a <<= 31;		b += d;
+	a += b;
+	return a >> bits;
+}
+#undef _ASSIGN	/* We're a widely-used header file, so don't litter! */
+
+#endif /* BITS_PER_LONG == 64 */
+
+#endif /* _ASM_HASH_H */
diff --git a/arch/parisc/include/asm/mc146818rtc.h b/arch/parisc/include/asm/mc146818rtc.h
deleted file mode 100644
index adf41631449f..000000000000
--- a/arch/parisc/include/asm/mc146818rtc.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/*
- * Machine dependent access functions for RTC registers.
- */
-#ifndef _ASM_MC146818RTC_H
-#define _ASM_MC146818RTC_H
-
-/* empty include file to satisfy the include in genrtc.c */
-
-#endif /* _ASM_MC146818RTC_H */
diff --git a/arch/parisc/include/asm/rtc.h b/arch/parisc/include/asm/rtc.h
deleted file mode 100644
index 099d641a42c2..000000000000
--- a/arch/parisc/include/asm/rtc.h
+++ /dev/null
@@ -1,131 +0,0 @@
-/* 
- * include/asm-parisc/rtc.h
- *
- * Copyright 2002 Randolph CHung <tausq@debian.org>
- *
- * Based on: include/asm-ppc/rtc.h and the genrtc driver in the
- * 2.4 parisc linux tree
- */
-
-#ifndef __ASM_RTC_H__
-#define __ASM_RTC_H__
-
-#ifdef __KERNEL__
-
-#include <linux/rtc.h>
-
-#include <asm/pdc.h>
-
-#define SECS_PER_HOUR   (60 * 60)
-#define SECS_PER_DAY    (SECS_PER_HOUR * 24)
-
-
-#define RTC_PIE 0x40		/* periodic interrupt enable */
-#define RTC_AIE 0x20		/* alarm interrupt enable */
-#define RTC_UIE 0x10		/* update-finished interrupt enable */
-
-#define RTC_BATT_BAD 0x100	/* battery bad */
-
-/* some dummy definitions */
-#define RTC_SQWE 0x08		/* enable square-wave output */
-#define RTC_DM_BINARY 0x04	/* all time/date values are BCD if clear */
-#define RTC_24H 0x02		/* 24 hour mode - else hours bit 7 means pm */
-#define RTC_DST_EN 0x01	        /* auto switch DST - works f. USA only */
-
-# define __isleap(year) \
-  ((year) % 4 == 0 && ((year) % 100 != 0 || (year) % 400 == 0))
-
-/* How many days come before each month (0-12).  */
-static const unsigned short int __mon_yday[2][13] =
-{
-	/* Normal years.  */
-	{ 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 },
-	/* Leap years.  */
-	{ 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 }
-};
-
-static inline unsigned int get_rtc_time(struct rtc_time *wtime)
-{
-	struct pdc_tod tod_data;
-	long int days, rem, y;
-	const unsigned short int *ip;
-
-	memset(wtime, 0, sizeof(*wtime));
-	if (pdc_tod_read(&tod_data) < 0)
-		return RTC_24H | RTC_BATT_BAD;
-
-	// most of the remainder of this function is:
-//	Copyright (C) 1991, 1993, 1997, 1998 Free Software Foundation, Inc.
-//	This was originally a part of the GNU C Library.
-//      It is distributed under the GPL, and was swiped from offtime.c
-
-
-	days = tod_data.tod_sec / SECS_PER_DAY;
-	rem = tod_data.tod_sec % SECS_PER_DAY;
-
-	wtime->tm_hour = rem / SECS_PER_HOUR;
-	rem %= SECS_PER_HOUR;
-	wtime->tm_min = rem / 60;
-	wtime->tm_sec = rem % 60;
-
-	y = 1970;
-
-#define DIV(a, b) ((a) / (b) - ((a) % (b) < 0))
-#define LEAPS_THRU_END_OF(y) (DIV (y, 4) - DIV (y, 100) + DIV (y, 400))
-
-	while (days < 0 || days >= (__isleap (y) ? 366 : 365))
-	{
-		/* Guess a corrected year, assuming 365 days per year.  */
-		long int yg = y + days / 365 - (days % 365 < 0);
-
-		/* Adjust DAYS and Y to match the guessed year.  */
-		days -= ((yg - y) * 365
-			 + LEAPS_THRU_END_OF (yg - 1)
-			 - LEAPS_THRU_END_OF (y - 1));
-		y = yg;
-	}
-	wtime->tm_year = y - 1900;
-
-	ip = __mon_yday[__isleap(y)];
-	for (y = 11; days < (long int) ip[y]; --y)
-		continue;
-	days -= ip[y];
-	wtime->tm_mon = y;
-	wtime->tm_mday = days + 1;
-
-	return RTC_24H;
-}
-
-static int set_rtc_time(struct rtc_time *wtime)
-{
-	u_int32_t secs;
-
-	secs = mktime(wtime->tm_year + 1900, wtime->tm_mon + 1, wtime->tm_mday, 
-		      wtime->tm_hour, wtime->tm_min, wtime->tm_sec);
-
-	if(pdc_tod_set(secs, 0) < 0)
-		return -1;
-	else
-		return 0;
-
-}
-
-static inline unsigned int get_rtc_ss(void)
-{
-	struct rtc_time h;
-
-	get_rtc_time(&h);
-	return h.tm_sec;
-}
-
-static inline int get_rtc_pll(struct rtc_pll_info *pll)
-{
-	return -EINVAL;
-}
-static inline int set_rtc_pll(struct rtc_pll_info *pll)
-{
-	return -EINVAL;
-}
-
-#endif /* __KERNEL__ */
-#endif /* __ASM_RTC_H__ */
diff --git a/arch/parisc/kernel/firmware.c b/arch/parisc/kernel/firmware.c
index 22395901d47b..e5d71905cad5 100644
--- a/arch/parisc/kernel/firmware.c
+++ b/arch/parisc/kernel/firmware.c
@@ -1354,9 +1354,9 @@ int pdc_pat_io_pci_cfg_read(unsigned long pci_addr, int pci_size, u32 *mem_addr)
 	retval = mem_pdc_call(PDC_PAT_IO, PDC_PAT_IO_PCI_CONFIG_READ,
 					__pa(pdc_result), pci_addr, pci_size);
 	switch(pci_size) {
-		case 1: *(u8 *) mem_addr =  (u8)  pdc_result[0];
-		case 2: *(u16 *)mem_addr =  (u16) pdc_result[0];
-		case 4: *(u32 *)mem_addr =  (u32) pdc_result[0];
+		case 1: *(u8 *) mem_addr =  (u8)  pdc_result[0]; break;
+		case 2: *(u16 *)mem_addr =  (u16) pdc_result[0]; break;
+		case 4: *(u32 *)mem_addr =  (u32) pdc_result[0]; break;
 	}
 	spin_unlock_irqrestore(&pdc_lock, flags);
 
diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c
index a27e4928bf73..02d9ed0f3949 100644
--- a/arch/parisc/kernel/pci-dma.c
+++ b/arch/parisc/kernel/pci-dma.c
@@ -414,7 +414,7 @@ pcxl_dma_init(void)
 __initcall(pcxl_dma_init);
 
 static void *pa11_dma_alloc(struct device *dev, size_t size,
-		dma_addr_t *dma_handle, gfp_t flag, struct dma_attrs *attrs)
+		dma_addr_t *dma_handle, gfp_t flag, unsigned long attrs)
 {
 	unsigned long vaddr;
 	unsigned long paddr;
@@ -441,7 +441,7 @@ static void *pa11_dma_alloc(struct device *dev, size_t size,
 }
 
 static void pa11_dma_free(struct device *dev, size_t size, void *vaddr,
-		dma_addr_t dma_handle, struct dma_attrs *attrs)
+		dma_addr_t dma_handle, unsigned long attrs)
 {
 	int order;
 
@@ -454,7 +454,7 @@ static void pa11_dma_free(struct device *dev, size_t size, void *vaddr,
 
 static dma_addr_t pa11_dma_map_page(struct device *dev, struct page *page,
 		unsigned long offset, size_t size,
-		enum dma_data_direction direction, struct dma_attrs *attrs)
+		enum dma_data_direction direction, unsigned long attrs)
 {
 	void *addr = page_address(page) + offset;
 	BUG_ON(direction == DMA_NONE);
@@ -465,7 +465,7 @@ static dma_addr_t pa11_dma_map_page(struct device *dev, struct page *page,
 
 static void pa11_dma_unmap_page(struct device *dev, dma_addr_t dma_handle,
 		size_t size, enum dma_data_direction direction,
-		struct dma_attrs *attrs)
+		unsigned long attrs)
 {
 	BUG_ON(direction == DMA_NONE);
 
@@ -484,7 +484,7 @@ static void pa11_dma_unmap_page(struct device *dev, dma_addr_t dma_handle,
 
 static int pa11_dma_map_sg(struct device *dev, struct scatterlist *sglist,
 		int nents, enum dma_data_direction direction,
-		struct dma_attrs *attrs)
+		unsigned long attrs)
 {
 	int i;
 	struct scatterlist *sg;
@@ -503,7 +503,7 @@ static int pa11_dma_map_sg(struct device *dev, struct scatterlist *sglist,
 
 static void pa11_dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
 		int nents, enum dma_data_direction direction,
-		struct dma_attrs *attrs)
+		unsigned long attrs)
 {
 	int i;
 	struct scatterlist *sg;
@@ -577,11 +577,11 @@ struct dma_map_ops pcxl_dma_ops = {
 };
 
 static void *pcx_dma_alloc(struct device *dev, size_t size,
-		dma_addr_t *dma_handle, gfp_t flag, struct dma_attrs *attrs)
+		dma_addr_t *dma_handle, gfp_t flag, unsigned long attrs)
 {
 	void *addr;
 
-	if (!dma_get_attr(DMA_ATTR_NON_CONSISTENT, attrs))
+	if ((attrs & DMA_ATTR_NON_CONSISTENT) == 0)
 		return NULL;
 
 	addr = (void *)__get_free_pages(flag, get_order(size));
@@ -592,7 +592,7 @@ static void *pcx_dma_alloc(struct device *dev, size_t size,
 }
 
 static void pcx_dma_free(struct device *dev, size_t size, void *vaddr,
-		dma_addr_t iova, struct dma_attrs *attrs)
+		dma_addr_t iova, unsigned long attrs)
 {
 	free_pages((unsigned long)vaddr, get_order(size));
 	return;
diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c
index 31ec99a5f119..505cf1ac5af2 100644
--- a/arch/parisc/kernel/time.c
+++ b/arch/parisc/kernel/time.c
@@ -12,6 +12,7 @@
  */
 #include <linux/errno.h>
 #include <linux/module.h>
+#include <linux/rtc.h>
 #include <linux/sched.h>
 #include <linux/kernel.h>
 #include <linux/param.h>
@@ -248,14 +249,47 @@ void __init start_cpu_itimer(void)
 	per_cpu(cpu_data, cpu).it_value = next_tick;
 }
 
+#if IS_ENABLED(CONFIG_RTC_DRV_GENERIC)
+static int rtc_generic_get_time(struct device *dev, struct rtc_time *tm)
+{
+	struct pdc_tod tod_data;
+
+	memset(tm, 0, sizeof(*tm));
+	if (pdc_tod_read(&tod_data) < 0)
+		return -EOPNOTSUPP;
+
+	/* we treat tod_sec as unsigned, so this can work until year 2106 */
+	rtc_time64_to_tm(tod_data.tod_sec, tm);
+	return rtc_valid_tm(tm);
+}
+
+static int rtc_generic_set_time(struct device *dev, struct rtc_time *tm)
+{
+	time64_t secs = rtc_tm_to_time64(tm);
+
+	if (pdc_tod_set(secs, 0) < 0)
+		return -EOPNOTSUPP;
+
+	return 0;
+}
+
+static const struct rtc_class_ops rtc_generic_ops = {
+	.read_time = rtc_generic_get_time,
+	.set_time = rtc_generic_set_time,
+};
+
 static int __init rtc_init(void)
 {
 	struct platform_device *pdev;
 
-	pdev = platform_device_register_simple("rtc-generic", -1, NULL, 0);
+	pdev = platform_device_register_data(NULL, "rtc-generic", -1,
+					     &rtc_generic_ops,
+					     sizeof(rtc_generic_ops));
+
 	return PTR_ERR_OR_ZERO(pdev);
 }
 device_initcall(rtc_init);
+#endif
 
 void read_persistent_clock(struct timespec *ts)
 {
diff --git a/arch/parisc/lib/iomap.c b/arch/parisc/lib/iomap.c
index fb8e10a4fb39..eaffbb90aa14 100644
--- a/arch/parisc/lib/iomap.c
+++ b/arch/parisc/lib/iomap.c
@@ -125,22 +125,22 @@ static void ioport_write32r(void __iomem *addr, const void *s, unsigned long n)
 }
 
 static const struct iomap_ops ioport_ops = {
-	ioport_read8,
-	ioport_read16,
-	ioport_read16,
-	ioport_read32,
-	ioport_read32,
-	ioport_write8,
-	ioport_write16,
-	ioport_write16,
-	ioport_write32,
-	ioport_write32,
-	ioport_read8r,
-	ioport_read16r,
-	ioport_read32r,
-	ioport_write8r,
-	ioport_write16r,
-	ioport_write32r,
+	.read8 = ioport_read8,
+	.read16 = ioport_read16,
+	.read16be = ioport_read16,
+	.read32 = ioport_read32,
+	.read32be = ioport_read32,
+	.write8 = ioport_write8,
+	.write16 = ioport_write16,
+	.write16be = ioport_write16,
+	.write32 = ioport_write32,
+	.write32be = ioport_write32,
+	.read8r = ioport_read8r,
+	.read16r = ioport_read16r,
+	.read32r = ioport_read32r,
+	.write8r = ioport_write8r,
+	.write16r = ioport_write16r,
+	.write32r = ioport_write32r,
 };
 
 /* Legacy I/O memory ops */
@@ -244,22 +244,22 @@ static void iomem_write32r(void __iomem *addr, const void *s, unsigned long n)
 }
 
 static const struct iomap_ops iomem_ops = {
-	iomem_read8,
-	iomem_read16,
-	iomem_read16be,
-	iomem_read32,
-	iomem_read32be,
-	iomem_write8,
-	iomem_write16,
-	iomem_write16be,
-	iomem_write32,
-	iomem_write32be,
-	iomem_read8r,
-	iomem_read16r,
-	iomem_read32r,
-	iomem_write8r,
-	iomem_write16r,
-	iomem_write32r,
+	.read8 = iomem_read8,
+	.read16 = iomem_read16,
+	.read16be = iomem_read16be,
+	.read32 = iomem_read32,
+	.read32be = iomem_read32be,
+	.write8 = iomem_write8,
+	.write16 = iomem_write16,
+	.write16be = iomem_write16be,
+	.write32 = iomem_write32,
+	.write32be = iomem_write32be,
+	.read8r = iomem_read8r,
+	.read16r = iomem_read16r,
+	.read32r = iomem_read32r,
+	.write8r = iomem_write8r,
+	.write16r = iomem_write16r,
+	.write32r = iomem_write32r,
 };
 
 static const struct iomap_ops *iomap_ops[8] = {
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index ec4047e170a0..927d2ab2ce08 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -166,6 +166,7 @@ config PPC
 	select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS
 	select GENERIC_CPU_AUTOPROBE
 	select HAVE_VIRT_CPU_ACCOUNTING
+	select HAVE_ARCH_HARDENED_USERCOPY
 
 config GENERIC_CSUM
 	def_bool CPU_LITTLE_ENDIAN
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index 171047822b56..63292f64b25a 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -60,6 +60,25 @@ config CODE_PATCHING_SELFTEST
 	depends on DEBUG_KERNEL
 	default n
 
+config JUMP_LABEL_FEATURE_CHECKS
+	bool "Enable use of jump label for cpu/mmu_has_feature()"
+	depends on JUMP_LABEL
+	default y
+	help
+	  Selecting this options enables use of jump labels for some internal
+	  feature checks. This should generate more optimal code for those
+	  checks.
+
+config JUMP_LABEL_FEATURE_CHECK_DEBUG
+	bool "Do extra check on feature fixup calls"
+	depends on DEBUG_KERNEL && JUMP_LABEL_FEATURE_CHECKS
+	default n
+	help
+	  This tries to catch incorrect usage of cpu_has_feature() and
+	  mmu_has_feature() in the code.
+
+	  If you don't know what this means, say N.
+
 config FTR_FIXUP_SELFTEST
 	bool "Run self-tests of the feature-fixup code"
 	depends on DEBUG_KERNEL
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index 4cd612a6e272..1a2a6e8dc40d 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -43,7 +43,7 @@ ifeq ($(call cc-option-yn, -fstack-protector),y)
 BOOTCFLAGS	+= -fno-stack-protector
 endif
 
-BOOTCFLAGS	+= -I$(obj) -I$(srctree)/$(obj)
+BOOTCFLAGS	+= -I$(objtree)/$(obj) -I$(srctree)/$(obj)
 
 DTC_FLAGS	?= -p 1024
 
diff --git a/arch/powerpc/include/asm/book3s/64/hugetlb-radix.h b/arch/powerpc/include/asm/book3s/64/hugetlb-radix.h
index 60f47649306f..c45189aa7476 100644
--- a/arch/powerpc/include/asm/book3s/64/hugetlb-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/hugetlb-radix.h
@@ -11,4 +11,19 @@ extern unsigned long
 radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 				unsigned long len, unsigned long pgoff,
 				unsigned long flags);
+
+static inline int hstate_get_psize(struct hstate *hstate)
+{
+	unsigned long shift;
+
+	shift = huge_page_shift(hstate);
+	if (shift == mmu_psize_defs[MMU_PAGE_2M].shift)
+		return MMU_PAGE_2M;
+	else if (shift == mmu_psize_defs[MMU_PAGE_1G].shift)
+		return MMU_PAGE_1G;
+	else {
+		WARN(1, "Wrong huge page shift\n");
+		return mmu_virtual_psize;
+	}
+}
 #endif
diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index 5eaf86ac143d..287a656ceb57 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -24,6 +24,7 @@
 #include <asm/book3s/64/pgtable.h>
 #include <asm/bug.h>
 #include <asm/processor.h>
+#include <asm/cpu_has_feature.h>
 
 /*
  * SLB
@@ -190,6 +191,15 @@ static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize)
 	BUG();
 }
 
+static inline unsigned long get_sllp_encoding(int psize)
+{
+	unsigned long sllp;
+
+	sllp = ((mmu_psize_defs[psize].sllp & SLB_VSID_L) >> 6) |
+		((mmu_psize_defs[psize].sllp & SLB_VSID_LP) >> 4);
+	return sllp;
+}
+
 #endif /* __ASSEMBLY__ */
 
 /*
diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index d4eda6420523..8afb0e00f7d9 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -23,13 +23,6 @@ struct mmu_psize_def {
 };
 extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
 
-#ifdef CONFIG_PPC_RADIX_MMU
-#define radix_enabled() mmu_has_feature(MMU_FTR_RADIX)
-#else
-#define radix_enabled() (0)
-#endif
-
-
 #endif /* __ASSEMBLY__ */
 
 /* 64-bit classic hash table MMU */
@@ -107,6 +100,9 @@ extern int mmu_vmemmap_psize;
 extern int mmu_io_psize;
 
 /* MMU initialization */
+void mmu_early_init_devtree(void);
+void hash__early_init_devtree(void);
+void radix__early_init_devtree(void);
 extern void radix_init_native(void);
 extern void hash__early_init_mmu(void);
 extern void radix__early_init_mmu(void);
@@ -132,11 +128,15 @@ extern void radix__setup_initial_memory_limit(phys_addr_t first_memblock_base,
 static inline void setup_initial_memory_limit(phys_addr_t first_memblock_base,
 					      phys_addr_t first_memblock_size)
 {
-	if (radix_enabled())
+	if (early_radix_enabled())
 		return radix__setup_initial_memory_limit(first_memblock_base,
 						   first_memblock_size);
 	return hash__setup_initial_memory_limit(first_memblock_base,
 					   first_memblock_size);
 }
+
+extern int (*register_process_table)(unsigned long base, unsigned long page_size,
+				     unsigned long tbl_size);
+
 #endif /* __ASSEMBLY__ */
 #endif /* _ASM_POWERPC_BOOK3S_64_MMU_H_ */
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
index f12ddf5e8de5..2f6373144e2c 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
@@ -75,11 +75,6 @@ static inline void hash__flush_tlb_page(struct vm_area_struct *vma,
 {
 }
 
-static inline void hash__flush_tlb_page_nohash(struct vm_area_struct *vma,
-					   unsigned long vmaddr)
-{
-}
-
 static inline void hash__flush_tlb_range(struct vm_area_struct *vma,
 				     unsigned long start, unsigned long end)
 {
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
index 00703e7e4c94..65037762b120 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
@@ -10,26 +10,32 @@ static inline int mmu_get_ap(int psize)
 	return mmu_psize_defs[psize].ap;
 }
 
+extern void radix__flush_hugetlb_tlb_range(struct vm_area_struct *vma,
+					   unsigned long start, unsigned long end);
+extern void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
+					 unsigned long end, int psize);
+extern void radix__flush_pmd_tlb_range(struct vm_area_struct *vma,
+				       unsigned long start, unsigned long end);
 extern void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
 			    unsigned long end);
 extern void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end);
 
 extern void radix__local_flush_tlb_mm(struct mm_struct *mm);
 extern void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
-extern void radix___local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
-				    unsigned long ap, int nid);
 extern void radix__local_flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr);
+extern void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
+					      int psize);
 extern void radix__tlb_flush(struct mmu_gather *tlb);
 #ifdef CONFIG_SMP
 extern void radix__flush_tlb_mm(struct mm_struct *mm);
 extern void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
-extern void radix___flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
-			      unsigned long ap, int nid);
 extern void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr);
+extern void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
+					int psize);
 #else
 #define radix__flush_tlb_mm(mm)		radix__local_flush_tlb_mm(mm)
 #define radix__flush_tlb_page(vma,addr)	radix__local_flush_tlb_page(vma,addr)
-#define radix___flush_tlb_page(mm,addr,p,i)	radix___local_flush_tlb_page(mm,addr,p,i)
+#define radix__flush_tlb_page_psize(mm,addr,p) radix__local_flush_tlb_page_psize(mm,addr,p)
 #define radix__flush_tlb_pwc(tlb, addr)	radix__local_flush_tlb_pwc(tlb, addr)
 #endif
 extern void radix__flush_tlb_lpid_va(unsigned long lpid, unsigned long gpa,
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h
index 96e5769b18b0..72b925f97bab 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h
@@ -7,6 +7,25 @@
 #include <asm/book3s/64/tlbflush-hash.h>
 #include <asm/book3s/64/tlbflush-radix.h>
 
+#define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
+static inline void flush_pmd_tlb_range(struct vm_area_struct *vma,
+				       unsigned long start, unsigned long end)
+{
+	if (radix_enabled())
+		return radix__flush_pmd_tlb_range(vma, start, end);
+	return hash__flush_tlb_range(vma, start, end);
+}
+
+#define __HAVE_ARCH_FLUSH_HUGETLB_TLB_RANGE
+static inline void flush_hugetlb_tlb_range(struct vm_area_struct *vma,
+					   unsigned long start,
+					   unsigned long end)
+{
+	if (radix_enabled())
+		return radix__flush_hugetlb_tlb_range(vma, start, end);
+	return hash__flush_tlb_range(vma, start, end);
+}
+
 static inline void flush_tlb_range(struct vm_area_struct *vma,
 				   unsigned long start, unsigned long end)
 {
@@ -38,14 +57,6 @@ static inline void local_flush_tlb_page(struct vm_area_struct *vma,
 	return hash__local_flush_tlb_page(vma, vmaddr);
 }
 
-static inline void flush_tlb_page_nohash(struct vm_area_struct *vma,
-					 unsigned long vmaddr)
-{
-	if (radix_enabled())
-		return radix__flush_tlb_page(vma, vmaddr);
-	return hash__flush_tlb_page_nohash(vma, vmaddr);
-}
-
 static inline void tlb_flush(struct mmu_gather *tlb)
 {
 	if (radix_enabled())
diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h
index 69fb16d7a811..b77f0364df94 100644
--- a/arch/powerpc/include/asm/cacheflush.h
+++ b/arch/powerpc/include/asm/cacheflush.h
@@ -11,6 +11,7 @@
 
 #include <linux/mm.h>
 #include <asm/cputable.h>
+#include <asm/cpu_has_feature.h>
 
 /*
  * No cache flushing is required when address mappings are changed,
diff --git a/arch/powerpc/include/asm/cpu_has_feature.h b/arch/powerpc/include/asm/cpu_has_feature.h
new file mode 100644
index 000000000000..2ef55f8968a2
--- /dev/null
+++ b/arch/powerpc/include/asm/cpu_has_feature.h
@@ -0,0 +1,53 @@
+#ifndef __ASM_POWERPC_CPUFEATURES_H
+#define __ASM_POWERPC_CPUFEATURES_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/bug.h>
+#include <asm/cputable.h>
+
+static inline bool early_cpu_has_feature(unsigned long feature)
+{
+	return !!((CPU_FTRS_ALWAYS & feature) ||
+		  (CPU_FTRS_POSSIBLE & cur_cpu_spec->cpu_features & feature));
+}
+
+#ifdef CONFIG_JUMP_LABEL_FEATURE_CHECKS
+#include <linux/jump_label.h>
+
+#define NUM_CPU_FTR_KEYS	64
+
+extern struct static_key_true cpu_feature_keys[NUM_CPU_FTR_KEYS];
+
+static __always_inline bool cpu_has_feature(unsigned long feature)
+{
+	int i;
+
+	BUILD_BUG_ON(!__builtin_constant_p(feature));
+
+#ifdef CONFIG_JUMP_LABEL_FEATURE_CHECK_DEBUG
+	if (!static_key_initialized) {
+		printk("Warning! cpu_has_feature() used prior to jump label init!\n");
+		dump_stack();
+		return early_cpu_has_feature(feature);
+	}
+#endif
+
+	if (CPU_FTRS_ALWAYS & feature)
+		return true;
+
+	if (!(CPU_FTRS_POSSIBLE & feature))
+		return false;
+
+	i = __builtin_ctzl(feature);
+	return static_branch_likely(&cpu_feature_keys[i]);
+}
+#else
+static inline bool cpu_has_feature(unsigned long feature)
+{
+	return early_cpu_has_feature(feature);
+}
+#endif
+
+#endif /* __ASSEMBLY__ */
+#endif /* __ASM_POWERPC_CPUFEATURE_H */
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index df4fb5faba43..82026b419341 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -2,6 +2,7 @@
 #define __ASM_POWERPC_CPUTABLE_H
 
 
+#include <linux/types.h>
 #include <asm/asm-compat.h>
 #include <asm/feature-fixups.h>
 #include <uapi/asm/cputable.h>
@@ -122,6 +123,12 @@ extern void do_feature_fixups(unsigned long value, void *fixup_start,
 
 extern const char *powerpc_base_platform;
 
+#ifdef CONFIG_JUMP_LABEL_FEATURE_CHECKS
+extern void cpu_feature_keys_init(void);
+#else
+static inline void cpu_feature_keys_init(void) { }
+#endif
+
 /* TLB flush actions. Used as argument to cpu_spec.flush_tlb() hook */
 enum {
 	TLB_INVAL_SCOPE_GLOBAL = 0,	/* invalidate all TLBs */
@@ -576,14 +583,6 @@ enum {
 };
 #endif /* __powerpc64__ */
 
-static inline int cpu_has_feature(unsigned long feature)
-{
-	return (CPU_FTRS_ALWAYS & feature) ||
-	       (CPU_FTRS_POSSIBLE
-		& cur_cpu_spec->cpu_features
-		& feature);
-}
-
 #define HBP_NUM 1
 
 #endif /* !__ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h
index 2dfd4fc41f3e..4f60db074725 100644
--- a/arch/powerpc/include/asm/cputime.h
+++ b/arch/powerpc/include/asm/cputime.h
@@ -28,6 +28,7 @@ static inline void setup_cputime_one_jiffy(void) { }
 #include <asm/div64.h>
 #include <asm/time.h>
 #include <asm/param.h>
+#include <asm/cpu_has_feature.h>
 
 typedef u64 __nocast cputime_t;
 typedef u64 __nocast cputime64_t;
diff --git a/arch/powerpc/include/asm/dbell.h b/arch/powerpc/include/asm/dbell.h
index 5fa6b20eba10..378167377065 100644
--- a/arch/powerpc/include/asm/dbell.h
+++ b/arch/powerpc/include/asm/dbell.h
@@ -16,6 +16,7 @@
 #include <linux/threads.h>
 
 #include <asm/ppc-opcode.h>
+#include <asm/cpu_has_feature.h>
 
 #define PPC_DBELL_MSG_BRDCAST	(0x04000000)
 #define PPC_DBELL_TYPE(x)	(((x) & 0xf) << (63-36))
diff --git a/arch/powerpc/include/asm/dcr-native.h b/arch/powerpc/include/asm/dcr-native.h
index 4efc11dacb98..4a2beef74277 100644
--- a/arch/powerpc/include/asm/dcr-native.h
+++ b/arch/powerpc/include/asm/dcr-native.h
@@ -24,6 +24,7 @@
 
 #include <linux/spinlock.h>
 #include <asm/cputable.h>
+#include <asm/cpu_has_feature.h>
 
 typedef struct {
 	unsigned int base;
diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h
index 77816acd4fd9..84e3f8dd5e4f 100644
--- a/arch/powerpc/include/asm/dma-mapping.h
+++ b/arch/powerpc/include/asm/dma-mapping.h
@@ -13,7 +13,6 @@
 /* need struct page definitions */
 #include <linux/mm.h>
 #include <linux/scatterlist.h>
-#include <linux/dma-attrs.h>
 #include <linux/dma-debug.h>
 #include <asm/io.h>
 #include <asm/swiotlb.h>
@@ -25,14 +24,14 @@
 /* Some dma direct funcs must be visible for use in other dma_ops */
 extern void *__dma_direct_alloc_coherent(struct device *dev, size_t size,
 					 dma_addr_t *dma_handle, gfp_t flag,
-					 struct dma_attrs *attrs);
+					 unsigned long attrs);
 extern void __dma_direct_free_coherent(struct device *dev, size_t size,
 				       void *vaddr, dma_addr_t dma_handle,
-				       struct dma_attrs *attrs);
+				       unsigned long attrs);
 extern int dma_direct_mmap_coherent(struct device *dev,
 				    struct vm_area_struct *vma,
 				    void *cpu_addr, dma_addr_t handle,
-				    size_t size, struct dma_attrs *attrs);
+				    size_t size, unsigned long attrs);
 
 #ifdef CONFIG_NOT_COHERENT_CACHE
 /*
diff --git a/arch/powerpc/include/asm/hmi.h b/arch/powerpc/include/asm/hmi.h
new file mode 100644
index 000000000000..88b4901ac4ee
--- /dev/null
+++ b/arch/powerpc/include/asm/hmi.h
@@ -0,0 +1,45 @@
+/*
+ * Hypervisor Maintenance Interrupt header file.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.
+ *
+ * Copyright 2015 IBM Corporation
+ * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
+ */
+
+#ifndef __ASM_PPC64_HMI_H__
+#define __ASM_PPC64_HMI_H__
+
+#ifdef CONFIG_PPC_BOOK3S_64
+
+#define	CORE_TB_RESYNC_REQ_BIT		63
+#define MAX_SUBCORE_PER_CORE		4
+
+/*
+ * sibling_subcore_state structure is used to co-ordinate all threads
+ * during HMI to avoid TB corruption. This structure is allocated once
+ * per each core and shared by all threads on that core.
+ */
+struct sibling_subcore_state {
+	unsigned long	flags;
+	u8		in_guest[MAX_SUBCORE_PER_CORE];
+};
+
+extern void wait_for_subcore_guest_exit(void);
+extern void wait_for_tb_resync(void);
+#else
+static inline void wait_for_subcore_guest_exit(void) { }
+static inline void wait_for_tb_resync(void) { }
+#endif
+#endif /* __ASM_PPC64_HMI_H__ */
diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h
index e2d9f4996e5c..c5517f463ec7 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -147,7 +147,7 @@ static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
 {
 	pte_t pte;
 	pte = huge_ptep_get_and_clear(vma->vm_mm, addr, ptep);
-	flush_tlb_page(vma, addr);
+	flush_hugetlb_page(vma, addr);
 }
 
 static inline int huge_pte_none(pte_t pte)
diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index f49a72a9062d..2c1d50792944 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -53,7 +53,7 @@ struct iommu_table_ops {
 			long index, long npages,
 			unsigned long uaddr,
 			enum dma_data_direction direction,
-			struct dma_attrs *attrs);
+			unsigned long attrs);
 #ifdef CONFIG_IOMMU_API
 	/*
 	 * Exchanges existing TCE with new TCE plus direction bits;
@@ -248,12 +248,12 @@ extern int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl,
 			    struct scatterlist *sglist, int nelems,
 			    unsigned long mask,
 			    enum dma_data_direction direction,
-			    struct dma_attrs *attrs);
+			    unsigned long attrs);
 extern void ppc_iommu_unmap_sg(struct iommu_table *tbl,
 			       struct scatterlist *sglist,
 			       int nelems,
 			       enum dma_data_direction direction,
-			       struct dma_attrs *attrs);
+			       unsigned long attrs);
 
 extern void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl,
 				  size_t size, dma_addr_t *dma_handle,
@@ -264,10 +264,10 @@ extern dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl,
 				 struct page *page, unsigned long offset,
 				 size_t size, unsigned long mask,
 				 enum dma_data_direction direction,
-				 struct dma_attrs *attrs);
+				 unsigned long attrs);
 extern void iommu_unmap_page(struct iommu_table *tbl, dma_addr_t dma_handle,
 			     size_t size, enum dma_data_direction direction,
-			     struct dma_attrs *attrs);
+			     unsigned long attrs);
 
 extern void iommu_init_early_pSeries(void);
 extern void iommu_init_early_dart(struct pci_controller_ops *controller_ops);
diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h
index 47e155f15433..9a287e0ac8b1 100644
--- a/arch/powerpc/include/asm/jump_label.h
+++ b/arch/powerpc/include/asm/jump_label.h
@@ -14,6 +14,7 @@
 #include <linux/types.h>
 
 #include <asm/feature-fixups.h>
+#include <asm/asm-compat.h>
 
 #define JUMP_ENTRY_TYPE		stringify_in_c(FTR_ENTRY_LONG)
 #define JUMP_LABEL_NOP_SIZE	4
@@ -21,7 +22,7 @@
 static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
 {
 	asm_volatile_goto("1:\n\t"
-		 "nop\n\t"
+		 "nop # arch_static_branch\n\t"
 		 ".pushsection __jump_table,  \"aw\"\n\t"
 		 JUMP_ENTRY_TYPE "1b, %l[l_yes], %c0\n\t"
 		 ".popsection \n\t"
@@ -35,7 +36,7 @@ l_yes:
 static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
 {
 	asm_volatile_goto("1:\n\t"
-		 "b %l[l_yes]\n\t"
+		 "b %l[l_yes] # arch_static_branch_jump\n\t"
 		 ".pushsection __jump_table,  \"aw\"\n\t"
 		 JUMP_ENTRY_TYPE "1b, %l[l_yes], %c0\n\t"
 		 ".popsection \n\t"
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 1f4497fb5b83..88d17b4ea9c8 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -181,8 +181,7 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
 
 	switch (b_psize) {
 	case MMU_PAGE_4K:
-		sllp = ((mmu_psize_defs[a_psize].sllp & SLB_VSID_L) >> 6) |
-			((mmu_psize_defs[a_psize].sllp & SLB_VSID_LP) >> 4);
+		sllp = get_sllp_encoding(a_psize);
 		rb |= sllp << 5;	/*  AP field */
 		rb |= (va_low & 0x7ff) << 12;	/* remaining 11 bits of AVA */
 		break;
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index 76f5398e7152..0420b388dd83 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -219,8 +219,6 @@ struct machdep_calls {
 #ifdef CONFIG_ARCH_RANDOM
 	int (*get_random_seed)(unsigned long *v);
 #endif
-	int (*register_process_table)(unsigned long base, unsigned long page_size,
-				      unsigned long tbl_size);
 };
 
 extern void e500_idle(void);
diff --git a/arch/powerpc/include/asm/mman.h b/arch/powerpc/include/asm/mman.h
index 2563c435a4b1..30922f699341 100644
--- a/arch/powerpc/include/asm/mman.h
+++ b/arch/powerpc/include/asm/mman.h
@@ -13,6 +13,7 @@
 
 #include <asm/cputable.h>
 #include <linux/mm.h>
+#include <asm/cpu_has_feature.h>
 
 /*
  * This file is included by linux/mman.h, so we can't use cacl_vm_prot_bits()
@@ -31,13 +32,13 @@ static inline pgprot_t arch_vm_get_page_prot(unsigned long vm_flags)
 }
 #define arch_vm_get_page_prot(vm_flags) arch_vm_get_page_prot(vm_flags)
 
-static inline int arch_validate_prot(unsigned long prot)
+static inline bool arch_validate_prot(unsigned long prot)
 {
 	if (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC | PROT_SEM | PROT_SAO))
-		return 0;
+		return false;
 	if ((prot & PROT_SAO) && !cpu_has_feature(CPU_FTR_SAO))
-		return 0;
-	return 1;
+		return false;
+	return true;
 }
 #define arch_validate_prot(prot) arch_validate_prot(prot)
 
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index 54471228f7b8..e2fb408f8398 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -12,7 +12,7 @@
  */
 
 /*
- * First half is MMU families
+ * MMU families
  */
 #define MMU_FTR_HPTE_TABLE		ASM_CONST(0x00000001)
 #define MMU_FTR_TYPE_8xx		ASM_CONST(0x00000002)
@@ -21,9 +21,13 @@
 #define MMU_FTR_TYPE_FSL_E		ASM_CONST(0x00000010)
 #define MMU_FTR_TYPE_47x		ASM_CONST(0x00000020)
 
+/* Radix page table supported and enabled */
+#define MMU_FTR_TYPE_RADIX		ASM_CONST(0x00000040)
+
 /*
- * This is individual features
+ * Individual features below.
  */
+
 /*
  * We need to clear top 16bits of va (from the remaining 64 bits )in
  * tlbie* instructions
@@ -93,11 +97,6 @@
  */
 #define MMU_FTR_1T_SEGMENT		ASM_CONST(0x40000000)
 
-/*
- * Radix page table available
- */
-#define MMU_FTR_RADIX			ASM_CONST(0x80000000)
-
 /* MMU feature bit sets for various CPUs */
 #define MMU_FTRS_DEFAULT_HPTE_ARCH_V2	\
 	MMU_FTR_HPTE_TABLE | MMU_FTR_PPCAS_ARCH_V2
@@ -113,6 +112,7 @@
 #define MMU_FTRS_PA6T		MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \
 				MMU_FTR_CI_LARGE_PAGE | MMU_FTR_NO_SLBIE_B
 #ifndef __ASSEMBLY__
+#include <linux/bug.h>
 #include <asm/cputable.h>
 
 #ifdef CONFIG_PPC_FSL_BOOK3E
@@ -131,20 +131,71 @@ enum {
 		MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_CI_LARGE_PAGE |
 		MMU_FTR_1T_SEGMENT | MMU_FTR_TLBIE_CROP_VA |
 #ifdef CONFIG_PPC_RADIX_MMU
-		MMU_FTR_RADIX |
+		MMU_FTR_TYPE_RADIX |
 #endif
 		0,
 };
 
-static inline int mmu_has_feature(unsigned long feature)
+static inline bool early_mmu_has_feature(unsigned long feature)
 {
-	return (MMU_FTRS_POSSIBLE & cur_cpu_spec->mmu_features & feature);
+	return !!(MMU_FTRS_POSSIBLE & cur_cpu_spec->mmu_features & feature);
+}
+
+#ifdef CONFIG_JUMP_LABEL_FEATURE_CHECKS
+#include <linux/jump_label.h>
+
+#define NUM_MMU_FTR_KEYS	32
+
+extern struct static_key_true mmu_feature_keys[NUM_MMU_FTR_KEYS];
+
+extern void mmu_feature_keys_init(void);
+
+static __always_inline bool mmu_has_feature(unsigned long feature)
+{
+	int i;
+
+	BUILD_BUG_ON(!__builtin_constant_p(feature));
+
+#ifdef CONFIG_JUMP_LABEL_FEATURE_CHECK_DEBUG
+	if (!static_key_initialized) {
+		printk("Warning! mmu_has_feature() used prior to jump label init!\n");
+		dump_stack();
+		return early_mmu_has_feature(feature);
+	}
+#endif
+
+	if (!(MMU_FTRS_POSSIBLE & feature))
+		return false;
+
+	i = __builtin_ctzl(feature);
+	return static_branch_likely(&mmu_feature_keys[i]);
 }
 
 static inline void mmu_clear_feature(unsigned long feature)
 {
+	int i;
+
+	i = __builtin_ctzl(feature);
 	cur_cpu_spec->mmu_features &= ~feature;
+	static_branch_disable(&mmu_feature_keys[i]);
 }
+#else
+
+static inline void mmu_feature_keys_init(void)
+{
+
+}
+
+static inline bool mmu_has_feature(unsigned long feature)
+{
+	return early_mmu_has_feature(feature);
+}
+
+static inline void mmu_clear_feature(unsigned long feature)
+{
+	cur_cpu_spec->mmu_features &= ~feature;
+}
+#endif /* CONFIG_JUMP_LABEL */
 
 extern unsigned int __start___mmu_ftr_fixup, __stop___mmu_ftr_fixup;
 
@@ -164,6 +215,28 @@ static inline void assert_pte_locked(struct mm_struct *mm, unsigned long addr)
 }
 #endif /* !CONFIG_DEBUG_VM */
 
+#ifdef CONFIG_PPC_RADIX_MMU
+static inline bool radix_enabled(void)
+{
+	return mmu_has_feature(MMU_FTR_TYPE_RADIX);
+}
+
+static inline bool early_radix_enabled(void)
+{
+	return early_mmu_has_feature(MMU_FTR_TYPE_RADIX);
+}
+#else
+static inline bool radix_enabled(void)
+{
+	return false;
+}
+
+static inline bool early_radix_enabled(void)
+{
+	return false;
+}
+#endif
+
 #endif /* !__ASSEMBLY__ */
 
 /* The kernel use the constants below to index in the page sizes array.
@@ -210,6 +283,7 @@ extern void early_init_mmu(void);
 extern void early_init_mmu_secondary(void);
 extern void setup_initial_memory_limit(phys_addr_t first_memblock_base,
 				       phys_addr_t first_memblock_size);
+static inline void mmu_early_init_devtree(void) { }
 #endif /* __ASSEMBLY__ */
 #endif
 
@@ -230,9 +304,5 @@ extern void setup_initial_memory_limit(phys_addr_t first_memblock_base,
 #  include <asm/mmu-8xx.h>
 #endif
 
-#ifndef radix_enabled
-#define radix_enabled() (0)
-#endif
-
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_MMU_H_ */
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index ad171e979ab0..148303e7771f 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -26,6 +26,7 @@
 #include <asm/kvm_book3s_asm.h>
 #endif
 #include <asm/accounting.h>
+#include <asm/hmi.h>
 
 register struct paca_struct *local_paca asm("r13");
 
@@ -182,6 +183,11 @@ struct paca_struct {
 	 */
 	u16 in_mce;
 	u8 hmi_event_available;		 /* HMI event is available */
+	/*
+	 * Bitmap for sibling subcore status. See kvm/book3s_hv_ras.c for
+	 * more details
+	 */
+	struct sibling_subcore_state *sibling_subcore_state;
 #endif
 
 	/* Stuff for accurate time accounting */
diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h
index a6f3ac0d4602..e9bd6cf0212f 100644
--- a/arch/powerpc/include/asm/pci.h
+++ b/arch/powerpc/include/asm/pci.h
@@ -136,9 +136,6 @@ extern pgprot_t	pci_phys_mem_access_prot(struct file *file,
 					 pgprot_t prot);
 
 #define HAVE_ARCH_PCI_RESOURCE_TO_USER
-extern void pci_resource_to_user(const struct pci_dev *dev, int bar,
-				 const struct resource *rsrc,
-				 resource_size_t *start, resource_size_t *end);
 
 extern resource_size_t pcibios_io_space_offset(struct pci_controller *hose);
 extern void pcibios_setup_bus_devices(struct pci_bus *bus);
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 40f3615bf940..f69f40f1519a 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -1256,15 +1256,6 @@ static inline void msr_check_and_clear(unsigned long bits)
 		__msr_check_and_clear(bits);
 }
 
-static inline unsigned long mfvtb (void)
-{
-#ifdef CONFIG_PPC_BOOK3S_64
-	if (cpu_has_feature(CPU_FTR_ARCH_207S))
-		return mfspr(SPRN_VTB);
-#endif
-	return 0;
-}
-
 #ifdef __powerpc64__
 #if defined(CONFIG_PPC_CELL) || defined(CONFIG_PPC_FSL_BOOK3E)
 #define mftb()		({unsigned long rval;				\
diff --git a/arch/powerpc/include/asm/rtc.h b/arch/powerpc/include/asm/rtc.h
deleted file mode 100644
index f5802926b6c0..000000000000
--- a/arch/powerpc/include/asm/rtc.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Real-time clock definitions and interfaces
- *
- * Author: Tom Rini <trini@mvista.com>
- *
- * 2002 (c) MontaVista, Software, Inc.  This file is licensed under
- * the terms of the GNU General Public License version 2.  This program
- * is licensed "as is" without any warranty of any kind, whether express
- * or implied.
- *
- * Based on:
- * include/asm-m68k/rtc.h
- *
- * Copyright Richard Zidlicky
- * implementation details for genrtc/q40rtc driver
- *
- * And the old drivers/macintosh/rtc.c which was heavily based on:
- * Linux/SPARC Real Time Clock Driver
- * Copyright (C) 1996 Thomas K. Dyas (tdyas@eden.rutgers.edu)
- *
- * With additional work by Paul Mackerras and Franz Sirl.
- */
-
-#ifndef __ASM_POWERPC_RTC_H__
-#define __ASM_POWERPC_RTC_H__
-
-#ifdef __KERNEL__
-
-#include <linux/rtc.h>
-
-#include <asm/machdep.h>
-#include <asm/time.h>
-
-#define RTC_PIE 0x40		/* periodic interrupt enable */
-#define RTC_AIE 0x20		/* alarm interrupt enable */
-#define RTC_UIE 0x10		/* update-finished interrupt enable */
-
-/* some dummy definitions */
-#define RTC_BATT_BAD 0x100	/* battery bad */
-#define RTC_SQWE 0x08		/* enable square-wave output */
-#define RTC_DM_BINARY 0x04	/* all time/date values are BCD if clear */
-#define RTC_24H 0x02		/* 24 hour mode - else hours bit 7 means pm */
-#define RTC_DST_EN 0x01	        /* auto switch DST - works f. USA only */
-
-static inline unsigned int get_rtc_time(struct rtc_time *time)
-{
-	if (ppc_md.get_rtc_time)
-		ppc_md.get_rtc_time(time);
-	return RTC_24H;
-}
-
-/* Set the current date and time in the real time clock. */
-static inline int set_rtc_time(struct rtc_time *time)
-{
-	if (ppc_md.set_rtc_time)
-		return ppc_md.set_rtc_time(time);
-	return -EINVAL;
-}
-
-static inline unsigned int get_rtc_ss(void)
-{
-	struct rtc_time h;
-
-	get_rtc_time(&h);
-	return h.tm_sec;
-}
-
-static inline int get_rtc_pll(struct rtc_pll_info *pll)
-{
-	return -EINVAL;
-}
-static inline int set_rtc_pll(struct rtc_pll_info *pll)
-{
-	return -EINVAL;
-}
-
-#endif /* __KERNEL__ */
-#endif /* __ASM_POWERPC_RTC_H__ */
diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h
index 17c8380673a6..0a74ebe934e1 100644
--- a/arch/powerpc/include/asm/switch_to.h
+++ b/arch/powerpc/include/asm/switch_to.h
@@ -75,6 +75,14 @@ static inline void disable_kernel_spe(void)
 static inline void __giveup_spe(struct task_struct *t) { }
 #endif
 
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+extern void flush_tmregs_to_thread(struct task_struct *);
+#else
+static inline void flush_tmregs_to_thread(struct task_struct *t)
+{
+}
+#endif
+
 static inline void clear_task_ebb(struct task_struct *t)
 {
 #ifdef CONFIG_PPC_BOOK3S_64
diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
index b21bb1f72314..87e4b2d8dcd4 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -138,40 +138,15 @@ static inline struct thread_info *current_thread_info(void)
 /* Don't move TLF_NAPPING without adjusting the code in entry_32.S */
 #define TLF_NAPPING		0	/* idle thread enabled NAP mode */
 #define TLF_SLEEPING		1	/* suspend code enabled SLEEP mode */
-#define TLF_RESTORE_SIGMASK	2	/* Restore signal mask in do_signal */
 #define TLF_LAZY_MMU		3	/* tlb_batch is active */
 #define TLF_RUNLATCH		4	/* Is the runlatch enabled? */
 
 #define _TLF_NAPPING		(1 << TLF_NAPPING)
 #define _TLF_SLEEPING		(1 << TLF_SLEEPING)
-#define _TLF_RESTORE_SIGMASK	(1 << TLF_RESTORE_SIGMASK)
 #define _TLF_LAZY_MMU		(1 << TLF_LAZY_MMU)
 #define _TLF_RUNLATCH		(1 << TLF_RUNLATCH)
 
 #ifndef __ASSEMBLY__
-#define HAVE_SET_RESTORE_SIGMASK	1
-static inline void set_restore_sigmask(void)
-{
-	struct thread_info *ti = current_thread_info();
-	ti->local_flags |= _TLF_RESTORE_SIGMASK;
-	WARN_ON(!test_bit(TIF_SIGPENDING, &ti->flags));
-}
-static inline void clear_restore_sigmask(void)
-{
-	current_thread_info()->local_flags &= ~_TLF_RESTORE_SIGMASK;
-}
-static inline bool test_restore_sigmask(void)
-{
-	return current_thread_info()->local_flags & _TLF_RESTORE_SIGMASK;
-}
-static inline bool test_and_clear_restore_sigmask(void)
-{
-	struct thread_info *ti = current_thread_info();
-	if (!(ti->local_flags & _TLF_RESTORE_SIGMASK))
-		return false;
-	ti->local_flags &= ~_TLF_RESTORE_SIGMASK;
-	return true;
-}
 
 static inline bool test_thread_local_flags(unsigned int flags)
 {
diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index 09211640a0e0..b240666b7bc1 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -18,6 +18,7 @@
 #include <linux/percpu.h>
 
 #include <asm/processor.h>
+#include <asm/cpu_has_feature.h>
 
 /* time.c */
 extern unsigned long tb_ticks_per_jiffy;
@@ -103,7 +104,7 @@ static inline u64 get_vtb(void)
 {
 #ifdef CONFIG_PPC_BOOK3S_64
 	if (cpu_has_feature(CPU_FTR_ARCH_207S))
-		return mfvtb();
+		return mfspr(SPRN_VTB);
 #endif
 	return 0;
 }
diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h
index 20733fa518ae..f6f68f73e858 100644
--- a/arch/powerpc/include/asm/tlb.h
+++ b/arch/powerpc/include/asm/tlb.h
@@ -46,5 +46,18 @@ static inline void __tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep,
 #endif
 }
 
+#ifdef CONFIG_SMP
+static inline int mm_is_core_local(struct mm_struct *mm)
+{
+	return cpumask_subset(mm_cpumask(mm),
+			      topology_sibling_cpumask(smp_processor_id()));
+}
+#else
+static inline int mm_is_core_local(struct mm_struct *mm)
+{
+	return 1;
+}
+#endif
+
 #endif /* __KERNEL__ */
 #endif /* __ASM_POWERPC_TLB_H */
diff --git a/arch/powerpc/include/asm/tlbflush.h b/arch/powerpc/include/asm/tlbflush.h
index 1b38eea28e5a..13dbcd41885e 100644
--- a/arch/powerpc/include/asm/tlbflush.h
+++ b/arch/powerpc/include/asm/tlbflush.h
@@ -54,7 +54,6 @@ extern void __flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
 #define flush_tlb_page(vma,addr)	local_flush_tlb_page(vma,addr)
 #define __flush_tlb_page(mm,addr,p,i)	__local_flush_tlb_page(mm,addr,p,i)
 #endif
-#define flush_tlb_page_nohash(vma,addr)	flush_tlb_page(vma,addr)
 
 #elif defined(CONFIG_PPC_STD_MMU_32)
 
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index b7c20f0b8fbe..c1dc6c14deb8 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -310,10 +310,15 @@ static inline unsigned long copy_from_user(void *to,
 {
 	unsigned long over;
 
-	if (access_ok(VERIFY_READ, from, n))
+	if (access_ok(VERIFY_READ, from, n)) {
+		if (!__builtin_constant_p(n))
+			check_object_size(to, n, false);
 		return __copy_tofrom_user((__force void __user *)to, from, n);
+	}
 	if ((unsigned long)from < TASK_SIZE) {
 		over = (unsigned long)from + n - TASK_SIZE;
+		if (!__builtin_constant_p(n - over))
+			check_object_size(to, n - over, false);
 		return __copy_tofrom_user((__force void __user *)to, from,
 				n - over) + over;
 	}
@@ -325,10 +330,15 @@ static inline unsigned long copy_to_user(void __user *to,
 {
 	unsigned long over;
 
-	if (access_ok(VERIFY_WRITE, to, n))
+	if (access_ok(VERIFY_WRITE, to, n)) {
+		if (!__builtin_constant_p(n))
+			check_object_size(from, n, true);
 		return __copy_tofrom_user(to, (__force void __user *)from, n);
+	}
 	if ((unsigned long)to < TASK_SIZE) {
 		over = (unsigned long)to + n - TASK_SIZE;
+		if (!__builtin_constant_p(n))
+			check_object_size(from, n - over, true);
 		return __copy_tofrom_user(to, (__force void __user *)from,
 				n - over) + over;
 	}
@@ -372,6 +382,10 @@ static inline unsigned long __copy_from_user_inatomic(void *to,
 		if (ret == 0)
 			return 0;
 	}
+
+	if (!__builtin_constant_p(n))
+		check_object_size(to, n, false);
+
 	return __copy_tofrom_user((__force void __user *)to, from, n);
 }
 
@@ -398,6 +412,9 @@ static inline unsigned long __copy_to_user_inatomic(void __user *to,
 		if (ret == 0)
 			return 0;
 	}
+	if (!__builtin_constant_p(n))
+		check_object_size(from, n, true);
+
 	return __copy_tofrom_user(to, (__force const void __user *)from, n);
 }
 
diff --git a/arch/powerpc/include/asm/xor.h b/arch/powerpc/include/asm/xor.h
index 0abb97f3be10..a36c2069d8ed 100644
--- a/arch/powerpc/include/asm/xor.h
+++ b/arch/powerpc/include/asm/xor.h
@@ -23,6 +23,7 @@
 #ifdef CONFIG_ALTIVEC
 
 #include <asm/cputable.h>
+#include <asm/cpu_has_feature.h>
 
 void xor_altivec_2(unsigned long bytes, unsigned long *v1_in,
 		   unsigned long *v2_in);
diff --git a/arch/powerpc/include/uapi/asm/elf.h b/arch/powerpc/include/uapi/asm/elf.h
index c2d21d11c2d2..3a9e44c45c78 100644
--- a/arch/powerpc/include/uapi/asm/elf.h
+++ b/arch/powerpc/include/uapi/asm/elf.h
@@ -91,6 +91,11 @@
 
 #define ELF_NGREG	48	/* includes nip, msr, lr, etc. */
 #define ELF_NFPREG	33	/* includes fpscr */
+#define ELF_NVMX	34	/* includes all vector registers */
+#define ELF_NVSX	32	/* includes all VSX registers */
+#define ELF_NTMSPRREG	3	/* include tfhar, tfiar, texasr */
+#define ELF_NEBB	3	/* includes ebbrr, ebbhr, bescr */
+#define ELF_NPMU	5	/* includes siar, sdar, sier, mmcr2, mmcr0 */
 
 typedef unsigned long elf_greg_t64;
 typedef elf_greg_t64 elf_gregset_t64[ELF_NGREG];
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index fe4c075bcf50..b2027a5cf508 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -41,7 +41,7 @@ obj-$(CONFIG_VDSO32)		+= vdso32/
 obj-$(CONFIG_HAVE_HW_BREAKPOINT)	+= hw_breakpoint.o
 obj-$(CONFIG_PPC_BOOK3S_64)	+= cpu_setup_ppc970.o cpu_setup_pa6t.o
 obj-$(CONFIG_PPC_BOOK3S_64)	+= cpu_setup_power.o
-obj-$(CONFIG_PPC_BOOK3S_64)	+= mce.o mce_power.o
+obj-$(CONFIG_PPC_BOOK3S_64)	+= mce.o mce_power.o hmi.o
 obj-$(CONFIG_PPC_BOOK3E_64)	+= exceptions-64e.o idle_book3e.o
 obj-$(CONFIG_PPC64)		+= vdso64/
 obj-$(CONFIG_ALTIVEC)		+= vecemu.o
diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c
index c7097f933114..033f3385fa49 100644
--- a/arch/powerpc/kernel/align.c
+++ b/arch/powerpc/kernel/align.c
@@ -26,6 +26,7 @@
 #include <asm/emulated_ops.h>
 #include <asm/switch_to.h>
 #include <asm/disassemble.h>
+#include <asm/cpu_has_feature.h>
 
 struct aligninfo {
 	unsigned char len;
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index d81f826d1029..74248ab18e98 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -15,6 +15,7 @@
 #include <linux/threads.h>
 #include <linux/init.h>
 #include <linux/export.h>
+#include <linux/jump_label.h>
 
 #include <asm/oprofile_impl.h>
 #include <asm/cputable.h>
@@ -2224,3 +2225,39 @@ struct cpu_spec * __init identify_cpu(unsigned long offset, unsigned int pvr)
 
 	return NULL;
 }
+
+#ifdef CONFIG_JUMP_LABEL_FEATURE_CHECKS
+struct static_key_true cpu_feature_keys[NUM_CPU_FTR_KEYS] = {
+			[0 ... NUM_CPU_FTR_KEYS - 1] = STATIC_KEY_TRUE_INIT
+};
+EXPORT_SYMBOL_GPL(cpu_feature_keys);
+
+void __init cpu_feature_keys_init(void)
+{
+	int i;
+
+	for (i = 0; i < NUM_CPU_FTR_KEYS; i++) {
+		unsigned long f = 1ul << i;
+
+		if (!(cur_cpu_spec->cpu_features & f))
+			static_branch_disable(&cpu_feature_keys[i]);
+	}
+}
+
+struct static_key_true mmu_feature_keys[NUM_MMU_FTR_KEYS] = {
+			[0 ... NUM_MMU_FTR_KEYS - 1] = STATIC_KEY_TRUE_INIT
+};
+EXPORT_SYMBOL_GPL(mmu_feature_keys);
+
+void __init mmu_feature_keys_init(void)
+{
+	int i;
+
+	for (i = 0; i < NUM_MMU_FTR_KEYS; i++) {
+		unsigned long f = 1ul << i;
+
+		if (!(cur_cpu_spec->mmu_features & f))
+			static_branch_disable(&mmu_feature_keys[i]);
+	}
+}
+#endif
diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c
index 41a7d9d49a5a..fb7cbaa37658 100644
--- a/arch/powerpc/kernel/dma-iommu.c
+++ b/arch/powerpc/kernel/dma-iommu.c
@@ -18,7 +18,7 @@
  */
 static void *dma_iommu_alloc_coherent(struct device *dev, size_t size,
 				      dma_addr_t *dma_handle, gfp_t flag,
-				      struct dma_attrs *attrs)
+				      unsigned long attrs)
 {
 	return iommu_alloc_coherent(dev, get_iommu_table_base(dev), size,
 				    dma_handle, dev->coherent_dma_mask, flag,
@@ -27,7 +27,7 @@ static void *dma_iommu_alloc_coherent(struct device *dev, size_t size,
 
 static void dma_iommu_free_coherent(struct device *dev, size_t size,
 				    void *vaddr, dma_addr_t dma_handle,
-				    struct dma_attrs *attrs)
+				    unsigned long attrs)
 {
 	iommu_free_coherent(get_iommu_table_base(dev), size, vaddr, dma_handle);
 }
@@ -40,7 +40,7 @@ static void dma_iommu_free_coherent(struct device *dev, size_t size,
 static dma_addr_t dma_iommu_map_page(struct device *dev, struct page *page,
 				     unsigned long offset, size_t size,
 				     enum dma_data_direction direction,
-				     struct dma_attrs *attrs)
+				     unsigned long attrs)
 {
 	return iommu_map_page(dev, get_iommu_table_base(dev), page, offset,
 			      size, device_to_mask(dev), direction, attrs);
@@ -49,7 +49,7 @@ static dma_addr_t dma_iommu_map_page(struct device *dev, struct page *page,
 
 static void dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle,
 				 size_t size, enum dma_data_direction direction,
-				 struct dma_attrs *attrs)
+				 unsigned long attrs)
 {
 	iommu_unmap_page(get_iommu_table_base(dev), dma_handle, size, direction,
 			 attrs);
@@ -58,7 +58,7 @@ static void dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle,
 
 static int dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist,
 			    int nelems, enum dma_data_direction direction,
-			    struct dma_attrs *attrs)
+			    unsigned long attrs)
 {
 	return ppc_iommu_map_sg(dev, get_iommu_table_base(dev), sglist, nelems,
 				device_to_mask(dev), direction, attrs);
@@ -66,7 +66,7 @@ static int dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist,
 
 static void dma_iommu_unmap_sg(struct device *dev, struct scatterlist *sglist,
 		int nelems, enum dma_data_direction direction,
-		struct dma_attrs *attrs)
+		unsigned long attrs)
 {
 	ppc_iommu_unmap_sg(get_iommu_table_base(dev), sglist, nelems,
 			   direction, attrs);
diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
index 3f1472a78f39..e64a6016fba7 100644
--- a/arch/powerpc/kernel/dma.c
+++ b/arch/powerpc/kernel/dma.c
@@ -64,7 +64,7 @@ static int dma_direct_dma_supported(struct device *dev, u64 mask)
 
 void *__dma_direct_alloc_coherent(struct device *dev, size_t size,
 				  dma_addr_t *dma_handle, gfp_t flag,
-				  struct dma_attrs *attrs)
+				  unsigned long attrs)
 {
 	void *ret;
 #ifdef CONFIG_NOT_COHERENT_CACHE
@@ -121,7 +121,7 @@ void *__dma_direct_alloc_coherent(struct device *dev, size_t size,
 
 void __dma_direct_free_coherent(struct device *dev, size_t size,
 				void *vaddr, dma_addr_t dma_handle,
-				struct dma_attrs *attrs)
+				unsigned long attrs)
 {
 #ifdef CONFIG_NOT_COHERENT_CACHE
 	__dma_free_coherent(size, vaddr);
@@ -132,7 +132,7 @@ void __dma_direct_free_coherent(struct device *dev, size_t size,
 
 static void *dma_direct_alloc_coherent(struct device *dev, size_t size,
 				       dma_addr_t *dma_handle, gfp_t flag,
-				       struct dma_attrs *attrs)
+				       unsigned long attrs)
 {
 	struct iommu_table *iommu;
 
@@ -156,7 +156,7 @@ static void *dma_direct_alloc_coherent(struct device *dev, size_t size,
 
 static void dma_direct_free_coherent(struct device *dev, size_t size,
 				     void *vaddr, dma_addr_t dma_handle,
-				     struct dma_attrs *attrs)
+				     unsigned long attrs)
 {
 	struct iommu_table *iommu;
 
@@ -177,7 +177,7 @@ static void dma_direct_free_coherent(struct device *dev, size_t size,
 
 int dma_direct_mmap_coherent(struct device *dev, struct vm_area_struct *vma,
 			     void *cpu_addr, dma_addr_t handle, size_t size,
-			     struct dma_attrs *attrs)
+			     unsigned long attrs)
 {
 	unsigned long pfn;
 
@@ -195,7 +195,7 @@ int dma_direct_mmap_coherent(struct device *dev, struct vm_area_struct *vma,
 
 static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl,
 			     int nents, enum dma_data_direction direction,
-			     struct dma_attrs *attrs)
+			     unsigned long attrs)
 {
 	struct scatterlist *sg;
 	int i;
@@ -211,7 +211,7 @@ static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl,
 
 static void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sg,
 				int nents, enum dma_data_direction direction,
-				struct dma_attrs *attrs)
+				unsigned long attrs)
 {
 }
 
@@ -232,7 +232,7 @@ static inline dma_addr_t dma_direct_map_page(struct device *dev,
 					     unsigned long offset,
 					     size_t size,
 					     enum dma_data_direction dir,
-					     struct dma_attrs *attrs)
+					     unsigned long attrs)
 {
 	BUG_ON(dir == DMA_NONE);
 	__dma_sync_page(page, offset, size, dir);
@@ -243,7 +243,7 @@ static inline void dma_direct_unmap_page(struct device *dev,
 					 dma_addr_t dma_address,
 					 size_t size,
 					 enum dma_data_direction direction,
-					 struct dma_attrs *attrs)
+					 unsigned long attrs)
 {
 }
 
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index fcb2887f5a33..6b8bc0dd09d4 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -532,7 +532,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 #ifdef CONFIG_PPC_STD_MMU_64
 BEGIN_MMU_FTR_SECTION
 	b	2f
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_RADIX)
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
 BEGIN_FTR_SECTION
 	clrrdi	r6,r8,28	/* get its ESID */
 	clrrdi	r9,r1,28	/* get current sp ESID */
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 6200e4925d26..41091fdf9bd8 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -671,6 +671,8 @@ BEGIN_FTR_SECTION
 	beq	h_doorbell_common
 	cmpwi	r3,0xea0
 	beq	h_virt_irq_common
+	cmpwi	r3,0xe60
+	beq	hmi_exception_common
 FTR_SECTION_ELSE
 	cmpwi	r3,0xa00
 	beq	doorbell_super_common
@@ -938,7 +940,7 @@ BEGIN_MMU_FTR_SECTION
 	b	do_hash_page		/* Try to handle as hpte fault */
 MMU_FTR_SECTION_ELSE
 	b	handle_page_fault
-ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_RADIX)
+ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
 
 	.align  7
 	.globl  h_data_storage_common
@@ -969,7 +971,7 @@ BEGIN_MMU_FTR_SECTION
 	b	do_hash_page		/* Try to handle as hpte fault */
 MMU_FTR_SECTION_ELSE
 	b	handle_page_fault
-ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_RADIX)
+ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
 
 	STD_EXCEPTION_COMMON(0xe20, h_instr_storage, unknown_exception)
 
@@ -1172,7 +1174,7 @@ fwnmi_data_area:
 
 	.globl hmi_exception_early
 hmi_exception_early:
-	EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, 0xe60)
+	EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST, 0xe62)
 	mr	r10,r1			/* Save r1			*/
 	ld	r1,PACAEMERGSP(r13)	/* Use emergency stack		*/
 	subi	r1,r1,INT_FRAME_SIZE	/* alloc stack frame		*/
@@ -1390,7 +1392,7 @@ slb_miss_realmode:
 #ifdef CONFIG_PPC_STD_MMU_64
 BEGIN_MMU_FTR_SECTION
 	bl	slb_allocate_realmode
-END_MMU_FTR_SECTION_IFCLR(MMU_FTR_RADIX)
+END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
 #endif
 	/* All done -- return from exception. */
 
@@ -1404,7 +1406,7 @@ BEGIN_MMU_FTR_SECTION
 	beq-	2f
 FTR_SECTION_ELSE
 	b	2f
-ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_RADIX)
+ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
 
 .machine	push
 .machine	"power4"
diff --git a/arch/powerpc/kernel/hmi.c b/arch/powerpc/kernel/hmi.c
new file mode 100644
index 000000000000..e3f738eb1cac
--- /dev/null
+++ b/arch/powerpc/kernel/hmi.c
@@ -0,0 +1,56 @@
+/*
+ * Hypervisor Maintenance Interrupt (HMI) handling.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.
+ *
+ * Copyright 2015 IBM Corporation
+ * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
+ */
+
+#undef DEBUG
+
+#include <linux/types.h>
+#include <linux/compiler.h>
+#include <asm/paca.h>
+#include <asm/hmi.h>
+
+void wait_for_subcore_guest_exit(void)
+{
+	int i;
+
+	/*
+	 * NULL bitmap pointer indicates that KVM module hasn't
+	 * been loaded yet and hence no guests are running.
+	 * If no KVM is in use, no need to co-ordinate among threads
+	 * as all of them will always be in host and no one is going
+	 * to modify TB other than the opal hmi handler.
+	 * Hence, just return from here.
+	 */
+	if (!local_paca->sibling_subcore_state)
+		return;
+
+	for (i = 0; i < MAX_SUBCORE_PER_CORE; i++)
+		while (local_paca->sibling_subcore_state->in_guest[i])
+			cpu_relax();
+}
+
+void wait_for_tb_resync(void)
+{
+	if (!local_paca->sibling_subcore_state)
+		return;
+
+	while (test_bit(CORE_TB_RESYNC_REQ_BIT,
+				&local_paca->sibling_subcore_state->flags))
+		cpu_relax();
+}
diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c
index a89f4f7a66bd..c1ca9282f4a0 100644
--- a/arch/powerpc/kernel/ibmebus.c
+++ b/arch/powerpc/kernel/ibmebus.c
@@ -65,7 +65,7 @@ static void *ibmebus_alloc_coherent(struct device *dev,
 				    size_t size,
 				    dma_addr_t *dma_handle,
 				    gfp_t flag,
-				    struct dma_attrs *attrs)
+				    unsigned long attrs)
 {
 	void *mem;
 
@@ -78,7 +78,7 @@ static void *ibmebus_alloc_coherent(struct device *dev,
 static void ibmebus_free_coherent(struct device *dev,
 				  size_t size, void *vaddr,
 				  dma_addr_t dma_handle,
-				  struct dma_attrs *attrs)
+				  unsigned long attrs)
 {
 	kfree(vaddr);
 }
@@ -88,7 +88,7 @@ static dma_addr_t ibmebus_map_page(struct device *dev,
 				   unsigned long offset,
 				   size_t size,
 				   enum dma_data_direction direction,
-				   struct dma_attrs *attrs)
+				   unsigned long attrs)
 {
 	return (dma_addr_t)(page_address(page) + offset);
 }
@@ -97,7 +97,7 @@ static void ibmebus_unmap_page(struct device *dev,
 			       dma_addr_t dma_addr,
 			       size_t size,
 			       enum dma_data_direction direction,
-			       struct dma_attrs *attrs)
+			       unsigned long attrs)
 {
 	return;
 }
@@ -105,7 +105,7 @@ static void ibmebus_unmap_page(struct device *dev,
 static int ibmebus_map_sg(struct device *dev,
 			  struct scatterlist *sgl,
 			  int nents, enum dma_data_direction direction,
-			  struct dma_attrs *attrs)
+			  unsigned long attrs)
 {
 	struct scatterlist *sg;
 	int i;
@@ -121,7 +121,7 @@ static int ibmebus_map_sg(struct device *dev,
 static void ibmebus_unmap_sg(struct device *dev,
 			     struct scatterlist *sg,
 			     int nents, enum dma_data_direction direction,
-			     struct dma_attrs *attrs)
+			     unsigned long attrs)
 {
 	return;
 }
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index 335eb6cedae5..ba79d15f4ddd 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -336,7 +336,9 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66);		\
 	ld	r2,PACATOC(r13);					\
 	ld	r1,PACAR1(r13);						\
 	std	r3,ORIG_GPR3(r1);	/* Save original r3 */		\
-	bl	opal_rm_handle_hmi;					\
+	li	r3,0;			/* NULL argument */		\
+	bl	hmi_exception_realmode;					\
+	nop;								\
 	ld	r3,ORIG_GPR3(r1);	/* Restore original r3 */	\
 20:	nop;
 
@@ -570,7 +572,7 @@ common_exit:
 
 BEGIN_MMU_FTR_SECTION
 	b	no_segments
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_RADIX)
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
 	/* Restore SLB  from PACA */
 	ld	r8,PACA_SLBSHADOWPTR(r13)
 
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index a8e3490b54e3..37d6e741be82 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -307,7 +307,7 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
 			      void *page, unsigned int npages,
 			      enum dma_data_direction direction,
 			      unsigned long mask, unsigned int align_order,
-			      struct dma_attrs *attrs)
+			      unsigned long attrs)
 {
 	unsigned long entry;
 	dma_addr_t ret = DMA_ERROR_CODE;
@@ -431,7 +431,7 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
 int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl,
 		     struct scatterlist *sglist, int nelems,
 		     unsigned long mask, enum dma_data_direction direction,
-		     struct dma_attrs *attrs)
+		     unsigned long attrs)
 {
 	dma_addr_t dma_next = 0, dma_addr;
 	struct scatterlist *s, *outs, *segstart;
@@ -574,7 +574,7 @@ int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl,
 
 void ppc_iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist,
 			int nelems, enum dma_data_direction direction,
-			struct dma_attrs *attrs)
+			unsigned long attrs)
 {
 	struct scatterlist *sg;
 
@@ -753,7 +753,7 @@ void iommu_free_table(struct iommu_table *tbl, const char *node_name)
 dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl,
 			  struct page *page, unsigned long offset, size_t size,
 			  unsigned long mask, enum dma_data_direction direction,
-			  struct dma_attrs *attrs)
+			  unsigned long attrs)
 {
 	dma_addr_t dma_handle = DMA_ERROR_CODE;
 	void *vaddr;
@@ -790,7 +790,7 @@ dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl,
 
 void iommu_unmap_page(struct iommu_table *tbl, dma_addr_t dma_handle,
 		      size_t size, enum dma_data_direction direction,
-		      struct dma_attrs *attrs)
+		      unsigned long attrs)
 {
 	unsigned int npages;
 
@@ -845,7 +845,7 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl,
 	nio_pages = size >> tbl->it_page_shift;
 	io_order = get_iommu_order(size, tbl);
 	mapping = iommu_alloc(dev, tbl, ret, nio_pages, DMA_BIDIRECTIONAL,
-			      mask >> tbl->it_page_shift, io_order, NULL);
+			      mask >> tbl->it_page_shift, io_order, 0);
 	if (mapping == DMA_ERROR_CODE) {
 		free_pages((unsigned long)ret, order);
 		return NULL;
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index ac910d9982df..08887cf2b20e 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -75,6 +75,7 @@
 #endif
 #define CREATE_TRACE_POINTS
 #include <asm/trace.h>
+#include <asm/cpu_has_feature.h>
 
 DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
 EXPORT_PER_CPU_SYMBOL(irq_stat);
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 93dae296b6be..fa20060ff7a5 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -184,7 +184,7 @@ void setup_paca(struct paca_struct *new_paca)
 	 * if we do a GET_PACA() before the feature fixups have been
 	 * applied
 	 */
-	if (cpu_has_feature(CPU_FTR_HVMODE))
+	if (early_cpu_has_feature(CPU_FTR_HVMODE))
 		mtspr(SPRN_SPRG_HPACA, local_paca);
 #endif
 	mtspr(SPRN_SPRG_PACA, local_paca);
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index f93942b4b6a6..a5c0153ede37 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -412,36 +412,6 @@ static struct resource *__pci_mmap_make_offset(struct pci_dev *dev,
 }
 
 /*
- * Set vm_page_prot of VMA, as appropriate for this architecture, for a pci
- * device mapping.
- */
-static pgprot_t __pci_mmap_set_pgprot(struct pci_dev *dev, struct resource *rp,
-				      pgprot_t protection,
-				      enum pci_mmap_state mmap_state,
-				      int write_combine)
-{
-
-	/* Write combine is always 0 on non-memory space mappings. On
-	 * memory space, if the user didn't pass 1, we check for a
-	 * "prefetchable" resource. This is a bit hackish, but we use
-	 * this to workaround the inability of /sysfs to provide a write
-	 * combine bit
-	 */
-	if (mmap_state != pci_mmap_mem)
-		write_combine = 0;
-	else if (write_combine == 0) {
-		if (rp->flags & IORESOURCE_PREFETCH)
-			write_combine = 1;
-	}
-
-	/* XXX would be nice to have a way to ask for write-through */
-	if (write_combine)
-		return pgprot_noncached_wc(protection);
-	else
-		return pgprot_noncached(protection);
-}
-
-/*
  * This one is used by /dev/mem and fbdev who have no clue about the
  * PCI device, it tries to find the PCI device first and calls the
  * above routine
@@ -514,9 +484,10 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
 		return -EINVAL;
 
 	vma->vm_pgoff = offset >> PAGE_SHIFT;
-	vma->vm_page_prot = __pci_mmap_set_pgprot(dev, rp,
-						  vma->vm_page_prot,
-						  mmap_state, write_combine);
+	if (write_combine)
+		vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot);
+	else
+		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 
 	ret = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
 			       vma->vm_end - vma->vm_start, vma->vm_page_prot);
@@ -666,39 +637,25 @@ void pci_resource_to_user(const struct pci_dev *dev, int bar,
 			  const struct resource *rsrc,
 			  resource_size_t *start, resource_size_t *end)
 {
-	struct pci_controller *hose = pci_bus_to_host(dev->bus);
-	resource_size_t offset = 0;
+	struct pci_bus_region region;
 
-	if (hose == NULL)
+	if (rsrc->flags & IORESOURCE_IO) {
+		pcibios_resource_to_bus(dev->bus, &region,
+					(struct resource *) rsrc);
+		*start = region.start;
+		*end = region.end;
 		return;
+	}
 
-	if (rsrc->flags & IORESOURCE_IO)
-		offset = (unsigned long)hose->io_base_virt - _IO_BASE;
-
-	/* We pass a fully fixed up address to userland for MMIO instead of
-	 * a BAR value because X is lame and expects to be able to use that
-	 * to pass to /dev/mem !
-	 *
-	 * That means that we'll have potentially 64 bits values where some
-	 * userland apps only expect 32 (like X itself since it thinks only
-	 * Sparc has 64 bits MMIO) but if we don't do that, we break it on
-	 * 32 bits CHRPs :-(
-	 *
-	 * Hopefully, the sysfs insterface is immune to that gunk. Once X
-	 * has been fixed (and the fix spread enough), we can re-enable the
-	 * 2 lines below and pass down a BAR value to userland. In that case
-	 * we'll also have to re-enable the matching code in
-	 * __pci_mmap_make_offset().
+	/* We pass a CPU physical address to userland for MMIO instead of a
+	 * BAR value because X is lame and expects to be able to use that
+	 * to pass to /dev/mem!
 	 *
-	 * BenH.
+	 * That means we may have 64-bit values where some apps only expect
+	 * 32 (like X itself since it thinks only Sparc has 64-bit MMIO).
 	 */
-#if 0
-	else if (rsrc->flags & IORESOURCE_MEM)
-		offset = hose->pci_mem_offset;
-#endif
-
-	*start = rsrc->start - offset;
-	*end = rsrc->end - offset;
+	*start = rsrc->start;
+	*end = rsrc->end;
 }
 
 /**
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index a8cca88e972f..58ccf86415b4 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -58,6 +58,7 @@
 #include <asm/code-patching.h>
 #include <asm/exec.h>
 #include <asm/livepatch.h>
+#include <asm/cpu_has_feature.h>
 
 #include <linux/kprobes.h>
 #include <linux/kdebug.h>
@@ -1073,6 +1074,26 @@ static inline void restore_sprs(struct thread_struct *old_thread,
 #endif
 }
 
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+void flush_tmregs_to_thread(struct task_struct *tsk)
+{
+	/*
+	 * Process self tracing is not yet supported through
+	 * ptrace interface. Ptrace generic code should have
+	 * prevented this from happening in the first place.
+	 * Warn once here with the message, if some how it
+	 * is attempted.
+	 */
+	WARN_ONCE(tsk == current,
+		"Not expecting ptrace on self: TM regs may be incorrect\n");
+
+	/*
+	 * If task is not current, it should have been flushed
+	 * already to it's thread_struct during __switch_to().
+	 */
+}
+#endif
+
 struct task_struct *__switch_to(struct task_struct *prev,
 	struct task_struct *new)
 {
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index bae3db791150..b0245bed6f54 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -170,7 +170,7 @@ static struct ibm_pa_feature {
 	 */
 	{CPU_FTR_TM_COMP, 0, 0,
 	 PPC_FEATURE2_HTM_COMP|PPC_FEATURE2_HTM_NOSC_COMP, 22, 0, 0},
-	{0, MMU_FTR_RADIX, 0, 0,		40, 0, 0},
+	{0, MMU_FTR_TYPE_RADIX, 0, 0,		40, 0, 0},
 };
 
 static void __init scan_features(unsigned long node, const unsigned char *ftrs,
@@ -647,14 +647,6 @@ static void __init early_reserve_mem(void)
 #endif
 }
 
-static bool disable_radix;
-static int __init parse_disable_radix(char *p)
-{
-	disable_radix = true;
-	return 0;
-}
-early_param("disable_radix", parse_disable_radix);
-
 void __init early_init_devtree(void *params)
 {
 	phys_addr_t limit;
@@ -744,11 +736,8 @@ void __init early_init_devtree(void *params)
 	 */
 	spinning_secondaries = boot_cpu_count - 1;
 #endif
-	/*
-	 * now fixup radix MMU mode based on kernel command line
-	 */
-	if (disable_radix)
-		cur_cpu_spec->mmu_features &= ~MMU_FTR_RADIX;
+
+	mmu_early_init_devtree();
 
 #ifdef CONFIG_PPC_POWERNV
 	/* Scan and build the list of machine check recoverable ranges */
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 134bee9ac664..4f3c5756cc09 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -64,6 +64,10 @@ struct pt_regs_offset {
 	{.name = STR(gpr##num), .offset = offsetof(struct pt_regs, gpr[num])}
 #define REG_OFFSET_END {.name = NULL, .offset = 0}
 
+#define TVSO(f)	(offsetof(struct thread_vr_state, f))
+#define TFSO(f)	(offsetof(struct thread_fp_state, f))
+#define TSO(f)	(offsetof(struct thread_struct, f))
+
 static const struct pt_regs_offset regoffset_table[] = {
 	GPR_OFFSET_NAME(0),
 	GPR_OFFSET_NAME(1),
@@ -181,6 +185,26 @@ static int set_user_msr(struct task_struct *task, unsigned long msr)
 	return 0;
 }
 
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+static unsigned long get_user_ckpt_msr(struct task_struct *task)
+{
+	return task->thread.ckpt_regs.msr | task->thread.fpexc_mode;
+}
+
+static int set_user_ckpt_msr(struct task_struct *task, unsigned long msr)
+{
+	task->thread.ckpt_regs.msr &= ~MSR_DEBUGCHANGE;
+	task->thread.ckpt_regs.msr |= msr & MSR_DEBUGCHANGE;
+	return 0;
+}
+
+static int set_user_ckpt_trap(struct task_struct *task, unsigned long trap)
+{
+	task->thread.ckpt_regs.trap = trap & 0xfff0;
+	return 0;
+}
+#endif
+
 #ifdef CONFIG_PPC64
 static int get_user_dscr(struct task_struct *task, unsigned long *data)
 {
@@ -358,6 +382,29 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset,
 	return ret;
 }
 
+/*
+ * When the transaction is active, 'transact_fp' holds the current running
+ * value of all FPR registers and 'fp_state' holds the last checkpointed
+ * value of all FPR registers for the current transaction. When transaction
+ * is not active 'fp_state' holds the current running state of all the FPR
+ * registers. So this function which returns the current running values of
+ * all the FPR registers, needs to know whether any transaction is active
+ * or not.
+ *
+ * Userspace interface buffer layout:
+ *
+ * struct data {
+ *	u64	fpr[32];
+ *	u64	fpscr;
+ * };
+ *
+ * There are two config options CONFIG_VSX and CONFIG_PPC_TRANSACTIONAL_MEM
+ * which determines the final code in this function. All the combinations of
+ * these two config options are possible except the one below as transactional
+ * memory config pulls in CONFIG_VSX automatically.
+ *
+ *	!defined(CONFIG_VSX) && defined(CONFIG_PPC_TRANSACTIONAL_MEM)
+ */
 static int fpr_get(struct task_struct *target, const struct user_regset *regset,
 		   unsigned int pos, unsigned int count,
 		   void *kbuf, void __user *ubuf)
@@ -368,14 +415,31 @@ static int fpr_get(struct task_struct *target, const struct user_regset *regset,
 #endif
 	flush_fp_to_thread(target);
 
-#ifdef CONFIG_VSX
+#if defined(CONFIG_VSX) && defined(CONFIG_PPC_TRANSACTIONAL_MEM)
+	/* copy to local buffer then write that out */
+	if (MSR_TM_ACTIVE(target->thread.regs->msr)) {
+		flush_altivec_to_thread(target);
+		flush_tmregs_to_thread(target);
+		for (i = 0; i < 32 ; i++)
+			buf[i] = target->thread.TS_TRANS_FPR(i);
+		buf[32] = target->thread.transact_fp.fpscr;
+	} else {
+		for (i = 0; i < 32 ; i++)
+			buf[i] = target->thread.TS_FPR(i);
+		buf[32] = target->thread.fp_state.fpscr;
+	}
+	return user_regset_copyout(&pos, &count, &kbuf, &ubuf, buf, 0, -1);
+#endif
+
+#if defined(CONFIG_VSX) && !defined(CONFIG_PPC_TRANSACTIONAL_MEM)
 	/* copy to local buffer then write that out */
 	for (i = 0; i < 32 ; i++)
 		buf[i] = target->thread.TS_FPR(i);
 	buf[32] = target->thread.fp_state.fpscr;
 	return user_regset_copyout(&pos, &count, &kbuf, &ubuf, buf, 0, -1);
+#endif
 
-#else
+#if !defined(CONFIG_VSX) && !defined(CONFIG_PPC_TRANSACTIONAL_MEM)
 	BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) !=
 		     offsetof(struct thread_fp_state, fpr[32]));
 
@@ -384,6 +448,29 @@ static int fpr_get(struct task_struct *target, const struct user_regset *regset,
 #endif
 }
 
+/*
+ * When the transaction is active, 'transact_fp' holds the current running
+ * value of all FPR registers and 'fp_state' holds the last checkpointed
+ * value of all FPR registers for the current transaction. When transaction
+ * is not active 'fp_state' holds the current running state of all the FPR
+ * registers. So this function which setss the current running values of
+ * all the FPR registers, needs to know whether any transaction is active
+ * or not.
+ *
+ * Userspace interface buffer layout:
+ *
+ * struct data {
+ *	u64	fpr[32];
+ *	u64	fpscr;
+ * };
+ *
+ * There are two config options CONFIG_VSX and CONFIG_PPC_TRANSACTIONAL_MEM
+ * which determines the final code in this function. All the combinations of
+ * these two config options are possible except the one below as transactional
+ * memory config pulls in CONFIG_VSX automatically.
+ *
+ *	!defined(CONFIG_VSX) && defined(CONFIG_PPC_TRANSACTIONAL_MEM)
+ */
 static int fpr_set(struct task_struct *target, const struct user_regset *regset,
 		   unsigned int pos, unsigned int count,
 		   const void *kbuf, const void __user *ubuf)
@@ -394,7 +481,27 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset,
 #endif
 	flush_fp_to_thread(target);
 
-#ifdef CONFIG_VSX
+#if defined(CONFIG_VSX) && defined(CONFIG_PPC_TRANSACTIONAL_MEM)
+	/* copy to local buffer then write that out */
+	i = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, -1);
+	if (i)
+		return i;
+
+	if (MSR_TM_ACTIVE(target->thread.regs->msr)) {
+		flush_altivec_to_thread(target);
+		flush_tmregs_to_thread(target);
+		for (i = 0; i < 32 ; i++)
+			target->thread.TS_TRANS_FPR(i) = buf[i];
+		target->thread.transact_fp.fpscr = buf[32];
+	} else {
+		for (i = 0; i < 32 ; i++)
+			target->thread.TS_FPR(i) = buf[i];
+		target->thread.fp_state.fpscr = buf[32];
+	}
+	return 0;
+#endif
+
+#if defined(CONFIG_VSX) && !defined(CONFIG_PPC_TRANSACTIONAL_MEM)
 	/* copy to local buffer then write that out */
 	i = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, -1);
 	if (i)
@@ -403,7 +510,9 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset,
 		target->thread.TS_FPR(i) = buf[i];
 	target->thread.fp_state.fpscr = buf[32];
 	return 0;
-#else
+#endif
+
+#if !defined(CONFIG_VSX) && !defined(CONFIG_PPC_TRANSACTIONAL_MEM)
 	BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) !=
 		     offsetof(struct thread_fp_state, fpr[32]));
 
@@ -433,10 +542,28 @@ static int vr_active(struct task_struct *target,
 	return target->thread.used_vr ? regset->n : 0;
 }
 
+/*
+ * When the transaction is active, 'transact_vr' holds the current running
+ * value of all the VMX registers and 'vr_state' holds the last checkpointed
+ * value of all the VMX registers for the current transaction to fall back
+ * on in case it aborts. When transaction is not active 'vr_state' holds
+ * the current running state of all the VMX registers. So this function which
+ * gets the current running values of all the VMX registers, needs to know
+ * whether any transaction is active or not.
+ *
+ * Userspace interface buffer layout:
+ *
+ * struct data {
+ *	vector128	vr[32];
+ *	vector128	vscr;
+ *	vector128	vrsave;
+ * };
+ */
 static int vr_get(struct task_struct *target, const struct user_regset *regset,
 		  unsigned int pos, unsigned int count,
 		  void *kbuf, void __user *ubuf)
 {
+	struct thread_vr_state *addr;
 	int ret;
 
 	flush_altivec_to_thread(target);
@@ -444,8 +571,19 @@ static int vr_get(struct task_struct *target, const struct user_regset *regset,
 	BUILD_BUG_ON(offsetof(struct thread_vr_state, vscr) !=
 		     offsetof(struct thread_vr_state, vr[32]));
 
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	if (MSR_TM_ACTIVE(target->thread.regs->msr)) {
+		flush_fp_to_thread(target);
+		flush_tmregs_to_thread(target);
+		addr = &target->thread.transact_vr;
+	} else {
+		addr = &target->thread.vr_state;
+	}
+#else
+	addr = &target->thread.vr_state;
+#endif
 	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
-				  &target->thread.vr_state, 0,
+				  addr, 0,
 				  33 * sizeof(vector128));
 	if (!ret) {
 		/*
@@ -456,7 +594,16 @@ static int vr_get(struct task_struct *target, const struct user_regset *regset,
 			u32 word;
 		} vrsave;
 		memset(&vrsave, 0, sizeof(vrsave));
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+		if (MSR_TM_ACTIVE(target->thread.regs->msr))
+			vrsave.word = target->thread.transact_vrsave;
+		else
+			vrsave.word = target->thread.vrsave;
+#else
 		vrsave.word = target->thread.vrsave;
+#endif
+
 		ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &vrsave,
 					  33 * sizeof(vector128), -1);
 	}
@@ -464,10 +611,28 @@ static int vr_get(struct task_struct *target, const struct user_regset *regset,
 	return ret;
 }
 
+/*
+ * When the transaction is active, 'transact_vr' holds the current running
+ * value of all the VMX registers and 'vr_state' holds the last checkpointed
+ * value of all the VMX registers for the current transaction to fall back
+ * on in case it aborts. When transaction is not active 'vr_state' holds
+ * the current running state of all the VMX registers. So this function which
+ * sets the current running values of all the VMX registers, needs to know
+ * whether any transaction is active or not.
+ *
+ * Userspace interface buffer layout:
+ *
+ * struct data {
+ *	vector128	vr[32];
+ *	vector128	vscr;
+ *	vector128	vrsave;
+ * };
+ */
 static int vr_set(struct task_struct *target, const struct user_regset *regset,
 		  unsigned int pos, unsigned int count,
 		  const void *kbuf, const void __user *ubuf)
 {
+	struct thread_vr_state *addr;
 	int ret;
 
 	flush_altivec_to_thread(target);
@@ -475,8 +640,19 @@ static int vr_set(struct task_struct *target, const struct user_regset *regset,
 	BUILD_BUG_ON(offsetof(struct thread_vr_state, vscr) !=
 		     offsetof(struct thread_vr_state, vr[32]));
 
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	if (MSR_TM_ACTIVE(target->thread.regs->msr)) {
+		flush_fp_to_thread(target);
+		flush_tmregs_to_thread(target);
+		addr = &target->thread.transact_vr;
+	} else {
+		addr = &target->thread.vr_state;
+	}
+#else
+	addr = &target->thread.vr_state;
+#endif
 	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
-				 &target->thread.vr_state, 0,
+				 addr, 0,
 				 33 * sizeof(vector128));
 	if (!ret && count > 0) {
 		/*
@@ -487,11 +663,28 @@ static int vr_set(struct task_struct *target, const struct user_regset *regset,
 			u32 word;
 		} vrsave;
 		memset(&vrsave, 0, sizeof(vrsave));
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+		if (MSR_TM_ACTIVE(target->thread.regs->msr))
+			vrsave.word = target->thread.transact_vrsave;
+		else
+			vrsave.word = target->thread.vrsave;
+#else
 		vrsave.word = target->thread.vrsave;
+#endif
 		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &vrsave,
 					 33 * sizeof(vector128), -1);
-		if (!ret)
+		if (!ret) {
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+			if (MSR_TM_ACTIVE(target->thread.regs->msr))
+				target->thread.transact_vrsave = vrsave.word;
+			else
+				target->thread.vrsave = vrsave.word;
+#else
 			target->thread.vrsave = vrsave.word;
+#endif
+		}
 	}
 
 	return ret;
@@ -512,6 +705,21 @@ static int vsr_active(struct task_struct *target,
 	return target->thread.used_vsr ? regset->n : 0;
 }
 
+/*
+ * When the transaction is active, 'transact_fp' holds the current running
+ * value of all FPR registers and 'fp_state' holds the last checkpointed
+ * value of all FPR registers for the current transaction. When transaction
+ * is not active 'fp_state' holds the current running state of all the FPR
+ * registers. So this function which returns the current running values of
+ * all the FPR registers, needs to know whether any transaction is active
+ * or not.
+ *
+ * Userspace interface buffer layout:
+ *
+ * struct data {
+ *	u64	vsx[32];
+ * };
+ */
 static int vsr_get(struct task_struct *target, const struct user_regset *regset,
 		   unsigned int pos, unsigned int count,
 		   void *kbuf, void __user *ubuf)
@@ -519,16 +727,47 @@ static int vsr_get(struct task_struct *target, const struct user_regset *regset,
 	u64 buf[32];
 	int ret, i;
 
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	flush_fp_to_thread(target);
+	flush_altivec_to_thread(target);
+	flush_tmregs_to_thread(target);
+#endif
 	flush_vsx_to_thread(target);
 
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	if (MSR_TM_ACTIVE(target->thread.regs->msr)) {
+		for (i = 0; i < 32 ; i++)
+			buf[i] = target->thread.
+				transact_fp.fpr[i][TS_VSRLOWOFFSET];
+	} else {
+		for (i = 0; i < 32 ; i++)
+			buf[i] = target->thread.
+				fp_state.fpr[i][TS_VSRLOWOFFSET];
+	}
+#else
 	for (i = 0; i < 32 ; i++)
 		buf[i] = target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET];
+#endif
 	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
 				  buf, 0, 32 * sizeof(double));
 
 	return ret;
 }
 
+/*
+ * When the transaction is active, 'transact_fp' holds the current running
+ * value of all FPR registers and 'fp_state' holds the last checkpointed
+ * value of all FPR registers for the current transaction. When transaction
+ * is not active 'fp_state' holds the current running state of all the FPR
+ * registers. So this function which sets the current running values of all
+ * the FPR registers, needs to know whether any transaction is active or not.
+ *
+ * Userspace interface buffer layout:
+ *
+ * struct data {
+ *	u64	vsx[32];
+ * };
+ */
 static int vsr_set(struct task_struct *target, const struct user_regset *regset,
 		   unsigned int pos, unsigned int count,
 		   const void *kbuf, const void __user *ubuf)
@@ -536,12 +775,30 @@ static int vsr_set(struct task_struct *target, const struct user_regset *regset,
 	u64 buf[32];
 	int ret,i;
 
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	flush_fp_to_thread(target);
+	flush_altivec_to_thread(target);
+	flush_tmregs_to_thread(target);
+#endif
 	flush_vsx_to_thread(target);
 
 	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
 				 buf, 0, 32 * sizeof(double));
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	if (MSR_TM_ACTIVE(target->thread.regs->msr)) {
+		for (i = 0; i < 32 ; i++)
+			target->thread.transact_fp.
+				fpr[i][TS_VSRLOWOFFSET] = buf[i];
+	} else {
+		for (i = 0; i < 32 ; i++)
+			target->thread.fp_state.
+				fpr[i][TS_VSRLOWOFFSET] = buf[i];
+	}
+#else
 	for (i = 0; i < 32 ; i++)
 		target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i];
+#endif
 
 
 	return ret;
@@ -614,8 +871,1030 @@ static int evr_set(struct task_struct *target, const struct user_regset *regset,
 }
 #endif /* CONFIG_SPE */
 
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+/**
+ * tm_cgpr_active - get active number of registers in CGPR
+ * @target:	The target task.
+ * @regset:	The user regset structure.
+ *
+ * This function checks for the active number of available
+ * regisers in transaction checkpointed GPR category.
+ */
+static int tm_cgpr_active(struct task_struct *target,
+			  const struct user_regset *regset)
+{
+	if (!cpu_has_feature(CPU_FTR_TM))
+		return -ENODEV;
+
+	if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+		return 0;
+
+	return regset->n;
+}
+
+/**
+ * tm_cgpr_get - get CGPR registers
+ * @target:	The target task.
+ * @regset:	The user regset structure.
+ * @pos:	The buffer position.
+ * @count:	Number of bytes to copy.
+ * @kbuf:	Kernel buffer to copy from.
+ * @ubuf:	User buffer to copy into.
+ *
+ * This function gets transaction checkpointed GPR registers.
+ *
+ * When the transaction is active, 'ckpt_regs' holds all the checkpointed
+ * GPR register values for the current transaction to fall back on if it
+ * aborts in between. This function gets those checkpointed GPR registers.
+ * The userspace interface buffer layout is as follows.
+ *
+ * struct data {
+ *	struct pt_regs ckpt_regs;
+ * };
+ */
+static int tm_cgpr_get(struct task_struct *target,
+			const struct user_regset *regset,
+			unsigned int pos, unsigned int count,
+			void *kbuf, void __user *ubuf)
+{
+	int ret;
+
+	if (!cpu_has_feature(CPU_FTR_TM))
+		return -ENODEV;
+
+	if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+		return -ENODATA;
+
+	flush_fp_to_thread(target);
+	flush_altivec_to_thread(target);
+	flush_tmregs_to_thread(target);
+
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				  &target->thread.ckpt_regs,
+				  0, offsetof(struct pt_regs, msr));
+	if (!ret) {
+		unsigned long msr = get_user_ckpt_msr(target);
+
+		ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &msr,
+					  offsetof(struct pt_regs, msr),
+					  offsetof(struct pt_regs, msr) +
+					  sizeof(msr));
+	}
+
+	BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) !=
+		     offsetof(struct pt_regs, msr) + sizeof(long));
+
+	if (!ret)
+		ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+					  &target->thread.ckpt_regs.orig_gpr3,
+					  offsetof(struct pt_regs, orig_gpr3),
+					  sizeof(struct pt_regs));
+	if (!ret)
+		ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+					       sizeof(struct pt_regs), -1);
+
+	return ret;
+}
 
 /*
+ * tm_cgpr_set - set the CGPR registers
+ * @target:	The target task.
+ * @regset:	The user regset structure.
+ * @pos:	The buffer position.
+ * @count:	Number of bytes to copy.
+ * @kbuf:	Kernel buffer to copy into.
+ * @ubuf:	User buffer to copy from.
+ *
+ * This function sets in transaction checkpointed GPR registers.
+ *
+ * When the transaction is active, 'ckpt_regs' holds the checkpointed
+ * GPR register values for the current transaction to fall back on if it
+ * aborts in between. This function sets those checkpointed GPR registers.
+ * The userspace interface buffer layout is as follows.
+ *
+ * struct data {
+ *	struct pt_regs ckpt_regs;
+ * };
+ */
+static int tm_cgpr_set(struct task_struct *target,
+			const struct user_regset *regset,
+			unsigned int pos, unsigned int count,
+			const void *kbuf, const void __user *ubuf)
+{
+	unsigned long reg;
+	int ret;
+
+	if (!cpu_has_feature(CPU_FTR_TM))
+		return -ENODEV;
+
+	if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+		return -ENODATA;
+
+	flush_fp_to_thread(target);
+	flush_altivec_to_thread(target);
+	flush_tmregs_to_thread(target);
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 &target->thread.ckpt_regs,
+				 0, PT_MSR * sizeof(reg));
+
+	if (!ret && count > 0) {
+		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &reg,
+					 PT_MSR * sizeof(reg),
+					 (PT_MSR + 1) * sizeof(reg));
+		if (!ret)
+			ret = set_user_ckpt_msr(target, reg);
+	}
+
+	BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) !=
+		     offsetof(struct pt_regs, msr) + sizeof(long));
+
+	if (!ret)
+		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+					 &target->thread.ckpt_regs.orig_gpr3,
+					 PT_ORIG_R3 * sizeof(reg),
+					 (PT_MAX_PUT_REG + 1) * sizeof(reg));
+
+	if (PT_MAX_PUT_REG + 1 < PT_TRAP && !ret)
+		ret = user_regset_copyin_ignore(
+			&pos, &count, &kbuf, &ubuf,
+			(PT_MAX_PUT_REG + 1) * sizeof(reg),
+			PT_TRAP * sizeof(reg));
+
+	if (!ret && count > 0) {
+		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &reg,
+					 PT_TRAP * sizeof(reg),
+					 (PT_TRAP + 1) * sizeof(reg));
+		if (!ret)
+			ret = set_user_ckpt_trap(target, reg);
+	}
+
+	if (!ret)
+		ret = user_regset_copyin_ignore(
+			&pos, &count, &kbuf, &ubuf,
+			(PT_TRAP + 1) * sizeof(reg), -1);
+
+	return ret;
+}
+
+/**
+ * tm_cfpr_active - get active number of registers in CFPR
+ * @target:	The target task.
+ * @regset:	The user regset structure.
+ *
+ * This function checks for the active number of available
+ * regisers in transaction checkpointed FPR category.
+ */
+static int tm_cfpr_active(struct task_struct *target,
+				const struct user_regset *regset)
+{
+	if (!cpu_has_feature(CPU_FTR_TM))
+		return -ENODEV;
+
+	if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+		return 0;
+
+	return regset->n;
+}
+
+/**
+ * tm_cfpr_get - get CFPR registers
+ * @target:	The target task.
+ * @regset:	The user regset structure.
+ * @pos:	The buffer position.
+ * @count:	Number of bytes to copy.
+ * @kbuf:	Kernel buffer to copy from.
+ * @ubuf:	User buffer to copy into.
+ *
+ * This function gets in transaction checkpointed FPR registers.
+ *
+ * When the transaction is active 'fp_state' holds the checkpointed
+ * values for the current transaction to fall back on if it aborts
+ * in between. This function gets those checkpointed FPR registers.
+ * The userspace interface buffer layout is as follows.
+ *
+ * struct data {
+ *	u64	fpr[32];
+ *	u64	fpscr;
+ *};
+ */
+static int tm_cfpr_get(struct task_struct *target,
+			const struct user_regset *regset,
+			unsigned int pos, unsigned int count,
+			void *kbuf, void __user *ubuf)
+{
+	u64 buf[33];
+	int i;
+
+	if (!cpu_has_feature(CPU_FTR_TM))
+		return -ENODEV;
+
+	if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+		return -ENODATA;
+
+	flush_fp_to_thread(target);
+	flush_altivec_to_thread(target);
+	flush_tmregs_to_thread(target);
+
+	/* copy to local buffer then write that out */
+	for (i = 0; i < 32 ; i++)
+		buf[i] = target->thread.TS_FPR(i);
+	buf[32] = target->thread.fp_state.fpscr;
+	return user_regset_copyout(&pos, &count, &kbuf, &ubuf, buf, 0, -1);
+}
+
+/**
+ * tm_cfpr_set - set CFPR registers
+ * @target:	The target task.
+ * @regset:	The user regset structure.
+ * @pos:	The buffer position.
+ * @count:	Number of bytes to copy.
+ * @kbuf:	Kernel buffer to copy into.
+ * @ubuf:	User buffer to copy from.
+ *
+ * This function sets in transaction checkpointed FPR registers.
+ *
+ * When the transaction is active 'fp_state' holds the checkpointed
+ * FPR register values for the current transaction to fall back on
+ * if it aborts in between. This function sets these checkpointed
+ * FPR registers. The userspace interface buffer layout is as follows.
+ *
+ * struct data {
+ *	u64	fpr[32];
+ *	u64	fpscr;
+ *};
+ */
+static int tm_cfpr_set(struct task_struct *target,
+			const struct user_regset *regset,
+			unsigned int pos, unsigned int count,
+			const void *kbuf, const void __user *ubuf)
+{
+	u64 buf[33];
+	int i;
+
+	if (!cpu_has_feature(CPU_FTR_TM))
+		return -ENODEV;
+
+	if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+		return -ENODATA;
+
+	flush_fp_to_thread(target);
+	flush_altivec_to_thread(target);
+	flush_tmregs_to_thread(target);
+
+	/* copy to local buffer then write that out */
+	i = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, -1);
+	if (i)
+		return i;
+	for (i = 0; i < 32 ; i++)
+		target->thread.TS_FPR(i) = buf[i];
+	target->thread.fp_state.fpscr = buf[32];
+	return 0;
+}
+
+/**
+ * tm_cvmx_active - get active number of registers in CVMX
+ * @target:	The target task.
+ * @regset:	The user regset structure.
+ *
+ * This function checks for the active number of available
+ * regisers in checkpointed VMX category.
+ */
+static int tm_cvmx_active(struct task_struct *target,
+				const struct user_regset *regset)
+{
+	if (!cpu_has_feature(CPU_FTR_TM))
+		return -ENODEV;
+
+	if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+		return 0;
+
+	return regset->n;
+}
+
+/**
+ * tm_cvmx_get - get CMVX registers
+ * @target:	The target task.
+ * @regset:	The user regset structure.
+ * @pos:	The buffer position.
+ * @count:	Number of bytes to copy.
+ * @kbuf:	Kernel buffer to copy from.
+ * @ubuf:	User buffer to copy into.
+ *
+ * This function gets in transaction checkpointed VMX registers.
+ *
+ * When the transaction is active 'vr_state' and 'vr_save' hold
+ * the checkpointed values for the current transaction to fall
+ * back on if it aborts in between. The userspace interface buffer
+ * layout is as follows.
+ *
+ * struct data {
+ *	vector128	vr[32];
+ *	vector128	vscr;
+ *	vector128	vrsave;
+ *};
+ */
+static int tm_cvmx_get(struct task_struct *target,
+			const struct user_regset *regset,
+			unsigned int pos, unsigned int count,
+			void *kbuf, void __user *ubuf)
+{
+	int ret;
+
+	BUILD_BUG_ON(TVSO(vscr) != TVSO(vr[32]));
+
+	if (!cpu_has_feature(CPU_FTR_TM))
+		return -ENODEV;
+
+	if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+		return -ENODATA;
+
+	/* Flush the state */
+	flush_fp_to_thread(target);
+	flush_altivec_to_thread(target);
+	flush_tmregs_to_thread(target);
+
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+					&target->thread.vr_state, 0,
+					33 * sizeof(vector128));
+	if (!ret) {
+		/*
+		 * Copy out only the low-order word of vrsave.
+		 */
+		union {
+			elf_vrreg_t reg;
+			u32 word;
+		} vrsave;
+		memset(&vrsave, 0, sizeof(vrsave));
+		vrsave.word = target->thread.vrsave;
+		ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &vrsave,
+						33 * sizeof(vector128), -1);
+	}
+
+	return ret;
+}
+
+/**
+ * tm_cvmx_set - set CMVX registers
+ * @target:	The target task.
+ * @regset:	The user regset structure.
+ * @pos:	The buffer position.
+ * @count:	Number of bytes to copy.
+ * @kbuf:	Kernel buffer to copy into.
+ * @ubuf:	User buffer to copy from.
+ *
+ * This function sets in transaction checkpointed VMX registers.
+ *
+ * When the transaction is active 'vr_state' and 'vr_save' hold
+ * the checkpointed values for the current transaction to fall
+ * back on if it aborts in between. The userspace interface buffer
+ * layout is as follows.
+ *
+ * struct data {
+ *	vector128	vr[32];
+ *	vector128	vscr;
+ *	vector128	vrsave;
+ *};
+ */
+static int tm_cvmx_set(struct task_struct *target,
+			const struct user_regset *regset,
+			unsigned int pos, unsigned int count,
+			const void *kbuf, const void __user *ubuf)
+{
+	int ret;
+
+	BUILD_BUG_ON(TVSO(vscr) != TVSO(vr[32]));
+
+	if (!cpu_has_feature(CPU_FTR_TM))
+		return -ENODEV;
+
+	if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+		return -ENODATA;
+
+	flush_fp_to_thread(target);
+	flush_altivec_to_thread(target);
+	flush_tmregs_to_thread(target);
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+					&target->thread.vr_state, 0,
+					33 * sizeof(vector128));
+	if (!ret && count > 0) {
+		/*
+		 * We use only the low-order word of vrsave.
+		 */
+		union {
+			elf_vrreg_t reg;
+			u32 word;
+		} vrsave;
+		memset(&vrsave, 0, sizeof(vrsave));
+		vrsave.word = target->thread.vrsave;
+		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &vrsave,
+						33 * sizeof(vector128), -1);
+		if (!ret)
+			target->thread.vrsave = vrsave.word;
+	}
+
+	return ret;
+}
+
+/**
+ * tm_cvsx_active - get active number of registers in CVSX
+ * @target:	The target task.
+ * @regset:	The user regset structure.
+ *
+ * This function checks for the active number of available
+ * regisers in transaction checkpointed VSX category.
+ */
+static int tm_cvsx_active(struct task_struct *target,
+				const struct user_regset *regset)
+{
+	if (!cpu_has_feature(CPU_FTR_TM))
+		return -ENODEV;
+
+	if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+		return 0;
+
+	flush_vsx_to_thread(target);
+	return target->thread.used_vsr ? regset->n : 0;
+}
+
+/**
+ * tm_cvsx_get - get CVSX registers
+ * @target:	The target task.
+ * @regset:	The user regset structure.
+ * @pos:	The buffer position.
+ * @count:	Number of bytes to copy.
+ * @kbuf:	Kernel buffer to copy from.
+ * @ubuf:	User buffer to copy into.
+ *
+ * This function gets in transaction checkpointed VSX registers.
+ *
+ * When the transaction is active 'fp_state' holds the checkpointed
+ * values for the current transaction to fall back on if it aborts
+ * in between. This function gets those checkpointed VSX registers.
+ * The userspace interface buffer layout is as follows.
+ *
+ * struct data {
+ *	u64	vsx[32];
+ *};
+ */
+static int tm_cvsx_get(struct task_struct *target,
+			const struct user_regset *regset,
+			unsigned int pos, unsigned int count,
+			void *kbuf, void __user *ubuf)
+{
+	u64 buf[32];
+	int ret, i;
+
+	if (!cpu_has_feature(CPU_FTR_TM))
+		return -ENODEV;
+
+	if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+		return -ENODATA;
+
+	/* Flush the state */
+	flush_fp_to_thread(target);
+	flush_altivec_to_thread(target);
+	flush_tmregs_to_thread(target);
+	flush_vsx_to_thread(target);
+
+	for (i = 0; i < 32 ; i++)
+		buf[i] = target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET];
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				  buf, 0, 32 * sizeof(double));
+
+	return ret;
+}
+
+/**
+ * tm_cvsx_set - set CFPR registers
+ * @target:	The target task.
+ * @regset:	The user regset structure.
+ * @pos:	The buffer position.
+ * @count:	Number of bytes to copy.
+ * @kbuf:	Kernel buffer to copy into.
+ * @ubuf:	User buffer to copy from.
+ *
+ * This function sets in transaction checkpointed VSX registers.
+ *
+ * When the transaction is active 'fp_state' holds the checkpointed
+ * VSX register values for the current transaction to fall back on
+ * if it aborts in between. This function sets these checkpointed
+ * FPR registers. The userspace interface buffer layout is as follows.
+ *
+ * struct data {
+ *	u64	vsx[32];
+ *};
+ */
+static int tm_cvsx_set(struct task_struct *target,
+			const struct user_regset *regset,
+			unsigned int pos, unsigned int count,
+			const void *kbuf, const void __user *ubuf)
+{
+	u64 buf[32];
+	int ret, i;
+
+	if (!cpu_has_feature(CPU_FTR_TM))
+		return -ENODEV;
+
+	if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+		return -ENODATA;
+
+	/* Flush the state */
+	flush_fp_to_thread(target);
+	flush_altivec_to_thread(target);
+	flush_tmregs_to_thread(target);
+	flush_vsx_to_thread(target);
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 buf, 0, 32 * sizeof(double));
+	for (i = 0; i < 32 ; i++)
+		target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i];
+
+	return ret;
+}
+
+/**
+ * tm_spr_active - get active number of registers in TM SPR
+ * @target:	The target task.
+ * @regset:	The user regset structure.
+ *
+ * This function checks the active number of available
+ * regisers in the transactional memory SPR category.
+ */
+static int tm_spr_active(struct task_struct *target,
+			 const struct user_regset *regset)
+{
+	if (!cpu_has_feature(CPU_FTR_TM))
+		return -ENODEV;
+
+	return regset->n;
+}
+
+/**
+ * tm_spr_get - get the TM related SPR registers
+ * @target:	The target task.
+ * @regset:	The user regset structure.
+ * @pos:	The buffer position.
+ * @count:	Number of bytes to copy.
+ * @kbuf:	Kernel buffer to copy from.
+ * @ubuf:	User buffer to copy into.
+ *
+ * This function gets transactional memory related SPR registers.
+ * The userspace interface buffer layout is as follows.
+ *
+ * struct {
+ *	u64		tm_tfhar;
+ *	u64		tm_texasr;
+ *	u64		tm_tfiar;
+ * };
+ */
+static int tm_spr_get(struct task_struct *target,
+		      const struct user_regset *regset,
+		      unsigned int pos, unsigned int count,
+		      void *kbuf, void __user *ubuf)
+{
+	int ret;
+
+	/* Build tests */
+	BUILD_BUG_ON(TSO(tm_tfhar) + sizeof(u64) != TSO(tm_texasr));
+	BUILD_BUG_ON(TSO(tm_texasr) + sizeof(u64) != TSO(tm_tfiar));
+	BUILD_BUG_ON(TSO(tm_tfiar) + sizeof(u64) != TSO(ckpt_regs));
+
+	if (!cpu_has_feature(CPU_FTR_TM))
+		return -ENODEV;
+
+	/* Flush the states */
+	flush_fp_to_thread(target);
+	flush_altivec_to_thread(target);
+	flush_tmregs_to_thread(target);
+
+	/* TFHAR register */
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				&target->thread.tm_tfhar, 0, sizeof(u64));
+
+	/* TEXASR register */
+	if (!ret)
+		ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				&target->thread.tm_texasr, sizeof(u64),
+				2 * sizeof(u64));
+
+	/* TFIAR register */
+	if (!ret)
+		ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				&target->thread.tm_tfiar,
+				2 * sizeof(u64), 3 * sizeof(u64));
+	return ret;
+}
+
+/**
+ * tm_spr_set - set the TM related SPR registers
+ * @target:	The target task.
+ * @regset:	The user regset structure.
+ * @pos:	The buffer position.
+ * @count:	Number of bytes to copy.
+ * @kbuf:	Kernel buffer to copy into.
+ * @ubuf:	User buffer to copy from.
+ *
+ * This function sets transactional memory related SPR registers.
+ * The userspace interface buffer layout is as follows.
+ *
+ * struct {
+ *	u64		tm_tfhar;
+ *	u64		tm_texasr;
+ *	u64		tm_tfiar;
+ * };
+ */
+static int tm_spr_set(struct task_struct *target,
+		      const struct user_regset *regset,
+		      unsigned int pos, unsigned int count,
+		      const void *kbuf, const void __user *ubuf)
+{
+	int ret;
+
+	/* Build tests */
+	BUILD_BUG_ON(TSO(tm_tfhar) + sizeof(u64) != TSO(tm_texasr));
+	BUILD_BUG_ON(TSO(tm_texasr) + sizeof(u64) != TSO(tm_tfiar));
+	BUILD_BUG_ON(TSO(tm_tfiar) + sizeof(u64) != TSO(ckpt_regs));
+
+	if (!cpu_has_feature(CPU_FTR_TM))
+		return -ENODEV;
+
+	/* Flush the states */
+	flush_fp_to_thread(target);
+	flush_altivec_to_thread(target);
+	flush_tmregs_to_thread(target);
+
+	/* TFHAR register */
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				&target->thread.tm_tfhar, 0, sizeof(u64));
+
+	/* TEXASR register */
+	if (!ret)
+		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				&target->thread.tm_texasr, sizeof(u64),
+				2 * sizeof(u64));
+
+	/* TFIAR register */
+	if (!ret)
+		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				&target->thread.tm_tfiar,
+				 2 * sizeof(u64), 3 * sizeof(u64));
+	return ret;
+}
+
+static int tm_tar_active(struct task_struct *target,
+			 const struct user_regset *regset)
+{
+	if (!cpu_has_feature(CPU_FTR_TM))
+		return -ENODEV;
+
+	if (MSR_TM_ACTIVE(target->thread.regs->msr))
+		return regset->n;
+
+	return 0;
+}
+
+static int tm_tar_get(struct task_struct *target,
+		      const struct user_regset *regset,
+		      unsigned int pos, unsigned int count,
+		      void *kbuf, void __user *ubuf)
+{
+	int ret;
+
+	if (!cpu_has_feature(CPU_FTR_TM))
+		return -ENODEV;
+
+	if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+		return -ENODATA;
+
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				&target->thread.tm_tar, 0, sizeof(u64));
+	return ret;
+}
+
+static int tm_tar_set(struct task_struct *target,
+		      const struct user_regset *regset,
+		      unsigned int pos, unsigned int count,
+		      const void *kbuf, const void __user *ubuf)
+{
+	int ret;
+
+	if (!cpu_has_feature(CPU_FTR_TM))
+		return -ENODEV;
+
+	if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+		return -ENODATA;
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				&target->thread.tm_tar, 0, sizeof(u64));
+	return ret;
+}
+
+static int tm_ppr_active(struct task_struct *target,
+			 const struct user_regset *regset)
+{
+	if (!cpu_has_feature(CPU_FTR_TM))
+		return -ENODEV;
+
+	if (MSR_TM_ACTIVE(target->thread.regs->msr))
+		return regset->n;
+
+	return 0;
+}
+
+
+static int tm_ppr_get(struct task_struct *target,
+		      const struct user_regset *regset,
+		      unsigned int pos, unsigned int count,
+		      void *kbuf, void __user *ubuf)
+{
+	int ret;
+
+	if (!cpu_has_feature(CPU_FTR_TM))
+		return -ENODEV;
+
+	if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+		return -ENODATA;
+
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				&target->thread.tm_ppr, 0, sizeof(u64));
+	return ret;
+}
+
+static int tm_ppr_set(struct task_struct *target,
+		      const struct user_regset *regset,
+		      unsigned int pos, unsigned int count,
+		      const void *kbuf, const void __user *ubuf)
+{
+	int ret;
+
+	if (!cpu_has_feature(CPU_FTR_TM))
+		return -ENODEV;
+
+	if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+		return -ENODATA;
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				&target->thread.tm_ppr, 0, sizeof(u64));
+	return ret;
+}
+
+static int tm_dscr_active(struct task_struct *target,
+			 const struct user_regset *regset)
+{
+	if (!cpu_has_feature(CPU_FTR_TM))
+		return -ENODEV;
+
+	if (MSR_TM_ACTIVE(target->thread.regs->msr))
+		return regset->n;
+
+	return 0;
+}
+
+static int tm_dscr_get(struct task_struct *target,
+		      const struct user_regset *regset,
+		      unsigned int pos, unsigned int count,
+		      void *kbuf, void __user *ubuf)
+{
+	int ret;
+
+	if (!cpu_has_feature(CPU_FTR_TM))
+		return -ENODEV;
+
+	if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+		return -ENODATA;
+
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				&target->thread.tm_dscr, 0, sizeof(u64));
+	return ret;
+}
+
+static int tm_dscr_set(struct task_struct *target,
+		      const struct user_regset *regset,
+		      unsigned int pos, unsigned int count,
+		      const void *kbuf, const void __user *ubuf)
+{
+	int ret;
+
+	if (!cpu_has_feature(CPU_FTR_TM))
+		return -ENODEV;
+
+	if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+		return -ENODATA;
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				&target->thread.tm_dscr, 0, sizeof(u64));
+	return ret;
+}
+#endif	/* CONFIG_PPC_TRANSACTIONAL_MEM */
+
+#ifdef CONFIG_PPC64
+static int ppr_get(struct task_struct *target,
+		      const struct user_regset *regset,
+		      unsigned int pos, unsigned int count,
+		      void *kbuf, void __user *ubuf)
+{
+	int ret;
+
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				&target->thread.ppr, 0, sizeof(u64));
+	return ret;
+}
+
+static int ppr_set(struct task_struct *target,
+		      const struct user_regset *regset,
+		      unsigned int pos, unsigned int count,
+		      const void *kbuf, const void __user *ubuf)
+{
+	int ret;
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				&target->thread.ppr, 0, sizeof(u64));
+	return ret;
+}
+
+static int dscr_get(struct task_struct *target,
+		      const struct user_regset *regset,
+		      unsigned int pos, unsigned int count,
+		      void *kbuf, void __user *ubuf)
+{
+	int ret;
+
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				&target->thread.dscr, 0, sizeof(u64));
+	return ret;
+}
+static int dscr_set(struct task_struct *target,
+		      const struct user_regset *regset,
+		      unsigned int pos, unsigned int count,
+		      const void *kbuf, const void __user *ubuf)
+{
+	int ret;
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				&target->thread.dscr, 0, sizeof(u64));
+	return ret;
+}
+#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+static int tar_get(struct task_struct *target,
+		      const struct user_regset *regset,
+		      unsigned int pos, unsigned int count,
+		      void *kbuf, void __user *ubuf)
+{
+	int ret;
+
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				&target->thread.tar, 0, sizeof(u64));
+	return ret;
+}
+static int tar_set(struct task_struct *target,
+		      const struct user_regset *regset,
+		      unsigned int pos, unsigned int count,
+		      const void *kbuf, const void __user *ubuf)
+{
+	int ret;
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				&target->thread.tar, 0, sizeof(u64));
+	return ret;
+}
+
+static int ebb_active(struct task_struct *target,
+			 const struct user_regset *regset)
+{
+	if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+		return -ENODEV;
+
+	if (target->thread.used_ebb)
+		return regset->n;
+
+	return 0;
+}
+
+static int ebb_get(struct task_struct *target,
+		      const struct user_regset *regset,
+		      unsigned int pos, unsigned int count,
+		      void *kbuf, void __user *ubuf)
+{
+	/* Build tests */
+	BUILD_BUG_ON(TSO(ebbrr) + sizeof(unsigned long) != TSO(ebbhr));
+	BUILD_BUG_ON(TSO(ebbhr) + sizeof(unsigned long) != TSO(bescr));
+
+	if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+		return -ENODEV;
+
+	if (!target->thread.used_ebb)
+		return -ENODATA;
+
+	return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+			&target->thread.ebbrr, 0, 3 * sizeof(unsigned long));
+}
+
+static int ebb_set(struct task_struct *target,
+		      const struct user_regset *regset,
+		      unsigned int pos, unsigned int count,
+		      const void *kbuf, const void __user *ubuf)
+{
+	int ret = 0;
+
+	/* Build tests */
+	BUILD_BUG_ON(TSO(ebbrr) + sizeof(unsigned long) != TSO(ebbhr));
+	BUILD_BUG_ON(TSO(ebbhr) + sizeof(unsigned long) != TSO(bescr));
+
+	if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+		return -ENODEV;
+
+	if (target->thread.used_ebb)
+		return -ENODATA;
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+			&target->thread.ebbrr, 0, sizeof(unsigned long));
+
+	if (!ret)
+		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+			&target->thread.ebbhr, sizeof(unsigned long),
+			2 * sizeof(unsigned long));
+
+	if (!ret)
+		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+			&target->thread.bescr,
+			2 * sizeof(unsigned long), 3 * sizeof(unsigned long));
+
+	return ret;
+}
+static int pmu_active(struct task_struct *target,
+			 const struct user_regset *regset)
+{
+	if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+		return -ENODEV;
+
+	return regset->n;
+}
+
+static int pmu_get(struct task_struct *target,
+		      const struct user_regset *regset,
+		      unsigned int pos, unsigned int count,
+		      void *kbuf, void __user *ubuf)
+{
+	/* Build tests */
+	BUILD_BUG_ON(TSO(siar) + sizeof(unsigned long) != TSO(sdar));
+	BUILD_BUG_ON(TSO(sdar) + sizeof(unsigned long) != TSO(sier));
+	BUILD_BUG_ON(TSO(sier) + sizeof(unsigned long) != TSO(mmcr2));
+	BUILD_BUG_ON(TSO(mmcr2) + sizeof(unsigned long) != TSO(mmcr0));
+
+	if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+		return -ENODEV;
+
+	return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+			&target->thread.siar, 0,
+			5 * sizeof(unsigned long));
+}
+
+static int pmu_set(struct task_struct *target,
+		      const struct user_regset *regset,
+		      unsigned int pos, unsigned int count,
+		      const void *kbuf, const void __user *ubuf)
+{
+	int ret = 0;
+
+	/* Build tests */
+	BUILD_BUG_ON(TSO(siar) + sizeof(unsigned long) != TSO(sdar));
+	BUILD_BUG_ON(TSO(sdar) + sizeof(unsigned long) != TSO(sier));
+	BUILD_BUG_ON(TSO(sier) + sizeof(unsigned long) != TSO(mmcr2));
+	BUILD_BUG_ON(TSO(mmcr2) + sizeof(unsigned long) != TSO(mmcr0));
+
+	if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+		return -ENODEV;
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+			&target->thread.siar, 0,
+			sizeof(unsigned long));
+
+	if (!ret)
+		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+			&target->thread.sdar, sizeof(unsigned long),
+			2 * sizeof(unsigned long));
+
+	if (!ret)
+		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+			&target->thread.sier, 2 * sizeof(unsigned long),
+			3 * sizeof(unsigned long));
+
+	if (!ret)
+		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+			&target->thread.mmcr2, 3 * sizeof(unsigned long),
+			4 * sizeof(unsigned long));
+
+	if (!ret)
+		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+			&target->thread.mmcr0, 4 * sizeof(unsigned long),
+			5 * sizeof(unsigned long));
+	return ret;
+}
+#endif
+/*
  * These are our native regset flavors.
  */
 enum powerpc_regset {
@@ -630,6 +1909,25 @@ enum powerpc_regset {
 #ifdef CONFIG_SPE
 	REGSET_SPE,
 #endif
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	REGSET_TM_CGPR,		/* TM checkpointed GPR registers */
+	REGSET_TM_CFPR,		/* TM checkpointed FPR registers */
+	REGSET_TM_CVMX,		/* TM checkpointed VMX registers */
+	REGSET_TM_CVSX,		/* TM checkpointed VSX registers */
+	REGSET_TM_SPR,		/* TM specific SPR registers */
+	REGSET_TM_CTAR,		/* TM checkpointed TAR register */
+	REGSET_TM_CPPR,		/* TM checkpointed PPR register */
+	REGSET_TM_CDSCR,	/* TM checkpointed DSCR register */
+#endif
+#ifdef CONFIG_PPC64
+	REGSET_PPR,		/* PPR register */
+	REGSET_DSCR,		/* DSCR register */
+#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+	REGSET_TAR,		/* TAR register */
+	REGSET_EBB,		/* EBB registers */
+	REGSET_PMR,		/* Performance Monitor Registers */
+#endif
 };
 
 static const struct user_regset native_regsets[] = {
@@ -664,6 +1962,77 @@ static const struct user_regset native_regsets[] = {
 		.active = evr_active, .get = evr_get, .set = evr_set
 	},
 #endif
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	[REGSET_TM_CGPR] = {
+		.core_note_type = NT_PPC_TM_CGPR, .n = ELF_NGREG,
+		.size = sizeof(long), .align = sizeof(long),
+		.active = tm_cgpr_active, .get = tm_cgpr_get, .set = tm_cgpr_set
+	},
+	[REGSET_TM_CFPR] = {
+		.core_note_type = NT_PPC_TM_CFPR, .n = ELF_NFPREG,
+		.size = sizeof(double), .align = sizeof(double),
+		.active = tm_cfpr_active, .get = tm_cfpr_get, .set = tm_cfpr_set
+	},
+	[REGSET_TM_CVMX] = {
+		.core_note_type = NT_PPC_TM_CVMX, .n = ELF_NVMX,
+		.size = sizeof(vector128), .align = sizeof(vector128),
+		.active = tm_cvmx_active, .get = tm_cvmx_get, .set = tm_cvmx_set
+	},
+	[REGSET_TM_CVSX] = {
+		.core_note_type = NT_PPC_TM_CVSX, .n = ELF_NVSX,
+		.size = sizeof(double), .align = sizeof(double),
+		.active = tm_cvsx_active, .get = tm_cvsx_get, .set = tm_cvsx_set
+	},
+	[REGSET_TM_SPR] = {
+		.core_note_type = NT_PPC_TM_SPR, .n = ELF_NTMSPRREG,
+		.size = sizeof(u64), .align = sizeof(u64),
+		.active = tm_spr_active, .get = tm_spr_get, .set = tm_spr_set
+	},
+	[REGSET_TM_CTAR] = {
+		.core_note_type = NT_PPC_TM_CTAR, .n = 1,
+		.size = sizeof(u64), .align = sizeof(u64),
+		.active = tm_tar_active, .get = tm_tar_get, .set = tm_tar_set
+	},
+	[REGSET_TM_CPPR] = {
+		.core_note_type = NT_PPC_TM_CPPR, .n = 1,
+		.size = sizeof(u64), .align = sizeof(u64),
+		.active = tm_ppr_active, .get = tm_ppr_get, .set = tm_ppr_set
+	},
+	[REGSET_TM_CDSCR] = {
+		.core_note_type = NT_PPC_TM_CDSCR, .n = 1,
+		.size = sizeof(u64), .align = sizeof(u64),
+		.active = tm_dscr_active, .get = tm_dscr_get, .set = tm_dscr_set
+	},
+#endif
+#ifdef CONFIG_PPC64
+	[REGSET_PPR] = {
+		.core_note_type = NT_PPC_PPR, .n = 1,
+		.size = sizeof(u64), .align = sizeof(u64),
+		.get = ppr_get, .set = ppr_set
+	},
+	[REGSET_DSCR] = {
+		.core_note_type = NT_PPC_DSCR, .n = 1,
+		.size = sizeof(u64), .align = sizeof(u64),
+		.get = dscr_get, .set = dscr_set
+	},
+#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+	[REGSET_TAR] = {
+		.core_note_type = NT_PPC_TAR, .n = 1,
+		.size = sizeof(u64), .align = sizeof(u64),
+		.get = tar_get, .set = tar_set
+	},
+	[REGSET_EBB] = {
+		.core_note_type = NT_PPC_EBB, .n = ELF_NEBB,
+		.size = sizeof(u64), .align = sizeof(u64),
+		.active = ebb_active, .get = ebb_get, .set = ebb_set
+	},
+	[REGSET_PMR] = {
+		.core_note_type = NT_PPC_PMU, .n = ELF_NPMU,
+		.size = sizeof(u64), .align = sizeof(u64),
+		.active = pmu_active, .get = pmu_get, .set = pmu_set
+	},
+#endif
 };
 
 static const struct user_regset_view user_ppc_native_view = {
@@ -674,24 +2043,35 @@ static const struct user_regset_view user_ppc_native_view = {
 #ifdef CONFIG_PPC64
 #include <linux/compat.h>
 
-static int gpr32_get(struct task_struct *target,
+static int gpr32_get_common(struct task_struct *target,
 		     const struct user_regset *regset,
 		     unsigned int pos, unsigned int count,
-		     void *kbuf, void __user *ubuf)
+			    void *kbuf, void __user *ubuf, bool tm_active)
 {
 	const unsigned long *regs = &target->thread.regs->gpr[0];
+	const unsigned long *ckpt_regs;
 	compat_ulong_t *k = kbuf;
 	compat_ulong_t __user *u = ubuf;
 	compat_ulong_t reg;
 	int i;
 
-	if (target->thread.regs == NULL)
-		return -EIO;
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	ckpt_regs = &target->thread.ckpt_regs.gpr[0];
+#endif
+	if (tm_active) {
+		regs = ckpt_regs;
+	} else {
+		if (target->thread.regs == NULL)
+			return -EIO;
 
-	if (!FULL_REGS(target->thread.regs)) {
-		/* We have a partial register set.  Fill 14-31 with bogus values */
-		for (i = 14; i < 32; i++)
-			target->thread.regs->gpr[i] = NV_REG_POISON; 
+		if (!FULL_REGS(target->thread.regs)) {
+			/*
+			 * We have a partial register set.
+			 * Fill 14-31 with bogus values.
+			 */
+			for (i = 14; i < 32; i++)
+				target->thread.regs->gpr[i] = NV_REG_POISON;
+		}
 	}
 
 	pos /= sizeof(reg);
@@ -731,20 +2111,31 @@ static int gpr32_get(struct task_struct *target,
 					PT_REGS_COUNT * sizeof(reg), -1);
 }
 
-static int gpr32_set(struct task_struct *target,
+static int gpr32_set_common(struct task_struct *target,
 		     const struct user_regset *regset,
 		     unsigned int pos, unsigned int count,
-		     const void *kbuf, const void __user *ubuf)
+		     const void *kbuf, const void __user *ubuf, bool tm_active)
 {
 	unsigned long *regs = &target->thread.regs->gpr[0];
+	unsigned long *ckpt_regs;
 	const compat_ulong_t *k = kbuf;
 	const compat_ulong_t __user *u = ubuf;
 	compat_ulong_t reg;
 
-	if (target->thread.regs == NULL)
-		return -EIO;
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	ckpt_regs = &target->thread.ckpt_regs.gpr[0];
+#endif
 
-	CHECK_FULL_REGS(target->thread.regs);
+	if (tm_active) {
+		regs = ckpt_regs;
+	} else {
+		regs = &target->thread.regs->gpr[0];
+
+		if (target->thread.regs == NULL)
+			return -EIO;
+
+		CHECK_FULL_REGS(target->thread.regs);
+	}
 
 	pos /= sizeof(reg);
 	count /= sizeof(reg);
@@ -804,6 +2195,40 @@ static int gpr32_set(struct task_struct *target,
 					 (PT_TRAP + 1) * sizeof(reg), -1);
 }
 
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+static int tm_cgpr32_get(struct task_struct *target,
+		     const struct user_regset *regset,
+		     unsigned int pos, unsigned int count,
+		     void *kbuf, void __user *ubuf)
+{
+	return gpr32_get_common(target, regset, pos, count, kbuf, ubuf, 1);
+}
+
+static int tm_cgpr32_set(struct task_struct *target,
+		     const struct user_regset *regset,
+		     unsigned int pos, unsigned int count,
+		     const void *kbuf, const void __user *ubuf)
+{
+	return gpr32_set_common(target, regset, pos, count, kbuf, ubuf, 1);
+}
+#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+
+static int gpr32_get(struct task_struct *target,
+		     const struct user_regset *regset,
+		     unsigned int pos, unsigned int count,
+		     void *kbuf, void __user *ubuf)
+{
+	return gpr32_get_common(target, regset, pos, count, kbuf, ubuf, 0);
+}
+
+static int gpr32_set(struct task_struct *target,
+		     const struct user_regset *regset,
+		     unsigned int pos, unsigned int count,
+		     const void *kbuf, const void __user *ubuf)
+{
+	return gpr32_set_common(target, regset, pos, count, kbuf, ubuf, 0);
+}
+
 /*
  * These are the regset flavors matching the CONFIG_PPC32 native set.
  */
@@ -832,6 +2257,73 @@ static const struct user_regset compat_regsets[] = {
 		.active = evr_active, .get = evr_get, .set = evr_set
 	},
 #endif
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	[REGSET_TM_CGPR] = {
+		.core_note_type = NT_PPC_TM_CGPR, .n = ELF_NGREG,
+		.size = sizeof(long), .align = sizeof(long),
+		.active = tm_cgpr_active,
+		.get = tm_cgpr32_get, .set = tm_cgpr32_set
+	},
+	[REGSET_TM_CFPR] = {
+		.core_note_type = NT_PPC_TM_CFPR, .n = ELF_NFPREG,
+		.size = sizeof(double), .align = sizeof(double),
+		.active = tm_cfpr_active, .get = tm_cfpr_get, .set = tm_cfpr_set
+	},
+	[REGSET_TM_CVMX] = {
+		.core_note_type = NT_PPC_TM_CVMX, .n = ELF_NVMX,
+		.size = sizeof(vector128), .align = sizeof(vector128),
+		.active = tm_cvmx_active, .get = tm_cvmx_get, .set = tm_cvmx_set
+	},
+	[REGSET_TM_CVSX] = {
+		.core_note_type = NT_PPC_TM_CVSX, .n = ELF_NVSX,
+		.size = sizeof(double), .align = sizeof(double),
+		.active = tm_cvsx_active, .get = tm_cvsx_get, .set = tm_cvsx_set
+	},
+	[REGSET_TM_SPR] = {
+		.core_note_type = NT_PPC_TM_SPR, .n = ELF_NTMSPRREG,
+		.size = sizeof(u64), .align = sizeof(u64),
+		.active = tm_spr_active, .get = tm_spr_get, .set = tm_spr_set
+	},
+	[REGSET_TM_CTAR] = {
+		.core_note_type = NT_PPC_TM_CTAR, .n = 1,
+		.size = sizeof(u64), .align = sizeof(u64),
+		.active = tm_tar_active, .get = tm_tar_get, .set = tm_tar_set
+	},
+	[REGSET_TM_CPPR] = {
+		.core_note_type = NT_PPC_TM_CPPR, .n = 1,
+		.size = sizeof(u64), .align = sizeof(u64),
+		.active = tm_ppr_active, .get = tm_ppr_get, .set = tm_ppr_set
+	},
+	[REGSET_TM_CDSCR] = {
+		.core_note_type = NT_PPC_TM_CDSCR, .n = 1,
+		.size = sizeof(u64), .align = sizeof(u64),
+		.active = tm_dscr_active, .get = tm_dscr_get, .set = tm_dscr_set
+	},
+#endif
+#ifdef CONFIG_PPC64
+	[REGSET_PPR] = {
+		.core_note_type = NT_PPC_PPR, .n = 1,
+		.size = sizeof(u64), .align = sizeof(u64),
+		.get = ppr_get, .set = ppr_set
+	},
+	[REGSET_DSCR] = {
+		.core_note_type = NT_PPC_DSCR, .n = 1,
+		.size = sizeof(u64), .align = sizeof(u64),
+		.get = dscr_get, .set = dscr_set
+	},
+#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+	[REGSET_TAR] = {
+		.core_note_type = NT_PPC_TAR, .n = 1,
+		.size = sizeof(u64), .align = sizeof(u64),
+		.get = tar_get, .set = tar_set
+	},
+	[REGSET_EBB] = {
+		.core_note_type = NT_PPC_EBB, .n = ELF_NEBB,
+		.size = sizeof(u64), .align = sizeof(u64),
+		.active = ebb_active, .get = ebb_get, .set = ebb_set
+	},
+#endif
 };
 
 static const struct user_regset_view user_ppc_compat_view = {
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 714b4ba7ab86..dba265c586df 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -66,6 +66,7 @@
 #include <asm/hugetlb.h>
 #include <asm/livepatch.h>
 #include <asm/mmu_context.h>
+#include <asm/cpu_has_feature.h>
 
 #include "setup.h"
 
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 00f57754407e..c3e861df4b20 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -37,6 +37,7 @@
 #include <asm/serial.h>
 #include <asm/udbg.h>
 #include <asm/code-patching.h>
+#include <asm/cpu_has_feature.h>
 
 #define DBG(fmt...)
 
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index d8216aed22b7..eafb9a79e011 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -227,8 +227,8 @@ static void __init configure_exceptions(void)
 			opal_configure_cores();
 
 		/* Enable AIL if supported, and we are in hypervisor mode */
-		if (cpu_has_feature(CPU_FTR_HVMODE) &&
-		    cpu_has_feature(CPU_FTR_ARCH_207S)) {
+		if (early_cpu_has_feature(CPU_FTR_HVMODE) &&
+		    early_cpu_has_feature(CPU_FTR_ARCH_207S)) {
 			unsigned long lpcr = mfspr(SPRN_LPCR);
 			mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3);
 		}
@@ -298,12 +298,12 @@ void __init early_setup(unsigned long dt_ptr)
 	 */
 	configure_exceptions();
 
-	/* Initialize the hash table or TLB handling */
-	early_init_mmu();
-
 	/* Apply all the dynamic patching */
 	apply_feature_fixups();
 
+	/* Initialize the hash table or TLB handling */
+	early_init_mmu();
+
 	/*
 	 * At this point, we can let interrupts switch to virtual mode
 	 * (the MMU has been setup), so adjust the MSR in the PACA to
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 5a1f015ea9f3..25a39052bf6b 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -55,6 +55,7 @@
 #include <asm/debug.h>
 #include <asm/kexec.h>
 #include <asm/asm-prototypes.h>
+#include <asm/cpu_has_feature.h>
 
 #ifdef DEBUG
 #include <asm/udbg.h>
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 4e7759c8ca30..3efbedefba6a 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -56,6 +56,7 @@
 #include <linux/irq_work.h>
 #include <linux/clk-provider.h>
 #include <linux/suspend.h>
+#include <linux/rtc.h>
 #include <asm/trace.h>
 
 #include <asm/io.h>
@@ -1159,6 +1160,29 @@ void calibrate_delay(void)
 	loops_per_jiffy = tb_ticks_per_jiffy;
 }
 
+#if IS_ENABLED(CONFIG_RTC_DRV_GENERIC)
+static int rtc_generic_get_time(struct device *dev, struct rtc_time *tm)
+{
+	ppc_md.get_rtc_time(tm);
+	return rtc_valid_tm(tm);
+}
+
+static int rtc_generic_set_time(struct device *dev, struct rtc_time *tm)
+{
+	if (!ppc_md.set_rtc_time)
+		return -EOPNOTSUPP;
+
+	if (ppc_md.set_rtc_time(tm) < 0)
+		return -EOPNOTSUPP;
+
+	return 0;
+}
+
+static const struct rtc_class_ops rtc_generic_ops = {
+	.read_time = rtc_generic_get_time,
+	.set_time = rtc_generic_set_time,
+};
+
 static int __init rtc_init(void)
 {
 	struct platform_device *pdev;
@@ -1166,9 +1190,12 @@ static int __init rtc_init(void)
 	if (!ppc_md.get_rtc_time)
 		return -ENODEV;
 
-	pdev = platform_device_register_simple("rtc-generic", -1, NULL, 0);
+	pdev = platform_device_register_data(NULL, "rtc-generic", -1,
+					     &rtc_generic_ops,
+					     sizeof(rtc_generic_ops));
 
 	return PTR_ERR_OR_ZERO(pdev);
 }
 
 device_initcall(rtc_init);
+#endif
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index f7e2f2e318bd..2cb589264cb7 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -61,6 +61,7 @@
 #include <asm/tm.h>
 #include <asm/debug.h>
 #include <asm/asm-prototypes.h>
+#include <asm/hmi.h>
 #include <sysdev/fsl_pci.h>
 
 #if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC)
@@ -308,9 +309,13 @@ long hmi_exception_realmode(struct pt_regs *regs)
 {
 	__this_cpu_inc(irq_stat.hmi_exceptions);
 
+	wait_for_subcore_guest_exit();
+
 	if (ppc_md.hmi_exception_early)
 		ppc_md.hmi_exception_early(regs);
 
+	wait_for_tb_resync();
+
 	return 0;
 }
 
diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c
index 8d7358f3a273..b3813ddb2fb4 100644
--- a/arch/powerpc/kernel/vio.c
+++ b/arch/powerpc/kernel/vio.c
@@ -482,7 +482,7 @@ static void vio_cmo_balance(struct work_struct *work)
 
 static void *vio_dma_iommu_alloc_coherent(struct device *dev, size_t size,
 					  dma_addr_t *dma_handle, gfp_t flag,
-					  struct dma_attrs *attrs)
+					  unsigned long attrs)
 {
 	struct vio_dev *viodev = to_vio_dev(dev);
 	void *ret;
@@ -503,7 +503,7 @@ static void *vio_dma_iommu_alloc_coherent(struct device *dev, size_t size,
 
 static void vio_dma_iommu_free_coherent(struct device *dev, size_t size,
 					void *vaddr, dma_addr_t dma_handle,
-					struct dma_attrs *attrs)
+					unsigned long attrs)
 {
 	struct vio_dev *viodev = to_vio_dev(dev);
 
@@ -515,7 +515,7 @@ static void vio_dma_iommu_free_coherent(struct device *dev, size_t size,
 static dma_addr_t vio_dma_iommu_map_page(struct device *dev, struct page *page,
                                          unsigned long offset, size_t size,
                                          enum dma_data_direction direction,
-                                         struct dma_attrs *attrs)
+                                         unsigned long attrs)
 {
 	struct vio_dev *viodev = to_vio_dev(dev);
 	struct iommu_table *tbl;
@@ -539,7 +539,7 @@ static dma_addr_t vio_dma_iommu_map_page(struct device *dev, struct page *page,
 static void vio_dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle,
 				     size_t size,
 				     enum dma_data_direction direction,
-				     struct dma_attrs *attrs)
+				     unsigned long attrs)
 {
 	struct vio_dev *viodev = to_vio_dev(dev);
 	struct iommu_table *tbl;
@@ -552,7 +552,7 @@ static void vio_dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle,
 
 static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist,
                                 int nelems, enum dma_data_direction direction,
-                                struct dma_attrs *attrs)
+                                unsigned long attrs)
 {
 	struct vio_dev *viodev = to_vio_dev(dev);
 	struct iommu_table *tbl;
@@ -588,7 +588,7 @@ static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist,
 static void vio_dma_iommu_unmap_sg(struct device *dev,
 		struct scatterlist *sglist, int nelems,
 		enum dma_data_direction direction,
-		struct dma_attrs *attrs)
+		unsigned long attrs)
 {
 	struct vio_dev *viodev = to_vio_dev(dev);
 	struct iommu_table *tbl;
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index eba0bea6e032..1f9e5529e692 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -20,7 +20,7 @@ common-objs-y += powerpc.o emulate.o emulate_loadstore.o
 obj-$(CONFIG_KVM_EXIT_TIMING) += timing.o
 obj-$(CONFIG_KVM_BOOK3S_HANDLER) += book3s_exports.o
 
-AFLAGS_booke_interrupts.o := -I$(obj)
+AFLAGS_booke_interrupts.o := -I$(objtree)/$(obj)
 
 kvm-e500-objs := \
 	$(common-objs-y) \
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index e20beae5ca7a..2fd5580c8f6e 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -52,6 +52,7 @@
 #include <asm/switch_to.h>
 #include <asm/smp.h>
 #include <asm/dbell.h>
+#include <asm/hmi.h>
 #include <linux/gfp.h>
 #include <linux/vmalloc.h>
 #include <linux/highmem.h>
@@ -2522,7 +2523,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 		list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list)
 			spin_unlock(&pvc->lock);
 
-	kvm_guest_enter();
+	guest_enter();
 
 	srcu_idx = srcu_read_lock(&vc->kvm->srcu);
 
@@ -2570,7 +2571,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 
 	/* make sure updates to secondary vcpu structs are visible now */
 	smp_mb();
-	kvm_guest_exit();
+	guest_exit();
 
 	for (sub = 0; sub < core_info.n_subcores; ++sub)
 		list_for_each_entry_safe(pvc, vcnext, &core_info.vcs[sub],
@@ -3401,6 +3402,38 @@ static struct kvmppc_ops kvm_ops_hv = {
 	.hcall_implemented = kvmppc_hcall_impl_hv,
 };
 
+static int kvm_init_subcore_bitmap(void)
+{
+	int i, j;
+	int nr_cores = cpu_nr_cores();
+	struct sibling_subcore_state *sibling_subcore_state;
+
+	for (i = 0; i < nr_cores; i++) {
+		int first_cpu = i * threads_per_core;
+		int node = cpu_to_node(first_cpu);
+
+		/* Ignore if it is already allocated. */
+		if (paca[first_cpu].sibling_subcore_state)
+			continue;
+
+		sibling_subcore_state =
+			kmalloc_node(sizeof(struct sibling_subcore_state),
+							GFP_KERNEL, node);
+		if (!sibling_subcore_state)
+			return -ENOMEM;
+
+		memset(sibling_subcore_state, 0,
+				sizeof(struct sibling_subcore_state));
+
+		for (j = 0; j < threads_per_core; j++) {
+			int cpu = first_cpu + j;
+
+			paca[cpu].sibling_subcore_state = sibling_subcore_state;
+		}
+	}
+	return 0;
+}
+
 static int kvmppc_book3s_init_hv(void)
 {
 	int r;
@@ -3411,6 +3444,10 @@ static int kvmppc_book3s_init_hv(void)
 	if (r < 0)
 		return -ENODEV;
 
+	r = kvm_init_subcore_bitmap();
+	if (r)
+		return r;
+
 	kvm_ops_hv.owner = THIS_MODULE;
 	kvmppc_hv_ops = &kvm_ops_hv;
 
diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c
index 93b5f5c9b445..0fa70a9618d7 100644
--- a/arch/powerpc/kvm/book3s_hv_ras.c
+++ b/arch/powerpc/kvm/book3s_hv_ras.c
@@ -13,6 +13,9 @@
 #include <linux/kernel.h>
 #include <asm/opal.h>
 #include <asm/mce.h>
+#include <asm/machdep.h>
+#include <asm/cputhreads.h>
+#include <asm/hmi.h>
 
 /* SRR1 bits for machine check on POWER7 */
 #define SRR1_MC_LDSTERR		(1ul << (63-42))
@@ -140,3 +143,176 @@ long kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu)
 {
 	return kvmppc_realmode_mc_power7(vcpu);
 }
+
+/* Check if dynamic split is in force and return subcore size accordingly. */
+static inline int kvmppc_cur_subcore_size(void)
+{
+	if (local_paca->kvm_hstate.kvm_split_mode)
+		return local_paca->kvm_hstate.kvm_split_mode->subcore_size;
+
+	return threads_per_subcore;
+}
+
+void kvmppc_subcore_enter_guest(void)
+{
+	int thread_id, subcore_id;
+
+	thread_id = cpu_thread_in_core(local_paca->paca_index);
+	subcore_id = thread_id / kvmppc_cur_subcore_size();
+
+	local_paca->sibling_subcore_state->in_guest[subcore_id] = 1;
+}
+
+void kvmppc_subcore_exit_guest(void)
+{
+	int thread_id, subcore_id;
+
+	thread_id = cpu_thread_in_core(local_paca->paca_index);
+	subcore_id = thread_id / kvmppc_cur_subcore_size();
+
+	local_paca->sibling_subcore_state->in_guest[subcore_id] = 0;
+}
+
+static bool kvmppc_tb_resync_required(void)
+{
+	if (test_and_set_bit(CORE_TB_RESYNC_REQ_BIT,
+				&local_paca->sibling_subcore_state->flags))
+		return false;
+
+	return true;
+}
+
+static void kvmppc_tb_resync_done(void)
+{
+	clear_bit(CORE_TB_RESYNC_REQ_BIT,
+			&local_paca->sibling_subcore_state->flags);
+}
+
+/*
+ * kvmppc_realmode_hmi_handler() is called only by primary thread during
+ * guest exit path.
+ *
+ * There are multiple reasons why HMI could occur, one of them is
+ * Timebase (TB) error. If this HMI is due to TB error, then TB would
+ * have been in stopped state. The opal hmi handler Will fix it and
+ * restore the TB value with host timebase value. For HMI caused due
+ * to non-TB errors, opal hmi handler will not touch/restore TB register
+ * and hence there won't be any change in TB value.
+ *
+ * Since we are not sure about the cause of this HMI, we can't be sure
+ * about the content of TB register whether it holds guest or host timebase
+ * value. Hence the idea is to resync the TB on every HMI, so that we
+ * know about the exact state of the TB value. Resync TB call will
+ * restore TB to host timebase.
+ *
+ * Things to consider:
+ * - On TB error, HMI interrupt is reported on all the threads of the core
+ *   that has encountered TB error irrespective of split-core mode.
+ * - The very first thread on the core that get chance to fix TB error
+ *   would rsync the TB with local chipTOD value.
+ * - The resync TB is a core level action i.e. it will sync all the TBs
+ *   in that core independent of split-core mode. This means if we trigger
+ *   TB sync from a thread from one subcore, it would affect TB values of
+ *   sibling subcores of the same core.
+ *
+ * All threads need to co-ordinate before making opal hmi handler.
+ * All threads will use sibling_subcore_state->in_guest[] (shared by all
+ * threads in the core) in paca which holds information about whether
+ * sibling subcores are in Guest mode or host mode. The in_guest[] array
+ * is of size MAX_SUBCORE_PER_CORE=4, indexed using subcore id to set/unset
+ * subcore status. Only primary threads from each subcore is responsible
+ * to set/unset its designated array element while entering/exiting the
+ * guset.
+ *
+ * After invoking opal hmi handler call, one of the thread (of entire core)
+ * will need to resync the TB. Bit 63 from subcore state bitmap flags
+ * (sibling_subcore_state->flags) will be used to co-ordinate between
+ * primary threads to decide who takes up the responsibility.
+ *
+ * This is what we do:
+ * - Primary thread from each subcore tries to set resync required bit[63]
+ *   of paca->sibling_subcore_state->flags.
+ * - The first primary thread that is able to set the flag takes the
+ *   responsibility of TB resync. (Let us call it as thread leader)
+ * - All other threads which are in host will call
+ *   wait_for_subcore_guest_exit() and wait for in_guest[0-3] from
+ *   paca->sibling_subcore_state to get cleared.
+ * - All the primary thread will clear its subcore status from subcore
+ *   state in_guest[] array respectively.
+ * - Once all primary threads clear in_guest[0-3], all of them will invoke
+ *   opal hmi handler.
+ * - Now all threads will wait for TB resync to complete by invoking
+ *   wait_for_tb_resync() except the thread leader.
+ * - Thread leader will do a TB resync by invoking opal_resync_timebase()
+ *   call and the it will clear the resync required bit.
+ * - All other threads will now come out of resync wait loop and proceed
+ *   with individual execution.
+ * - On return of this function, primary thread will signal all
+ *   secondary threads to proceed.
+ * - All secondary threads will eventually call opal hmi handler on
+ *   their exit path.
+ */
+
+long kvmppc_realmode_hmi_handler(void)
+{
+	int ptid = local_paca->kvm_hstate.ptid;
+	bool resync_req;
+
+	/* This is only called on primary thread. */
+	BUG_ON(ptid != 0);
+	__this_cpu_inc(irq_stat.hmi_exceptions);
+
+	/*
+	 * By now primary thread has already completed guest->host
+	 * partition switch but haven't signaled secondaries yet.
+	 * All the secondary threads on this subcore is waiting
+	 * for primary thread to signal them to go ahead.
+	 *
+	 * For threads from subcore which isn't in guest, they all will
+	 * wait until all other subcores on this core exit the guest.
+	 *
+	 * Now set the resync required bit. If you are the first to
+	 * set this bit then kvmppc_tb_resync_required() function will
+	 * return true. For rest all other subcores
+	 * kvmppc_tb_resync_required() will return false.
+	 *
+	 * If resync_req == true, then this thread is responsible to
+	 * initiate TB resync after hmi handler has completed.
+	 * All other threads on this core will wait until this thread
+	 * clears the resync required bit flag.
+	 */
+	resync_req = kvmppc_tb_resync_required();
+
+	/* Reset the subcore status to indicate it has exited guest */
+	kvmppc_subcore_exit_guest();
+
+	/*
+	 * Wait for other subcores on this core to exit the guest.
+	 * All the primary threads and threads from subcore that are
+	 * not in guest will wait here until all subcores are out
+	 * of guest context.
+	 */
+	wait_for_subcore_guest_exit();
+
+	/*
+	 * At this point we are sure that primary threads from each
+	 * subcore on this core have completed guest->host partition
+	 * switch. Now it is safe to call HMI handler.
+	 */
+	if (ppc_md.hmi_exception_early)
+		ppc_md.hmi_exception_early(NULL);
+
+	/*
+	 * Check if this thread is responsible to resync TB.
+	 * All other threads will wait until this thread completes the
+	 * TB resync.
+	 */
+	if (resync_req) {
+		opal_resync_timebase();
+		/* Reset TB resync req bit */
+		kvmppc_tb_resync_done();
+	} else {
+		wait_for_tb_resync();
+	}
+	return 0;
+}
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 86f0cae37a85..975655573844 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -29,6 +29,7 @@
 #include <asm/kvm_book3s_asm.h>
 #include <asm/book3s/64/mmu-hash.h>
 #include <asm/tm.h>
+#include <asm/opal.h>
 
 #define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM)
 
@@ -373,6 +374,18 @@ kvm_secondary_got_guest:
 	lwsync
 	std	r0, HSTATE_KVM_VCORE(r13)
 
+	/*
+	 * All secondaries exiting guest will fall through this path.
+	 * Before proceeding, just check for HMI interrupt and
+	 * invoke opal hmi handler. By now we are sure that the
+	 * primary thread on this core/subcore has already made partition
+	 * switch/TB resync and we are good to call opal hmi handler.
+	 */
+	cmpwi	r12, BOOK3S_INTERRUPT_HMI
+	bne	kvm_no_guest
+
+	li	r3,0			/* NULL argument */
+	bl	hmi_exception_realmode
 /*
  * At this point we have finished executing in the guest.
  * We need to wait for hwthread_req to become zero, since
@@ -428,6 +441,22 @@ kvm_no_guest:
  */
 kvm_unsplit_nap:
 	/*
+	 * When secondaries are napping in kvm_unsplit_nap() with
+	 * hwthread_req = 1, HMI goes ignored even though subcores are
+	 * already exited the guest. Hence HMI keeps waking up secondaries
+	 * from nap in a loop and secondaries always go back to nap since
+	 * no vcore is assigned to them. This makes impossible for primary
+	 * thread to get hold of secondary threads resulting into a soft
+	 * lockup in KVM path.
+	 *
+	 * Let us check if HMI is pending and handle it before we go to nap.
+	 */
+	cmpwi	r12, BOOK3S_INTERRUPT_HMI
+	bne	55f
+	li	r3, 0			/* NULL argument */
+	bl	hmi_exception_realmode
+55:
+	/*
 	 * Ensure that secondary doesn't nap when it has
 	 * its vcore pointer set.
 	 */
@@ -601,6 +630,11 @@ BEGIN_FTR_SECTION
 	mtspr	SPRN_DPDES, r8
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 
+	/* Mark the subcore state as inside guest */
+	bl	kvmppc_subcore_enter_guest
+	nop
+	ld	r5, HSTATE_KVM_VCORE(r13)
+	ld	r4, HSTATE_KVM_VCPU(r13)
 	li	r0,1
 	stb	r0,VCORE_IN_GUEST(r5)	/* signal secondaries to continue */
 
@@ -655,112 +689,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 BEGIN_FTR_SECTION
-	b	skip_tm
-END_FTR_SECTION_IFCLR(CPU_FTR_TM)
-
-	/* Turn on TM/FP/VSX/VMX so we can restore them. */
-	mfmsr	r5
-	li	r6, MSR_TM >> 32
-	sldi	r6, r6, 32
-	or	r5, r5, r6
-	ori	r5, r5, MSR_FP
-	oris	r5, r5, (MSR_VEC | MSR_VSX)@h
-	mtmsrd	r5
-
-	/*
-	 * The user may change these outside of a transaction, so they must
-	 * always be context switched.
-	 */
-	ld	r5, VCPU_TFHAR(r4)
-	ld	r6, VCPU_TFIAR(r4)
-	ld	r7, VCPU_TEXASR(r4)
-	mtspr	SPRN_TFHAR, r5
-	mtspr	SPRN_TFIAR, r6
-	mtspr	SPRN_TEXASR, r7
-
-	ld	r5, VCPU_MSR(r4)
-	rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
-	beq	skip_tm	/* TM not active in guest */
-
-	/* Make sure the failure summary is set, otherwise we'll program check
-	 * when we trechkpt.  It's possible that this might have been not set
-	 * on a kvmppc_set_one_reg() call but we shouldn't let this crash the
-	 * host.
-	 */
-	oris	r7, r7, (TEXASR_FS)@h
-	mtspr	SPRN_TEXASR, r7
-
-	/*
-	 * We need to load up the checkpointed state for the guest.
-	 * We need to do this early as it will blow away any GPRs, VSRs and
-	 * some SPRs.
-	 */
-
-	mr	r31, r4
-	addi	r3, r31, VCPU_FPRS_TM
-	bl	load_fp_state
-	addi	r3, r31, VCPU_VRS_TM
-	bl	load_vr_state
-	mr	r4, r31
-	lwz	r7, VCPU_VRSAVE_TM(r4)
-	mtspr	SPRN_VRSAVE, r7
-
-	ld	r5, VCPU_LR_TM(r4)
-	lwz	r6, VCPU_CR_TM(r4)
-	ld	r7, VCPU_CTR_TM(r4)
-	ld	r8, VCPU_AMR_TM(r4)
-	ld	r9, VCPU_TAR_TM(r4)
-	mtlr	r5
-	mtcr	r6
-	mtctr	r7
-	mtspr	SPRN_AMR, r8
-	mtspr	SPRN_TAR, r9
-
-	/*
-	 * Load up PPR and DSCR values but don't put them in the actual SPRs
-	 * till the last moment to avoid running with userspace PPR and DSCR for
-	 * too long.
-	 */
-	ld	r29, VCPU_DSCR_TM(r4)
-	ld	r30, VCPU_PPR_TM(r4)
-
-	std	r2, PACATMSCRATCH(r13) /* Save TOC */
-
-	/* Clear the MSR RI since r1, r13 are all going to be foobar. */
-	li	r5, 0
-	mtmsrd	r5, 1
-
-	/* Load GPRs r0-r28 */
-	reg = 0
-	.rept	29
-	ld	reg, VCPU_GPRS_TM(reg)(r31)
-	reg = reg + 1
-	.endr
-
-	mtspr	SPRN_DSCR, r29
-	mtspr	SPRN_PPR, r30
-
-	/* Load final GPRs */
-	ld	29, VCPU_GPRS_TM(29)(r31)
-	ld	30, VCPU_GPRS_TM(30)(r31)
-	ld	31, VCPU_GPRS_TM(31)(r31)
-
-	/* TM checkpointed state is now setup.  All GPRs are now volatile. */
-	TRECHKPT
-
-	/* Now let's get back the state we need. */
-	HMT_MEDIUM
-	GET_PACA(r13)
-	ld	r29, HSTATE_DSCR(r13)
-	mtspr	SPRN_DSCR, r29
-	ld	r4, HSTATE_KVM_VCPU(r13)
-	ld	r1, HSTATE_HOST_R1(r13)
-	ld	r2, PACATMSCRATCH(r13)
-
-	/* Set the MSR RI since we have our registers back. */
-	li	r5, MSR_RI
-	mtmsrd	r5, 1
-skip_tm:
+	bl	kvmppc_restore_tm
+END_FTR_SECTION_IFSET(CPU_FTR_TM)
 #endif
 
 	/* Load guest PMU registers */
@@ -841,12 +771,6 @@ BEGIN_FTR_SECTION
 	/* Skip next section on POWER7 */
 	b	8f
 END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
-	/* Turn on TM so we can access TFHAR/TFIAR/TEXASR */
-	mfmsr	r8
-	li	r0, 1
-	rldimi	r8, r0, MSR_TM_LG, 63-MSR_TM_LG
-	mtmsrd	r8
-
 	/* Load up POWER8-specific registers */
 	ld	r5, VCPU_IAMR(r4)
 	lwz	r6, VCPU_PSPB(r4)
@@ -1436,106 +1360,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 BEGIN_FTR_SECTION
-	b	2f
-END_FTR_SECTION_IFCLR(CPU_FTR_TM)
-	/* Turn on TM. */
-	mfmsr	r8
-	li	r0, 1
-	rldimi	r8, r0, MSR_TM_LG, 63-MSR_TM_LG
-	mtmsrd	r8
-
-	ld	r5, VCPU_MSR(r9)
-	rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
-	beq	1f	/* TM not active in guest. */
-
-	li	r3, TM_CAUSE_KVM_RESCHED
-
-	/* Clear the MSR RI since r1, r13 are all going to be foobar. */
-	li	r5, 0
-	mtmsrd	r5, 1
-
-	/* All GPRs are volatile at this point. */
-	TRECLAIM(R3)
-
-	/* Temporarily store r13 and r9 so we have some regs to play with */
-	SET_SCRATCH0(r13)
-	GET_PACA(r13)
-	std	r9, PACATMSCRATCH(r13)
-	ld	r9, HSTATE_KVM_VCPU(r13)
-
-	/* Get a few more GPRs free. */
-	std	r29, VCPU_GPRS_TM(29)(r9)
-	std	r30, VCPU_GPRS_TM(30)(r9)
-	std	r31, VCPU_GPRS_TM(31)(r9)
-
-	/* Save away PPR and DSCR soon so don't run with user values. */
-	mfspr	r31, SPRN_PPR
-	HMT_MEDIUM
-	mfspr	r30, SPRN_DSCR
-	ld	r29, HSTATE_DSCR(r13)
-	mtspr	SPRN_DSCR, r29
-
-	/* Save all but r9, r13 & r29-r31 */
-	reg = 0
-	.rept	29
-	.if (reg != 9) && (reg != 13)
-	std	reg, VCPU_GPRS_TM(reg)(r9)
-	.endif
-	reg = reg + 1
-	.endr
-	/* ... now save r13 */
-	GET_SCRATCH0(r4)
-	std	r4, VCPU_GPRS_TM(13)(r9)
-	/* ... and save r9 */
-	ld	r4, PACATMSCRATCH(r13)
-	std	r4, VCPU_GPRS_TM(9)(r9)
-
-	/* Reload stack pointer and TOC. */
-	ld	r1, HSTATE_HOST_R1(r13)
-	ld	r2, PACATOC(r13)
-
-	/* Set MSR RI now we have r1 and r13 back. */
-	li	r5, MSR_RI
-	mtmsrd	r5, 1
-
-	/* Save away checkpinted SPRs. */
-	std	r31, VCPU_PPR_TM(r9)
-	std	r30, VCPU_DSCR_TM(r9)
-	mflr	r5
-	mfcr	r6
-	mfctr	r7
-	mfspr	r8, SPRN_AMR
-	mfspr	r10, SPRN_TAR
-	std	r5, VCPU_LR_TM(r9)
-	stw	r6, VCPU_CR_TM(r9)
-	std	r7, VCPU_CTR_TM(r9)
-	std	r8, VCPU_AMR_TM(r9)
-	std	r10, VCPU_TAR_TM(r9)
-
-	/* Restore r12 as trap number. */
-	lwz	r12, VCPU_TRAP(r9)
-
-	/* Save FP/VSX. */
-	addi	r3, r9, VCPU_FPRS_TM
-	bl	store_fp_state
-	addi	r3, r9, VCPU_VRS_TM
-	bl	store_vr_state
-	mfspr	r6, SPRN_VRSAVE
-	stw	r6, VCPU_VRSAVE_TM(r9)
-1:
-	/*
-	 * We need to save these SPRs after the treclaim so that the software
-	 * error code is recorded correctly in the TEXASR.  Also the user may
-	 * change these outside of a transaction, so they must always be
-	 * context switched.
-	 */
-	mfspr	r5, SPRN_TFHAR
-	mfspr	r6, SPRN_TFIAR
-	mfspr	r7, SPRN_TEXASR
-	std	r5, VCPU_TFHAR(r9)
-	std	r6, VCPU_TFIAR(r9)
-	std	r7, VCPU_TEXASR(r9)
-2:
+	bl	kvmppc_save_tm
+END_FTR_SECTION_IFSET(CPU_FTR_TM)
 #endif
 
 	/* Increment yield count if they have a VPA */
@@ -1683,6 +1509,23 @@ BEGIN_FTR_SECTION
 	mtspr	SPRN_DPDES, r8
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 
+	/* If HMI, call kvmppc_realmode_hmi_handler() */
+	cmpwi	r12, BOOK3S_INTERRUPT_HMI
+	bne	27f
+	bl	kvmppc_realmode_hmi_handler
+	nop
+	li	r12, BOOK3S_INTERRUPT_HMI
+	/*
+	 * At this point kvmppc_realmode_hmi_handler would have resync-ed
+	 * the TB. Hence it is not required to subtract guest timebase
+	 * offset from timebase. So, skip it.
+	 *
+	 * Also, do not call kvmppc_subcore_exit_guest() because it has
+	 * been invoked as part of kvmppc_realmode_hmi_handler().
+	 */
+	b	30f
+
+27:
 	/* Subtract timebase offset from timebase */
 	ld	r8,VCORE_TB_OFFSET(r5)
 	cmpdi	r8,0
@@ -1698,8 +1541,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	addis	r8,r8,0x100		/* if so, increment upper 40 bits */
 	mtspr	SPRN_TBU40,r8
 
+17:	bl	kvmppc_subcore_exit_guest
+	nop
+30:	ld	r5,HSTATE_KVM_VCORE(r13)
+	ld	r4,VCORE_KVM(r5)	/* pointer to struct kvm */
+
 	/* Reset PCR */
-17:	ld	r0, VCORE_PCR(r5)
+	ld	r0, VCORE_PCR(r5)
 	cmpdi	r0, 0
 	beq	18f
 	li	r0, 0
@@ -2245,6 +2093,13 @@ _GLOBAL(kvmppc_h_cede)		/* r3 = vcpu pointer, r11 = msr, r13 = paca */
 	/* save FP state */
 	bl	kvmppc_save_fp
 
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+BEGIN_FTR_SECTION
+	ld	r9, HSTATE_KVM_VCPU(r13)
+	bl	kvmppc_save_tm
+END_FTR_SECTION_IFSET(CPU_FTR_TM)
+#endif
+
 	/*
 	 * Set DEC to the smaller of DEC and HDEC, so that we wake
 	 * no later than the end of our timeslice (HDEC interrupts
@@ -2321,6 +2176,12 @@ kvm_end_cede:
 	bl	kvmhv_accumulate_time
 #endif
 
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+BEGIN_FTR_SECTION
+	bl	kvmppc_restore_tm
+END_FTR_SECTION_IFSET(CPU_FTR_TM)
+#endif
+
 	/* load up FP state */
 	bl	kvmppc_load_fp
 
@@ -2461,6 +2322,8 @@ BEGIN_FTR_SECTION
 	cmpwi	r6, 3			/* hypervisor doorbell? */
 	beq	3f
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+	cmpwi	r6, 0xa			/* Hypervisor maintenance ? */
+	beq	4f
 	li	r3, 1			/* anything else, return 1 */
 0:	blr
 
@@ -2482,6 +2345,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	li	r3, -1
 	blr
 
+	/* Woken up due to Hypervisor maintenance interrupt */
+4:	li	r12, BOOK3S_INTERRUPT_HMI
+	li	r3, 1
+	blr
+
 /*
  * Determine what sort of external interrupt is pending (if any).
  * Returns:
@@ -2631,6 +2499,239 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 	mr	r4,r31
 	blr
 
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+/*
+ * Save transactional state and TM-related registers.
+ * Called with r9 pointing to the vcpu struct.
+ * This can modify all checkpointed registers, but
+ * restores r1, r2 and r9 (vcpu pointer) before exit.
+ */
+kvmppc_save_tm:
+	mflr	r0
+	std	r0, PPC_LR_STKOFF(r1)
+
+	/* Turn on TM. */
+	mfmsr	r8
+	li	r0, 1
+	rldimi	r8, r0, MSR_TM_LG, 63-MSR_TM_LG
+	mtmsrd	r8
+
+	ld	r5, VCPU_MSR(r9)
+	rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
+	beq	1f	/* TM not active in guest. */
+
+	std	r1, HSTATE_HOST_R1(r13)
+	li	r3, TM_CAUSE_KVM_RESCHED
+
+	/* Clear the MSR RI since r1, r13 are all going to be foobar. */
+	li	r5, 0
+	mtmsrd	r5, 1
+
+	/* All GPRs are volatile at this point. */
+	TRECLAIM(R3)
+
+	/* Temporarily store r13 and r9 so we have some regs to play with */
+	SET_SCRATCH0(r13)
+	GET_PACA(r13)
+	std	r9, PACATMSCRATCH(r13)
+	ld	r9, HSTATE_KVM_VCPU(r13)
+
+	/* Get a few more GPRs free. */
+	std	r29, VCPU_GPRS_TM(29)(r9)
+	std	r30, VCPU_GPRS_TM(30)(r9)
+	std	r31, VCPU_GPRS_TM(31)(r9)
+
+	/* Save away PPR and DSCR soon so don't run with user values. */
+	mfspr	r31, SPRN_PPR
+	HMT_MEDIUM
+	mfspr	r30, SPRN_DSCR
+	ld	r29, HSTATE_DSCR(r13)
+	mtspr	SPRN_DSCR, r29
+
+	/* Save all but r9, r13 & r29-r31 */
+	reg = 0
+	.rept	29
+	.if (reg != 9) && (reg != 13)
+	std	reg, VCPU_GPRS_TM(reg)(r9)
+	.endif
+	reg = reg + 1
+	.endr
+	/* ... now save r13 */
+	GET_SCRATCH0(r4)
+	std	r4, VCPU_GPRS_TM(13)(r9)
+	/* ... and save r9 */
+	ld	r4, PACATMSCRATCH(r13)
+	std	r4, VCPU_GPRS_TM(9)(r9)
+
+	/* Reload stack pointer and TOC. */
+	ld	r1, HSTATE_HOST_R1(r13)
+	ld	r2, PACATOC(r13)
+
+	/* Set MSR RI now we have r1 and r13 back. */
+	li	r5, MSR_RI
+	mtmsrd	r5, 1
+
+	/* Save away checkpinted SPRs. */
+	std	r31, VCPU_PPR_TM(r9)
+	std	r30, VCPU_DSCR_TM(r9)
+	mflr	r5
+	mfcr	r6
+	mfctr	r7
+	mfspr	r8, SPRN_AMR
+	mfspr	r10, SPRN_TAR
+	std	r5, VCPU_LR_TM(r9)
+	stw	r6, VCPU_CR_TM(r9)
+	std	r7, VCPU_CTR_TM(r9)
+	std	r8, VCPU_AMR_TM(r9)
+	std	r10, VCPU_TAR_TM(r9)
+
+	/* Restore r12 as trap number. */
+	lwz	r12, VCPU_TRAP(r9)
+
+	/* Save FP/VSX. */
+	addi	r3, r9, VCPU_FPRS_TM
+	bl	store_fp_state
+	addi	r3, r9, VCPU_VRS_TM
+	bl	store_vr_state
+	mfspr	r6, SPRN_VRSAVE
+	stw	r6, VCPU_VRSAVE_TM(r9)
+1:
+	/*
+	 * We need to save these SPRs after the treclaim so that the software
+	 * error code is recorded correctly in the TEXASR.  Also the user may
+	 * change these outside of a transaction, so they must always be
+	 * context switched.
+	 */
+	mfspr	r5, SPRN_TFHAR
+	mfspr	r6, SPRN_TFIAR
+	mfspr	r7, SPRN_TEXASR
+	std	r5, VCPU_TFHAR(r9)
+	std	r6, VCPU_TFIAR(r9)
+	std	r7, VCPU_TEXASR(r9)
+
+	ld	r0, PPC_LR_STKOFF(r1)
+	mtlr	r0
+	blr
+
+/*
+ * Restore transactional state and TM-related registers.
+ * Called with r4 pointing to the vcpu struct.
+ * This potentially modifies all checkpointed registers.
+ * It restores r1, r2, r4 from the PACA.
+ */
+kvmppc_restore_tm:
+	mflr	r0
+	std	r0, PPC_LR_STKOFF(r1)
+
+	/* Turn on TM/FP/VSX/VMX so we can restore them. */
+	mfmsr	r5
+	li	r6, MSR_TM >> 32
+	sldi	r6, r6, 32
+	or	r5, r5, r6
+	ori	r5, r5, MSR_FP
+	oris	r5, r5, (MSR_VEC | MSR_VSX)@h
+	mtmsrd	r5
+
+	/*
+	 * The user may change these outside of a transaction, so they must
+	 * always be context switched.
+	 */
+	ld	r5, VCPU_TFHAR(r4)
+	ld	r6, VCPU_TFIAR(r4)
+	ld	r7, VCPU_TEXASR(r4)
+	mtspr	SPRN_TFHAR, r5
+	mtspr	SPRN_TFIAR, r6
+	mtspr	SPRN_TEXASR, r7
+
+	ld	r5, VCPU_MSR(r4)
+	rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
+	beqlr		/* TM not active in guest */
+	std	r1, HSTATE_HOST_R1(r13)
+
+	/* Make sure the failure summary is set, otherwise we'll program check
+	 * when we trechkpt.  It's possible that this might have been not set
+	 * on a kvmppc_set_one_reg() call but we shouldn't let this crash the
+	 * host.
+	 */
+	oris	r7, r7, (TEXASR_FS)@h
+	mtspr	SPRN_TEXASR, r7
+
+	/*
+	 * We need to load up the checkpointed state for the guest.
+	 * We need to do this early as it will blow away any GPRs, VSRs and
+	 * some SPRs.
+	 */
+
+	mr	r31, r4
+	addi	r3, r31, VCPU_FPRS_TM
+	bl	load_fp_state
+	addi	r3, r31, VCPU_VRS_TM
+	bl	load_vr_state
+	mr	r4, r31
+	lwz	r7, VCPU_VRSAVE_TM(r4)
+	mtspr	SPRN_VRSAVE, r7
+
+	ld	r5, VCPU_LR_TM(r4)
+	lwz	r6, VCPU_CR_TM(r4)
+	ld	r7, VCPU_CTR_TM(r4)
+	ld	r8, VCPU_AMR_TM(r4)
+	ld	r9, VCPU_TAR_TM(r4)
+	mtlr	r5
+	mtcr	r6
+	mtctr	r7
+	mtspr	SPRN_AMR, r8
+	mtspr	SPRN_TAR, r9
+
+	/*
+	 * Load up PPR and DSCR values but don't put them in the actual SPRs
+	 * till the last moment to avoid running with userspace PPR and DSCR for
+	 * too long.
+	 */
+	ld	r29, VCPU_DSCR_TM(r4)
+	ld	r30, VCPU_PPR_TM(r4)
+
+	std	r2, PACATMSCRATCH(r13) /* Save TOC */
+
+	/* Clear the MSR RI since r1, r13 are all going to be foobar. */
+	li	r5, 0
+	mtmsrd	r5, 1
+
+	/* Load GPRs r0-r28 */
+	reg = 0
+	.rept	29
+	ld	reg, VCPU_GPRS_TM(reg)(r31)
+	reg = reg + 1
+	.endr
+
+	mtspr	SPRN_DSCR, r29
+	mtspr	SPRN_PPR, r30
+
+	/* Load final GPRs */
+	ld	29, VCPU_GPRS_TM(29)(r31)
+	ld	30, VCPU_GPRS_TM(30)(r31)
+	ld	31, VCPU_GPRS_TM(31)(r31)
+
+	/* TM checkpointed state is now setup.  All GPRs are now volatile. */
+	TRECHKPT
+
+	/* Now let's get back the state we need. */
+	HMT_MEDIUM
+	GET_PACA(r13)
+	ld	r29, HSTATE_DSCR(r13)
+	mtspr	SPRN_DSCR, r29
+	ld	r4, HSTATE_KVM_VCPU(r13)
+	ld	r1, HSTATE_HOST_R1(r13)
+	ld	r2, PACATMSCRATCH(r13)
+
+	/* Set the MSR RI since we have our registers back. */
+	li	r5, MSR_RI
+	mtmsrd	r5, 1
+
+	ld	r0, PPC_LR_STKOFF(r1)
+	mtlr	r0
+	blr
+#endif
+
 /*
  * We come here if we get any exception or interrupt while we are
  * executing host real mode code while in guest MMU context.
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index c4f7d6b86b9e..e76f79a45988 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -914,7 +914,7 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	/* We get here with MSR.EE=1 */
 
 	trace_kvm_exit(exit_nr, vcpu);
-	kvm_guest_exit();
+	guest_exit();
 
 	switch (exit_nr) {
 	case BOOK3S_INTERRUPT_INST_STORAGE:
@@ -1049,7 +1049,17 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		int emul;
 
 program_interrupt:
-		flags = vcpu->arch.shadow_srr1 & 0x1f0000ull;
+		/*
+		 * shadow_srr1 only contains valid flags if we came here via
+		 * a program exception. The other exceptions (emulation assist,
+		 * FP unavailable, etc.) do not provide flags in SRR1, so use
+		 * an illegal-instruction exception when injecting a program
+		 * interrupt into the guest.
+		 */
+		if (exit_nr == BOOK3S_INTERRUPT_PROGRAM)
+			flags = vcpu->arch.shadow_srr1 & 0x1f0000ull;
+		else
+			flags = SRR1_PROGILL;
 
 		emul = kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst);
 		if (emul != EMULATE_DONE) {
@@ -1531,7 +1541,7 @@ static int kvmppc_vcpu_run_pr(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 
 	kvmppc_clear_debug(vcpu);
 
-	/* No need for kvm_guest_exit. It's done in handle_exit.
+	/* No need for guest_exit. It's done in handle_exit.
 	   We also get here with interrupts enabled. */
 
 	/* Make sure we save the guest FPU/Altivec/VSX state */
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 4afae695899a..02b4672f7347 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -776,7 +776,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 
 	ret = __kvmppc_vcpu_run(kvm_run, vcpu);
 
-	/* No need for kvm_guest_exit. It's done in handle_exit.
+	/* No need for guest_exit. It's done in handle_exit.
 	   We also get here with interrupts enabled. */
 
 	/* Switch back to user space debug context */
@@ -1012,7 +1012,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	}
 
 	trace_kvm_exit(exit_nr, vcpu);
-	__kvm_guest_exit();
+	guest_exit_irqoff();
 
 	local_irq_enable();
 
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index 5cc2e7af3a7b..b379146de55b 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -302,7 +302,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 			advance = 0;
 			printk(KERN_ERR "Couldn't emulate instruction 0x%08x "
 			       "(op %d xop %d)\n", inst, get_op(inst), get_xop(inst));
-			kvmppc_core_queue_program(vcpu, 0);
 		}
 	}
 
diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c
index 6249cdc834d1..ed38f8114118 100644
--- a/arch/powerpc/kvm/mpic.c
+++ b/arch/powerpc/kvm/mpic.c
@@ -1823,7 +1823,8 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
 	return 0;
 }
 
-int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e,
+int kvm_set_routing_entry(struct kvm *kvm,
+			  struct kvm_kernel_irq_routing_entry *e,
 			  const struct kvm_irq_routing_entry *ue)
 {
 	int r = -EINVAL;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 02416fea7653..6ce40dd6fe51 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -119,7 +119,7 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
 			continue;
 		}
 
-		__kvm_guest_enter();
+		guest_enter_irqoff();
 		return 1;
 	}
 
@@ -588,6 +588,10 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 		r = 1;
 		break;
 #endif
+	case KVM_CAP_PPC_HTM:
+		r = cpu_has_feature(CPU_FTR_TM_COMP) &&
+		    is_kvmppc_hv_enabled(kvm);
+		break;
 	default:
 		r = 0;
 		break;
diff --git a/arch/powerpc/lib/alloc.c b/arch/powerpc/lib/alloc.c
index 60b0b3fc8fc1..a58abe4afbd1 100644
--- a/arch/powerpc/lib/alloc.c
+++ b/arch/powerpc/lib/alloc.c
@@ -6,7 +6,7 @@
 #include <asm/setup.h>
 
 
-void * __init_refok zalloc_maybe_bootmem(size_t size, gfp_t mask)
+void * __ref zalloc_maybe_bootmem(size_t size, gfp_t mask)
 {
 	void *p;
 
diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index defb2998b818..74145f02ad41 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -13,6 +13,7 @@
  */
 
 #include <linux/types.h>
+#include <linux/jump_label.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/init.h>
@@ -152,9 +153,18 @@ static void do_final_fixups(void)
 #endif
 }
 
-void apply_feature_fixups(void)
+static unsigned long __initdata saved_cpu_features;
+static unsigned int __initdata saved_mmu_features;
+#ifdef CONFIG_PPC64
+static unsigned long __initdata saved_firmware_features;
+#endif
+
+void __init apply_feature_fixups(void)
 {
-	struct cpu_spec *spec = *PTRRELOC(&cur_cpu_spec);
+	struct cpu_spec *spec = PTRRELOC(*PTRRELOC(&cur_cpu_spec));
+
+	*PTRRELOC(&saved_cpu_features) = spec->cpu_features;
+	*PTRRELOC(&saved_mmu_features) = spec->mmu_features;
 
 	/*
 	 * Apply the CPU-specific and firmware specific fixups to kernel text
@@ -173,11 +183,36 @@ void apply_feature_fixups(void)
 			 PTRRELOC(&__stop___lwsync_fixup));
 
 #ifdef CONFIG_PPC64
+	saved_firmware_features = powerpc_firmware_features;
 	do_feature_fixups(powerpc_firmware_features,
 			  &__start___fw_ftr_fixup, &__stop___fw_ftr_fixup);
 #endif
 	do_final_fixups();
+
+	/*
+	 * Initialise jump label. This causes all the cpu/mmu_has_feature()
+	 * checks to take on their correct polarity based on the current set of
+	 * CPU/MMU features.
+	 */
+	jump_label_init();
+	cpu_feature_keys_init();
+	mmu_feature_keys_init();
+}
+
+static int __init check_features(void)
+{
+	WARN(saved_cpu_features != cur_cpu_spec->cpu_features,
+	     "CPU features changed after feature patching!\n");
+	WARN(saved_mmu_features != cur_cpu_spec->mmu_features,
+	     "MMU features changed after feature patching!\n");
+#ifdef CONFIG_PPC64
+	WARN(saved_firmware_features != powerpc_firmware_features,
+	     "Firmware features changed after feature patching!\n");
+#endif
+
+	return 0;
 }
+late_initcall(check_features);
 
 #ifdef CONFIG_FTR_FIXUP_SELFTEST
 
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 88ce7d212320..0e4e9654bd2c 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -72,8 +72,7 @@ static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)
 		/* clear out bits after (52) [0....52.....63] */
 		va &= ~((1ul << (64 - 52)) - 1);
 		va |= ssize << 8;
-		sllp = ((mmu_psize_defs[apsize].sllp & SLB_VSID_L) >> 6) |
-			((mmu_psize_defs[apsize].sllp & SLB_VSID_LP) >> 4);
+		sllp = get_sllp_encoding(apsize);
 		va |= sllp << 5;
 		asm volatile(ASM_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0), %2)
 			     : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206)
@@ -122,8 +121,7 @@ static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize)
 		/* clear out bits after(52) [0....52.....63] */
 		va &= ~((1ul << (64 - 52)) - 1);
 		va |= ssize << 8;
-		sllp = ((mmu_psize_defs[apsize].sllp & SLB_VSID_L) >> 6) |
-			((mmu_psize_defs[apsize].sllp & SLB_VSID_LP) >> 4);
+		sllp = get_sllp_encoding(apsize);
 		va |= sllp << 5;
 		asm volatile(".long 0x7c000224 | (%0 << 11) | (0 << 21)"
 			     : : "r"(va) : "memory");
@@ -749,5 +747,5 @@ void __init hpte_init_native(void)
 	mmu_hash_ops.hugepage_invalidate   = native_hugepage_invalidate;
 
 	if (cpu_has_feature(CPU_FTR_ARCH_300))
-		ppc_md.register_process_table = native_register_proc_table;
+		register_process_table = native_register_proc_table;
 }
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index b78b5d211278..0821556e16f4 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -363,11 +363,6 @@ static int __init htab_dt_scan_seg_sizes(unsigned long node,
 	return 0;
 }
 
-static void __init htab_init_seg_sizes(void)
-{
-	of_scan_flat_dt(htab_dt_scan_seg_sizes, NULL);
-}
-
 static int __init get_idx_from_shift(unsigned int shift)
 {
 	int idx = -1;
@@ -539,7 +534,7 @@ static bool might_have_hea(void)
 
 #endif /* #ifdef CONFIG_PPC_64K_PAGES */
 
-static void __init htab_init_page_sizes(void)
+static void __init htab_scan_page_sizes(void)
 {
 	int rc;
 
@@ -554,17 +549,23 @@ static void __init htab_init_page_sizes(void)
 	 * Try to find the available page sizes in the device-tree
 	 */
 	rc = of_scan_flat_dt(htab_dt_scan_page_sizes, NULL);
-	if (rc != 0)  /* Found */
-		goto found;
-
-	/*
-	 * Not in the device-tree, let's fallback on known size
-	 * list for 16M capable GP & GR
-	 */
-	if (mmu_has_feature(MMU_FTR_16M_PAGE))
+	if (rc == 0 && early_mmu_has_feature(MMU_FTR_16M_PAGE)) {
+		/*
+		 * Nothing in the device-tree, but the CPU supports 16M pages,
+		 * so let's fallback on a known size list for 16M capable CPUs.
+		 */
 		memcpy(mmu_psize_defs, mmu_psize_defaults_gp,
 		       sizeof(mmu_psize_defaults_gp));
-found:
+	}
+
+#ifdef CONFIG_HUGETLB_PAGE
+	/* Reserve 16G huge page memory sections for huge pages */
+	of_scan_flat_dt(htab_dt_scan_hugepage_blocks, NULL);
+#endif /* CONFIG_HUGETLB_PAGE */
+}
+
+static void __init htab_init_page_sizes(void)
+{
 	if (!debug_pagealloc_enabled()) {
 		/*
 		 * Pick a size for the linear mapping. Currently, we only
@@ -630,11 +631,6 @@ found:
 	       ,mmu_psize_defs[mmu_vmemmap_psize].shift
 #endif
 	       );
-
-#ifdef CONFIG_HUGETLB_PAGE
-	/* Reserve 16G huge page memory sections for huge pages */
-	of_scan_flat_dt(htab_dt_scan_hugepage_blocks, NULL);
-#endif /* CONFIG_HUGETLB_PAGE */
 }
 
 static int __init htab_dt_scan_pftsize(unsigned long node,
@@ -759,12 +755,6 @@ static void __init htab_initialize(void)
 
 	DBG(" -> htab_initialize()\n");
 
-	/* Initialize segment sizes */
-	htab_init_seg_sizes();
-
-	/* Initialize page sizes */
-	htab_init_page_sizes();
-
 	if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) {
 		mmu_kernel_ssize = MMU_SEGSIZE_1T;
 		mmu_highuser_ssize = MMU_SEGSIZE_1T;
@@ -885,8 +875,19 @@ static void __init htab_initialize(void)
 #undef KB
 #undef MB
 
+void __init hash__early_init_devtree(void)
+{
+	/* Initialize segment sizes */
+	of_scan_flat_dt(htab_dt_scan_seg_sizes, NULL);
+
+	/* Initialize page sizes */
+	htab_scan_page_sizes();
+}
+
 void __init hash__early_init_mmu(void)
 {
+	htab_init_page_sizes();
+
 	/*
 	 * initialize page table size
 	 */
diff --git a/arch/powerpc/mm/hugetlbpage-radix.c b/arch/powerpc/mm/hugetlbpage-radix.c
index 1e11559e1aac..35254a678456 100644
--- a/arch/powerpc/mm/hugetlbpage-radix.c
+++ b/arch/powerpc/mm/hugetlbpage-radix.c
@@ -5,39 +5,34 @@
 #include <asm/cacheflush.h>
 #include <asm/machdep.h>
 #include <asm/mman.h>
+#include <asm/tlb.h>
 
 void radix__flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
 {
-	unsigned long ap, shift;
+	int psize;
 	struct hstate *hstate = hstate_file(vma->vm_file);
 
-	shift = huge_page_shift(hstate);
-	if (shift == mmu_psize_defs[MMU_PAGE_2M].shift)
-		ap = mmu_get_ap(MMU_PAGE_2M);
-	else if (shift == mmu_psize_defs[MMU_PAGE_1G].shift)
-		ap = mmu_get_ap(MMU_PAGE_1G);
-	else {
-		WARN(1, "Wrong huge page shift\n");
-		return ;
-	}
-	radix___flush_tlb_page(vma->vm_mm, vmaddr, ap, 0);
+	psize = hstate_get_psize(hstate);
+	radix__flush_tlb_page_psize(vma->vm_mm, vmaddr, psize);
 }
 
 void radix__local_flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
 {
-	unsigned long ap, shift;
+	int psize;
 	struct hstate *hstate = hstate_file(vma->vm_file);
 
-	shift = huge_page_shift(hstate);
-	if (shift == mmu_psize_defs[MMU_PAGE_2M].shift)
-		ap = mmu_get_ap(MMU_PAGE_2M);
-	else if (shift == mmu_psize_defs[MMU_PAGE_1G].shift)
-		ap = mmu_get_ap(MMU_PAGE_1G);
-	else {
-		WARN(1, "Wrong huge page shift\n");
-		return ;
-	}
-	radix___local_flush_tlb_page(vma->vm_mm, vmaddr, ap, 0);
+	psize = hstate_get_psize(hstate);
+	radix__local_flush_tlb_page_psize(vma->vm_mm, vmaddr, psize);
+}
+
+void radix__flush_hugetlb_tlb_range(struct vm_area_struct *vma, unsigned long start,
+				   unsigned long end)
+{
+	int psize;
+	struct hstate *hstate = hstate_file(vma->vm_file);
+
+	psize = hstate_get_psize(hstate);
+	radix__flush_tlb_range_psize(vma->vm_mm, start, end, psize);
 }
 
 /*
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 33709bdb0419..16ada1eb7e26 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -411,3 +411,25 @@ struct page *realmode_pfn_to_page(unsigned long pfn)
 EXPORT_SYMBOL_GPL(realmode_pfn_to_page);
 
 #endif /* CONFIG_SPARSEMEM_VMEMMAP/CONFIG_FLATMEM */
+
+#ifdef CONFIG_PPC_STD_MMU_64
+static bool disable_radix;
+static int __init parse_disable_radix(char *p)
+{
+	disable_radix = true;
+	return 0;
+}
+early_param("disable_radix", parse_disable_radix);
+
+void __init mmu_early_init_devtree(void)
+{
+	/* Disable radix mode based on kernel command line. */
+	if (disable_radix)
+		cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
+
+	if (early_radix_enabled())
+		radix__early_init_devtree();
+	else
+		hash__early_init_devtree();
+}
+#endif /* CONFIG_PPC_STD_MMU_64 */
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index 670318766545..34079302cc17 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -14,6 +14,9 @@
 #include "mmu_decl.h"
 #include <trace/events/thp.h>
 
+int (*register_process_table)(unsigned long base, unsigned long page_size,
+			      unsigned long tbl_size);
+
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 /*
  * This is called when relaxing access to a hugepage. It's also called in the page
@@ -33,7 +36,7 @@ int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
 	changed = !pmd_same(*(pmdp), entry);
 	if (changed) {
 		__ptep_set_access_flags(pmdp_ptep(pmdp), pmd_pte(entry));
-		flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
+		flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
 	}
 	return changed;
 }
@@ -66,7 +69,7 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
 		     pmd_t *pmdp)
 {
 	pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, 0);
-	flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
+	flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
 	/*
 	 * This ensures that generic code that rely on IRQ disabling
 	 * to prevent a parallel THP split work as expected.
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index 003ff48a11b6..af897d91d09f 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -171,7 +171,7 @@ redo:
 	 * of process table here. But our linear mapping also enable us to use
 	 * physical address here.
 	 */
-	ppc_md.register_process_table(__pa(process_tb), 0, PRTB_SIZE_SHIFT - 12);
+	register_process_table(__pa(process_tb), 0, PRTB_SIZE_SHIFT - 12);
 	pr_info("Process table %p and radix root for kernel: %p\n", process_tb, init_mm.pgd);
 }
 
@@ -198,7 +198,7 @@ static void __init radix_init_partition_table(void)
 
 void __init radix_init_native(void)
 {
-	ppc_md.register_process_table = native_register_process_table;
+	register_process_table = native_register_process_table;
 }
 
 static int __init get_idx_from_shift(unsigned int shift)
@@ -264,7 +264,7 @@ static int __init radix_dt_scan_page_sizes(unsigned long node,
 	return 1;
 }
 
-static void __init radix_init_page_sizes(void)
+void __init radix__early_init_devtree(void)
 {
 	int rc;
 
@@ -343,7 +343,6 @@ void __init radix__early_init_mmu(void)
 	__pte_frag_nr = H_PTE_FRAG_NR;
 	__pte_frag_size_shift = H_PTE_FRAG_SIZE_SHIFT;
 
-	radix_init_page_sizes();
 	if (!firmware_has_feature(FW_FEATURE_LPAR)) {
 		radix_init_native();
 		lpcr = mfspr(SPRN_LPCR);
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 88a307504b5a..0b6fb244d0a1 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -225,7 +225,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address,
 		if (!is_vm_hugetlb_page(vma))
 			assert_pte_locked(vma->vm_mm, address);
 		__ptep_set_access_flags(ptep, entry);
-		flush_tlb_page_nohash(vma, address);
+		flush_tlb_page(vma, address);
 	}
 	return changed;
 }
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index 7f922f557936..0ae0572bc239 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -79,7 +79,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 #endif
 }
 
-__init_refok pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 {
 	pte_t *pte;
 
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
index e1f22700fb16..48df05ef5231 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -140,10 +140,11 @@ void radix__local_flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr)
 }
 EXPORT_SYMBOL(radix__local_flush_tlb_pwc);
 
-void radix___local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
-			    unsigned long ap, int nid)
+void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
+				       int psize)
 {
 	unsigned long pid;
+	unsigned long ap = mmu_get_ap(psize);
 
 	preempt_disable();
 	pid = mm ? mm->context.id : 0;
@@ -159,18 +160,12 @@ void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmadd
 	if (vma && is_vm_hugetlb_page(vma))
 		return __local_flush_hugetlb_page(vma, vmaddr);
 #endif
-	radix___local_flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr,
-			       mmu_get_ap(mmu_virtual_psize), 0);
+	radix__local_flush_tlb_page_psize(vma ? vma->vm_mm : NULL, vmaddr,
+					  mmu_virtual_psize);
 }
 EXPORT_SYMBOL(radix__local_flush_tlb_page);
 
 #ifdef CONFIG_SMP
-static int mm_is_core_local(struct mm_struct *mm)
-{
-	return cpumask_subset(mm_cpumask(mm),
-			      topology_sibling_cpumask(smp_processor_id()));
-}
-
 void radix__flush_tlb_mm(struct mm_struct *mm)
 {
 	unsigned long pid;
@@ -221,10 +216,11 @@ no_context:
 }
 EXPORT_SYMBOL(radix__flush_tlb_pwc);
 
-void radix___flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
-		       unsigned long ap, int nid)
+void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
+				 int psize)
 {
 	unsigned long pid;
+	unsigned long ap = mmu_get_ap(psize);
 
 	preempt_disable();
 	pid = mm ? mm->context.id : 0;
@@ -250,8 +246,8 @@ void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
 	if (vma && is_vm_hugetlb_page(vma))
 		return flush_hugetlb_page(vma, vmaddr);
 #endif
-	radix___flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr,
-			 mmu_get_ap(mmu_virtual_psize), 0);
+	radix__flush_tlb_page_psize(vma ? vma->vm_mm : NULL, vmaddr,
+				    mmu_virtual_psize);
 }
 EXPORT_SYMBOL(radix__flush_tlb_page);
 
@@ -299,8 +295,65 @@ static int radix_get_mmu_psize(int page_size)
 
 void radix__tlb_flush(struct mmu_gather *tlb)
 {
+	int psize = 0;
 	struct mm_struct *mm = tlb->mm;
-	radix__flush_tlb_mm(mm);
+	int page_size = tlb->page_size;
+
+	psize = radix_get_mmu_psize(page_size);
+	/*
+	 * if page size is not something we understand, do a full mm flush
+	 */
+	if (psize != -1 && !tlb->fullmm && !tlb->need_flush_all)
+		radix__flush_tlb_range_psize(mm, tlb->start, tlb->end, psize);
+	else
+		radix__flush_tlb_mm(mm);
+}
+
+#define TLB_FLUSH_ALL -1UL
+/*
+ * Number of pages above which we will do a bcast tlbie. Just a
+ * number at this point copied from x86
+ */
+static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
+
+void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
+				  unsigned long end, int psize)
+{
+	unsigned long pid;
+	unsigned long addr;
+	int local = mm_is_core_local(mm);
+	unsigned long ap = mmu_get_ap(psize);
+	int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
+	unsigned long page_size = 1UL << mmu_psize_defs[psize].shift;
+
+
+	preempt_disable();
+	pid = mm ? mm->context.id : 0;
+	if (unlikely(pid == MMU_NO_CONTEXT))
+		goto err_out;
+
+	if (end == TLB_FLUSH_ALL ||
+	    (end - start) > tlb_single_page_flush_ceiling * page_size) {
+		if (local)
+			_tlbiel_pid(pid, RIC_FLUSH_TLB);
+		else
+			_tlbie_pid(pid, RIC_FLUSH_TLB);
+		goto err_out;
+	}
+	for (addr = start; addr < end; addr += page_size) {
+
+		if (local)
+			_tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB);
+		else {
+			if (lock_tlbie)
+				raw_spin_lock(&native_tlbie_lock);
+			_tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
+			if (lock_tlbie)
+				raw_spin_unlock(&native_tlbie_lock);
+		}
+	}
+err_out:
+	preempt_enable();
 }
 
 void radix__flush_tlb_lpid_va(unsigned long lpid, unsigned long gpa,
@@ -340,3 +393,10 @@ void radix__flush_tlb_lpid(unsigned long lpid)
 	asm volatile("eieio; tlbsync; ptesync": : :"memory");
 }
 EXPORT_SYMBOL(radix__flush_tlb_lpid);
+
+void radix__flush_pmd_tlb_range(struct vm_area_struct *vma,
+				unsigned long start, unsigned long end)
+{
+	radix__flush_tlb_range_psize(vma->vm_mm, start, end, MMU_PAGE_2M);
+}
+EXPORT_SYMBOL(radix__flush_pmd_tlb_range);
diff --git a/arch/powerpc/mm/tlb_hash32.c b/arch/powerpc/mm/tlb_hash32.c
index 558e30cce33e..702d7689d714 100644
--- a/arch/powerpc/mm/tlb_hash32.c
+++ b/arch/powerpc/mm/tlb_hash32.c
@@ -49,17 +49,6 @@ void flush_hash_entry(struct mm_struct *mm, pte_t *ptep, unsigned long addr)
 EXPORT_SYMBOL(flush_hash_entry);
 
 /*
- * Called by ptep_set_access_flags, must flush on CPUs for which the
- * DSI handler can't just "fixup" the TLB on a write fault
- */
-void flush_tlb_page_nohash(struct vm_area_struct *vma, unsigned long addr)
-{
-	if (Hash != 0)
-		return;
-	_tlbie(addr);
-}
-
-/*
  * Called at the end of a mmu_gather operation to make sure the
  * TLB flush is completely done.
  */
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c
index f4668488512c..050badc0ebd3 100644
--- a/arch/powerpc/mm/tlb_nohash.c
+++ b/arch/powerpc/mm/tlb_nohash.c
@@ -215,12 +215,6 @@ EXPORT_SYMBOL(local_flush_tlb_page);
 
 static DEFINE_RAW_SPINLOCK(tlbivax_lock);
 
-static int mm_is_core_local(struct mm_struct *mm)
-{
-	return cpumask_subset(mm_cpumask(mm),
-			      topology_sibling_cpumask(smp_processor_id()));
-}
-
 struct tlb_flush_param {
 	unsigned long addr;
 	unsigned int pid;
diff --git a/arch/powerpc/perf/power9-events-list.h b/arch/powerpc/perf/power9-events-list.h
index cda6fcb809ca..6447dc1c3d89 100644
--- a/arch/powerpc/perf/power9-events-list.h
+++ b/arch/powerpc/perf/power9-events-list.h
@@ -34,15 +34,15 @@ EVENT(PM_L1_ICACHE_MISS,			0x200fd)
 /* Instruction Demand sectors wriittent into IL1 */
 EVENT(PM_L1_DEMAND_WRITE,			0x0408c)
 /* Instruction prefetch written into IL1 */
-EVENT(PM_IC_PREF_WRITE,				0x0408e)
+EVENT(PM_IC_PREF_WRITE,				0x0488c)
 /* The data cache was reloaded from local core's L3 due to a demand load */
 EVENT(PM_DATA_FROM_L3,				0x4c042)
 /* Demand LD - L3 Miss (not L2 hit and not L3 hit) */
 EVENT(PM_DATA_FROM_L3MISS,			0x300fe)
 /* All successful D-side store dispatches for this thread */
-EVENT(PM_L2_ST,					0x16081)
+EVENT(PM_L2_ST,					0x16880)
 /* All successful D-side store dispatches for this thread that were L2 Miss */
-EVENT(PM_L2_ST_MISS,				0x26081)
+EVENT(PM_L2_ST_MISS,				0x26880)
 /* Total HW L3 prefetches(Load+store) */
 EVENT(PM_L3_PREF_ALL,				0x4e052)
 /* Data PTEG reload */
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index 3663f71fd913..fbdae8377b71 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -321,6 +321,17 @@ config OF_RTC
 	  Uses information from the OF or flattened device tree to instantiate
 	  platform devices for direct mapped RTC chips like the DS1742 or DS1743.
 
+config GEN_RTC
+	bool "Use the platform RTC operations from user space"
+	select RTC_CLASS
+	select RTC_DRV_GENERIC
+	help
+	  This option provides backwards compatibility with the old gen_rtc.ko
+	  module that was traditionally used for old PowerPC machines.
+	  Platforms should migrate to enabling the RTC_DRV_GENERIC by hand
+	  replacing their get_rtc_time/set_rtc_time callbacks with
+	  a proper RTC device driver.
+
 config SIMPLE_GPIO
 	bool "Support for simple, memory-mapped GPIO controllers"
 	depends on PPC
diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c
index 9027d7c48507..f7d1a4953ea0 100644
--- a/arch/powerpc/platforms/cell/iommu.c
+++ b/arch/powerpc/platforms/cell/iommu.c
@@ -166,7 +166,7 @@ static void invalidate_tce_cache(struct cbe_iommu *iommu, unsigned long *pte,
 
 static int tce_build_cell(struct iommu_table *tbl, long index, long npages,
 		unsigned long uaddr, enum dma_data_direction direction,
-		struct dma_attrs *attrs)
+		unsigned long attrs)
 {
 	int i;
 	unsigned long *io_pte, base_pte;
@@ -193,7 +193,7 @@ static int tce_build_cell(struct iommu_table *tbl, long index, long npages,
 	base_pte = CBE_IOPTE_PP_W | CBE_IOPTE_PP_R | CBE_IOPTE_M |
 		CBE_IOPTE_SO_RW | (window->ioid & CBE_IOPTE_IOID_Mask);
 #endif
-	if (unlikely(dma_get_attr(DMA_ATTR_WEAK_ORDERING, attrs)))
+	if (unlikely(attrs & DMA_ATTR_WEAK_ORDERING))
 		base_pte &= ~CBE_IOPTE_SO_RW;
 
 	io_pte = (unsigned long *)tbl->it_base + (index - tbl->it_offset);
@@ -526,7 +526,7 @@ cell_iommu_setup_window(struct cbe_iommu *iommu, struct device_node *np,
 
 	__set_bit(0, window->table.it_map);
 	tce_build_cell(&window->table, window->table.it_offset, 1,
-		       (unsigned long)iommu->pad_page, DMA_TO_DEVICE, NULL);
+		       (unsigned long)iommu->pad_page, DMA_TO_DEVICE, 0);
 
 	return window;
 }
@@ -572,7 +572,7 @@ static struct iommu_table *cell_get_iommu_table(struct device *dev)
 
 static void *dma_fixed_alloc_coherent(struct device *dev, size_t size,
 				      dma_addr_t *dma_handle, gfp_t flag,
-				      struct dma_attrs *attrs)
+				      unsigned long attrs)
 {
 	if (iommu_fixed_is_weak)
 		return iommu_alloc_coherent(dev, cell_get_iommu_table(dev),
@@ -586,7 +586,7 @@ static void *dma_fixed_alloc_coherent(struct device *dev, size_t size,
 
 static void dma_fixed_free_coherent(struct device *dev, size_t size,
 				    void *vaddr, dma_addr_t dma_handle,
-				    struct dma_attrs *attrs)
+				    unsigned long attrs)
 {
 	if (iommu_fixed_is_weak)
 		iommu_free_coherent(cell_get_iommu_table(dev), size, vaddr,
@@ -598,9 +598,9 @@ static void dma_fixed_free_coherent(struct device *dev, size_t size,
 static dma_addr_t dma_fixed_map_page(struct device *dev, struct page *page,
 				     unsigned long offset, size_t size,
 				     enum dma_data_direction direction,
-				     struct dma_attrs *attrs)
+				     unsigned long attrs)
 {
-	if (iommu_fixed_is_weak == dma_get_attr(DMA_ATTR_WEAK_ORDERING, attrs))
+	if (iommu_fixed_is_weak == (attrs & DMA_ATTR_WEAK_ORDERING))
 		return dma_direct_ops.map_page(dev, page, offset, size,
 					       direction, attrs);
 	else
@@ -611,9 +611,9 @@ static dma_addr_t dma_fixed_map_page(struct device *dev, struct page *page,
 
 static void dma_fixed_unmap_page(struct device *dev, dma_addr_t dma_addr,
 				 size_t size, enum dma_data_direction direction,
-				 struct dma_attrs *attrs)
+				 unsigned long attrs)
 {
-	if (iommu_fixed_is_weak == dma_get_attr(DMA_ATTR_WEAK_ORDERING, attrs))
+	if (iommu_fixed_is_weak == (attrs & DMA_ATTR_WEAK_ORDERING))
 		dma_direct_ops.unmap_page(dev, dma_addr, size, direction,
 					  attrs);
 	else
@@ -623,9 +623,9 @@ static void dma_fixed_unmap_page(struct device *dev, dma_addr_t dma_addr,
 
 static int dma_fixed_map_sg(struct device *dev, struct scatterlist *sg,
 			   int nents, enum dma_data_direction direction,
-			   struct dma_attrs *attrs)
+			   unsigned long attrs)
 {
-	if (iommu_fixed_is_weak == dma_get_attr(DMA_ATTR_WEAK_ORDERING, attrs))
+	if (iommu_fixed_is_weak == (attrs & DMA_ATTR_WEAK_ORDERING))
 		return dma_direct_ops.map_sg(dev, sg, nents, direction, attrs);
 	else
 		return ppc_iommu_map_sg(dev, cell_get_iommu_table(dev), sg,
@@ -635,9 +635,9 @@ static int dma_fixed_map_sg(struct device *dev, struct scatterlist *sg,
 
 static void dma_fixed_unmap_sg(struct device *dev, struct scatterlist *sg,
 			       int nents, enum dma_data_direction direction,
-			       struct dma_attrs *attrs)
+			       unsigned long attrs)
 {
-	if (iommu_fixed_is_weak == dma_get_attr(DMA_ATTR_WEAK_ORDERING, attrs))
+	if (iommu_fixed_is_weak == (attrs & DMA_ATTR_WEAK_ORDERING))
 		dma_direct_ops.unmap_sg(dev, sg, nents, direction, attrs);
 	else
 		ppc_iommu_unmap_sg(cell_get_iommu_table(dev), sg, nents,
@@ -1162,7 +1162,7 @@ static int __init setup_iommu_fixed(char *str)
 	pciep = of_find_node_by_type(NULL, "pcie-endpoint");
 
 	if (strcmp(str, "weak") == 0 || (pciep && strcmp(str, "strong") != 0))
-		iommu_fixed_is_weak = 1;
+		iommu_fixed_is_weak = DMA_ATTR_WEAK_ORDERING;
 
 	of_node_put(pciep);
 
diff --git a/arch/powerpc/platforms/cell/pervasive.c b/arch/powerpc/platforms/cell/pervasive.c
index d17e98bc0c10..e7d075077cb0 100644
--- a/arch/powerpc/platforms/cell/pervasive.c
+++ b/arch/powerpc/platforms/cell/pervasive.c
@@ -35,6 +35,7 @@
 #include <asm/pgtable.h>
 #include <asm/reg.h>
 #include <asm/cell-regs.h>
+#include <asm/cpu_has_feature.h>
 
 #include "pervasive.h"
 
diff --git a/arch/powerpc/platforms/pasemi/iommu.c b/arch/powerpc/platforms/pasemi/iommu.c
index 43dd3fb514e0..309d9ccccd50 100644
--- a/arch/powerpc/platforms/pasemi/iommu.c
+++ b/arch/powerpc/platforms/pasemi/iommu.c
@@ -88,7 +88,7 @@ static int iommu_table_iobmap_inited;
 static int iobmap_build(struct iommu_table *tbl, long index,
 			 long npages, unsigned long uaddr,
 			 enum dma_data_direction direction,
-			 struct dma_attrs *attrs)
+			 unsigned long attrs)
 {
 	u32 *ip;
 	u32 rpn;
diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c
index 3de4a7c85140..6b4e9d181126 100644
--- a/arch/powerpc/platforms/powermac/setup.c
+++ b/arch/powerpc/platforms/powermac/setup.c
@@ -353,12 +353,12 @@ static int pmac_late_init(void)
 machine_late_initcall(powermac, pmac_late_init);
 
 /*
- * This is __init_refok because we check for "initializing" before
+ * This is __ref because we check for "initializing" before
  * touching any of the __init sensitive things and "initializing"
  * will be false after __init time. This can't be __init because it
  * can be called whenever a disk is first accessed.
  */
-void __init_refok note_bootable_part(dev_t dev, int part, int goodness)
+void __ref note_bootable_part(dev_t dev, int part, int goodness)
 {
 	char *p;
 
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index 4383a5ff82ba..00e1a0195c78 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -73,7 +73,7 @@ EXPORT_SYMBOL(pnv_pci_get_npu_dev);
 
 static void *dma_npu_alloc(struct device *dev, size_t size,
 			   dma_addr_t *dma_handle, gfp_t flag,
-			   struct dma_attrs *attrs)
+			   unsigned long attrs)
 {
 	NPU_DMA_OP_UNSUPPORTED();
 	return NULL;
@@ -81,7 +81,7 @@ static void *dma_npu_alloc(struct device *dev, size_t size,
 
 static void dma_npu_free(struct device *dev, size_t size,
 			 void *vaddr, dma_addr_t dma_handle,
-			 struct dma_attrs *attrs)
+			 unsigned long attrs)
 {
 	NPU_DMA_OP_UNSUPPORTED();
 }
@@ -89,7 +89,7 @@ static void dma_npu_free(struct device *dev, size_t size,
 static dma_addr_t dma_npu_map_page(struct device *dev, struct page *page,
 				   unsigned long offset, size_t size,
 				   enum dma_data_direction direction,
-				   struct dma_attrs *attrs)
+				   unsigned long attrs)
 {
 	NPU_DMA_OP_UNSUPPORTED();
 	return 0;
@@ -97,7 +97,7 @@ static dma_addr_t dma_npu_map_page(struct device *dev, struct page *page,
 
 static int dma_npu_map_sg(struct device *dev, struct scatterlist *sglist,
 			  int nelems, enum dma_data_direction direction,
-			  struct dma_attrs *attrs)
+			  unsigned long attrs)
 {
 	NPU_DMA_OP_UNSUPPORTED();
 	return 0;
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index cf928bba4d9a..3d29d40eb0e9 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -64,7 +64,6 @@ END_FTR_SECTION(0, 1);						\
 	OPAL_BRANCH(opal_tracepoint_entry) \
 	mfcr	r12;			\
 	stw	r12,8(r1);		\
-	std	r1,PACAR1(r13);		\
 	li	r11,0;			\
 	mfmsr	r12;			\
 	ori	r11,r11,MSR_EE;		\
@@ -127,7 +126,6 @@ opal_tracepoint_entry:
 	mfcr	r12
 	std	r11,16(r1)
 	stw	r12,8(r1)
-	std	r1,PACAR1(r13)
 	li	r11,0
 	mfmsr	r12
 	ori	r11,r11,MSR_EE
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 891fc4a453df..6b9528307f62 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1806,7 +1806,7 @@ static void pnv_pci_p7ioc_tce_invalidate(struct iommu_table *tbl,
 static int pnv_ioda1_tce_build(struct iommu_table *tbl, long index,
 		long npages, unsigned long uaddr,
 		enum dma_data_direction direction,
-		struct dma_attrs *attrs)
+		unsigned long attrs)
 {
 	int ret = pnv_tce_build(tbl, index, npages, uaddr, direction,
 			attrs);
@@ -1950,7 +1950,7 @@ static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl,
 static int pnv_ioda2_tce_build(struct iommu_table *tbl, long index,
 		long npages, unsigned long uaddr,
 		enum dma_data_direction direction,
-		struct dma_attrs *attrs)
+		unsigned long attrs)
 {
 	int ret = pnv_tce_build(tbl, index, npages, uaddr, direction,
 			attrs);
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 6701dd5ded20..a21d831c1114 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -704,7 +704,7 @@ static __be64 *pnv_tce(struct iommu_table *tbl, long idx)
 
 int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
 		unsigned long uaddr, enum dma_data_direction direction,
-		struct dma_attrs *attrs)
+		unsigned long attrs)
 {
 	u64 proto_tce = iommu_direction_to_tce_perm(direction);
 	u64 rpn = __pa(uaddr) >> tbl->it_page_shift;
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index d088d4f06116..e64df7894d6e 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -181,7 +181,7 @@ struct pnv_phb {
 extern struct pci_ops pnv_pci_ops;
 extern int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
 		unsigned long uaddr, enum dma_data_direction direction,
-		struct dma_attrs *attrs);
+		unsigned long attrs);
 extern void pnv_tce_free(struct iommu_table *tbl, long index, long npages);
 extern int pnv_tce_xchg(struct iommu_table *tbl, long index,
 		unsigned long *hpa, enum dma_data_direction *direction);
diff --git a/arch/powerpc/platforms/ps3/device-init.c b/arch/powerpc/platforms/ps3/device-init.c
index 3f175e8aedb4..57caaf11a83f 100644
--- a/arch/powerpc/platforms/ps3/device-init.c
+++ b/arch/powerpc/platforms/ps3/device-init.c
@@ -189,7 +189,7 @@ fail_malloc:
 	return result;
 }
 
-static int __init_refok ps3_setup_uhc_device(
+static int __ref ps3_setup_uhc_device(
 	const struct ps3_repository_device *repo, enum ps3_match_id match_id,
 	enum ps3_interrupt_type interrupt_type, enum ps3_reg_type reg_type)
 {
diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c
index 5606fe36faf2..8af1c15aef85 100644
--- a/arch/powerpc/platforms/ps3/system-bus.c
+++ b/arch/powerpc/platforms/ps3/system-bus.c
@@ -516,7 +516,7 @@ core_initcall(ps3_system_bus_init);
  */
 static void * ps3_alloc_coherent(struct device *_dev, size_t size,
 				 dma_addr_t *dma_handle, gfp_t flag,
-				 struct dma_attrs *attrs)
+				 unsigned long attrs)
 {
 	int result;
 	struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev);
@@ -553,7 +553,7 @@ clean_none:
 }
 
 static void ps3_free_coherent(struct device *_dev, size_t size, void *vaddr,
-			      dma_addr_t dma_handle, struct dma_attrs *attrs)
+			      dma_addr_t dma_handle, unsigned long attrs)
 {
 	struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev);
 
@@ -569,7 +569,7 @@ static void ps3_free_coherent(struct device *_dev, size_t size, void *vaddr,
 
 static dma_addr_t ps3_sb_map_page(struct device *_dev, struct page *page,
 	unsigned long offset, size_t size, enum dma_data_direction direction,
-	struct dma_attrs *attrs)
+	unsigned long attrs)
 {
 	struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev);
 	int result;
@@ -592,7 +592,7 @@ static dma_addr_t ps3_sb_map_page(struct device *_dev, struct page *page,
 static dma_addr_t ps3_ioc0_map_page(struct device *_dev, struct page *page,
 				    unsigned long offset, size_t size,
 				    enum dma_data_direction direction,
-				    struct dma_attrs *attrs)
+				    unsigned long attrs)
 {
 	struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev);
 	int result;
@@ -626,7 +626,7 @@ static dma_addr_t ps3_ioc0_map_page(struct device *_dev, struct page *page,
 }
 
 static void ps3_unmap_page(struct device *_dev, dma_addr_t dma_addr,
-	size_t size, enum dma_data_direction direction, struct dma_attrs *attrs)
+	size_t size, enum dma_data_direction direction, unsigned long attrs)
 {
 	struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev);
 	int result;
@@ -640,7 +640,7 @@ static void ps3_unmap_page(struct device *_dev, dma_addr_t dma_addr,
 }
 
 static int ps3_sb_map_sg(struct device *_dev, struct scatterlist *sgl,
-	int nents, enum dma_data_direction direction, struct dma_attrs *attrs)
+	int nents, enum dma_data_direction direction, unsigned long attrs)
 {
 #if defined(CONFIG_PS3_DYNAMIC_DMA)
 	BUG_ON("do");
@@ -670,14 +670,14 @@ static int ps3_sb_map_sg(struct device *_dev, struct scatterlist *sgl,
 static int ps3_ioc0_map_sg(struct device *_dev, struct scatterlist *sg,
 			   int nents,
 			   enum dma_data_direction direction,
-			   struct dma_attrs *attrs)
+			   unsigned long attrs)
 {
 	BUG();
 	return 0;
 }
 
 static void ps3_sb_unmap_sg(struct device *_dev, struct scatterlist *sg,
-	int nents, enum dma_data_direction direction, struct dma_attrs *attrs)
+	int nents, enum dma_data_direction direction, unsigned long attrs)
 {
 #if defined(CONFIG_PS3_DYNAMIC_DMA)
 	BUG_ON("do");
@@ -686,7 +686,7 @@ static void ps3_sb_unmap_sg(struct device *_dev, struct scatterlist *sg,
 
 static void ps3_ioc0_unmap_sg(struct device *_dev, struct scatterlist *sg,
 			    int nents, enum dma_data_direction direction,
-			    struct dma_attrs *attrs)
+			    unsigned long attrs)
 {
 	BUG();
 }
diff --git a/arch/powerpc/platforms/ps3/time.c b/arch/powerpc/platforms/ps3/time.c
index 791c6142c4a7..11b45b58c81b 100644
--- a/arch/powerpc/platforms/ps3/time.c
+++ b/arch/powerpc/platforms/ps3/time.c
@@ -20,9 +20,9 @@
 
 #include <linux/kernel.h>
 #include <linux/platform_device.h>
+#include <linux/rtc.h>
 
 #include <asm/firmware.h>
-#include <asm/rtc.h>
 #include <asm/lv1call.h>
 #include <asm/ps3.h>
 
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 770a753b52c9..0024e451bb36 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -123,7 +123,7 @@ static void iommu_pseries_free_group(struct iommu_table_group *table_group,
 static int tce_build_pSeries(struct iommu_table *tbl, long index,
 			      long npages, unsigned long uaddr,
 			      enum dma_data_direction direction,
-			      struct dma_attrs *attrs)
+			      unsigned long attrs)
 {
 	u64 proto_tce;
 	__be64 *tcep, *tces;
@@ -173,7 +173,7 @@ static void tce_freemulti_pSeriesLP(struct iommu_table*, long, long);
 static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
 				long npages, unsigned long uaddr,
 				enum dma_data_direction direction,
-				struct dma_attrs *attrs)
+				unsigned long attrs)
 {
 	u64 rc = 0;
 	u64 proto_tce, tce;
@@ -216,7 +216,7 @@ static DEFINE_PER_CPU(__be64 *, tce_page);
 static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
 				     long npages, unsigned long uaddr,
 				     enum dma_data_direction direction,
-				     struct dma_attrs *attrs)
+				     unsigned long attrs)
 {
 	u64 rc = 0;
 	u64 proto_tce;
diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c
index 26904f4879ec..3573d54b2770 100644
--- a/arch/powerpc/sysdev/dart_iommu.c
+++ b/arch/powerpc/sysdev/dart_iommu.c
@@ -185,7 +185,7 @@ static void dart_flush(struct iommu_table *tbl)
 static int dart_build(struct iommu_table *tbl, long index,
 		       long npages, unsigned long uaddr,
 		       enum dma_data_direction direction,
-		       struct dma_attrs *attrs)
+		       unsigned long attrs)
 {
 	unsigned int *dp, *orig_dp;
 	unsigned int rpn;
diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c
index f5bf38b94595..68e7c0dd2e45 100644
--- a/arch/powerpc/sysdev/fsl_rio.c
+++ b/arch/powerpc/sysdev/fsl_rio.c
@@ -289,7 +289,7 @@ static void fsl_rio_inbound_mem_init(struct rio_priv *priv)
 }
 
 int fsl_map_inb_mem(struct rio_mport *mport, dma_addr_t lstart,
-	u64 rstart, u32 size, u32 flags)
+	u64 rstart, u64 size, u32 flags)
 {
 	struct rio_priv *priv = mport->priv;
 	u32 base_size;
@@ -298,7 +298,7 @@ int fsl_map_inb_mem(struct rio_mport *mport, dma_addr_t lstart,
 	u32 riwar;
 	int i;
 
-	if ((size & (size - 1)) != 0)
+	if ((size & (size - 1)) != 0 || size > 0x400000000ULL)
 		return -EINVAL;
 
 	base_size_log = ilog2(size);
@@ -491,6 +491,7 @@ int fsl_rio_setup(struct platform_device *dev)
 	rmu_node = of_parse_phandle(dev->dev.of_node, "fsl,srio-rmu-handle", 0);
 	if (!rmu_node) {
 		dev_err(&dev->dev, "No valid fsl,srio-rmu-handle property\n");
+		rc = -ENOENT;
 		goto err_rmu;
 	}
 	rc = of_address_to_resource(rmu_node, 0, &rmu_regs);
@@ -643,19 +644,11 @@ int fsl_rio_setup(struct platform_device *dev)
 		port->ops = ops;
 		port->priv = priv;
 		port->phys_efptr = 0x100;
+		port->phys_rmap = 1;
 		priv->regs_win = rio_regs_win;
 
-		/* Probe the master port phy type */
 		ccsr = in_be32(priv->regs_win + RIO_CCSR + i*0x20);
-		port->phy_type = (ccsr & 1) ? RIO_PHY_SERIAL : RIO_PHY_PARALLEL;
-		if (port->phy_type == RIO_PHY_PARALLEL) {
-			dev_err(&dev->dev, "RIO: Parallel PHY type, unsupported port type!\n");
-			release_resource(&port->iores);
-			kfree(priv);
-			kfree(port);
-			continue;
-		}
-		dev_info(&dev->dev, "RapidIO PHY type: Serial\n");
+
 		/* Checking the port training status */
 		if (in_be32((priv->regs_win + RIO_ESCSR + i*0x20)) & 1) {
 			dev_err(&dev->dev, "Port %d is not ready. "
@@ -705,11 +698,9 @@ int fsl_rio_setup(struct platform_device *dev)
 			((i == 0) ? RIO_INB_ATMU_REGS_PORT1_OFFSET :
 			RIO_INB_ATMU_REGS_PORT2_OFFSET));
 
-
-		/* Set to receive any dist ID for serial RapidIO controller. */
-		if (port->phy_type == RIO_PHY_SERIAL)
-			out_be32((priv->regs_win
-				+ RIO_ISR_AACR + i*0x80), RIO_ISR_AACR_AA);
+		/* Set to receive packets with any dest ID */
+		out_be32((priv->regs_win + RIO_ISR_AACR + i*0x80),
+			 RIO_ISR_AACR_AA);
 
 		/* Configure maintenance transaction window */
 		out_be32(&priv->maint_atmu_regs->rowbar,
diff --git a/arch/powerpc/sysdev/msi_bitmap.c b/arch/powerpc/sysdev/msi_bitmap.c
index ed5234ed8d3f..5ebd3f018295 100644
--- a/arch/powerpc/sysdev/msi_bitmap.c
+++ b/arch/powerpc/sysdev/msi_bitmap.c
@@ -112,7 +112,7 @@ int msi_bitmap_reserve_dt_hwirqs(struct msi_bitmap *bmp)
 	return 0;
 }
 
-int __init_refok msi_bitmap_alloc(struct msi_bitmap *bmp, unsigned int irq_count,
+int __ref msi_bitmap_alloc(struct msi_bitmap *bmp, unsigned int irq_count,
 		     struct device_node *of_node)
 {
 	int size;
diff --git a/arch/powerpc/xmon/ppc-dis.c b/arch/powerpc/xmon/ppc-dis.c
index 89098f320ad5..ee9891734149 100644
--- a/arch/powerpc/xmon/ppc-dis.c
+++ b/arch/powerpc/xmon/ppc-dis.c
@@ -20,6 +20,7 @@ along with this file; see the file COPYING.  If not, write to the Free
 Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA.  */
 
 #include <asm/cputable.h>
+#include <asm/cpu_has_feature.h>
 #include "nonstdio.h"
 #include "ansidecl.h"
 #include "ppc.h"
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 9e607bf2d640..e751fe25d6ab 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -123,6 +123,7 @@ config S390
 	select HAVE_ALIGNED_STRUCT_PAGE if SLUB
 	select HAVE_ARCH_AUDITSYSCALL
 	select HAVE_ARCH_EARLY_PFN_TO_NID
+	select HAVE_ARCH_HARDENED_USERCOPY
 	select HAVE_ARCH_JUMP_LABEL
 	select CPU_NO_EFFICIENT_FFS if !HAVE_MARCH_Z9_109_FEATURES
 	select HAVE_ARCH_SECCOMP_FILTER
@@ -871,4 +872,17 @@ config S390_GUEST
 	  Select this option if you want to run the kernel as a guest under
 	  the KVM hypervisor.
 
+config S390_GUEST_OLD_TRANSPORT
+	def_bool y
+	prompt "Guest support for old s390 virtio transport (DEPRECATED)"
+	depends on S390_GUEST
+	help
+	  Enable this option to add support for the old s390-virtio
+	  transport (i.e. virtio devices NOT based on virtio-ccw). This
+	  type of virtio devices is only available on the experimental
+	  kuli userspace or with old (< 2.6) qemu. If you are running
+	  with a modern version of qemu (which supports virtio-ccw since
+	  1.4 and uses it by default since version 2.4), you probably won't
+	  need this.
+
 endmenu
diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile
index 13723c39e58e..33ba697c782d 100644
--- a/arch/s390/boot/compressed/Makefile
+++ b/arch/s390/boot/compressed/Makefile
@@ -33,10 +33,10 @@ quiet_cmd_sizes = GEN $@
 $(obj)/sizes.h: vmlinux
 	$(call if_changed,sizes)
 
-AFLAGS_head.o += -I$(obj)
+AFLAGS_head.o += -I$(objtree)/$(obj)
 $(obj)/head.o: $(obj)/sizes.h
 
-CFLAGS_misc.o += -I$(obj)
+CFLAGS_misc.o += -I$(objtree)/$(obj)
 $(obj)/misc.o: $(obj)/sizes.h
 
 OBJCOPYFLAGS_vmlinux.bin :=  -R .comment -S
diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c
index 67d43a0eabb4..28f03ca60100 100644
--- a/arch/s390/hypfs/hypfs_diag.c
+++ b/arch/s390/hypfs/hypfs_diag.c
@@ -19,29 +19,10 @@
 #include <asm/ebcdic.h>
 #include "hypfs.h"
 
-#define LPAR_NAME_LEN 8		/* lpar name len in diag 204 data */
-#define CPU_NAME_LEN 16		/* type name len of cpus in diag224 name table */
 #define TMP_SIZE 64		/* size of temporary buffers */
 
 #define DBFS_D204_HDR_VERSION	0
 
-/* diag 204 subcodes */
-enum diag204_sc {
-	SUBC_STIB4 = 4,
-	SUBC_RSI = 5,
-	SUBC_STIB6 = 6,
-	SUBC_STIB7 = 7
-};
-
-/* The two available diag 204 data formats */
-enum diag204_format {
-	INFO_SIMPLE = 0,
-	INFO_EXT = 0x00010000
-};
-
-/* bit is set in flags, when physical cpu info is included in diag 204 data */
-#define LPAR_PHYS_FLG  0x80
-
 static char *diag224_cpu_names;			/* diag 224 name table */
 static enum diag204_sc diag204_store_sc;	/* used subcode for store */
 static enum diag204_format diag204_info_type;	/* used diag 204 data format */
@@ -53,7 +34,7 @@ static int diag204_buf_pages;		/* number of pages for diag204 data */
 static struct dentry *dbfs_d204_file;
 
 /*
- * DIAG 204 data structures and member access functions.
+ * DIAG 204 member access functions.
  *
  * Since we have two different diag 204 data formats for old and new s390
  * machines, we do not access the structs directly, but use getter functions for
@@ -62,304 +43,173 @@ static struct dentry *dbfs_d204_file;
 
 /* Time information block */
 
-struct info_blk_hdr {
-	__u8  npar;
-	__u8  flags;
-	__u16 tslice;
-	__u16 phys_cpus;
-	__u16 this_part;
-	__u64 curtod;
-} __attribute__ ((packed));
-
-struct x_info_blk_hdr {
-	__u8  npar;
-	__u8  flags;
-	__u16 tslice;
-	__u16 phys_cpus;
-	__u16 this_part;
-	__u64 curtod1;
-	__u64 curtod2;
-	char reserved[40];
-} __attribute__ ((packed));
-
 static inline int info_blk_hdr__size(enum diag204_format type)
 {
-	if (type == INFO_SIMPLE)
-		return sizeof(struct info_blk_hdr);
-	else /* INFO_EXT */
-		return sizeof(struct x_info_blk_hdr);
+	if (type == DIAG204_INFO_SIMPLE)
+		return sizeof(struct diag204_info_blk_hdr);
+	else /* DIAG204_INFO_EXT */
+		return sizeof(struct diag204_x_info_blk_hdr);
 }
 
 static inline __u8 info_blk_hdr__npar(enum diag204_format type, void *hdr)
 {
-	if (type == INFO_SIMPLE)
-		return ((struct info_blk_hdr *)hdr)->npar;
-	else /* INFO_EXT */
-		return ((struct x_info_blk_hdr *)hdr)->npar;
+	if (type == DIAG204_INFO_SIMPLE)
+		return ((struct diag204_info_blk_hdr *)hdr)->npar;
+	else /* DIAG204_INFO_EXT */
+		return ((struct diag204_x_info_blk_hdr *)hdr)->npar;
 }
 
 static inline __u8 info_blk_hdr__flags(enum diag204_format type, void *hdr)
 {
-	if (type == INFO_SIMPLE)
-		return ((struct info_blk_hdr *)hdr)->flags;
-	else /* INFO_EXT */
-		return ((struct x_info_blk_hdr *)hdr)->flags;
+	if (type == DIAG204_INFO_SIMPLE)
+		return ((struct diag204_info_blk_hdr *)hdr)->flags;
+	else /* DIAG204_INFO_EXT */
+		return ((struct diag204_x_info_blk_hdr *)hdr)->flags;
 }
 
 static inline __u16 info_blk_hdr__pcpus(enum diag204_format type, void *hdr)
 {
-	if (type == INFO_SIMPLE)
-		return ((struct info_blk_hdr *)hdr)->phys_cpus;
-	else /* INFO_EXT */
-		return ((struct x_info_blk_hdr *)hdr)->phys_cpus;
+	if (type == DIAG204_INFO_SIMPLE)
+		return ((struct diag204_info_blk_hdr *)hdr)->phys_cpus;
+	else /* DIAG204_INFO_EXT */
+		return ((struct diag204_x_info_blk_hdr *)hdr)->phys_cpus;
 }
 
 /* Partition header */
 
-struct part_hdr {
-	__u8 pn;
-	__u8 cpus;
-	char reserved[6];
-	char part_name[LPAR_NAME_LEN];
-} __attribute__ ((packed));
-
-struct x_part_hdr {
-	__u8  pn;
-	__u8  cpus;
-	__u8  rcpus;
-	__u8  pflag;
-	__u32 mlu;
-	char  part_name[LPAR_NAME_LEN];
-	char  lpc_name[8];
-	char  os_name[8];
-	__u64 online_cs;
-	__u64 online_es;
-	__u8  upid;
-	char  reserved1[3];
-	__u32 group_mlu;
-	char  group_name[8];
-	char  reserved2[32];
-} __attribute__ ((packed));
-
 static inline int part_hdr__size(enum diag204_format type)
 {
-	if (type == INFO_SIMPLE)
-		return sizeof(struct part_hdr);
-	else /* INFO_EXT */
-		return sizeof(struct x_part_hdr);
+	if (type == DIAG204_INFO_SIMPLE)
+		return sizeof(struct diag204_part_hdr);
+	else /* DIAG204_INFO_EXT */
+		return sizeof(struct diag204_x_part_hdr);
 }
 
 static inline __u8 part_hdr__rcpus(enum diag204_format type, void *hdr)
 {
-	if (type == INFO_SIMPLE)
-		return ((struct part_hdr *)hdr)->cpus;
-	else /* INFO_EXT */
-		return ((struct x_part_hdr *)hdr)->rcpus;
+	if (type == DIAG204_INFO_SIMPLE)
+		return ((struct diag204_part_hdr *)hdr)->cpus;
+	else /* DIAG204_INFO_EXT */
+		return ((struct diag204_x_part_hdr *)hdr)->rcpus;
 }
 
 static inline void part_hdr__part_name(enum diag204_format type, void *hdr,
 				       char *name)
 {
-	if (type == INFO_SIMPLE)
-		memcpy(name, ((struct part_hdr *)hdr)->part_name,
-		       LPAR_NAME_LEN);
-	else /* INFO_EXT */
-		memcpy(name, ((struct x_part_hdr *)hdr)->part_name,
-		       LPAR_NAME_LEN);
-	EBCASC(name, LPAR_NAME_LEN);
-	name[LPAR_NAME_LEN] = 0;
+	if (type == DIAG204_INFO_SIMPLE)
+		memcpy(name, ((struct diag204_part_hdr *)hdr)->part_name,
+		       DIAG204_LPAR_NAME_LEN);
+	else /* DIAG204_INFO_EXT */
+		memcpy(name, ((struct diag204_x_part_hdr *)hdr)->part_name,
+		       DIAG204_LPAR_NAME_LEN);
+	EBCASC(name, DIAG204_LPAR_NAME_LEN);
+	name[DIAG204_LPAR_NAME_LEN] = 0;
 	strim(name);
 }
 
-struct cpu_info {
-	__u16 cpu_addr;
-	char  reserved1[2];
-	__u8  ctidx;
-	__u8  cflag;
-	__u16 weight;
-	__u64 acc_time;
-	__u64 lp_time;
-} __attribute__ ((packed));
-
-struct x_cpu_info {
-	__u16 cpu_addr;
-	char  reserved1[2];
-	__u8  ctidx;
-	__u8  cflag;
-	__u16 weight;
-	__u64 acc_time;
-	__u64 lp_time;
-	__u16 min_weight;
-	__u16 cur_weight;
-	__u16 max_weight;
-	char  reseved2[2];
-	__u64 online_time;
-	__u64 wait_time;
-	__u32 pma_weight;
-	__u32 polar_weight;
-	char  reserved3[40];
-} __attribute__ ((packed));
-
 /* CPU info block */
 
 static inline int cpu_info__size(enum diag204_format type)
 {
-	if (type == INFO_SIMPLE)
-		return sizeof(struct cpu_info);
-	else /* INFO_EXT */
-		return sizeof(struct x_cpu_info);
+	if (type == DIAG204_INFO_SIMPLE)
+		return sizeof(struct diag204_cpu_info);
+	else /* DIAG204_INFO_EXT */
+		return sizeof(struct diag204_x_cpu_info);
 }
 
 static inline __u8 cpu_info__ctidx(enum diag204_format type, void *hdr)
 {
-	if (type == INFO_SIMPLE)
-		return ((struct cpu_info *)hdr)->ctidx;
-	else /* INFO_EXT */
-		return ((struct x_cpu_info *)hdr)->ctidx;
+	if (type == DIAG204_INFO_SIMPLE)
+		return ((struct diag204_cpu_info *)hdr)->ctidx;
+	else /* DIAG204_INFO_EXT */
+		return ((struct diag204_x_cpu_info *)hdr)->ctidx;
 }
 
 static inline __u16 cpu_info__cpu_addr(enum diag204_format type, void *hdr)
 {
-	if (type == INFO_SIMPLE)
-		return ((struct cpu_info *)hdr)->cpu_addr;
-	else /* INFO_EXT */
-		return ((struct x_cpu_info *)hdr)->cpu_addr;
+	if (type == DIAG204_INFO_SIMPLE)
+		return ((struct diag204_cpu_info *)hdr)->cpu_addr;
+	else /* DIAG204_INFO_EXT */
+		return ((struct diag204_x_cpu_info *)hdr)->cpu_addr;
 }
 
 static inline __u64 cpu_info__acc_time(enum diag204_format type, void *hdr)
 {
-	if (type == INFO_SIMPLE)
-		return ((struct cpu_info *)hdr)->acc_time;
-	else /* INFO_EXT */
-		return ((struct x_cpu_info *)hdr)->acc_time;
+	if (type == DIAG204_INFO_SIMPLE)
+		return ((struct diag204_cpu_info *)hdr)->acc_time;
+	else /* DIAG204_INFO_EXT */
+		return ((struct diag204_x_cpu_info *)hdr)->acc_time;
 }
 
 static inline __u64 cpu_info__lp_time(enum diag204_format type, void *hdr)
 {
-	if (type == INFO_SIMPLE)
-		return ((struct cpu_info *)hdr)->lp_time;
-	else /* INFO_EXT */
-		return ((struct x_cpu_info *)hdr)->lp_time;
+	if (type == DIAG204_INFO_SIMPLE)
+		return ((struct diag204_cpu_info *)hdr)->lp_time;
+	else /* DIAG204_INFO_EXT */
+		return ((struct diag204_x_cpu_info *)hdr)->lp_time;
 }
 
 static inline __u64 cpu_info__online_time(enum diag204_format type, void *hdr)
 {
-	if (type == INFO_SIMPLE)
+	if (type == DIAG204_INFO_SIMPLE)
 		return 0;	/* online_time not available in simple info */
-	else /* INFO_EXT */
-		return ((struct x_cpu_info *)hdr)->online_time;
+	else /* DIAG204_INFO_EXT */
+		return ((struct diag204_x_cpu_info *)hdr)->online_time;
 }
 
 /* Physical header */
 
-struct phys_hdr {
-	char reserved1[1];
-	__u8 cpus;
-	char reserved2[6];
-	char mgm_name[8];
-} __attribute__ ((packed));
-
-struct x_phys_hdr {
-	char reserved1[1];
-	__u8 cpus;
-	char reserved2[6];
-	char mgm_name[8];
-	char reserved3[80];
-} __attribute__ ((packed));
-
 static inline int phys_hdr__size(enum diag204_format type)
 {
-	if (type == INFO_SIMPLE)
-		return sizeof(struct phys_hdr);
-	else /* INFO_EXT */
-		return sizeof(struct x_phys_hdr);
+	if (type == DIAG204_INFO_SIMPLE)
+		return sizeof(struct diag204_phys_hdr);
+	else /* DIAG204_INFO_EXT */
+		return sizeof(struct diag204_x_phys_hdr);
 }
 
 static inline __u8 phys_hdr__cpus(enum diag204_format type, void *hdr)
 {
-	if (type == INFO_SIMPLE)
-		return ((struct phys_hdr *)hdr)->cpus;
-	else /* INFO_EXT */
-		return ((struct x_phys_hdr *)hdr)->cpus;
+	if (type == DIAG204_INFO_SIMPLE)
+		return ((struct diag204_phys_hdr *)hdr)->cpus;
+	else /* DIAG204_INFO_EXT */
+		return ((struct diag204_x_phys_hdr *)hdr)->cpus;
 }
 
 /* Physical CPU info block */
 
-struct phys_cpu {
-	__u16 cpu_addr;
-	char  reserved1[2];
-	__u8  ctidx;
-	char  reserved2[3];
-	__u64 mgm_time;
-	char  reserved3[8];
-} __attribute__ ((packed));
-
-struct x_phys_cpu {
-	__u16 cpu_addr;
-	char  reserved1[2];
-	__u8  ctidx;
-	char  reserved2[3];
-	__u64 mgm_time;
-	char  reserved3[80];
-} __attribute__ ((packed));
-
 static inline int phys_cpu__size(enum diag204_format type)
 {
-	if (type == INFO_SIMPLE)
-		return sizeof(struct phys_cpu);
-	else /* INFO_EXT */
-		return sizeof(struct x_phys_cpu);
+	if (type == DIAG204_INFO_SIMPLE)
+		return sizeof(struct diag204_phys_cpu);
+	else /* DIAG204_INFO_EXT */
+		return sizeof(struct diag204_x_phys_cpu);
 }
 
 static inline __u16 phys_cpu__cpu_addr(enum diag204_format type, void *hdr)
 {
-	if (type == INFO_SIMPLE)
-		return ((struct phys_cpu *)hdr)->cpu_addr;
-	else /* INFO_EXT */
-		return ((struct x_phys_cpu *)hdr)->cpu_addr;
+	if (type == DIAG204_INFO_SIMPLE)
+		return ((struct diag204_phys_cpu *)hdr)->cpu_addr;
+	else /* DIAG204_INFO_EXT */
+		return ((struct diag204_x_phys_cpu *)hdr)->cpu_addr;
 }
 
 static inline __u64 phys_cpu__mgm_time(enum diag204_format type, void *hdr)
 {
-	if (type == INFO_SIMPLE)
-		return ((struct phys_cpu *)hdr)->mgm_time;
-	else /* INFO_EXT */
-		return ((struct x_phys_cpu *)hdr)->mgm_time;
+	if (type == DIAG204_INFO_SIMPLE)
+		return ((struct diag204_phys_cpu *)hdr)->mgm_time;
+	else /* DIAG204_INFO_EXT */
+		return ((struct diag204_x_phys_cpu *)hdr)->mgm_time;
 }
 
 static inline __u64 phys_cpu__ctidx(enum diag204_format type, void *hdr)
 {
-	if (type == INFO_SIMPLE)
-		return ((struct phys_cpu *)hdr)->ctidx;
-	else /* INFO_EXT */
-		return ((struct x_phys_cpu *)hdr)->ctidx;
+	if (type == DIAG204_INFO_SIMPLE)
+		return ((struct diag204_phys_cpu *)hdr)->ctidx;
+	else /* DIAG204_INFO_EXT */
+		return ((struct diag204_x_phys_cpu *)hdr)->ctidx;
 }
 
 /* Diagnose 204 functions */
-
-static inline int __diag204(unsigned long *subcode, unsigned long size, void *addr)
-{
-	register unsigned long _subcode asm("0") = *subcode;
-	register unsigned long _size asm("1") = size;
-
-	asm volatile(
-		"	diag	%2,%0,0x204\n"
-		"0:	nopr	%%r7\n"
-		EX_TABLE(0b,0b)
-		: "+d" (_subcode), "+d" (_size) : "d" (addr) : "memory");
-	*subcode = _subcode;
-	return _size;
-}
-
-static int diag204(unsigned long subcode, unsigned long size, void *addr)
-{
-	diag_stat_inc(DIAG_STAT_X204);
-	size = __diag204(&subcode, size, addr);
-	if (subcode)
-		return -1;
-	return size;
-}
-
 /*
  * For the old diag subcode 4 with simple data format we have to use real
  * memory. If we use subcode 6 or 7 with extended data format, we can (and
@@ -411,12 +261,12 @@ static void *diag204_get_buffer(enum diag204_format fmt, int *pages)
 		*pages = diag204_buf_pages;
 		return diag204_buf;
 	}
-	if (fmt == INFO_SIMPLE) {
+	if (fmt == DIAG204_INFO_SIMPLE) {
 		*pages = 1;
 		return diag204_alloc_rbuf();
-	} else {/* INFO_EXT */
-		*pages = diag204((unsigned long)SUBC_RSI |
-				 (unsigned long)INFO_EXT, 0, NULL);
+	} else {/* DIAG204_INFO_EXT */
+		*pages = diag204((unsigned long)DIAG204_SUBC_RSI |
+				 (unsigned long)DIAG204_INFO_EXT, 0, NULL);
 		if (*pages <= 0)
 			return ERR_PTR(-ENOSYS);
 		else
@@ -443,18 +293,18 @@ static int diag204_probe(void)
 	void *buf;
 	int pages, rc;
 
-	buf = diag204_get_buffer(INFO_EXT, &pages);
+	buf = diag204_get_buffer(DIAG204_INFO_EXT, &pages);
 	if (!IS_ERR(buf)) {
-		if (diag204((unsigned long)SUBC_STIB7 |
-			    (unsigned long)INFO_EXT, pages, buf) >= 0) {
-			diag204_store_sc = SUBC_STIB7;
-			diag204_info_type = INFO_EXT;
+		if (diag204((unsigned long)DIAG204_SUBC_STIB7 |
+			    (unsigned long)DIAG204_INFO_EXT, pages, buf) >= 0) {
+			diag204_store_sc = DIAG204_SUBC_STIB7;
+			diag204_info_type = DIAG204_INFO_EXT;
 			goto out;
 		}
-		if (diag204((unsigned long)SUBC_STIB6 |
-			    (unsigned long)INFO_EXT, pages, buf) >= 0) {
-			diag204_store_sc = SUBC_STIB6;
-			diag204_info_type = INFO_EXT;
+		if (diag204((unsigned long)DIAG204_SUBC_STIB6 |
+			    (unsigned long)DIAG204_INFO_EXT, pages, buf) >= 0) {
+			diag204_store_sc = DIAG204_SUBC_STIB6;
+			diag204_info_type = DIAG204_INFO_EXT;
 			goto out;
 		}
 		diag204_free_buffer();
@@ -462,15 +312,15 @@ static int diag204_probe(void)
 
 	/* subcodes 6 and 7 failed, now try subcode 4 */
 
-	buf = diag204_get_buffer(INFO_SIMPLE, &pages);
+	buf = diag204_get_buffer(DIAG204_INFO_SIMPLE, &pages);
 	if (IS_ERR(buf)) {
 		rc = PTR_ERR(buf);
 		goto fail_alloc;
 	}
-	if (diag204((unsigned long)SUBC_STIB4 |
-		    (unsigned long)INFO_SIMPLE, pages, buf) >= 0) {
-		diag204_store_sc = SUBC_STIB4;
-		diag204_info_type = INFO_SIMPLE;
+	if (diag204((unsigned long)DIAG204_SUBC_STIB4 |
+		    (unsigned long)DIAG204_INFO_SIMPLE, pages, buf) >= 0) {
+		diag204_store_sc = DIAG204_SUBC_STIB4;
+		diag204_info_type = DIAG204_INFO_SIMPLE;
 		goto out;
 	} else {
 		rc = -ENOSYS;
@@ -510,20 +360,6 @@ out:
 
 /* Diagnose 224 functions */
 
-static int diag224(void *ptr)
-{
-	int rc = -EOPNOTSUPP;
-
-	diag_stat_inc(DIAG_STAT_X224);
-	asm volatile(
-		"	diag	%1,%2,0x224\n"
-		"0:	lhi	%0,0x0\n"
-		"1:\n"
-		EX_TABLE(0b,1b)
-		: "+d" (rc) :"d" (0), "d" (ptr) : "memory");
-	return rc;
-}
-
 static int diag224_get_name_table(void)
 {
 	/* memory must be below 2GB */
@@ -545,9 +381,9 @@ static void diag224_delete_name_table(void)
 
 static int diag224_idx2name(int index, char *name)
 {
-	memcpy(name, diag224_cpu_names + ((index + 1) * CPU_NAME_LEN),
-		CPU_NAME_LEN);
-	name[CPU_NAME_LEN] = 0;
+	memcpy(name, diag224_cpu_names + ((index + 1) * DIAG204_CPU_NAME_LEN),
+	       DIAG204_CPU_NAME_LEN);
+	name[DIAG204_CPU_NAME_LEN] = 0;
 	strim(name);
 	return 0;
 }
@@ -603,7 +439,7 @@ __init int hypfs_diag_init(void)
 		pr_err("The hardware system does not support hypfs\n");
 		return -ENODATA;
 	}
-	if (diag204_info_type == INFO_EXT) {
+	if (diag204_info_type == DIAG204_INFO_EXT) {
 		rc = hypfs_dbfs_create_file(&dbfs_file_d204);
 		if (rc)
 			return rc;
@@ -651,7 +487,7 @@ static int hypfs_create_cpu_files(struct dentry *cpus_dir, void *cpu_info)
 			      cpu_info__lp_time(diag204_info_type, cpu_info));
 	if (IS_ERR(rc))
 		return PTR_ERR(rc);
-	if (diag204_info_type == INFO_EXT) {
+	if (diag204_info_type == DIAG204_INFO_EXT) {
 		rc = hypfs_create_u64(cpu_dir, "onlinetime",
 				      cpu_info__online_time(diag204_info_type,
 							    cpu_info));
@@ -667,12 +503,12 @@ static void *hypfs_create_lpar_files(struct dentry *systems_dir, void *part_hdr)
 {
 	struct dentry *cpus_dir;
 	struct dentry *lpar_dir;
-	char lpar_name[LPAR_NAME_LEN + 1];
+	char lpar_name[DIAG204_LPAR_NAME_LEN + 1];
 	void *cpu_info;
 	int i;
 
 	part_hdr__part_name(diag204_info_type, part_hdr, lpar_name);
-	lpar_name[LPAR_NAME_LEN] = 0;
+	lpar_name[DIAG204_LPAR_NAME_LEN] = 0;
 	lpar_dir = hypfs_mkdir(systems_dir, lpar_name);
 	if (IS_ERR(lpar_dir))
 		return lpar_dir;
@@ -755,7 +591,8 @@ int hypfs_diag_create_files(struct dentry *root)
 			goto err_out;
 		}
 	}
-	if (info_blk_hdr__flags(diag204_info_type, time_hdr) & LPAR_PHYS_FLG) {
+	if (info_blk_hdr__flags(diag204_info_type, time_hdr) &
+	    DIAG204_LPAR_PHYS_FLG) {
 		ptr = hypfs_create_phys_files(root, part_hdr);
 		if (IS_ERR(ptr)) {
 			rc = PTR_ERR(ptr);
diff --git a/arch/s390/include/asm/cpacf.h b/arch/s390/include/asm/cpacf.h
index 1a82cf26ee11..d28621de8e0b 100644
--- a/arch/s390/include/asm/cpacf.h
+++ b/arch/s390/include/asm/cpacf.h
@@ -20,6 +20,9 @@
 #define CPACF_KMC		0xb92f		/* MSA	*/
 #define CPACF_KIMD		0xb93e		/* MSA	*/
 #define CPACF_KLMD		0xb93f		/* MSA	*/
+#define CPACF_PCKMO		0xb928		/* MSA3 */
+#define CPACF_KMF		0xb92a		/* MSA4 */
+#define CPACF_KMO		0xb92b		/* MSA4 */
 #define CPACF_PCC		0xb92c		/* MSA4 */
 #define CPACF_KMCTR		0xb92d		/* MSA4 */
 #define CPACF_PPNO		0xb93c		/* MSA5 */
@@ -136,6 +139,7 @@ static inline void __cpacf_query(unsigned int opcode, unsigned char *status)
 	register unsigned long r1 asm("1") = (unsigned long) status;
 
 	asm volatile(
+		"	spm 0\n" /* pckmo doesn't change the cc */
 		/* Parameter registers are ignored, but may not be 0 */
 		"0:	.insn	rrf,%[opc] << 16,2,2,2,0\n"
 		"	brc	1,0b\n"	/* handle partial completion */
@@ -157,6 +161,12 @@ static inline int cpacf_query(unsigned int opcode, unsigned int func)
 		if (!test_facility(17))	/* check for MSA */
 			return 0;
 		break;
+	case CPACF_PCKMO:
+		if (!test_facility(76))	/* check for MSA3 */
+			return 0;
+		break;
+	case CPACF_KMF:
+	case CPACF_KMO:
 	case CPACF_PCC:
 	case CPACF_KMCTR:
 		if (!test_facility(77))	/* check for MSA4 */
diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h
index 86cae09e076a..8acf482162ed 100644
--- a/arch/s390/include/asm/diag.h
+++ b/arch/s390/include/asm/diag.h
@@ -78,4 +78,153 @@ struct diag210 {
 
 extern int diag210(struct diag210 *addr);
 
+/* bit is set in flags, when physical cpu info is included in diag 204 data */
+#define DIAG204_LPAR_PHYS_FLG 0x80
+#define DIAG204_LPAR_NAME_LEN 8		/* lpar name len in diag 204 data */
+#define DIAG204_CPU_NAME_LEN 16		/* type name len of cpus in diag224 name table */
+
+/* diag 204 subcodes */
+enum diag204_sc {
+	DIAG204_SUBC_STIB4 = 4,
+	DIAG204_SUBC_RSI = 5,
+	DIAG204_SUBC_STIB6 = 6,
+	DIAG204_SUBC_STIB7 = 7
+};
+
+/* The two available diag 204 data formats */
+enum diag204_format {
+	DIAG204_INFO_SIMPLE = 0,
+	DIAG204_INFO_EXT = 0x00010000
+};
+
+enum diag204_cpu_flags {
+	DIAG204_CPU_ONLINE = 0x20,
+	DIAG204_CPU_CAPPED = 0x40,
+};
+
+struct diag204_info_blk_hdr {
+	__u8  npar;
+	__u8  flags;
+	__u16 tslice;
+	__u16 phys_cpus;
+	__u16 this_part;
+	__u64 curtod;
+} __packed;
+
+struct diag204_x_info_blk_hdr {
+	__u8  npar;
+	__u8  flags;
+	__u16 tslice;
+	__u16 phys_cpus;
+	__u16 this_part;
+	__u64 curtod1;
+	__u64 curtod2;
+	char reserved[40];
+} __packed;
+
+struct diag204_part_hdr {
+	__u8 pn;
+	__u8 cpus;
+	char reserved[6];
+	char part_name[DIAG204_LPAR_NAME_LEN];
+} __packed;
+
+struct diag204_x_part_hdr {
+	__u8  pn;
+	__u8  cpus;
+	__u8  rcpus;
+	__u8  pflag;
+	__u32 mlu;
+	char  part_name[DIAG204_LPAR_NAME_LEN];
+	char  lpc_name[8];
+	char  os_name[8];
+	__u64 online_cs;
+	__u64 online_es;
+	__u8  upid;
+	__u8  reserved:3;
+	__u8  mtid:5;
+	char  reserved1[2];
+	__u32 group_mlu;
+	char  group_name[8];
+	char  hardware_group_name[8];
+	char  reserved2[24];
+} __packed;
+
+struct diag204_cpu_info {
+	__u16 cpu_addr;
+	char  reserved1[2];
+	__u8  ctidx;
+	__u8  cflag;
+	__u16 weight;
+	__u64 acc_time;
+	__u64 lp_time;
+} __packed;
+
+struct diag204_x_cpu_info {
+	__u16 cpu_addr;
+	char  reserved1[2];
+	__u8  ctidx;
+	__u8  cflag;
+	__u16 weight;
+	__u64 acc_time;
+	__u64 lp_time;
+	__u16 min_weight;
+	__u16 cur_weight;
+	__u16 max_weight;
+	char  reseved2[2];
+	__u64 online_time;
+	__u64 wait_time;
+	__u32 pma_weight;
+	__u32 polar_weight;
+	__u32 cpu_type_cap;
+	__u32 group_cpu_type_cap;
+	char  reserved3[32];
+} __packed;
+
+struct diag204_phys_hdr {
+	char reserved1[1];
+	__u8 cpus;
+	char reserved2[6];
+	char mgm_name[8];
+} __packed;
+
+struct diag204_x_phys_hdr {
+	char reserved1[1];
+	__u8 cpus;
+	char reserved2[6];
+	char mgm_name[8];
+	char reserved3[80];
+} __packed;
+
+struct diag204_phys_cpu {
+	__u16 cpu_addr;
+	char  reserved1[2];
+	__u8  ctidx;
+	char  reserved2[3];
+	__u64 mgm_time;
+	char  reserved3[8];
+} __packed;
+
+struct diag204_x_phys_cpu {
+	__u16 cpu_addr;
+	char  reserved1[2];
+	__u8  ctidx;
+	char  reserved2[1];
+	__u16 weight;
+	__u64 mgm_time;
+	char  reserved3[80];
+} __packed;
+
+struct diag204_x_part_block {
+	struct diag204_x_part_hdr hdr;
+	struct diag204_x_cpu_info cpus[];
+} __packed;
+
+struct diag204_x_phys_block {
+	struct diag204_x_phys_hdr hdr;
+	struct diag204_x_phys_cpu cpus[];
+} __packed;
+
+int diag204(unsigned long subcode, unsigned long size, void *addr);
+int diag224(void *ptr);
 #endif /* _ASM_S390_DIAG_H */
diff --git a/arch/s390/include/asm/dma-mapping.h b/arch/s390/include/asm/dma-mapping.h
index 3249b7464889..ffaba07f50ab 100644
--- a/arch/s390/include/asm/dma-mapping.h
+++ b/arch/s390/include/asm/dma-mapping.h
@@ -5,7 +5,6 @@
 #include <linux/types.h>
 #include <linux/mm.h>
 #include <linux/scatterlist.h>
-#include <linux/dma-attrs.h>
 #include <linux/dma-debug.h>
 #include <linux/io.h>
 
diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h
index d054c1b07a3c..741ddba0bf11 100644
--- a/arch/s390/include/asm/gmap.h
+++ b/arch/s390/include/asm/gmap.h
@@ -10,14 +10,25 @@
 
 /**
  * struct gmap_struct - guest address space
+ * @list: list head for the mm->context gmap list
  * @crst_list: list of all crst tables used in the guest address space
  * @mm: pointer to the parent mm_struct
  * @guest_to_host: radix tree with guest to host address translation
  * @host_to_guest: radix tree with pointer to segment table entries
  * @guest_table_lock: spinlock to protect all entries in the guest page table
+ * @ref_count: reference counter for the gmap structure
  * @table: pointer to the page directory
  * @asce: address space control element for gmap page table
  * @pfault_enabled: defines if pfaults are applicable for the guest
+ * @host_to_rmap: radix tree with gmap_rmap lists
+ * @children: list of shadow gmap structures
+ * @pt_list: list of all page tables used in the shadow guest address space
+ * @shadow_lock: spinlock to protect the shadow gmap list
+ * @parent: pointer to the parent gmap for shadow guest address spaces
+ * @orig_asce: ASCE for which the shadow page table has been created
+ * @edat_level: edat level to be used for the shadow translation
+ * @removed: flag to indicate if a shadow guest address space has been removed
+ * @initialized: flag to indicate if a shadow guest address space can be used
  */
 struct gmap {
 	struct list_head list;
@@ -26,26 +37,64 @@ struct gmap {
 	struct radix_tree_root guest_to_host;
 	struct radix_tree_root host_to_guest;
 	spinlock_t guest_table_lock;
+	atomic_t ref_count;
 	unsigned long *table;
 	unsigned long asce;
 	unsigned long asce_end;
 	void *private;
 	bool pfault_enabled;
+	/* Additional data for shadow guest address spaces */
+	struct radix_tree_root host_to_rmap;
+	struct list_head children;
+	struct list_head pt_list;
+	spinlock_t shadow_lock;
+	struct gmap *parent;
+	unsigned long orig_asce;
+	int edat_level;
+	bool removed;
+	bool initialized;
 };
 
 /**
+ * struct gmap_rmap - reverse mapping for shadow page table entries
+ * @next: pointer to next rmap in the list
+ * @raddr: virtual rmap address in the shadow guest address space
+ */
+struct gmap_rmap {
+	struct gmap_rmap *next;
+	unsigned long raddr;
+};
+
+#define gmap_for_each_rmap(pos, head) \
+	for (pos = (head); pos; pos = pos->next)
+
+#define gmap_for_each_rmap_safe(pos, n, head) \
+	for (pos = (head); n = pos ? pos->next : NULL, pos; pos = n)
+
+/**
  * struct gmap_notifier - notify function block for page invalidation
  * @notifier_call: address of callback function
  */
 struct gmap_notifier {
 	struct list_head list;
-	void (*notifier_call)(struct gmap *gmap, unsigned long gaddr);
+	struct rcu_head rcu;
+	void (*notifier_call)(struct gmap *gmap, unsigned long start,
+			      unsigned long end);
 };
 
-struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit);
-void gmap_free(struct gmap *gmap);
+static inline int gmap_is_shadow(struct gmap *gmap)
+{
+	return !!gmap->parent;
+}
+
+struct gmap *gmap_create(struct mm_struct *mm, unsigned long limit);
+void gmap_remove(struct gmap *gmap);
+struct gmap *gmap_get(struct gmap *gmap);
+void gmap_put(struct gmap *gmap);
+
 void gmap_enable(struct gmap *gmap);
 void gmap_disable(struct gmap *gmap);
+struct gmap *gmap_get_enabled(void);
 int gmap_map_segment(struct gmap *gmap, unsigned long from,
 		     unsigned long to, unsigned long len);
 int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len);
@@ -57,8 +106,29 @@ void gmap_discard(struct gmap *, unsigned long from, unsigned long to);
 void __gmap_zap(struct gmap *, unsigned long gaddr);
 void gmap_unlink(struct mm_struct *, unsigned long *table, unsigned long vmaddr);
 
-void gmap_register_ipte_notifier(struct gmap_notifier *);
-void gmap_unregister_ipte_notifier(struct gmap_notifier *);
-int gmap_ipte_notify(struct gmap *, unsigned long start, unsigned long len);
+int gmap_read_table(struct gmap *gmap, unsigned long gaddr, unsigned long *val);
+
+struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce,
+			 int edat_level);
+int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level);
+int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
+		    int fake);
+int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
+		    int fake);
+int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
+		    int fake);
+int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt,
+		    int fake);
+int gmap_shadow_pgt_lookup(struct gmap *sg, unsigned long saddr,
+			   unsigned long *pgt, int *dat_protection, int *fake);
+int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte);
+
+void gmap_register_pte_notifier(struct gmap_notifier *);
+void gmap_unregister_pte_notifier(struct gmap_notifier *);
+void gmap_pte_notify(struct mm_struct *, unsigned long addr, pte_t *,
+		     unsigned long bits);
+
+int gmap_mprotect_notify(struct gmap *, unsigned long start,
+			 unsigned long len, int prot);
 
 #endif /* _ASM_S390_GMAP_H */
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index ac82e8eb936d..8e5daf7a76ce 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -43,6 +43,7 @@
 /* s390-specific vcpu->requests bit members */
 #define KVM_REQ_ENABLE_IBS         8
 #define KVM_REQ_DISABLE_IBS        9
+#define KVM_REQ_ICPT_OPEREXC       10
 
 #define SIGP_CTRL_C		0x80
 #define SIGP_CTRL_SCN_MASK	0x3f
@@ -145,7 +146,7 @@ struct kvm_s390_sie_block {
 	__u64	cputm;			/* 0x0028 */
 	__u64	ckc;			/* 0x0030 */
 	__u64	epoch;			/* 0x0038 */
-	__u8	reserved40[4];		/* 0x0040 */
+	__u32	svcc;			/* 0x0040 */
 #define LCTL_CR0	0x8000
 #define LCTL_CR6	0x0200
 #define LCTL_CR9	0x0040
@@ -154,6 +155,7 @@ struct kvm_s390_sie_block {
 #define LCTL_CR14	0x0002
 	__u16   lctl;			/* 0x0044 */
 	__s16	icpua;			/* 0x0046 */
+#define ICTL_OPEREXC	0x80000000
 #define ICTL_PINT	0x20000000
 #define ICTL_LPSW	0x00400000
 #define ICTL_STCTL	0x00040000
@@ -166,6 +168,9 @@ struct kvm_s390_sie_block {
 #define ICPT_INST	0x04
 #define ICPT_PROGI	0x08
 #define ICPT_INSTPROGI	0x0C
+#define ICPT_EXTINT	0x14
+#define ICPT_VALIDITY	0x20
+#define ICPT_STOP	0x28
 #define ICPT_OPEREXC	0x2C
 #define ICPT_PARTEXEC	0x38
 #define ICPT_IOINST	0x40
@@ -185,7 +190,9 @@ struct kvm_s390_sie_block {
 	__u32	scaol;			/* 0x0064 */
 	__u8	reserved68[4];		/* 0x0068 */
 	__u32	todpr;			/* 0x006c */
-	__u8	reserved70[32];		/* 0x0070 */
+	__u8	reserved70[16];		/* 0x0070 */
+	__u64	mso;			/* 0x0080 */
+	__u64	msl;			/* 0x0088 */
 	psw_t	gpsw;			/* 0x0090 */
 	__u64	gg14;			/* 0x00a0 */
 	__u64	gg15;			/* 0x00a8 */
@@ -223,7 +230,7 @@ struct kvm_s390_sie_block {
 	__u8	reserved1e6[2];		/* 0x01e6 */
 	__u64	itdba;			/* 0x01e8 */
 	__u64   riccbd;			/* 0x01f0 */
-	__u8    reserved1f8[8];		/* 0x01f8 */
+	__u64	gvrd;			/* 0x01f8 */
 } __attribute__((packed));
 
 struct kvm_s390_itdb {
@@ -256,6 +263,7 @@ struct kvm_vcpu_stat {
 	u32 instruction_stctg;
 	u32 exit_program_interruption;
 	u32 exit_instr_and_program;
+	u32 exit_operation_exception;
 	u32 deliver_external_call;
 	u32 deliver_emergency_signal;
 	u32 deliver_service_signal;
@@ -278,7 +286,9 @@ struct kvm_vcpu_stat {
 	u32 instruction_stsi;
 	u32 instruction_stfl;
 	u32 instruction_tprot;
+	u32 instruction_sie;
 	u32 instruction_essa;
+	u32 instruction_sthyi;
 	u32 instruction_sigp_sense;
 	u32 instruction_sigp_sense_running;
 	u32 instruction_sigp_external_call;
@@ -541,12 +551,16 @@ struct kvm_guestdbg_info_arch {
 
 struct kvm_vcpu_arch {
 	struct kvm_s390_sie_block *sie_block;
+	/* if vsie is active, currently executed shadow sie control block */
+	struct kvm_s390_sie_block *vsie_block;
 	unsigned int      host_acrs[NUM_ACRS];
 	struct fpu	  host_fpregs;
 	struct kvm_s390_local_interrupt local_int;
 	struct hrtimer    ckc_timer;
 	struct kvm_s390_pgm_info pgm;
 	struct gmap *gmap;
+	/* backup location for the currently enabled gmap when scheduled out */
+	struct gmap *enabled_gmap;
 	struct kvm_guestdbg_info_arch guestdbg;
 	unsigned long pfault_token;
 	unsigned long pfault_select;
@@ -631,6 +645,14 @@ struct sie_page2 {
 	u8 reserved900[0x1000 - 0x900];			/* 0x0900 */
 } __packed;
 
+struct kvm_s390_vsie {
+	struct mutex mutex;
+	struct radix_tree_root addr_to_page;
+	int page_count;
+	int next;
+	struct page *pages[KVM_MAX_VCPUS];
+};
+
 struct kvm_arch{
 	void *sca;
 	int use_esca;
@@ -646,15 +668,20 @@ struct kvm_arch{
 	int user_cpu_state_ctrl;
 	int user_sigp;
 	int user_stsi;
+	int user_instr0;
 	struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS];
 	wait_queue_head_t ipte_wq;
 	int ipte_lock_count;
 	struct mutex ipte_mutex;
+	struct ratelimit_state sthyi_limit;
 	spinlock_t start_stop_lock;
 	struct sie_page2 *sie_page2;
 	struct kvm_s390_cpu_model model;
 	struct kvm_s390_crypto crypto;
+	struct kvm_s390_vsie vsie;
 	u64 epoch;
+	/* subset of available cpu features enabled by user space */
+	DECLARE_BITMAP(cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 };
 
 #define KVM_HVA_ERR_BAD		(-1UL)
diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
index 18226437a832..6d39329c894b 100644
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h
@@ -8,8 +8,9 @@ typedef struct {
 	cpumask_t cpu_attach_mask;
 	atomic_t flush_count;
 	unsigned int flush_mm;
-	spinlock_t list_lock;
+	spinlock_t pgtable_lock;
 	struct list_head pgtable_list;
+	spinlock_t gmap_lock;
 	struct list_head gmap_list;
 	unsigned long asce;
 	unsigned long asce_limit;
@@ -22,9 +23,11 @@ typedef struct {
 	unsigned int use_skey:1;
 } mm_context_t;
 
-#define INIT_MM_CONTEXT(name)						      \
-	.context.list_lock    = __SPIN_LOCK_UNLOCKED(name.context.list_lock), \
-	.context.pgtable_list = LIST_HEAD_INIT(name.context.pgtable_list),    \
+#define INIT_MM_CONTEXT(name)						   \
+	.context.pgtable_lock =						   \
+			__SPIN_LOCK_UNLOCKED(name.context.pgtable_lock),   \
+	.context.pgtable_list = LIST_HEAD_INIT(name.context.pgtable_list), \
+	.context.gmap_lock = __SPIN_LOCK_UNLOCKED(name.context.gmap_lock), \
 	.context.gmap_list = LIST_HEAD_INIT(name.context.gmap_list),
 
 static inline int tprot(unsigned long addr)
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index f77c638bf397..c6a088c91aee 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -15,8 +15,9 @@
 static inline int init_new_context(struct task_struct *tsk,
 				   struct mm_struct *mm)
 {
-	spin_lock_init(&mm->context.list_lock);
+	spin_lock_init(&mm->context.pgtable_lock);
 	INIT_LIST_HEAD(&mm->context.pgtable_list);
+	spin_lock_init(&mm->context.gmap_lock);
 	INIT_LIST_HEAD(&mm->context.gmap_list);
 	cpumask_clear(&mm->context.cpu_attach_mask);
 	atomic_set(&mm->context.flush_count, 0);
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index b2146c4119b2..69b8a41fca84 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -111,13 +111,14 @@ static inline unsigned char page_get_storage_key(unsigned long addr)
 
 static inline int page_reset_referenced(unsigned long addr)
 {
-	unsigned int ipm;
+	int cc;
 
 	asm volatile(
 		"	rrbe	0,%1\n"
 		"	ipm	%0\n"
-		: "=d" (ipm) : "a" (addr) : "cc");
-	return !!(ipm & 0x20000000);
+		"	srl	%0,28\n"
+		: "=d" (cc) : "a" (addr) : "cc");
+	return cc;
 }
 
 /* Bits int the storage key */
@@ -148,6 +149,8 @@ static inline int devmem_is_allowed(unsigned long pfn)
 #define virt_to_page(kaddr)	pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
 #define page_to_phys(page)	(page_to_pfn(page) << PAGE_SHIFT)
 #define virt_addr_valid(kaddr)	pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
+#define pfn_to_virt(pfn)	__va((pfn) << PAGE_SHIFT)
+#define page_to_virt(page)	pfn_to_virt(page_to_pfn(page))
 
 #define VM_DATA_DEFAULT_FLAGS	(VM_READ | VM_WRITE | \
 				 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index da34cb6b1f3b..f4eb9843eed4 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -19,8 +19,10 @@ unsigned long *crst_table_alloc(struct mm_struct *);
 void crst_table_free(struct mm_struct *, unsigned long *);
 
 unsigned long *page_table_alloc(struct mm_struct *);
+struct page *page_table_alloc_pgste(struct mm_struct *mm);
 void page_table_free(struct mm_struct *, unsigned long *);
 void page_table_free_rcu(struct mmu_gather *, unsigned long *, unsigned long);
+void page_table_free_pgste(struct page *page);
 extern int page_table_allocate_pgste;
 
 static inline void clear_table(unsigned long *s, unsigned long val, size_t n)
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 48d383af078f..72c7f60bfe83 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -277,6 +277,7 @@ static inline int is_module_addr(void *addr)
 /* Bits in the region table entry */
 #define _REGION_ENTRY_ORIGIN	~0xfffUL/* region/segment table origin	    */
 #define _REGION_ENTRY_PROTECT	0x200	/* region protection bit	    */
+#define _REGION_ENTRY_OFFSET	0xc0	/* region table offset		    */
 #define _REGION_ENTRY_INVALID	0x20	/* invalid region table entry	    */
 #define _REGION_ENTRY_TYPE_MASK	0x0c	/* region/segment table type mask   */
 #define _REGION_ENTRY_TYPE_R1	0x0c	/* region first table type	    */
@@ -364,6 +365,7 @@ static inline int is_module_addr(void *addr)
 #define PGSTE_GC_BIT	0x0002000000000000UL
 #define PGSTE_UC_BIT	0x0000800000000000UL	/* user dirty (migration) */
 #define PGSTE_IN_BIT	0x0000400000000000UL	/* IPTE notify bit */
+#define PGSTE_VSIE_BIT	0x0000200000000000UL	/* ref'd in a shadow table */
 
 /* Guest Page State used for virtualization */
 #define _PGSTE_GPS_ZERO		0x0000000080000000UL
@@ -1002,15 +1004,26 @@ static inline int ptep_set_access_flags(struct vm_area_struct *vma,
 void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr,
 		     pte_t *ptep, pte_t entry);
 void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
-void ptep_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
+void ptep_notify(struct mm_struct *mm, unsigned long addr,
+		 pte_t *ptep, unsigned long bits);
+int ptep_force_prot(struct mm_struct *mm, unsigned long gaddr,
+		    pte_t *ptep, int prot, unsigned long bit);
 void ptep_zap_unused(struct mm_struct *mm, unsigned long addr,
 		     pte_t *ptep , int reset);
 void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
+int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr,
+		    pte_t *sptep, pte_t *tptep, pte_t pte);
+void ptep_unshadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *ptep);
 
 bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long address);
 int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
 			  unsigned char key, bool nq);
-unsigned char get_guest_storage_key(struct mm_struct *mm, unsigned long addr);
+int cond_set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
+			       unsigned char key, unsigned char *oldkey,
+			       bool nq, bool mr, bool mc);
+int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr);
+int get_guest_storage_key(struct mm_struct *mm, unsigned long addr,
+			  unsigned char *key);
 
 /*
  * Certain architectures need to do special things when PTEs
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 09529202ea77..03323175de30 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -112,6 +112,8 @@ struct thread_struct {
         unsigned long ksp;              /* kernel stack pointer             */
 	mm_segment_t mm_segment;
 	unsigned long gmap_addr;	/* address of last gmap fault. */
+	unsigned int gmap_write_flag;	/* gmap fault write indication */
+	unsigned int gmap_int_code;	/* int code of last gmap fault */
 	unsigned int gmap_pfault;	/* signal of a pending guest pfault */
 	struct per_regs per_user;	/* User specified PER registers */
 	struct per_event per_event;	/* Cause of the last PER trap */
diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index e4f6f73afe2f..2ad9c204b1a2 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -32,12 +32,19 @@ struct sclp_core_entry {
 	u8 reserved0;
 	u8 : 4;
 	u8 sief2 : 1;
-	u8 : 3;
-	u8 : 3;
+	u8 skey : 1;
+	u8 : 2;
+	u8 : 2;
+	u8 gpere : 1;
 	u8 siif : 1;
 	u8 sigpif : 1;
 	u8 : 3;
-	u8 reserved2[10];
+	u8 reserved2[3];
+	u8 : 2;
+	u8 ib : 1;
+	u8 cei : 1;
+	u8 : 4;
+	u8 reserved3[6];
 	u8 type;
 	u8 reserved1;
 } __attribute__((packed));
@@ -59,6 +66,15 @@ struct sclp_info {
 	unsigned char has_hvs : 1;
 	unsigned char has_esca : 1;
 	unsigned char has_sief2 : 1;
+	unsigned char has_64bscao : 1;
+	unsigned char has_gpere : 1;
+	unsigned char has_cmma : 1;
+	unsigned char has_gsls : 1;
+	unsigned char has_ib : 1;
+	unsigned char has_cei : 1;
+	unsigned char has_pfmfi : 1;
+	unsigned char has_ibs : 1;
+	unsigned char has_skey : 1;
 	unsigned int ibc;
 	unsigned int mtid;
 	unsigned int mtid_cp;
@@ -101,5 +117,6 @@ int memcpy_hsa_kernel(void *dest, unsigned long src, size_t count);
 int memcpy_hsa_user(void __user *dest, unsigned long src, size_t count);
 void sclp_early_detect(void);
 void _sclp_print_early(const char *);
+void sclp_ocf_cpc_name_copy(char *dst);
 
 #endif /* _ASM_S390_SCLP_H */
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index 3b8e99ef9d58..a2ffec4139ad 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -93,6 +93,47 @@ struct kvm_s390_vm_cpu_machine {
 	__u64 fac_list[256];
 };
 
+#define KVM_S390_VM_CPU_PROCESSOR_FEAT	2
+#define KVM_S390_VM_CPU_MACHINE_FEAT	3
+
+#define KVM_S390_VM_CPU_FEAT_NR_BITS	1024
+#define KVM_S390_VM_CPU_FEAT_ESOP	0
+#define KVM_S390_VM_CPU_FEAT_SIEF2	1
+#define KVM_S390_VM_CPU_FEAT_64BSCAO	2
+#define KVM_S390_VM_CPU_FEAT_SIIF	3
+#define KVM_S390_VM_CPU_FEAT_GPERE	4
+#define KVM_S390_VM_CPU_FEAT_GSLS	5
+#define KVM_S390_VM_CPU_FEAT_IB		6
+#define KVM_S390_VM_CPU_FEAT_CEI	7
+#define KVM_S390_VM_CPU_FEAT_IBS	8
+#define KVM_S390_VM_CPU_FEAT_SKEY	9
+#define KVM_S390_VM_CPU_FEAT_CMMA	10
+#define KVM_S390_VM_CPU_FEAT_PFMFI	11
+#define KVM_S390_VM_CPU_FEAT_SIGPIF	12
+struct kvm_s390_vm_cpu_feat {
+	__u64 feat[16];
+};
+
+#define KVM_S390_VM_CPU_PROCESSOR_SUBFUNC	4
+#define KVM_S390_VM_CPU_MACHINE_SUBFUNC		5
+/* for "test bit" instructions MSB 0 bit ordering, for "query" raw blocks */
+struct kvm_s390_vm_cpu_subfunc {
+	__u8 plo[32];		/* always */
+	__u8 ptff[16];		/* with TOD-clock steering */
+	__u8 kmac[16];		/* with MSA */
+	__u8 kmc[16];		/* with MSA */
+	__u8 km[16];		/* with MSA */
+	__u8 kimd[16];		/* with MSA */
+	__u8 klmd[16];		/* with MSA */
+	__u8 pckmo[16];		/* with MSA3 */
+	__u8 kmctr[16];		/* with MSA4 */
+	__u8 kmf[16];		/* with MSA4 */
+	__u8 kmo[16];		/* with MSA4 */
+	__u8 pcc[16];		/* with MSA4 */
+	__u8 ppno[16];		/* with MSA5 */
+	__u8 reserved[1824];
+};
+
 /* kvm attributes for crypto */
 #define KVM_S390_VM_CRYPTO_ENABLE_AES_KW	0
 #define KVM_S390_VM_CRYPTO_ENABLE_DEA_KW	1
diff --git a/arch/s390/include/uapi/asm/sie.h b/arch/s390/include/uapi/asm/sie.h
index 8fb5d4a6dd25..3ac634368939 100644
--- a/arch/s390/include/uapi/asm/sie.h
+++ b/arch/s390/include/uapi/asm/sie.h
@@ -140,6 +140,7 @@
 	exit_code_ipa0(0xB2, 0x4c, "TAR"),	\
 	exit_code_ipa0(0xB2, 0x50, "CSP"),	\
 	exit_code_ipa0(0xB2, 0x54, "MVPG"),	\
+	exit_code_ipa0(0xB2, 0x56, "STHYI"),	\
 	exit_code_ipa0(0xB2, 0x58, "BSG"),	\
 	exit_code_ipa0(0xB2, 0x5a, "BSA"),	\
 	exit_code_ipa0(0xB2, 0x5f, "CHSC"),	\
diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c
index 48b37b8357e6..a97354c8c667 100644
--- a/arch/s390/kernel/diag.c
+++ b/arch/s390/kernel/diag.c
@@ -162,6 +162,30 @@ int diag14(unsigned long rx, unsigned long ry1, unsigned long subcode)
 }
 EXPORT_SYMBOL(diag14);
 
+static inline int __diag204(unsigned long *subcode, unsigned long size, void *addr)
+{
+	register unsigned long _subcode asm("0") = *subcode;
+	register unsigned long _size asm("1") = size;
+
+	asm volatile(
+		"	diag	%2,%0,0x204\n"
+		"0:	nopr	%%r7\n"
+		EX_TABLE(0b,0b)
+		: "+d" (_subcode), "+d" (_size) : "d" (addr) : "memory");
+	*subcode = _subcode;
+	return _size;
+}
+
+int diag204(unsigned long subcode, unsigned long size, void *addr)
+{
+	diag_stat_inc(DIAG_STAT_X204);
+	size = __diag204(&subcode, size, addr);
+	if (subcode)
+		return -1;
+	return size;
+}
+EXPORT_SYMBOL(diag204);
+
 /*
  * Diagnose 210: Get information about a virtual device
  */
@@ -196,3 +220,18 @@ int diag210(struct diag210 *addr)
 	return ccode;
 }
 EXPORT_SYMBOL(diag210);
+
+int diag224(void *ptr)
+{
+	int rc = -EOPNOTSUPP;
+
+	diag_stat_inc(DIAG_STAT_X224);
+	asm volatile(
+		"	diag	%1,%2,0x224\n"
+		"0:	lhi	%0,0x0\n"
+		"1:\n"
+		EX_TABLE(0b,1b)
+		: "+d" (rc) :"d" (0), "d" (ptr) : "memory");
+	return rc;
+}
+EXPORT_SYMBOL(diag224);
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index d42fa38c2429..09a9e6dfc09f 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -12,6 +12,6 @@ common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o  $(KVM)/async_pf.o $(KVM)/irqch
 ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
 
 kvm-objs := $(common-objs) kvm-s390.o intercept.o interrupt.o priv.o sigp.o
-kvm-objs += diag.o gaccess.o guestdbg.o
+kvm-objs += diag.o gaccess.o guestdbg.o sthyi.o vsie.o
 
 obj-$(CONFIG_KVM) += kvm.o
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 1ea4095b67d7..ce865bd4f81d 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -212,6 +212,11 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu)
 	    (vcpu->run->s.regs.gprs[1] != KVM_S390_VIRTIO_CCW_NOTIFY))
 		return -EOPNOTSUPP;
 
+	VCPU_EVENT(vcpu, 4, "diag 0x500 schid 0x%8.8x queue 0x%x cookie 0x%llx",
+			    (u32) vcpu->run->s.regs.gprs[2],
+			    (u32) vcpu->run->s.regs.gprs[3],
+			    vcpu->run->s.regs.gprs[4]);
+
 	/*
 	 * The layout is as follows:
 	 * - gpr 2 contains the subchannel id (passed as addr)
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 66938d283b77..54200208bf24 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -8,6 +8,7 @@
 #include <linux/vmalloc.h>
 #include <linux/err.h>
 #include <asm/pgtable.h>
+#include <asm/gmap.h>
 #include "kvm-s390.h"
 #include "gaccess.h"
 #include <asm/switch_to.h>
@@ -476,18 +477,73 @@ enum {
 	FSI_FETCH   = 2  /* Exception was due to fetch operation */
 };
 
-static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce,
-			 ar_t ar, enum gacc_mode mode)
+enum prot_type {
+	PROT_TYPE_LA   = 0,
+	PROT_TYPE_KEYC = 1,
+	PROT_TYPE_ALC  = 2,
+	PROT_TYPE_DAT  = 3,
+};
+
+static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva,
+		     ar_t ar, enum gacc_mode mode, enum prot_type prot)
 {
-	int rc;
-	struct psw_bits psw = psw_bits(vcpu->arch.sie_block->gpsw);
 	struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
-	struct trans_exc_code_bits *tec_bits;
+	struct trans_exc_code_bits *tec;
 
 	memset(pgm, 0, sizeof(*pgm));
-	tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
-	tec_bits->fsi = mode == GACC_STORE ? FSI_STORE : FSI_FETCH;
-	tec_bits->as = psw.as;
+	pgm->code = code;
+	tec = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
+
+	switch (code) {
+	case PGM_ASCE_TYPE:
+	case PGM_PAGE_TRANSLATION:
+	case PGM_REGION_FIRST_TRANS:
+	case PGM_REGION_SECOND_TRANS:
+	case PGM_REGION_THIRD_TRANS:
+	case PGM_SEGMENT_TRANSLATION:
+		/*
+		 * op_access_id only applies to MOVE_PAGE -> set bit 61
+		 * exc_access_id has to be set to 0 for some instructions. Both
+		 * cases have to be handled by the caller. We can always store
+		 * exc_access_id, as it is undefined for non-ar cases.
+		 */
+		tec->addr = gva >> PAGE_SHIFT;
+		tec->fsi = mode == GACC_STORE ? FSI_STORE : FSI_FETCH;
+		tec->as = psw_bits(vcpu->arch.sie_block->gpsw).as;
+		/* FALL THROUGH */
+	case PGM_ALEN_TRANSLATION:
+	case PGM_ALE_SEQUENCE:
+	case PGM_ASTE_VALIDITY:
+	case PGM_ASTE_SEQUENCE:
+	case PGM_EXTENDED_AUTHORITY:
+		pgm->exc_access_id = ar;
+		break;
+	case PGM_PROTECTION:
+		switch (prot) {
+		case PROT_TYPE_ALC:
+			tec->b60 = 1;
+			/* FALL THROUGH */
+		case PROT_TYPE_DAT:
+			tec->b61 = 1;
+			tec->addr = gva >> PAGE_SHIFT;
+			tec->fsi = mode == GACC_STORE ? FSI_STORE : FSI_FETCH;
+			tec->as = psw_bits(vcpu->arch.sie_block->gpsw).as;
+			/* exc_access_id is undefined for most cases */
+			pgm->exc_access_id = ar;
+			break;
+		default: /* LA and KEYC set b61 to 0, other params undefined */
+			break;
+		}
+		break;
+	}
+	return code;
+}
+
+static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce,
+			 unsigned long ga, ar_t ar, enum gacc_mode mode)
+{
+	int rc;
+	struct psw_bits psw = psw_bits(vcpu->arch.sie_block->gpsw);
 
 	if (!psw.t) {
 		asce->val = 0;
@@ -510,21 +566,8 @@ static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce,
 		return 0;
 	case PSW_AS_ACCREG:
 		rc = ar_translation(vcpu, asce, ar, mode);
-		switch (rc) {
-		case PGM_ALEN_TRANSLATION:
-		case PGM_ALE_SEQUENCE:
-		case PGM_ASTE_VALIDITY:
-		case PGM_ASTE_SEQUENCE:
-		case PGM_EXTENDED_AUTHORITY:
-			vcpu->arch.pgm.exc_access_id = ar;
-			break;
-		case PGM_PROTECTION:
-			tec_bits->b60 = 1;
-			tec_bits->b61 = 1;
-			break;
-		}
 		if (rc > 0)
-			pgm->code = rc;
+			return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_ALC);
 		return rc;
 	}
 	return 0;
@@ -729,40 +772,31 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu,
 	return 1;
 }
 
-static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga,
+static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar,
 			    unsigned long *pages, unsigned long nr_pages,
 			    const union asce asce, enum gacc_mode mode)
 {
-	struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
 	psw_t *psw = &vcpu->arch.sie_block->gpsw;
-	struct trans_exc_code_bits *tec_bits;
-	int lap_enabled, rc;
+	int lap_enabled, rc = 0;
 
-	tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
 	lap_enabled = low_address_protection_enabled(vcpu, asce);
 	while (nr_pages) {
 		ga = kvm_s390_logical_to_effective(vcpu, ga);
-		tec_bits->addr = ga >> PAGE_SHIFT;
-		if (mode == GACC_STORE && lap_enabled && is_low_address(ga)) {
-			pgm->code = PGM_PROTECTION;
-			return pgm->code;
-		}
+		if (mode == GACC_STORE && lap_enabled && is_low_address(ga))
+			return trans_exc(vcpu, PGM_PROTECTION, ga, ar, mode,
+					 PROT_TYPE_LA);
 		ga &= PAGE_MASK;
 		if (psw_bits(*psw).t) {
 			rc = guest_translate(vcpu, ga, pages, asce, mode);
 			if (rc < 0)
 				return rc;
-			if (rc == PGM_PROTECTION)
-				tec_bits->b61 = 1;
-			if (rc)
-				pgm->code = rc;
 		} else {
 			*pages = kvm_s390_real_to_abs(vcpu, ga);
 			if (kvm_is_error_gpa(vcpu->kvm, *pages))
-				pgm->code = PGM_ADDRESSING;
+				rc = PGM_ADDRESSING;
 		}
-		if (pgm->code)
-			return pgm->code;
+		if (rc)
+			return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_DAT);
 		ga += PAGE_SIZE;
 		pages++;
 		nr_pages--;
@@ -783,7 +817,8 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
 
 	if (!len)
 		return 0;
-	rc = get_vcpu_asce(vcpu, &asce, ar, mode);
+	ga = kvm_s390_logical_to_effective(vcpu, ga);
+	rc = get_vcpu_asce(vcpu, &asce, ga, ar, mode);
 	if (rc)
 		return rc;
 	nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1;
@@ -795,7 +830,7 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
 	need_ipte_lock = psw_bits(*psw).t && !asce.r;
 	if (need_ipte_lock)
 		ipte_lock(vcpu);
-	rc = guest_page_range(vcpu, ga, pages, nr_pages, asce, mode);
+	rc = guest_page_range(vcpu, ga, ar, pages, nr_pages, asce, mode);
 	for (idx = 0; idx < nr_pages && !rc; idx++) {
 		gpa = *(pages + idx) + (ga & ~PAGE_MASK);
 		_len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len);
@@ -846,37 +881,28 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
 int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
 			    unsigned long *gpa, enum gacc_mode mode)
 {
-	struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
 	psw_t *psw = &vcpu->arch.sie_block->gpsw;
-	struct trans_exc_code_bits *tec;
 	union asce asce;
 	int rc;
 
 	gva = kvm_s390_logical_to_effective(vcpu, gva);
-	tec = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
-	rc = get_vcpu_asce(vcpu, &asce, ar, mode);
-	tec->addr = gva >> PAGE_SHIFT;
+	rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode);
 	if (rc)
 		return rc;
 	if (is_low_address(gva) && low_address_protection_enabled(vcpu, asce)) {
-		if (mode == GACC_STORE) {
-			rc = pgm->code = PGM_PROTECTION;
-			return rc;
-		}
+		if (mode == GACC_STORE)
+			return trans_exc(vcpu, PGM_PROTECTION, gva, 0,
+					 mode, PROT_TYPE_LA);
 	}
 
 	if (psw_bits(*psw).t && !asce.r) {	/* Use DAT? */
 		rc = guest_translate(vcpu, gva, gpa, asce, mode);
-		if (rc > 0) {
-			if (rc == PGM_PROTECTION)
-				tec->b61 = 1;
-			pgm->code = rc;
-		}
+		if (rc > 0)
+			return trans_exc(vcpu, rc, gva, 0, mode, PROT_TYPE_DAT);
 	} else {
-		rc = 0;
 		*gpa = kvm_s390_real_to_abs(vcpu, gva);
 		if (kvm_is_error_gpa(vcpu->kvm, *gpa))
-			rc = pgm->code = PGM_ADDRESSING;
+			return trans_exc(vcpu, rc, gva, PGM_ADDRESSING, mode, 0);
 	}
 
 	return rc;
@@ -915,20 +941,247 @@ int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
  */
 int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra)
 {
-	struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
-	psw_t *psw = &vcpu->arch.sie_block->gpsw;
-	struct trans_exc_code_bits *tec_bits;
 	union ctlreg0 ctlreg0 = {.val = vcpu->arch.sie_block->gcr[0]};
 
 	if (!ctlreg0.lap || !is_low_address(gra))
 		return 0;
+	return trans_exc(vcpu, PGM_PROTECTION, gra, 0, GACC_STORE, PROT_TYPE_LA);
+}
 
-	memset(pgm, 0, sizeof(*pgm));
-	tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
-	tec_bits->fsi = FSI_STORE;
-	tec_bits->as = psw_bits(*psw).as;
-	tec_bits->addr = gra >> PAGE_SHIFT;
-	pgm->code = PGM_PROTECTION;
+/**
+ * kvm_s390_shadow_tables - walk the guest page table and create shadow tables
+ * @sg: pointer to the shadow guest address space structure
+ * @saddr: faulting address in the shadow gmap
+ * @pgt: pointer to the page table address result
+ * @fake: pgt references contiguous guest memory block, not a pgtable
+ */
+static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
+				  unsigned long *pgt, int *dat_protection,
+				  int *fake)
+{
+	struct gmap *parent;
+	union asce asce;
+	union vaddress vaddr;
+	unsigned long ptr;
+	int rc;
+
+	*fake = 0;
+	*dat_protection = 0;
+	parent = sg->parent;
+	vaddr.addr = saddr;
+	asce.val = sg->orig_asce;
+	ptr = asce.origin * 4096;
+	if (asce.r) {
+		*fake = 1;
+		asce.dt = ASCE_TYPE_REGION1;
+	}
+	switch (asce.dt) {
+	case ASCE_TYPE_REGION1:
+		if (vaddr.rfx01 > asce.tl && !asce.r)
+			return PGM_REGION_FIRST_TRANS;
+		break;
+	case ASCE_TYPE_REGION2:
+		if (vaddr.rfx)
+			return PGM_ASCE_TYPE;
+		if (vaddr.rsx01 > asce.tl)
+			return PGM_REGION_SECOND_TRANS;
+		break;
+	case ASCE_TYPE_REGION3:
+		if (vaddr.rfx || vaddr.rsx)
+			return PGM_ASCE_TYPE;
+		if (vaddr.rtx01 > asce.tl)
+			return PGM_REGION_THIRD_TRANS;
+		break;
+	case ASCE_TYPE_SEGMENT:
+		if (vaddr.rfx || vaddr.rsx || vaddr.rtx)
+			return PGM_ASCE_TYPE;
+		if (vaddr.sx01 > asce.tl)
+			return PGM_SEGMENT_TRANSLATION;
+		break;
+	}
+
+	switch (asce.dt) {
+	case ASCE_TYPE_REGION1: {
+		union region1_table_entry rfte;
 
-	return pgm->code;
+		if (*fake) {
+			/* offset in 16EB guest memory block */
+			ptr = ptr + ((unsigned long) vaddr.rsx << 53UL);
+			rfte.val = ptr;
+			goto shadow_r2t;
+		}
+		rc = gmap_read_table(parent, ptr + vaddr.rfx * 8, &rfte.val);
+		if (rc)
+			return rc;
+		if (rfte.i)
+			return PGM_REGION_FIRST_TRANS;
+		if (rfte.tt != TABLE_TYPE_REGION1)
+			return PGM_TRANSLATION_SPEC;
+		if (vaddr.rsx01 < rfte.tf || vaddr.rsx01 > rfte.tl)
+			return PGM_REGION_SECOND_TRANS;
+		if (sg->edat_level >= 1)
+			*dat_protection |= rfte.p;
+		ptr = rfte.rto << 12UL;
+shadow_r2t:
+		rc = gmap_shadow_r2t(sg, saddr, rfte.val, *fake);
+		if (rc)
+			return rc;
+		/* fallthrough */
+	}
+	case ASCE_TYPE_REGION2: {
+		union region2_table_entry rste;
+
+		if (*fake) {
+			/* offset in 8PB guest memory block */
+			ptr = ptr + ((unsigned long) vaddr.rtx << 42UL);
+			rste.val = ptr;
+			goto shadow_r3t;
+		}
+		rc = gmap_read_table(parent, ptr + vaddr.rsx * 8, &rste.val);
+		if (rc)
+			return rc;
+		if (rste.i)
+			return PGM_REGION_SECOND_TRANS;
+		if (rste.tt != TABLE_TYPE_REGION2)
+			return PGM_TRANSLATION_SPEC;
+		if (vaddr.rtx01 < rste.tf || vaddr.rtx01 > rste.tl)
+			return PGM_REGION_THIRD_TRANS;
+		if (sg->edat_level >= 1)
+			*dat_protection |= rste.p;
+		ptr = rste.rto << 12UL;
+shadow_r3t:
+		rste.p |= *dat_protection;
+		rc = gmap_shadow_r3t(sg, saddr, rste.val, *fake);
+		if (rc)
+			return rc;
+		/* fallthrough */
+	}
+	case ASCE_TYPE_REGION3: {
+		union region3_table_entry rtte;
+
+		if (*fake) {
+			/* offset in 4TB guest memory block */
+			ptr = ptr + ((unsigned long) vaddr.sx << 31UL);
+			rtte.val = ptr;
+			goto shadow_sgt;
+		}
+		rc = gmap_read_table(parent, ptr + vaddr.rtx * 8, &rtte.val);
+		if (rc)
+			return rc;
+		if (rtte.i)
+			return PGM_REGION_THIRD_TRANS;
+		if (rtte.tt != TABLE_TYPE_REGION3)
+			return PGM_TRANSLATION_SPEC;
+		if (rtte.cr && asce.p && sg->edat_level >= 2)
+			return PGM_TRANSLATION_SPEC;
+		if (rtte.fc && sg->edat_level >= 2) {
+			*dat_protection |= rtte.fc0.p;
+			*fake = 1;
+			ptr = rtte.fc1.rfaa << 31UL;
+			rtte.val = ptr;
+			goto shadow_sgt;
+		}
+		if (vaddr.sx01 < rtte.fc0.tf || vaddr.sx01 > rtte.fc0.tl)
+			return PGM_SEGMENT_TRANSLATION;
+		if (sg->edat_level >= 1)
+			*dat_protection |= rtte.fc0.p;
+		ptr = rtte.fc0.sto << 12UL;
+shadow_sgt:
+		rtte.fc0.p |= *dat_protection;
+		rc = gmap_shadow_sgt(sg, saddr, rtte.val, *fake);
+		if (rc)
+			return rc;
+		/* fallthrough */
+	}
+	case ASCE_TYPE_SEGMENT: {
+		union segment_table_entry ste;
+
+		if (*fake) {
+			/* offset in 2G guest memory block */
+			ptr = ptr + ((unsigned long) vaddr.sx << 20UL);
+			ste.val = ptr;
+			goto shadow_pgt;
+		}
+		rc = gmap_read_table(parent, ptr + vaddr.sx * 8, &ste.val);
+		if (rc)
+			return rc;
+		if (ste.i)
+			return PGM_SEGMENT_TRANSLATION;
+		if (ste.tt != TABLE_TYPE_SEGMENT)
+			return PGM_TRANSLATION_SPEC;
+		if (ste.cs && asce.p)
+			return PGM_TRANSLATION_SPEC;
+		*dat_protection |= ste.fc0.p;
+		if (ste.fc && sg->edat_level >= 1) {
+			*fake = 1;
+			ptr = ste.fc1.sfaa << 20UL;
+			ste.val = ptr;
+			goto shadow_pgt;
+		}
+		ptr = ste.fc0.pto << 11UL;
+shadow_pgt:
+		ste.fc0.p |= *dat_protection;
+		rc = gmap_shadow_pgt(sg, saddr, ste.val, *fake);
+		if (rc)
+			return rc;
+	}
+	}
+	/* Return the parent address of the page table */
+	*pgt = ptr;
+	return 0;
+}
+
+/**
+ * kvm_s390_shadow_fault - handle fault on a shadow page table
+ * @vcpu: virtual cpu
+ * @sg: pointer to the shadow guest address space structure
+ * @saddr: faulting address in the shadow gmap
+ *
+ * Returns: - 0 if the shadow fault was successfully resolved
+ *	    - > 0 (pgm exception code) on exceptions while faulting
+ *	    - -EAGAIN if the caller can retry immediately
+ *	    - -EFAULT when accessing invalid guest addresses
+ *	    - -ENOMEM if out of memory
+ */
+int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
+			  unsigned long saddr)
+{
+	union vaddress vaddr;
+	union page_table_entry pte;
+	unsigned long pgt;
+	int dat_protection, fake;
+	int rc;
+
+	down_read(&sg->mm->mmap_sem);
+	/*
+	 * We don't want any guest-2 tables to change - so the parent
+	 * tables/pointers we read stay valid - unshadowing is however
+	 * always possible - only guest_table_lock protects us.
+	 */
+	ipte_lock(vcpu);
+
+	rc = gmap_shadow_pgt_lookup(sg, saddr, &pgt, &dat_protection, &fake);
+	if (rc)
+		rc = kvm_s390_shadow_tables(sg, saddr, &pgt, &dat_protection,
+					    &fake);
+
+	vaddr.addr = saddr;
+	if (fake) {
+		/* offset in 1MB guest memory block */
+		pte.val = pgt + ((unsigned long) vaddr.px << 12UL);
+		goto shadow_page;
+	}
+	if (!rc)
+		rc = gmap_read_table(sg->parent, pgt + vaddr.px * 8, &pte.val);
+	if (!rc && pte.i)
+		rc = PGM_PAGE_TRANSLATION;
+	if (!rc && (pte.z || (pte.co && sg->edat_level < 1)))
+		rc = PGM_TRANSLATION_SPEC;
+shadow_page:
+	pte.p |= dat_protection;
+	if (!rc)
+		rc = gmap_shadow_page(sg, saddr, __pte(pte.val));
+	ipte_unlock(vcpu);
+	up_read(&sg->mm->mmap_sem);
+	return rc;
 }
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
index df0a79dd8159..8756569ad938 100644
--- a/arch/s390/kvm/gaccess.h
+++ b/arch/s390/kvm/gaccess.h
@@ -361,4 +361,7 @@ void ipte_unlock(struct kvm_vcpu *vcpu);
 int ipte_lock_held(struct kvm_vcpu *vcpu);
 int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra);
 
+int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *shadow,
+			  unsigned long saddr);
+
 #endif /* __KVM_S390_GACCESS_H */
diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c
index e8c6843b9600..31a05330d11c 100644
--- a/arch/s390/kvm/guestdbg.c
+++ b/arch/s390/kvm/guestdbg.c
@@ -439,6 +439,23 @@ exit_required:
 #define guest_per_enabled(vcpu) \
 			     (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PER)
 
+int kvm_s390_handle_per_ifetch_icpt(struct kvm_vcpu *vcpu)
+{
+	const u8 ilen = kvm_s390_get_ilen(vcpu);
+	struct kvm_s390_pgm_info pgm_info = {
+		.code = PGM_PER,
+		.per_code = PER_EVENT_IFETCH >> 24,
+		.per_address = __rewind_psw(vcpu->arch.sie_block->gpsw, ilen),
+	};
+
+	/*
+	 * The PSW points to the next instruction, therefore the intercepted
+	 * instruction generated a PER i-fetch event. PER address therefore
+	 * points at the previous PSW address (could be an EXECUTE function).
+	 */
+	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
+}
+
 static void filter_guest_per_event(struct kvm_vcpu *vcpu)
 {
 	u32 perc = vcpu->arch.sie_block->perc << 24;
@@ -465,7 +482,7 @@ static void filter_guest_per_event(struct kvm_vcpu *vcpu)
 		guest_perc &= ~PER_EVENT_IFETCH;
 
 	/* All other PER events will be given to the guest */
-	/* TODO: Check alterated address/address space */
+	/* TODO: Check altered address/address space */
 
 	vcpu->arch.sie_block->perc = guest_perc >> 24;
 
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index 252157181302..dfd0ca2638fa 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -351,8 +351,26 @@ static int handle_partial_execution(struct kvm_vcpu *vcpu)
 	return -EOPNOTSUPP;
 }
 
+static int handle_operexc(struct kvm_vcpu *vcpu)
+{
+	vcpu->stat.exit_operation_exception++;
+	trace_kvm_s390_handle_operexc(vcpu, vcpu->arch.sie_block->ipa,
+				      vcpu->arch.sie_block->ipb);
+
+	if (vcpu->arch.sie_block->ipa == 0xb256 &&
+	    test_kvm_facility(vcpu->kvm, 74))
+		return handle_sthyi(vcpu);
+
+	if (vcpu->arch.sie_block->ipa == 0 && vcpu->kvm->arch.user_instr0)
+		return -EOPNOTSUPP;
+
+	return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
+}
+
 int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
 {
+	int rc, per_rc = 0;
+
 	if (kvm_is_ucontrol(vcpu->kvm))
 		return -EOPNOTSUPP;
 
@@ -361,7 +379,8 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
 	case 0x18:
 		return handle_noop(vcpu);
 	case 0x04:
-		return handle_instruction(vcpu);
+		rc = handle_instruction(vcpu);
+		break;
 	case 0x08:
 		return handle_prog(vcpu);
 	case 0x14:
@@ -372,9 +391,19 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
 		return handle_validity(vcpu);
 	case 0x28:
 		return handle_stop(vcpu);
+	case 0x2c:
+		rc = handle_operexc(vcpu);
+		break;
 	case 0x38:
-		return handle_partial_execution(vcpu);
+		rc = handle_partial_execution(vcpu);
+		break;
 	default:
 		return -EOPNOTSUPP;
 	}
+
+	/* process PER, also if the instrution is processed in user space */
+	if (vcpu->arch.sie_block->icptstatus & 0x02 &&
+	    (!rc || rc == -EOPNOTSUPP))
+		per_rc = kvm_s390_handle_per_ifetch_icpt(vcpu);
+	return per_rc ? per_rc : rc;
 }
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 5a80af740d3e..24524c0f3ef8 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -28,9 +28,6 @@
 #include "gaccess.h"
 #include "trace-s390.h"
 
-#define IOINT_SCHID_MASK 0x0000ffff
-#define IOINT_SSID_MASK 0x00030000
-#define IOINT_CSSID_MASK 0x03fc0000
 #define PFAULT_INIT 0x0600
 #define PFAULT_DONE 0x0680
 #define VIRTIO_PARAM 0x0d00
@@ -821,7 +818,14 @@ static int __must_check __deliver_io(struct kvm_vcpu *vcpu,
 					struct kvm_s390_interrupt_info,
 					list);
 	if (inti) {
-		VCPU_EVENT(vcpu, 4, "deliver: I/O 0x%llx", inti->type);
+		if (inti->type & KVM_S390_INT_IO_AI_MASK)
+			VCPU_EVENT(vcpu, 4, "%s", "deliver: I/O (AI)");
+		else
+			VCPU_EVENT(vcpu, 4, "deliver: I/O %x ss %x schid %04x",
+			inti->io.subchannel_id >> 8,
+			inti->io.subchannel_id >> 1 & 0x3,
+			inti->io.subchannel_nr);
+
 		vcpu->stat.deliver_io_int++;
 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
 				inti->type,
@@ -991,6 +995,11 @@ void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu)
 		swake_up(&vcpu->wq);
 		vcpu->stat.halt_wakeup++;
 	}
+	/*
+	 * The VCPU might not be sleeping but is executing the VSIE. Let's
+	 * kick it, so it leaves the SIE to process the request.
+	 */
+	kvm_s390_vsie_kick(vcpu);
 }
 
 enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer)
@@ -1415,6 +1424,13 @@ static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
 	}
 	fi->counters[FIRQ_CNTR_IO] += 1;
 
+	if (inti->type & KVM_S390_INT_IO_AI_MASK)
+		VM_EVENT(kvm, 4, "%s", "inject: I/O (AI)");
+	else
+		VM_EVENT(kvm, 4, "inject: I/O %x ss %x schid %04x",
+			inti->io.subchannel_id >> 8,
+			inti->io.subchannel_id >> 1 & 0x3,
+			inti->io.subchannel_nr);
 	isc = int_word_to_isc(inti->io.io_int_word);
 	list = &fi->lists[FIRQ_LIST_IO_ISC_0 + isc];
 	list_add_tail(&inti->list, list);
@@ -1531,13 +1547,6 @@ int kvm_s390_inject_vm(struct kvm *kvm,
 		inti->mchk.mcic = s390int->parm64;
 		break;
 	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
-		if (inti->type & KVM_S390_INT_IO_AI_MASK)
-			VM_EVENT(kvm, 5, "%s", "inject: I/O (AI)");
-		else
-			VM_EVENT(kvm, 5, "inject: I/O css %x ss %x schid %04x",
-				 s390int->type & IOINT_CSSID_MASK,
-				 s390int->type & IOINT_SSID_MASK,
-				 s390int->type & IOINT_SCHID_MASK);
 		inti->io.subchannel_id = s390int->parm >> 16;
 		inti->io.subchannel_nr = s390int->parm & 0x0000ffffu;
 		inti->io.io_int_parm = s390int->parm64 >> 32;
@@ -2237,7 +2246,8 @@ static int set_adapter_int(struct kvm_kernel_irq_routing_entry *e,
 	return ret;
 }
 
-int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e,
+int kvm_set_routing_entry(struct kvm *kvm,
+			  struct kvm_kernel_irq_routing_entry *e,
 			  const struct kvm_irq_routing_entry *ue)
 {
 	int ret;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 6f5c344cd785..3f3ae4865d57 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -21,11 +21,13 @@
 #include <linux/init.h>
 #include <linux/kvm.h>
 #include <linux/kvm_host.h>
+#include <linux/mman.h>
 #include <linux/module.h>
 #include <linux/random.h>
 #include <linux/slab.h>
 #include <linux/timer.h>
 #include <linux/vmalloc.h>
+#include <linux/bitmap.h>
 #include <asm/asm-offsets.h>
 #include <asm/lowcore.h>
 #include <asm/stp.h>
@@ -35,6 +37,8 @@
 #include <asm/switch_to.h>
 #include <asm/isc.h>
 #include <asm/sclp.h>
+#include <asm/cpacf.h>
+#include <asm/timex.h>
 #include "kvm-s390.h"
 #include "gaccess.h"
 
@@ -64,6 +68,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "exit_pei", VCPU_STAT(exit_pei) },
 	{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
 	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
+	{ "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
 	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
 	{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
 	{ "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
@@ -94,6 +99,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
 	{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
 	{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
+	{ "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
+	{ "instruction_sie", VCPU_STAT(instruction_sie) },
 	{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
 	{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
 	{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
@@ -119,6 +126,11 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ NULL }
 };
 
+/* allow nested virtualization in KVM (if enabled by user space) */
+static int nested;
+module_param(nested, int, S_IRUGO);
+MODULE_PARM_DESC(nested, "Nested virtualization support");
+
 /* upper facilities limit for kvm */
 unsigned long kvm_s390_fac_list_mask[16] = {
 	0xffe6000000000000UL,
@@ -131,7 +143,13 @@ unsigned long kvm_s390_fac_list_mask_size(void)
 	return ARRAY_SIZE(kvm_s390_fac_list_mask);
 }
 
+/* available cpu features supported by kvm */
+static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
+/* available subfunctions indicated via query / "test bit" */
+static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
+
 static struct gmap_notifier gmap_notifier;
+static struct gmap_notifier vsie_gmap_notifier;
 debug_info_t *kvm_s390_dbf;
 
 /* Section: not file related */
@@ -141,7 +159,8 @@ int kvm_arch_hardware_enable(void)
 	return 0;
 }
 
-static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
+static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
+			      unsigned long end);
 
 /*
  * This callback is executed during stop_machine(). All CPUs are therefore
@@ -163,6 +182,8 @@ static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 			vcpu->arch.sie_block->epoch -= *delta;
 			if (vcpu->arch.cputm_enabled)
 				vcpu->arch.cputm_start += *delta;
+			if (vcpu->arch.vsie_block)
+				vcpu->arch.vsie_block->epoch -= *delta;
 		}
 	}
 	return NOTIFY_OK;
@@ -175,7 +196,9 @@ static struct notifier_block kvm_clock_notifier = {
 int kvm_arch_hardware_setup(void)
 {
 	gmap_notifier.notifier_call = kvm_gmap_notifier;
-	gmap_register_ipte_notifier(&gmap_notifier);
+	gmap_register_pte_notifier(&gmap_notifier);
+	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
+	gmap_register_pte_notifier(&vsie_gmap_notifier);
 	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 				       &kvm_clock_notifier);
 	return 0;
@@ -183,11 +206,109 @@ int kvm_arch_hardware_setup(void)
 
 void kvm_arch_hardware_unsetup(void)
 {
-	gmap_unregister_ipte_notifier(&gmap_notifier);
+	gmap_unregister_pte_notifier(&gmap_notifier);
+	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
 	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 					 &kvm_clock_notifier);
 }
 
+static void allow_cpu_feat(unsigned long nr)
+{
+	set_bit_inv(nr, kvm_s390_available_cpu_feat);
+}
+
+static inline int plo_test_bit(unsigned char nr)
+{
+	register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
+	int cc = 3; /* subfunction not available */
+
+	asm volatile(
+		/* Parameter registers are ignored for "test bit" */
+		"	plo	0,0,0,0(0)\n"
+		"	ipm	%0\n"
+		"	srl	%0,28\n"
+		: "=d" (cc)
+		: "d" (r0)
+		: "cc");
+	return cc == 0;
+}
+
+static void kvm_s390_cpu_feat_init(void)
+{
+	int i;
+
+	for (i = 0; i < 256; ++i) {
+		if (plo_test_bit(i))
+			kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
+	}
+
+	if (test_facility(28)) /* TOD-clock steering */
+		ptff(kvm_s390_available_subfunc.ptff,
+		     sizeof(kvm_s390_available_subfunc.ptff),
+		     PTFF_QAF);
+
+	if (test_facility(17)) { /* MSA */
+		__cpacf_query(CPACF_KMAC, kvm_s390_available_subfunc.kmac);
+		__cpacf_query(CPACF_KMC, kvm_s390_available_subfunc.kmc);
+		__cpacf_query(CPACF_KM, kvm_s390_available_subfunc.km);
+		__cpacf_query(CPACF_KIMD, kvm_s390_available_subfunc.kimd);
+		__cpacf_query(CPACF_KLMD, kvm_s390_available_subfunc.klmd);
+	}
+	if (test_facility(76)) /* MSA3 */
+		__cpacf_query(CPACF_PCKMO, kvm_s390_available_subfunc.pckmo);
+	if (test_facility(77)) { /* MSA4 */
+		__cpacf_query(CPACF_KMCTR, kvm_s390_available_subfunc.kmctr);
+		__cpacf_query(CPACF_KMF, kvm_s390_available_subfunc.kmf);
+		__cpacf_query(CPACF_KMO, kvm_s390_available_subfunc.kmo);
+		__cpacf_query(CPACF_PCC, kvm_s390_available_subfunc.pcc);
+	}
+	if (test_facility(57)) /* MSA5 */
+		__cpacf_query(CPACF_PPNO, kvm_s390_available_subfunc.ppno);
+
+	if (MACHINE_HAS_ESOP)
+		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
+	/*
+	 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
+	 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
+	 */
+	if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
+	    !test_facility(3) || !nested)
+		return;
+	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
+	if (sclp.has_64bscao)
+		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
+	if (sclp.has_siif)
+		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
+	if (sclp.has_gpere)
+		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
+	if (sclp.has_gsls)
+		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
+	if (sclp.has_ib)
+		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
+	if (sclp.has_cei)
+		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
+	if (sclp.has_ibs)
+		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
+	/*
+	 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
+	 * all skey handling functions read/set the skey from the PGSTE
+	 * instead of the real storage key.
+	 *
+	 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
+	 * pages being detected as preserved although they are resident.
+	 *
+	 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
+	 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
+	 *
+	 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
+	 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
+	 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
+	 *
+	 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
+	 * cannot easily shadow the SCA because of the ipte lock.
+	 */
+}
+
 int kvm_arch_init(void *opaque)
 {
 	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
@@ -199,6 +320,8 @@ int kvm_arch_init(void *opaque)
 		return -ENOMEM;
 	}
 
+	kvm_s390_cpu_feat_init();
+
 	/* Register floating interrupt controller interface. */
 	return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 }
@@ -244,6 +367,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_S390_USER_STSI:
 	case KVM_CAP_S390_SKEYS:
 	case KVM_CAP_S390_IRQ_STATE:
+	case KVM_CAP_S390_USER_INSTR0:
 		r = 1;
 		break;
 	case KVM_CAP_S390_MEM_OP:
@@ -251,8 +375,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 		break;
 	case KVM_CAP_NR_VCPUS:
 	case KVM_CAP_MAX_VCPUS:
-		r = sclp.has_esca ? KVM_S390_ESCA_CPU_SLOTS
-				  : KVM_S390_BSCA_CPU_SLOTS;
+		r = KVM_S390_BSCA_CPU_SLOTS;
+		if (sclp.has_esca && sclp.has_64bscao)
+			r = KVM_S390_ESCA_CPU_SLOTS;
 		break;
 	case KVM_CAP_NR_MEMSLOTS:
 		r = KVM_USER_MEM_SLOTS;
@@ -335,6 +460,16 @@ out:
 	return r;
 }
 
+static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
+{
+	unsigned int i;
+	struct kvm_vcpu *vcpu;
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
+	}
+}
+
 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 {
 	int r;
@@ -355,7 +490,7 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 		break;
 	case KVM_CAP_S390_VECTOR_REGISTERS:
 		mutex_lock(&kvm->lock);
-		if (atomic_read(&kvm->online_vcpus)) {
+		if (kvm->created_vcpus) {
 			r = -EBUSY;
 		} else if (MACHINE_HAS_VX) {
 			set_kvm_facility(kvm->arch.model.fac_mask, 129);
@@ -370,7 +505,7 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 	case KVM_CAP_S390_RI:
 		r = -EINVAL;
 		mutex_lock(&kvm->lock);
-		if (atomic_read(&kvm->online_vcpus)) {
+		if (kvm->created_vcpus) {
 			r = -EBUSY;
 		} else if (test_facility(64)) {
 			set_kvm_facility(kvm->arch.model.fac_mask, 64);
@@ -386,6 +521,12 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 		kvm->arch.user_stsi = 1;
 		r = 0;
 		break;
+	case KVM_CAP_S390_USER_INSTR0:
+		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
+		kvm->arch.user_instr0 = 1;
+		icpt_operexc_on_all_vcpus(kvm);
+		r = 0;
+		break;
 	default:
 		r = -EINVAL;
 		break;
@@ -418,21 +559,23 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att
 	unsigned int idx;
 	switch (attr->attr) {
 	case KVM_S390_VM_MEM_ENABLE_CMMA:
-		/* enable CMMA only for z10 and later (EDAT_1) */
-		ret = -EINVAL;
-		if (!MACHINE_IS_LPAR || !MACHINE_HAS_EDAT1)
+		ret = -ENXIO;
+		if (!sclp.has_cmma)
 			break;
 
 		ret = -EBUSY;
 		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 		mutex_lock(&kvm->lock);
-		if (atomic_read(&kvm->online_vcpus) == 0) {
+		if (!kvm->created_vcpus) {
 			kvm->arch.use_cmma = 1;
 			ret = 0;
 		}
 		mutex_unlock(&kvm->lock);
 		break;
 	case KVM_S390_VM_MEM_CLR_CMMA:
+		ret = -ENXIO;
+		if (!sclp.has_cmma)
+			break;
 		ret = -EINVAL;
 		if (!kvm->arch.use_cmma)
 			break;
@@ -461,20 +604,20 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att
 		if (!new_limit)
 			return -EINVAL;
 
-		/* gmap_alloc takes last usable address */
+		/* gmap_create takes last usable address */
 		if (new_limit != KVM_S390_NO_MEM_LIMIT)
 			new_limit -= 1;
 
 		ret = -EBUSY;
 		mutex_lock(&kvm->lock);
-		if (atomic_read(&kvm->online_vcpus) == 0) {
-			/* gmap_alloc will round the limit up */
-			struct gmap *new = gmap_alloc(current->mm, new_limit);
+		if (!kvm->created_vcpus) {
+			/* gmap_create will round the limit up */
+			struct gmap *new = gmap_create(current->mm, new_limit);
 
 			if (!new) {
 				ret = -ENOMEM;
 			} else {
-				gmap_free(kvm->arch.gmap);
+				gmap_remove(kvm->arch.gmap);
 				new->private = kvm;
 				kvm->arch.gmap = new;
 				ret = 0;
@@ -644,7 +787,7 @@ static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 	int ret = 0;
 
 	mutex_lock(&kvm->lock);
-	if (atomic_read(&kvm->online_vcpus)) {
+	if (kvm->created_vcpus) {
 		ret = -EBUSY;
 		goto out;
 	}
@@ -676,6 +819,39 @@ out:
 	return ret;
 }
 
+static int kvm_s390_set_processor_feat(struct kvm *kvm,
+				       struct kvm_device_attr *attr)
+{
+	struct kvm_s390_vm_cpu_feat data;
+	int ret = -EBUSY;
+
+	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
+		return -EFAULT;
+	if (!bitmap_subset((unsigned long *) data.feat,
+			   kvm_s390_available_cpu_feat,
+			   KVM_S390_VM_CPU_FEAT_NR_BITS))
+		return -EINVAL;
+
+	mutex_lock(&kvm->lock);
+	if (!atomic_read(&kvm->online_vcpus)) {
+		bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
+			    KVM_S390_VM_CPU_FEAT_NR_BITS);
+		ret = 0;
+	}
+	mutex_unlock(&kvm->lock);
+	return ret;
+}
+
+static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
+					  struct kvm_device_attr *attr)
+{
+	/*
+	 * Once supported by kernel + hw, we have to store the subfunctions
+	 * in kvm->arch and remember that user space configured them.
+	 */
+	return -ENXIO;
+}
+
 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 {
 	int ret = -ENXIO;
@@ -684,6 +860,12 @@ static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 	case KVM_S390_VM_CPU_PROCESSOR:
 		ret = kvm_s390_set_processor(kvm, attr);
 		break;
+	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
+		ret = kvm_s390_set_processor_feat(kvm, attr);
+		break;
+	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
+		ret = kvm_s390_set_processor_subfunc(kvm, attr);
+		break;
 	}
 	return ret;
 }
@@ -732,6 +914,50 @@ out:
 	return ret;
 }
 
+static int kvm_s390_get_processor_feat(struct kvm *kvm,
+				       struct kvm_device_attr *attr)
+{
+	struct kvm_s390_vm_cpu_feat data;
+
+	bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
+		    KVM_S390_VM_CPU_FEAT_NR_BITS);
+	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
+		return -EFAULT;
+	return 0;
+}
+
+static int kvm_s390_get_machine_feat(struct kvm *kvm,
+				     struct kvm_device_attr *attr)
+{
+	struct kvm_s390_vm_cpu_feat data;
+
+	bitmap_copy((unsigned long *) data.feat,
+		    kvm_s390_available_cpu_feat,
+		    KVM_S390_VM_CPU_FEAT_NR_BITS);
+	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
+		return -EFAULT;
+	return 0;
+}
+
+static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
+					  struct kvm_device_attr *attr)
+{
+	/*
+	 * Once we can actually configure subfunctions (kernel + hw support),
+	 * we have to check if they were already set by user space, if so copy
+	 * them from kvm->arch.
+	 */
+	return -ENXIO;
+}
+
+static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
+					struct kvm_device_attr *attr)
+{
+	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
+	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
+		return -EFAULT;
+	return 0;
+}
 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 {
 	int ret = -ENXIO;
@@ -743,6 +969,18 @@ static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 	case KVM_S390_VM_CPU_MACHINE:
 		ret = kvm_s390_get_machine(kvm, attr);
 		break;
+	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
+		ret = kvm_s390_get_processor_feat(kvm, attr);
+		break;
+	case KVM_S390_VM_CPU_MACHINE_FEAT:
+		ret = kvm_s390_get_machine_feat(kvm, attr);
+		break;
+	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
+		ret = kvm_s390_get_processor_subfunc(kvm, attr);
+		break;
+	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
+		ret = kvm_s390_get_machine_subfunc(kvm, attr);
+		break;
 	}
 	return ret;
 }
@@ -803,6 +1041,8 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 		switch (attr->attr) {
 		case KVM_S390_VM_MEM_ENABLE_CMMA:
 		case KVM_S390_VM_MEM_CLR_CMMA:
+			ret = sclp.has_cmma ? 0 : -ENXIO;
+			break;
 		case KVM_S390_VM_MEM_LIMIT_SIZE:
 			ret = 0;
 			break;
@@ -826,8 +1066,13 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 		switch (attr->attr) {
 		case KVM_S390_VM_CPU_PROCESSOR:
 		case KVM_S390_VM_CPU_MACHINE:
+		case KVM_S390_VM_CPU_PROCESSOR_FEAT:
+		case KVM_S390_VM_CPU_MACHINE_FEAT:
+		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
 			ret = 0;
 			break;
+		/* configuring subfunctions is not supported yet */
+		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
 		default:
 			ret = -ENXIO;
 			break;
@@ -858,7 +1103,6 @@ static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
 {
 	uint8_t *keys;
 	uint64_t hva;
-	unsigned long curkey;
 	int i, r = 0;
 
 	if (args->flags != 0)
@@ -879,26 +1123,27 @@ static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
 	if (!keys)
 		return -ENOMEM;
 
+	down_read(&current->mm->mmap_sem);
 	for (i = 0; i < args->count; i++) {
 		hva = gfn_to_hva(kvm, args->start_gfn + i);
 		if (kvm_is_error_hva(hva)) {
 			r = -EFAULT;
-			goto out;
+			break;
 		}
 
-		curkey = get_guest_storage_key(current->mm, hva);
-		if (IS_ERR_VALUE(curkey)) {
-			r = curkey;
-			goto out;
-		}
-		keys[i] = curkey;
+		r = get_guest_storage_key(current->mm, hva, &keys[i]);
+		if (r)
+			break;
+	}
+	up_read(&current->mm->mmap_sem);
+
+	if (!r) {
+		r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
+				 sizeof(uint8_t) * args->count);
+		if (r)
+			r = -EFAULT;
 	}
 
-	r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
-			 sizeof(uint8_t) * args->count);
-	if (r)
-		r = -EFAULT;
-out:
 	kvfree(keys);
 	return r;
 }
@@ -935,24 +1180,25 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
 	if (r)
 		goto out;
 
+	down_read(&current->mm->mmap_sem);
 	for (i = 0; i < args->count; i++) {
 		hva = gfn_to_hva(kvm, args->start_gfn + i);
 		if (kvm_is_error_hva(hva)) {
 			r = -EFAULT;
-			goto out;
+			break;
 		}
 
 		/* Lowest order bit is reserved */
 		if (keys[i] & 0x01) {
 			r = -EINVAL;
-			goto out;
+			break;
 		}
 
-		r = set_guest_storage_key(current->mm, hva,
-					  (unsigned long)keys[i], 0);
+		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
 		if (r)
-			goto out;
+			break;
 	}
+	up_read(&current->mm->mmap_sem);
 out:
 	kvfree(keys);
 	return r;
@@ -1129,6 +1375,7 @@ static void sca_dispose(struct kvm *kvm)
 
 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 {
+	gfp_t alloc_flags = GFP_KERNEL;
 	int i, rc;
 	char debug_name[16];
 	static unsigned long sca_offset;
@@ -1150,9 +1397,13 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
 	rc = -ENOMEM;
 
+	ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
+
 	kvm->arch.use_esca = 0; /* start with basic SCA */
+	if (!sclp.has_64bscao)
+		alloc_flags |= GFP_DMA;
 	rwlock_init(&kvm->arch.sca_lock);
-	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(GFP_KERNEL);
+	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
 	if (!kvm->arch.sca)
 		goto out_err;
 	spin_lock(&kvm_lock);
@@ -1189,6 +1440,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
 
+	set_kvm_facility(kvm->arch.model.fac_mask, 74);
+	set_kvm_facility(kvm->arch.model.fac_list, 74);
+
 	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
 	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
 
@@ -1212,7 +1466,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 		else
 			kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
 						    sclp.hamax + 1);
-		kvm->arch.gmap = gmap_alloc(current->mm, kvm->arch.mem_limit - 1);
+		kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
 		if (!kvm->arch.gmap)
 			goto out_err;
 		kvm->arch.gmap->private = kvm;
@@ -1224,6 +1478,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	kvm->arch.epoch = 0;
 
 	spin_lock_init(&kvm->arch.start_stop_lock);
+	kvm_s390_vsie_init(kvm);
 	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
 
 	return 0;
@@ -1245,7 +1500,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 		sca_del_vcpu(vcpu);
 
 	if (kvm_is_ucontrol(vcpu->kvm))
-		gmap_free(vcpu->arch.gmap);
+		gmap_remove(vcpu->arch.gmap);
 
 	if (vcpu->kvm->arch.use_cmma)
 		kvm_s390_vcpu_unsetup_cmma(vcpu);
@@ -1278,16 +1533,17 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 	debug_unregister(kvm->arch.dbf);
 	free_page((unsigned long)kvm->arch.sie_page2);
 	if (!kvm_is_ucontrol(kvm))
-		gmap_free(kvm->arch.gmap);
+		gmap_remove(kvm->arch.gmap);
 	kvm_s390_destroy_adapters(kvm);
 	kvm_s390_clear_float_irqs(kvm);
+	kvm_s390_vsie_destroy(kvm);
 	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
 }
 
 /* Section: vcpu related */
 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
 {
-	vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
+	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
 	if (!vcpu->arch.gmap)
 		return -ENOMEM;
 	vcpu->arch.gmap->private = vcpu->kvm;
@@ -1396,7 +1652,7 @@ static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
 
 	if (id < KVM_S390_BSCA_CPU_SLOTS)
 		return true;
-	if (!sclp.has_esca)
+	if (!sclp.has_esca || !sclp.has_64bscao)
 		return false;
 
 	mutex_lock(&kvm->lock);
@@ -1537,7 +1793,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
 	save_access_regs(vcpu->arch.host_acrs);
 	restore_access_regs(vcpu->run->s.regs.acrs);
-	gmap_enable(vcpu->arch.gmap);
+	gmap_enable(vcpu->arch.enabled_gmap);
 	atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
 		__start_cpu_timer_accounting(vcpu);
@@ -1550,7 +1806,8 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
 		__stop_cpu_timer_accounting(vcpu);
 	atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
-	gmap_disable(vcpu->arch.gmap);
+	vcpu->arch.enabled_gmap = gmap_get_enabled();
+	gmap_disable(vcpu->arch.enabled_gmap);
 
 	/* Save guest register state */
 	save_fpu_regs();
@@ -1599,7 +1856,10 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
 		sca_add_vcpu(vcpu);
 	}
-
+	if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
+		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
+	/* make vcpu_load load the right gmap on the first trigger */
+	vcpu->arch.enabled_gmap = vcpu->arch.gmap;
 }
 
 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
@@ -1658,15 +1918,21 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 
 	kvm_s390_vcpu_setup_model(vcpu);
 
-	vcpu->arch.sie_block->ecb = 0x02;
+	/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
+	if (MACHINE_HAS_ESOP)
+		vcpu->arch.sie_block->ecb |= 0x02;
 	if (test_kvm_facility(vcpu->kvm, 9))
 		vcpu->arch.sie_block->ecb |= 0x04;
-	if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
+	if (test_kvm_facility(vcpu->kvm, 73))
 		vcpu->arch.sie_block->ecb |= 0x10;
 
-	if (test_kvm_facility(vcpu->kvm, 8))
+	if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
 		vcpu->arch.sie_block->ecb2 |= 0x08;
-	vcpu->arch.sie_block->eca   = 0xC1002000U;
+	vcpu->arch.sie_block->eca = 0x1002000U;
+	if (sclp.has_cei)
+		vcpu->arch.sie_block->eca |= 0x80000000U;
+	if (sclp.has_ib)
+		vcpu->arch.sie_block->eca |= 0x40000000U;
 	if (sclp.has_siif)
 		vcpu->arch.sie_block->eca |= 1;
 	if (sclp.has_sigpif)
@@ -1716,6 +1982,10 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 	vcpu->arch.sie_block = &sie_page->sie_block;
 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
 
+	/* the real guest size will always be smaller than msl */
+	vcpu->arch.sie_block->mso = 0;
+	vcpu->arch.sie_block->msl = sclp.hamax;
+
 	vcpu->arch.sie_block->icpua = id;
 	spin_lock_init(&vcpu->arch.local_int.lock);
 	vcpu->arch.local_int.float_int = &kvm->arch.float_int;
@@ -1784,16 +2054,25 @@ void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
 	kvm_s390_vcpu_request(vcpu);
 }
 
-static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
+static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
+			      unsigned long end)
 {
-	int i;
 	struct kvm *kvm = gmap->private;
 	struct kvm_vcpu *vcpu;
+	unsigned long prefix;
+	int i;
 
+	if (gmap_is_shadow(gmap))
+		return;
+	if (start >= 1UL << 31)
+		/* We are only interested in prefix pages */
+		return;
 	kvm_for_each_vcpu(i, vcpu, kvm) {
 		/* match against both prefix pages */
-		if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
-			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
+		prefix = kvm_s390_get_prefix(vcpu);
+		if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
+			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
+				   start, end);
 			kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
 		}
 	}
@@ -2002,6 +2281,8 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
 
 	if (dbg->control & ~VALID_GUESTDBG_FLAGS)
 		return -EINVAL;
+	if (!sclp.has_gpere)
+		return -EINVAL;
 
 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
 		vcpu->guest_debug = dbg->control;
@@ -2070,16 +2351,16 @@ retry:
 		return 0;
 	/*
 	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
-	 * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
+	 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
 	 * This ensures that the ipte instruction for this request has
 	 * already finished. We might race against a second unmapper that
 	 * wants to set the blocking bit. Lets just retry the request loop.
 	 */
 	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
 		int rc;
-		rc = gmap_ipte_notify(vcpu->arch.gmap,
-				      kvm_s390_get_prefix(vcpu),
-				      PAGE_SIZE * 2);
+		rc = gmap_mprotect_notify(vcpu->arch.gmap,
+					  kvm_s390_get_prefix(vcpu),
+					  PAGE_SIZE * 2, PROT_WRITE);
 		if (rc)
 			return rc;
 		goto retry;
@@ -2108,6 +2389,11 @@ retry:
 		goto retry;
 	}
 
+	if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
+		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
+		goto retry;
+	}
+
 	/* nothing to do, just clear the request */
 	clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
 
@@ -2362,14 +2648,14 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
 		 * guest_enter and guest_exit should be no uaccess.
 		 */
 		local_irq_disable();
-		__kvm_guest_enter();
+		guest_enter_irqoff();
 		__disable_cpu_timer_accounting(vcpu);
 		local_irq_enable();
 		exit_reason = sie64a(vcpu->arch.sie_block,
 				     vcpu->run->s.regs.gprs);
 		local_irq_disable();
 		__enable_cpu_timer_accounting(vcpu);
-		__kvm_guest_exit();
+		guest_exit_irqoff();
 		local_irq_enable();
 		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
 
@@ -2598,6 +2884,8 @@ static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
 
 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
 {
+	if (!sclp.has_ibs)
+		return;
 	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
 	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
 }
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 8621ab00ec8e..b8432862a817 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -56,7 +56,7 @@ static inline int is_vcpu_stopped(struct kvm_vcpu *vcpu)
 
 static inline int is_vcpu_idle(struct kvm_vcpu *vcpu)
 {
-	return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_WAIT;
+	return test_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask);
 }
 
 static inline int kvm_is_ucontrol(struct kvm *kvm)
@@ -175,6 +175,12 @@ static inline int set_kvm_facility(u64 *fac_list, unsigned long nr)
 	return 0;
 }
 
+static inline int test_kvm_cpu_feat(struct kvm *kvm, unsigned long nr)
+{
+	WARN_ON_ONCE(nr >= KVM_S390_VM_CPU_FEAT_NR_BITS);
+	return test_bit_inv(nr, kvm->arch.cpu_feat);
+}
+
 /* are cpu states controlled by user space */
 static inline int kvm_s390_user_cpu_state_ctrl(struct kvm *kvm)
 {
@@ -232,6 +238,8 @@ static inline void kvm_s390_forward_psw(struct kvm_vcpu *vcpu, int ilen)
 }
 static inline void kvm_s390_retry_instr(struct kvm_vcpu *vcpu)
 {
+	/* don't inject PER events if we re-execute the instruction */
+	vcpu->arch.sie_block->icptstatus &= ~0x02;
 	kvm_s390_rewind_psw(vcpu, kvm_s390_get_ilen(vcpu));
 }
 
@@ -246,10 +254,21 @@ int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu);
 int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu);
 int kvm_s390_handle_eb(struct kvm_vcpu *vcpu);
 
+/* implemented in vsie.c */
+int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu);
+void kvm_s390_vsie_kick(struct kvm_vcpu *vcpu);
+void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start,
+				 unsigned long end);
+void kvm_s390_vsie_init(struct kvm *kvm);
+void kvm_s390_vsie_destroy(struct kvm *kvm);
+
 /* implemented in sigp.c */
 int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
 int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu);
 
+/* implemented in sthyi.c */
+int handle_sthyi(struct kvm_vcpu *vcpu);
+
 /* implemented in kvm-s390.c */
 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod);
 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable);
@@ -360,6 +379,7 @@ int kvm_s390_import_bp_data(struct kvm_vcpu *vcpu,
 			    struct kvm_guest_debug *dbg);
 void kvm_s390_clear_bp_data(struct kvm_vcpu *vcpu);
 void kvm_s390_prepare_debug_exit(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_per_ifetch_icpt(struct kvm_vcpu *vcpu);
 void kvm_s390_handle_per_event(struct kvm_vcpu *vcpu);
 
 /* support for Basic/Extended SCA handling */
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 95916fa7c670..46160388e996 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -27,6 +27,7 @@
 #include <asm/io.h>
 #include <asm/ptrace.h>
 #include <asm/compat.h>
+#include <asm/sclp.h>
 #include "gaccess.h"
 #include "kvm-s390.h"
 #include "trace.h"
@@ -152,30 +153,166 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu)
 static int __skey_check_enable(struct kvm_vcpu *vcpu)
 {
 	int rc = 0;
+
+	trace_kvm_s390_skey_related_inst(vcpu);
 	if (!(vcpu->arch.sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE)))
 		return rc;
 
 	rc = s390_enable_skey();
-	VCPU_EVENT(vcpu, 3, "%s", "enabling storage keys for guest");
-	trace_kvm_s390_skey_related_inst(vcpu);
-	vcpu->arch.sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE);
+	VCPU_EVENT(vcpu, 3, "enabling storage keys for guest: %d", rc);
+	if (!rc)
+		vcpu->arch.sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE);
 	return rc;
 }
 
-
-static int handle_skey(struct kvm_vcpu *vcpu)
+static int try_handle_skey(struct kvm_vcpu *vcpu)
 {
-	int rc = __skey_check_enable(vcpu);
+	int rc;
 
+	vcpu->stat.instruction_storage_key++;
+	rc = __skey_check_enable(vcpu);
 	if (rc)
 		return rc;
-	vcpu->stat.instruction_storage_key++;
-
+	if (sclp.has_skey) {
+		/* with storage-key facility, SIE interprets it for us */
+		kvm_s390_retry_instr(vcpu);
+		VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation");
+		return -EAGAIN;
+	}
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+	return 0;
+}
 
-	kvm_s390_retry_instr(vcpu);
-	VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation");
+static int handle_iske(struct kvm_vcpu *vcpu)
+{
+	unsigned long addr;
+	unsigned char key;
+	int reg1, reg2;
+	int rc;
+
+	rc = try_handle_skey(vcpu);
+	if (rc)
+		return rc != -EAGAIN ? rc : 0;
+
+	kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
+
+	addr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
+	addr = kvm_s390_logical_to_effective(vcpu, addr);
+	addr = kvm_s390_real_to_abs(vcpu, addr);
+	addr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(addr));
+	if (kvm_is_error_hva(addr))
+		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+
+	down_read(&current->mm->mmap_sem);
+	rc = get_guest_storage_key(current->mm, addr, &key);
+	up_read(&current->mm->mmap_sem);
+	if (rc)
+		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+	vcpu->run->s.regs.gprs[reg1] &= ~0xff;
+	vcpu->run->s.regs.gprs[reg1] |= key;
+	return 0;
+}
+
+static int handle_rrbe(struct kvm_vcpu *vcpu)
+{
+	unsigned long addr;
+	int reg1, reg2;
+	int rc;
+
+	rc = try_handle_skey(vcpu);
+	if (rc)
+		return rc != -EAGAIN ? rc : 0;
+
+	kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
+
+	addr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
+	addr = kvm_s390_logical_to_effective(vcpu, addr);
+	addr = kvm_s390_real_to_abs(vcpu, addr);
+	addr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(addr));
+	if (kvm_is_error_hva(addr))
+		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+
+	down_read(&current->mm->mmap_sem);
+	rc = reset_guest_reference_bit(current->mm, addr);
+	up_read(&current->mm->mmap_sem);
+	if (rc < 0)
+		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+
+	kvm_s390_set_psw_cc(vcpu, rc);
+	return 0;
+}
+
+#define SSKE_NQ 0x8
+#define SSKE_MR 0x4
+#define SSKE_MC 0x2
+#define SSKE_MB 0x1
+static int handle_sske(struct kvm_vcpu *vcpu)
+{
+	unsigned char m3 = vcpu->arch.sie_block->ipb >> 28;
+	unsigned long start, end;
+	unsigned char key, oldkey;
+	int reg1, reg2;
+	int rc;
+
+	rc = try_handle_skey(vcpu);
+	if (rc)
+		return rc != -EAGAIN ? rc : 0;
+
+	if (!test_kvm_facility(vcpu->kvm, 8))
+		m3 &= ~SSKE_MB;
+	if (!test_kvm_facility(vcpu->kvm, 10))
+		m3 &= ~(SSKE_MC | SSKE_MR);
+	if (!test_kvm_facility(vcpu->kvm, 14))
+		m3 &= ~SSKE_NQ;
+
+	kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
+
+	key = vcpu->run->s.regs.gprs[reg1] & 0xfe;
+	start = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
+	start = kvm_s390_logical_to_effective(vcpu, start);
+	if (m3 & SSKE_MB) {
+		/* start already designates an absolute address */
+		end = (start + (1UL << 20)) & ~((1UL << 20) - 1);
+	} else {
+		start = kvm_s390_real_to_abs(vcpu, start);
+		end = start + PAGE_SIZE;
+	}
+
+	while (start != end) {
+		unsigned long addr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start));
+
+		if (kvm_is_error_hva(addr))
+			return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+
+		down_read(&current->mm->mmap_sem);
+		rc = cond_set_guest_storage_key(current->mm, addr, key, &oldkey,
+						m3 & SSKE_NQ, m3 & SSKE_MR,
+						m3 & SSKE_MC);
+		up_read(&current->mm->mmap_sem);
+		if (rc < 0)
+			return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+		start += PAGE_SIZE;
+	};
+
+	if (m3 & (SSKE_MC | SSKE_MR)) {
+		if (m3 & SSKE_MB) {
+			/* skey in reg1 is unpredictable */
+			kvm_s390_set_psw_cc(vcpu, 3);
+		} else {
+			kvm_s390_set_psw_cc(vcpu, rc);
+			vcpu->run->s.regs.gprs[reg1] &= ~0xff00UL;
+			vcpu->run->s.regs.gprs[reg1] |= (u64) oldkey << 8;
+		}
+	}
+	if (m3 & SSKE_MB) {
+		if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_AMODE_64BIT)
+			vcpu->run->s.regs.gprs[reg2] &= ~PAGE_MASK;
+		else
+			vcpu->run->s.regs.gprs[reg2] &= ~0xfffff000UL;
+		end = kvm_s390_logical_to_effective(vcpu, end);
+		vcpu->run->s.regs.gprs[reg2] |= end;
+	}
 	return 0;
 }
 
@@ -582,10 +719,11 @@ static const intercept_handler_t b2_handlers[256] = {
 	[0x10] = handle_set_prefix,
 	[0x11] = handle_store_prefix,
 	[0x12] = handle_store_cpu_address,
+	[0x14] = kvm_s390_handle_vsie,
 	[0x21] = handle_ipte_interlock,
-	[0x29] = handle_skey,
-	[0x2a] = handle_skey,
-	[0x2b] = handle_skey,
+	[0x29] = handle_iske,
+	[0x2a] = handle_rrbe,
+	[0x2b] = handle_sske,
 	[0x2c] = handle_test_block,
 	[0x30] = handle_io_inst,
 	[0x31] = handle_io_inst,
@@ -654,8 +792,10 @@ static int handle_epsw(struct kvm_vcpu *vcpu)
 
 static int handle_pfmf(struct kvm_vcpu *vcpu)
 {
+	bool mr = false, mc = false, nq;
 	int reg1, reg2;
 	unsigned long start, end;
+	unsigned char key;
 
 	vcpu->stat.instruction_pfmf++;
 
@@ -675,15 +815,27 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
 	    !test_kvm_facility(vcpu->kvm, 14))
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
-	/* No support for conditional-SSKE */
-	if (vcpu->run->s.regs.gprs[reg1] & (PFMF_MR | PFMF_MC))
-		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+	/* Only provide conditional-SSKE support if enabled for the guest */
+	if (vcpu->run->s.regs.gprs[reg1] & PFMF_SK &&
+	    test_kvm_facility(vcpu->kvm, 10)) {
+		mr = vcpu->run->s.regs.gprs[reg1] & PFMF_MR;
+		mc = vcpu->run->s.regs.gprs[reg1] & PFMF_MC;
+	}
 
+	nq = vcpu->run->s.regs.gprs[reg1] & PFMF_NQ;
+	key = vcpu->run->s.regs.gprs[reg1] & PFMF_KEY;
 	start = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
 	start = kvm_s390_logical_to_effective(vcpu, start);
 
+	if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) {
+		if (kvm_s390_check_low_addr_prot_real(vcpu, start))
+			return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
+	}
+
 	switch (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) {
 	case 0x00000000:
+		/* only 4k frames specify a real address */
+		start = kvm_s390_real_to_abs(vcpu, start);
 		end = (start + (1UL << 12)) & ~((1UL << 12) - 1);
 		break;
 	case 0x00001000:
@@ -701,20 +853,11 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 	}
 
-	if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) {
-		if (kvm_s390_check_low_addr_prot_real(vcpu, start))
-			return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
-	}
-
-	while (start < end) {
-		unsigned long useraddr, abs_addr;
+	while (start != end) {
+		unsigned long useraddr;
 
 		/* Translate guest address to host address */
-		if ((vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) == 0)
-			abs_addr = kvm_s390_real_to_abs(vcpu, start);
-		else
-			abs_addr = start;
-		useraddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(abs_addr));
+		useraddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start));
 		if (kvm_is_error_hva(useraddr))
 			return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
 
@@ -728,16 +871,25 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
 
 			if (rc)
 				return rc;
-			if (set_guest_storage_key(current->mm, useraddr,
-					vcpu->run->s.regs.gprs[reg1] & PFMF_KEY,
-					vcpu->run->s.regs.gprs[reg1] & PFMF_NQ))
+			down_read(&current->mm->mmap_sem);
+			rc = cond_set_guest_storage_key(current->mm, useraddr,
+							key, NULL, nq, mr, mc);
+			up_read(&current->mm->mmap_sem);
+			if (rc < 0)
 				return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
 		}
 
 		start += PAGE_SIZE;
 	}
-	if (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC)
-		vcpu->run->s.regs.gprs[reg2] = end;
+	if (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) {
+		if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_AMODE_64BIT) {
+			vcpu->run->s.regs.gprs[reg2] = end;
+		} else {
+			vcpu->run->s.regs.gprs[reg2] &= ~0xffffffffUL;
+			end = kvm_s390_logical_to_effective(vcpu, end);
+			vcpu->run->s.regs.gprs[reg2] |= end;
+		}
+	}
 	return 0;
 }
 
@@ -1033,7 +1185,15 @@ static int handle_sckpf(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
+static int handle_ptff(struct kvm_vcpu *vcpu)
+{
+	/* we don't emulate any control instructions yet */
+	kvm_s390_set_psw_cc(vcpu, 3);
+	return 0;
+}
+
 static const intercept_handler_t x01_handlers[256] = {
+	[0x04] = handle_ptff,
 	[0x07] = handle_sckpf,
 };
 
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index 28ea0cab1f1b..1a252f537081 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -77,18 +77,18 @@ static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu,
 	const u64 psw_int_mask = PSW_MASK_IO | PSW_MASK_EXT;
 	u16 p_asn, s_asn;
 	psw_t *psw;
-	u32 flags;
+	bool idle;
 
-	flags = atomic_read(&dst_vcpu->arch.sie_block->cpuflags);
+	idle = is_vcpu_idle(vcpu);
 	psw = &dst_vcpu->arch.sie_block->gpsw;
 	p_asn = dst_vcpu->arch.sie_block->gcr[4] & 0xffff;  /* Primary ASN */
 	s_asn = dst_vcpu->arch.sie_block->gcr[3] & 0xffff;  /* Secondary ASN */
 
 	/* Inject the emergency signal? */
-	if (!(flags & CPUSTAT_STOPPED)
+	if (!is_vcpu_stopped(vcpu)
 	    || (psw->mask & psw_int_mask) != psw_int_mask
-	    || ((flags & CPUSTAT_WAIT) && psw->addr != 0)
-	    || (!(flags & CPUSTAT_WAIT) && (asn == p_asn || asn == s_asn))) {
+	    || (idle && psw->addr != 0)
+	    || (!idle && (asn == p_asn || asn == s_asn))) {
 		return __inject_sigp_emergency(vcpu, dst_vcpu);
 	} else {
 		*reg &= 0xffffffff00000000UL;
diff --git a/arch/s390/kvm/sthyi.c b/arch/s390/kvm/sthyi.c
new file mode 100644
index 000000000000..bd98b7d25200
--- /dev/null
+++ b/arch/s390/kvm/sthyi.c
@@ -0,0 +1,471 @@
+/*
+ * store hypervisor information instruction emulation functions.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * Copyright IBM Corp. 2016
+ * Author(s): Janosch Frank <frankja@linux.vnet.ibm.com>
+ */
+#include <linux/kvm_host.h>
+#include <linux/errno.h>
+#include <linux/pagemap.h>
+#include <linux/vmalloc.h>
+#include <linux/ratelimit.h>
+
+#include <asm/kvm_host.h>
+#include <asm/asm-offsets.h>
+#include <asm/sclp.h>
+#include <asm/diag.h>
+#include <asm/sysinfo.h>
+#include <asm/ebcdic.h>
+
+#include "kvm-s390.h"
+#include "gaccess.h"
+#include "trace.h"
+
+#define DED_WEIGHT 0xffff
+/*
+ * CP and IFL as EBCDIC strings, SP/0x40 determines the end of string
+ * as they are justified with spaces.
+ */
+#define CP  0xc3d7404040404040UL
+#define IFL 0xc9c6d34040404040UL
+
+enum hdr_flags {
+	HDR_NOT_LPAR   = 0x10,
+	HDR_STACK_INCM = 0x20,
+	HDR_STSI_UNAV  = 0x40,
+	HDR_PERF_UNAV  = 0x80,
+};
+
+enum mac_validity {
+	MAC_NAME_VLD = 0x20,
+	MAC_ID_VLD   = 0x40,
+	MAC_CNT_VLD  = 0x80,
+};
+
+enum par_flag {
+	PAR_MT_EN = 0x80,
+};
+
+enum par_validity {
+	PAR_GRP_VLD  = 0x08,
+	PAR_ID_VLD   = 0x10,
+	PAR_ABS_VLD  = 0x20,
+	PAR_WGHT_VLD = 0x40,
+	PAR_PCNT_VLD  = 0x80,
+};
+
+struct hdr_sctn {
+	u8 infhflg1;
+	u8 infhflg2; /* reserved */
+	u8 infhval1; /* reserved */
+	u8 infhval2; /* reserved */
+	u8 reserved[3];
+	u8 infhygct;
+	u16 infhtotl;
+	u16 infhdln;
+	u16 infmoff;
+	u16 infmlen;
+	u16 infpoff;
+	u16 infplen;
+	u16 infhoff1;
+	u16 infhlen1;
+	u16 infgoff1;
+	u16 infglen1;
+	u16 infhoff2;
+	u16 infhlen2;
+	u16 infgoff2;
+	u16 infglen2;
+	u16 infhoff3;
+	u16 infhlen3;
+	u16 infgoff3;
+	u16 infglen3;
+	u8 reserved2[4];
+} __packed;
+
+struct mac_sctn {
+	u8 infmflg1; /* reserved */
+	u8 infmflg2; /* reserved */
+	u8 infmval1;
+	u8 infmval2; /* reserved */
+	u16 infmscps;
+	u16 infmdcps;
+	u16 infmsifl;
+	u16 infmdifl;
+	char infmname[8];
+	char infmtype[4];
+	char infmmanu[16];
+	char infmseq[16];
+	char infmpman[4];
+	u8 reserved[4];
+} __packed;
+
+struct par_sctn {
+	u8 infpflg1;
+	u8 infpflg2; /* reserved */
+	u8 infpval1;
+	u8 infpval2; /* reserved */
+	u16 infppnum;
+	u16 infpscps;
+	u16 infpdcps;
+	u16 infpsifl;
+	u16 infpdifl;
+	u16 reserved;
+	char infppnam[8];
+	u32 infpwbcp;
+	u32 infpabcp;
+	u32 infpwbif;
+	u32 infpabif;
+	char infplgnm[8];
+	u32 infplgcp;
+	u32 infplgif;
+} __packed;
+
+struct sthyi_sctns {
+	struct hdr_sctn hdr;
+	struct mac_sctn mac;
+	struct par_sctn par;
+} __packed;
+
+struct cpu_inf {
+	u64 lpar_cap;
+	u64 lpar_grp_cap;
+	u64 lpar_weight;
+	u64 all_weight;
+	int cpu_num_ded;
+	int cpu_num_shd;
+};
+
+struct lpar_cpu_inf {
+	struct cpu_inf cp;
+	struct cpu_inf ifl;
+};
+
+static inline u64 cpu_id(u8 ctidx, void *diag224_buf)
+{
+	return *((u64 *)(diag224_buf + (ctidx + 1) * DIAG204_CPU_NAME_LEN));
+}
+
+/*
+ * Scales the cpu capping from the lpar range to the one expected in
+ * sthyi data.
+ *
+ * diag204 reports a cap in hundredths of processor units.
+ * z/VM's range for one core is 0 - 0x10000.
+ */
+static u32 scale_cap(u32 in)
+{
+	return (0x10000 * in) / 100;
+}
+
+static void fill_hdr(struct sthyi_sctns *sctns)
+{
+	sctns->hdr.infhdln = sizeof(sctns->hdr);
+	sctns->hdr.infmoff = sizeof(sctns->hdr);
+	sctns->hdr.infmlen = sizeof(sctns->mac);
+	sctns->hdr.infplen = sizeof(sctns->par);
+	sctns->hdr.infpoff = sctns->hdr.infhdln + sctns->hdr.infmlen;
+	sctns->hdr.infhtotl = sctns->hdr.infpoff + sctns->hdr.infplen;
+}
+
+static void fill_stsi_mac(struct sthyi_sctns *sctns,
+			  struct sysinfo_1_1_1 *sysinfo)
+{
+	if (stsi(sysinfo, 1, 1, 1))
+		return;
+
+	sclp_ocf_cpc_name_copy(sctns->mac.infmname);
+
+	memcpy(sctns->mac.infmtype, sysinfo->type, sizeof(sctns->mac.infmtype));
+	memcpy(sctns->mac.infmmanu, sysinfo->manufacturer, sizeof(sctns->mac.infmmanu));
+	memcpy(sctns->mac.infmpman, sysinfo->plant, sizeof(sctns->mac.infmpman));
+	memcpy(sctns->mac.infmseq, sysinfo->sequence, sizeof(sctns->mac.infmseq));
+
+	sctns->mac.infmval1 |= MAC_ID_VLD | MAC_NAME_VLD;
+}
+
+static void fill_stsi_par(struct sthyi_sctns *sctns,
+			  struct sysinfo_2_2_2 *sysinfo)
+{
+	if (stsi(sysinfo, 2, 2, 2))
+		return;
+
+	sctns->par.infppnum = sysinfo->lpar_number;
+	memcpy(sctns->par.infppnam, sysinfo->name, sizeof(sctns->par.infppnam));
+
+	sctns->par.infpval1 |= PAR_ID_VLD;
+}
+
+static void fill_stsi(struct sthyi_sctns *sctns)
+{
+	void *sysinfo;
+
+	/* Errors are handled through the validity bits in the response. */
+	sysinfo = (void *)__get_free_page(GFP_KERNEL);
+	if (!sysinfo)
+		return;
+
+	fill_stsi_mac(sctns, sysinfo);
+	fill_stsi_par(sctns, sysinfo);
+
+	free_pages((unsigned long)sysinfo, 0);
+}
+
+static void fill_diag_mac(struct sthyi_sctns *sctns,
+			  struct diag204_x_phys_block *block,
+			  void *diag224_buf)
+{
+	int i;
+
+	for (i = 0; i < block->hdr.cpus; i++) {
+		switch (cpu_id(block->cpus[i].ctidx, diag224_buf)) {
+		case CP:
+			if (block->cpus[i].weight == DED_WEIGHT)
+				sctns->mac.infmdcps++;
+			else
+				sctns->mac.infmscps++;
+			break;
+		case IFL:
+			if (block->cpus[i].weight == DED_WEIGHT)
+				sctns->mac.infmdifl++;
+			else
+				sctns->mac.infmsifl++;
+			break;
+		}
+	}
+	sctns->mac.infmval1 |= MAC_CNT_VLD;
+}
+
+/* Returns a pointer to the the next partition block. */
+static struct diag204_x_part_block *lpar_cpu_inf(struct lpar_cpu_inf *part_inf,
+						 bool this_lpar,
+						 void *diag224_buf,
+						 struct diag204_x_part_block *block)
+{
+	int i, capped = 0, weight_cp = 0, weight_ifl = 0;
+	struct cpu_inf *cpu_inf;
+
+	for (i = 0; i < block->hdr.rcpus; i++) {
+		if (!(block->cpus[i].cflag & DIAG204_CPU_ONLINE))
+			continue;
+
+		switch (cpu_id(block->cpus[i].ctidx, diag224_buf)) {
+		case CP:
+			cpu_inf = &part_inf->cp;
+			if (block->cpus[i].cur_weight < DED_WEIGHT)
+				weight_cp |= block->cpus[i].cur_weight;
+			break;
+		case IFL:
+			cpu_inf = &part_inf->ifl;
+			if (block->cpus[i].cur_weight < DED_WEIGHT)
+				weight_ifl |= block->cpus[i].cur_weight;
+			break;
+		default:
+			continue;
+		}
+
+		if (!this_lpar)
+			continue;
+
+		capped |= block->cpus[i].cflag & DIAG204_CPU_CAPPED;
+		cpu_inf->lpar_cap |= block->cpus[i].cpu_type_cap;
+		cpu_inf->lpar_grp_cap |= block->cpus[i].group_cpu_type_cap;
+
+		if (block->cpus[i].weight == DED_WEIGHT)
+			cpu_inf->cpu_num_ded += 1;
+		else
+			cpu_inf->cpu_num_shd += 1;
+	}
+
+	if (this_lpar && capped) {
+		part_inf->cp.lpar_weight = weight_cp;
+		part_inf->ifl.lpar_weight = weight_ifl;
+	}
+	part_inf->cp.all_weight += weight_cp;
+	part_inf->ifl.all_weight += weight_ifl;
+	return (struct diag204_x_part_block *)&block->cpus[i];
+}
+
+static void fill_diag(struct sthyi_sctns *sctns)
+{
+	int i, r, pages;
+	bool this_lpar;
+	void *diag204_buf;
+	void *diag224_buf = NULL;
+	struct diag204_x_info_blk_hdr *ti_hdr;
+	struct diag204_x_part_block *part_block;
+	struct diag204_x_phys_block *phys_block;
+	struct lpar_cpu_inf lpar_inf = {};
+
+	/* Errors are handled through the validity bits in the response. */
+	pages = diag204((unsigned long)DIAG204_SUBC_RSI |
+			(unsigned long)DIAG204_INFO_EXT, 0, NULL);
+	if (pages <= 0)
+		return;
+
+	diag204_buf = vmalloc(PAGE_SIZE * pages);
+	if (!diag204_buf)
+		return;
+
+	r = diag204((unsigned long)DIAG204_SUBC_STIB7 |
+		    (unsigned long)DIAG204_INFO_EXT, pages, diag204_buf);
+	if (r < 0)
+		goto out;
+
+	diag224_buf = kmalloc(PAGE_SIZE, GFP_KERNEL | GFP_DMA);
+	if (!diag224_buf || diag224(diag224_buf))
+		goto out;
+
+	ti_hdr = diag204_buf;
+	part_block = diag204_buf + sizeof(*ti_hdr);
+
+	for (i = 0; i < ti_hdr->npar; i++) {
+		/*
+		 * For the calling lpar we also need to get the cpu
+		 * caps and weights. The time information block header
+		 * specifies the offset to the partition block of the
+		 * caller lpar, so we know when we process its data.
+		 */
+		this_lpar = (void *)part_block - diag204_buf == ti_hdr->this_part;
+		part_block = lpar_cpu_inf(&lpar_inf, this_lpar, diag224_buf,
+					  part_block);
+	}
+
+	phys_block = (struct diag204_x_phys_block *)part_block;
+	part_block = diag204_buf + ti_hdr->this_part;
+	if (part_block->hdr.mtid)
+		sctns->par.infpflg1 = PAR_MT_EN;
+
+	sctns->par.infpval1 |= PAR_GRP_VLD;
+	sctns->par.infplgcp = scale_cap(lpar_inf.cp.lpar_grp_cap);
+	sctns->par.infplgif = scale_cap(lpar_inf.ifl.lpar_grp_cap);
+	memcpy(sctns->par.infplgnm, part_block->hdr.hardware_group_name,
+	       sizeof(sctns->par.infplgnm));
+
+	sctns->par.infpscps = lpar_inf.cp.cpu_num_shd;
+	sctns->par.infpdcps = lpar_inf.cp.cpu_num_ded;
+	sctns->par.infpsifl = lpar_inf.ifl.cpu_num_shd;
+	sctns->par.infpdifl = lpar_inf.ifl.cpu_num_ded;
+	sctns->par.infpval1 |= PAR_PCNT_VLD;
+
+	sctns->par.infpabcp = scale_cap(lpar_inf.cp.lpar_cap);
+	sctns->par.infpabif = scale_cap(lpar_inf.ifl.lpar_cap);
+	sctns->par.infpval1 |= PAR_ABS_VLD;
+
+	/*
+	 * Everything below needs global performance data to be
+	 * meaningful.
+	 */
+	if (!(ti_hdr->flags & DIAG204_LPAR_PHYS_FLG)) {
+		sctns->hdr.infhflg1 |= HDR_PERF_UNAV;
+		goto out;
+	}
+
+	fill_diag_mac(sctns, phys_block, diag224_buf);
+
+	if (lpar_inf.cp.lpar_weight) {
+		sctns->par.infpwbcp = sctns->mac.infmscps * 0x10000 *
+			lpar_inf.cp.lpar_weight / lpar_inf.cp.all_weight;
+	}
+
+	if (lpar_inf.ifl.lpar_weight) {
+		sctns->par.infpwbif = sctns->mac.infmsifl * 0x10000 *
+			lpar_inf.ifl.lpar_weight / lpar_inf.ifl.all_weight;
+	}
+	sctns->par.infpval1 |= PAR_WGHT_VLD;
+
+out:
+	kfree(diag224_buf);
+	vfree(diag204_buf);
+}
+
+static int sthyi(u64 vaddr)
+{
+	register u64 code asm("0") = 0;
+	register u64 addr asm("2") = vaddr;
+	int cc;
+
+	asm volatile(
+		".insn   rre,0xB2560000,%[code],%[addr]\n"
+		"ipm     %[cc]\n"
+		"srl     %[cc],28\n"
+		: [cc] "=d" (cc)
+		: [code] "d" (code), [addr] "a" (addr)
+		: "memory", "cc");
+	return cc;
+}
+
+int handle_sthyi(struct kvm_vcpu *vcpu)
+{
+	int reg1, reg2, r = 0;
+	u64 code, addr, cc = 0;
+	struct sthyi_sctns *sctns = NULL;
+
+	/*
+	 * STHYI requires extensive locking in the higher hypervisors
+	 * and is very computational/memory expensive. Therefore we
+	 * ratelimit the executions per VM.
+	 */
+	if (!__ratelimit(&vcpu->kvm->arch.sthyi_limit)) {
+		kvm_s390_retry_instr(vcpu);
+		return 0;
+	}
+
+	kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
+	code = vcpu->run->s.regs.gprs[reg1];
+	addr = vcpu->run->s.regs.gprs[reg2];
+
+	vcpu->stat.instruction_sthyi++;
+	VCPU_EVENT(vcpu, 3, "STHYI: fc: %llu addr: 0x%016llx", code, addr);
+	trace_kvm_s390_handle_sthyi(vcpu, code, addr);
+
+	if (reg1 == reg2 || reg1 & 1 || reg2 & 1 || addr & ~PAGE_MASK)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+	if (code & 0xffff) {
+		cc = 3;
+		goto out;
+	}
+
+	/*
+	 * If the page has not yet been faulted in, we want to do that
+	 * now and not after all the expensive calculations.
+	 */
+	r = write_guest(vcpu, addr, reg2, &cc, 1);
+	if (r)
+		return kvm_s390_inject_prog_cond(vcpu, r);
+
+	sctns = (void *)get_zeroed_page(GFP_KERNEL);
+	if (!sctns)
+		return -ENOMEM;
+
+	/*
+	 * If we are a guest, we don't want to emulate an emulated
+	 * instruction. We ask the hypervisor to provide the data.
+	 */
+	if (test_facility(74)) {
+		cc = sthyi((u64)sctns);
+		goto out;
+	}
+
+	fill_hdr(sctns);
+	fill_stsi(sctns);
+	fill_diag(sctns);
+
+out:
+	if (!cc) {
+		r = write_guest(vcpu, addr, reg2, sctns, PAGE_SIZE);
+		if (r) {
+			free_page((unsigned long)sctns);
+			return kvm_s390_inject_prog_cond(vcpu, r);
+		}
+	}
+
+	free_page((unsigned long)sctns);
+	vcpu->run->s.regs.gprs[reg2 + 1] = cc ? 4 : 0;
+	kvm_s390_set_psw_cc(vcpu, cc);
+	return r;
+}
diff --git a/arch/s390/kvm/trace.h b/arch/s390/kvm/trace.h
index 916834d7a73a..4fc9d4e5be89 100644
--- a/arch/s390/kvm/trace.h
+++ b/arch/s390/kvm/trace.h
@@ -41,7 +41,7 @@ TRACE_EVENT(kvm_s390_skey_related_inst,
 	    TP_fast_assign(
 		    VCPU_ASSIGN_COMMON
 		    ),
-	    VCPU_TP_PRINTK("%s", "first instruction related to skeys on vcpu")
+	    VCPU_TP_PRINTK("%s", "storage key related instruction")
 	);
 
 TRACE_EVENT(kvm_s390_major_guest_pfault,
@@ -185,8 +185,10 @@ TRACE_EVENT(kvm_s390_intercept_prog,
 		    __entry->code = code;
 		    ),
 
-	    VCPU_TP_PRINTK("intercepted program interruption %04x",
-			   __entry->code)
+	    VCPU_TP_PRINTK("intercepted program interruption %04x (%s)",
+			   __entry->code,
+			   __print_symbolic(__entry->code,
+					    icpt_prog_codes))
 	);
 
 /*
@@ -412,6 +414,47 @@ TRACE_EVENT(kvm_s390_handle_stsi,
 			   __entry->addr)
 	);
 
+TRACE_EVENT(kvm_s390_handle_operexc,
+	    TP_PROTO(VCPU_PROTO_COMMON, __u16 ipa, __u32 ipb),
+	    TP_ARGS(VCPU_ARGS_COMMON, ipa, ipb),
+
+	    TP_STRUCT__entry(
+		    VCPU_FIELD_COMMON
+		    __field(__u64, instruction)
+		    ),
+
+	    TP_fast_assign(
+		    VCPU_ASSIGN_COMMON
+		    __entry->instruction = ((__u64)ipa << 48) |
+		    ((__u64)ipb << 16);
+		    ),
+
+	    VCPU_TP_PRINTK("operation exception on instruction %016llx (%s)",
+			   __entry->instruction,
+			   __print_symbolic(icpt_insn_decoder(__entry->instruction),
+					    icpt_insn_codes))
+	);
+
+TRACE_EVENT(kvm_s390_handle_sthyi,
+	    TP_PROTO(VCPU_PROTO_COMMON, u64 code, u64 addr),
+	    TP_ARGS(VCPU_ARGS_COMMON, code, addr),
+
+	    TP_STRUCT__entry(
+		    VCPU_FIELD_COMMON
+		    __field(u64, code)
+		    __field(u64, addr)
+		    ),
+
+	    TP_fast_assign(
+		    VCPU_ASSIGN_COMMON
+		    __entry->code = code;
+		    __entry->addr = addr;
+		    ),
+
+	    VCPU_TP_PRINTK("STHYI fc: %llu addr: %016llx",
+			   __entry->code, __entry->addr)
+	);
+
 #endif /* _TRACE_KVM_H */
 
 /* This part must be outside protection */
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
new file mode 100644
index 000000000000..c106488b4137
--- /dev/null
+++ b/arch/s390/kvm/vsie.c
@@ -0,0 +1,1091 @@
+/*
+ * kvm nested virtualization support for s390x
+ *
+ * Copyright IBM Corp. 2016
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *    Author(s): David Hildenbrand <dahi@linux.vnet.ibm.com>
+ */
+#include <linux/vmalloc.h>
+#include <linux/kvm_host.h>
+#include <linux/bug.h>
+#include <linux/list.h>
+#include <linux/bitmap.h>
+#include <asm/gmap.h>
+#include <asm/mmu_context.h>
+#include <asm/sclp.h>
+#include <asm/nmi.h>
+#include <asm/dis.h>
+#include "kvm-s390.h"
+#include "gaccess.h"
+
+struct vsie_page {
+	struct kvm_s390_sie_block scb_s;	/* 0x0000 */
+	/* the pinned originial scb */
+	struct kvm_s390_sie_block *scb_o;	/* 0x0200 */
+	/* the shadow gmap in use by the vsie_page */
+	struct gmap *gmap;			/* 0x0208 */
+	/* address of the last reported fault to guest2 */
+	unsigned long fault_addr;		/* 0x0210 */
+	__u8 reserved[0x0700 - 0x0218];		/* 0x0218 */
+	struct kvm_s390_crypto_cb crycb;	/* 0x0700 */
+	__u8 fac[S390_ARCH_FAC_LIST_SIZE_BYTE];	/* 0x0800 */
+} __packed;
+
+/* trigger a validity icpt for the given scb */
+static int set_validity_icpt(struct kvm_s390_sie_block *scb,
+			     __u16 reason_code)
+{
+	scb->ipa = 0x1000;
+	scb->ipb = ((__u32) reason_code) << 16;
+	scb->icptcode = ICPT_VALIDITY;
+	return 1;
+}
+
+/* mark the prefix as unmapped, this will block the VSIE */
+static void prefix_unmapped(struct vsie_page *vsie_page)
+{
+	atomic_or(PROG_REQUEST, &vsie_page->scb_s.prog20);
+}
+
+/* mark the prefix as unmapped and wait until the VSIE has been left */
+static void prefix_unmapped_sync(struct vsie_page *vsie_page)
+{
+	prefix_unmapped(vsie_page);
+	if (vsie_page->scb_s.prog0c & PROG_IN_SIE)
+		atomic_or(CPUSTAT_STOP_INT, &vsie_page->scb_s.cpuflags);
+	while (vsie_page->scb_s.prog0c & PROG_IN_SIE)
+		cpu_relax();
+}
+
+/* mark the prefix as mapped, this will allow the VSIE to run */
+static void prefix_mapped(struct vsie_page *vsie_page)
+{
+	atomic_andnot(PROG_REQUEST, &vsie_page->scb_s.prog20);
+}
+
+/* test if the prefix is mapped into the gmap shadow */
+static int prefix_is_mapped(struct vsie_page *vsie_page)
+{
+	return !(atomic_read(&vsie_page->scb_s.prog20) & PROG_REQUEST);
+}
+
+/* copy the updated intervention request bits into the shadow scb */
+static void update_intervention_requests(struct vsie_page *vsie_page)
+{
+	const int bits = CPUSTAT_STOP_INT | CPUSTAT_IO_INT | CPUSTAT_EXT_INT;
+	int cpuflags;
+
+	cpuflags = atomic_read(&vsie_page->scb_o->cpuflags);
+	atomic_andnot(bits, &vsie_page->scb_s.cpuflags);
+	atomic_or(cpuflags & bits, &vsie_page->scb_s.cpuflags);
+}
+
+/* shadow (filter and validate) the cpuflags  */
+static int prepare_cpuflags(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
+{
+	struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
+	struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
+	int newflags, cpuflags = atomic_read(&scb_o->cpuflags);
+
+	/* we don't allow ESA/390 guests */
+	if (!(cpuflags & CPUSTAT_ZARCH))
+		return set_validity_icpt(scb_s, 0x0001U);
+
+	if (cpuflags & (CPUSTAT_RRF | CPUSTAT_MCDS))
+		return set_validity_icpt(scb_s, 0x0001U);
+	else if (cpuflags & (CPUSTAT_SLSV | CPUSTAT_SLSR))
+		return set_validity_icpt(scb_s, 0x0007U);
+
+	/* intervention requests will be set later */
+	newflags = CPUSTAT_ZARCH;
+	if (cpuflags & CPUSTAT_GED && test_kvm_facility(vcpu->kvm, 8))
+		newflags |= CPUSTAT_GED;
+	if (cpuflags & CPUSTAT_GED2 && test_kvm_facility(vcpu->kvm, 78)) {
+		if (cpuflags & CPUSTAT_GED)
+			return set_validity_icpt(scb_s, 0x0001U);
+		newflags |= CPUSTAT_GED2;
+	}
+	if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_GPERE))
+		newflags |= cpuflags & CPUSTAT_P;
+	if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_GSLS))
+		newflags |= cpuflags & CPUSTAT_SM;
+	if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_IBS))
+		newflags |= cpuflags & CPUSTAT_IBS;
+
+	atomic_set(&scb_s->cpuflags, newflags);
+	return 0;
+}
+
+/*
+ * Create a shadow copy of the crycb block and setup key wrapping, if
+ * requested for guest 3 and enabled for guest 2.
+ *
+ * We only accept format-1 (no AP in g2), but convert it into format-2
+ * There is nothing to do for format-0.
+ *
+ * Returns: - 0 if shadowed or nothing to do
+ *          - > 0 if control has to be given to guest 2
+ */
+static int shadow_crycb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
+{
+	struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
+	struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
+	u32 crycb_addr = scb_o->crycbd & 0x7ffffff8U;
+	unsigned long *b1, *b2;
+	u8 ecb3_flags;
+
+	scb_s->crycbd = 0;
+	if (!(scb_o->crycbd & vcpu->arch.sie_block->crycbd & CRYCB_FORMAT1))
+		return 0;
+	/* format-1 is supported with message-security-assist extension 3 */
+	if (!test_kvm_facility(vcpu->kvm, 76))
+		return 0;
+	/* we may only allow it if enabled for guest 2 */
+	ecb3_flags = scb_o->ecb3 & vcpu->arch.sie_block->ecb3 &
+		     (ECB3_AES | ECB3_DEA);
+	if (!ecb3_flags)
+		return 0;
+
+	if ((crycb_addr & PAGE_MASK) != ((crycb_addr + 128) & PAGE_MASK))
+		return set_validity_icpt(scb_s, 0x003CU);
+	else if (!crycb_addr)
+		return set_validity_icpt(scb_s, 0x0039U);
+
+	/* copy only the wrapping keys */
+	if (read_guest_real(vcpu, crycb_addr + 72, &vsie_page->crycb, 56))
+		return set_validity_icpt(scb_s, 0x0035U);
+
+	scb_s->ecb3 |= ecb3_flags;
+	scb_s->crycbd = ((__u32)(__u64) &vsie_page->crycb) | CRYCB_FORMAT1 |
+			CRYCB_FORMAT2;
+
+	/* xor both blocks in one run */
+	b1 = (unsigned long *) vsie_page->crycb.dea_wrapping_key_mask;
+	b2 = (unsigned long *)
+			    vcpu->kvm->arch.crypto.crycb->dea_wrapping_key_mask;
+	/* as 56%8 == 0, bitmap_xor won't overwrite any data */
+	bitmap_xor(b1, b1, b2, BITS_PER_BYTE * 56);
+	return 0;
+}
+
+/* shadow (round up/down) the ibc to avoid validity icpt */
+static void prepare_ibc(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
+{
+	struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
+	struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
+	__u64 min_ibc = (sclp.ibc >> 16) & 0x0fffU;
+
+	scb_s->ibc = 0;
+	/* ibc installed in g2 and requested for g3 */
+	if (vcpu->kvm->arch.model.ibc && (scb_o->ibc & 0x0fffU)) {
+		scb_s->ibc = scb_o->ibc & 0x0fffU;
+		/* takte care of the minimum ibc level of the machine */
+		if (scb_s->ibc < min_ibc)
+			scb_s->ibc = min_ibc;
+		/* take care of the maximum ibc level set for the guest */
+		if (scb_s->ibc > vcpu->kvm->arch.model.ibc)
+			scb_s->ibc = vcpu->kvm->arch.model.ibc;
+	}
+}
+
+/* unshadow the scb, copying parameters back to the real scb */
+static void unshadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
+{
+	struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
+	struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
+
+	/* interception */
+	scb_o->icptcode = scb_s->icptcode;
+	scb_o->icptstatus = scb_s->icptstatus;
+	scb_o->ipa = scb_s->ipa;
+	scb_o->ipb = scb_s->ipb;
+	scb_o->gbea = scb_s->gbea;
+
+	/* timer */
+	scb_o->cputm = scb_s->cputm;
+	scb_o->ckc = scb_s->ckc;
+	scb_o->todpr = scb_s->todpr;
+
+	/* guest state */
+	scb_o->gpsw = scb_s->gpsw;
+	scb_o->gg14 = scb_s->gg14;
+	scb_o->gg15 = scb_s->gg15;
+	memcpy(scb_o->gcr, scb_s->gcr, 128);
+	scb_o->pp = scb_s->pp;
+
+	/* interrupt intercept */
+	switch (scb_s->icptcode) {
+	case ICPT_PROGI:
+	case ICPT_INSTPROGI:
+	case ICPT_EXTINT:
+		memcpy((void *)((u64)scb_o + 0xc0),
+		       (void *)((u64)scb_s + 0xc0), 0xf0 - 0xc0);
+		break;
+	case ICPT_PARTEXEC:
+		/* MVPG only */
+		memcpy((void *)((u64)scb_o + 0xc0),
+		       (void *)((u64)scb_s + 0xc0), 0xd0 - 0xc0);
+		break;
+	}
+
+	if (scb_s->ihcpu != 0xffffU)
+		scb_o->ihcpu = scb_s->ihcpu;
+}
+
+/*
+ * Setup the shadow scb by copying and checking the relevant parts of the g2
+ * provided scb.
+ *
+ * Returns: - 0 if the scb has been shadowed
+ *          - > 0 if control has to be given to guest 2
+ */
+static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
+{
+	struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
+	struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
+	bool had_tx = scb_s->ecb & 0x10U;
+	unsigned long new_mso = 0;
+	int rc;
+
+	/* make sure we don't have any leftovers when reusing the scb */
+	scb_s->icptcode = 0;
+	scb_s->eca = 0;
+	scb_s->ecb = 0;
+	scb_s->ecb2 = 0;
+	scb_s->ecb3 = 0;
+	scb_s->ecd = 0;
+	scb_s->fac = 0;
+
+	rc = prepare_cpuflags(vcpu, vsie_page);
+	if (rc)
+		goto out;
+
+	/* timer */
+	scb_s->cputm = scb_o->cputm;
+	scb_s->ckc = scb_o->ckc;
+	scb_s->todpr = scb_o->todpr;
+	scb_s->epoch = scb_o->epoch;
+
+	/* guest state */
+	scb_s->gpsw = scb_o->gpsw;
+	scb_s->gg14 = scb_o->gg14;
+	scb_s->gg15 = scb_o->gg15;
+	memcpy(scb_s->gcr, scb_o->gcr, 128);
+	scb_s->pp = scb_o->pp;
+
+	/* interception / execution handling */
+	scb_s->gbea = scb_o->gbea;
+	scb_s->lctl = scb_o->lctl;
+	scb_s->svcc = scb_o->svcc;
+	scb_s->ictl = scb_o->ictl;
+	/*
+	 * SKEY handling functions can't deal with false setting of PTE invalid
+	 * bits. Therefore we cannot provide interpretation and would later
+	 * have to provide own emulation handlers.
+	 */
+	scb_s->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
+	scb_s->icpua = scb_o->icpua;
+
+	if (!(atomic_read(&scb_s->cpuflags) & CPUSTAT_SM))
+		new_mso = scb_o->mso & 0xfffffffffff00000UL;
+	/* if the hva of the prefix changes, we have to remap the prefix */
+	if (scb_s->mso != new_mso || scb_s->prefix != scb_o->prefix)
+		prefix_unmapped(vsie_page);
+	 /* SIE will do mso/msl validity and exception checks for us */
+	scb_s->msl = scb_o->msl & 0xfffffffffff00000UL;
+	scb_s->mso = new_mso;
+	scb_s->prefix = scb_o->prefix;
+
+	/* We have to definetly flush the tlb if this scb never ran */
+	if (scb_s->ihcpu != 0xffffU)
+		scb_s->ihcpu = scb_o->ihcpu;
+
+	/* MVPG and Protection Exception Interpretation are always available */
+	scb_s->eca |= scb_o->eca & 0x01002000U;
+	/* Host-protection-interruption introduced with ESOP */
+	if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_ESOP))
+		scb_s->ecb |= scb_o->ecb & 0x02U;
+	/* transactional execution */
+	if (test_kvm_facility(vcpu->kvm, 73)) {
+		/* remap the prefix is tx is toggled on */
+		if ((scb_o->ecb & 0x10U) && !had_tx)
+			prefix_unmapped(vsie_page);
+		scb_s->ecb |= scb_o->ecb & 0x10U;
+	}
+	/* SIMD */
+	if (test_kvm_facility(vcpu->kvm, 129)) {
+		scb_s->eca |= scb_o->eca & 0x00020000U;
+		scb_s->ecd |= scb_o->ecd & 0x20000000U;
+	}
+	/* Run-time-Instrumentation */
+	if (test_kvm_facility(vcpu->kvm, 64))
+		scb_s->ecb3 |= scb_o->ecb3 & 0x01U;
+	if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_SIIF))
+		scb_s->eca |= scb_o->eca & 0x00000001U;
+	if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_IB))
+		scb_s->eca |= scb_o->eca & 0x40000000U;
+	if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_CEI))
+		scb_s->eca |= scb_o->eca & 0x80000000U;
+
+	prepare_ibc(vcpu, vsie_page);
+	rc = shadow_crycb(vcpu, vsie_page);
+out:
+	if (rc)
+		unshadow_scb(vcpu, vsie_page);
+	return rc;
+}
+
+void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start,
+				 unsigned long end)
+{
+	struct kvm *kvm = gmap->private;
+	struct vsie_page *cur;
+	unsigned long prefix;
+	struct page *page;
+	int i;
+
+	if (!gmap_is_shadow(gmap))
+		return;
+	if (start >= 1UL << 31)
+		/* We are only interested in prefix pages */
+		return;
+
+	/*
+	 * Only new shadow blocks are added to the list during runtime,
+	 * therefore we can safely reference them all the time.
+	 */
+	for (i = 0; i < kvm->arch.vsie.page_count; i++) {
+		page = READ_ONCE(kvm->arch.vsie.pages[i]);
+		if (!page)
+			continue;
+		cur = page_to_virt(page);
+		if (READ_ONCE(cur->gmap) != gmap)
+			continue;
+		prefix = cur->scb_s.prefix << GUEST_PREFIX_SHIFT;
+		/* with mso/msl, the prefix lies at an offset */
+		prefix += cur->scb_s.mso;
+		if (prefix <= end && start <= prefix + 2 * PAGE_SIZE - 1)
+			prefix_unmapped_sync(cur);
+	}
+}
+
+/*
+ * Map the first prefix page and if tx is enabled also the second prefix page.
+ *
+ * The prefix will be protected, a gmap notifier will inform about unmaps.
+ * The shadow scb must not be executed until the prefix is remapped, this is
+ * guaranteed by properly handling PROG_REQUEST.
+ *
+ * Returns: - 0 on if successfully mapped or already mapped
+ *          - > 0 if control has to be given to guest 2
+ *          - -EAGAIN if the caller can retry immediately
+ *          - -ENOMEM if out of memory
+ */
+static int map_prefix(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
+{
+	struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
+	u64 prefix = scb_s->prefix << GUEST_PREFIX_SHIFT;
+	int rc;
+
+	if (prefix_is_mapped(vsie_page))
+		return 0;
+
+	/* mark it as mapped so we can catch any concurrent unmappers */
+	prefix_mapped(vsie_page);
+
+	/* with mso/msl, the prefix lies at offset *mso* */
+	prefix += scb_s->mso;
+
+	rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix);
+	if (!rc && (scb_s->ecb & 0x10U))
+		rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
+					   prefix + PAGE_SIZE);
+	/*
+	 * We don't have to mprotect, we will be called for all unshadows.
+	 * SIE will detect if protection applies and trigger a validity.
+	 */
+	if (rc)
+		prefix_unmapped(vsie_page);
+	if (rc > 0 || rc == -EFAULT)
+		rc = set_validity_icpt(scb_s, 0x0037U);
+	return rc;
+}
+
+/*
+ * Pin the guest page given by gpa and set hpa to the pinned host address.
+ * Will always be pinned writable.
+ *
+ * Returns: - 0 on success
+ *          - -EINVAL if the gpa is not valid guest storage
+ *          - -ENOMEM if out of memory
+ */
+static int pin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t *hpa)
+{
+	struct page *page;
+	hva_t hva;
+	int rc;
+
+	hva = gfn_to_hva(kvm, gpa_to_gfn(gpa));
+	if (kvm_is_error_hva(hva))
+		return -EINVAL;
+	rc = get_user_pages_fast(hva, 1, 1, &page);
+	if (rc < 0)
+		return rc;
+	else if (rc != 1)
+		return -ENOMEM;
+	*hpa = (hpa_t) page_to_virt(page) + (gpa & ~PAGE_MASK);
+	return 0;
+}
+
+/* Unpins a page previously pinned via pin_guest_page, marking it as dirty. */
+static void unpin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t hpa)
+{
+	struct page *page;
+
+	page = virt_to_page(hpa);
+	set_page_dirty_lock(page);
+	put_page(page);
+	/* mark the page always as dirty for migration */
+	mark_page_dirty(kvm, gpa_to_gfn(gpa));
+}
+
+/* unpin all blocks previously pinned by pin_blocks(), marking them dirty */
+static void unpin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
+{
+	struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
+	struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
+	hpa_t hpa;
+	gpa_t gpa;
+
+	hpa = (u64) scb_s->scaoh << 32 | scb_s->scaol;
+	if (hpa) {
+		gpa = scb_o->scaol & ~0xfUL;
+		if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_64BSCAO))
+			gpa |= (u64) scb_o->scaoh << 32;
+		unpin_guest_page(vcpu->kvm, gpa, hpa);
+		scb_s->scaol = 0;
+		scb_s->scaoh = 0;
+	}
+
+	hpa = scb_s->itdba;
+	if (hpa) {
+		gpa = scb_o->itdba & ~0xffUL;
+		unpin_guest_page(vcpu->kvm, gpa, hpa);
+		scb_s->itdba = 0;
+	}
+
+	hpa = scb_s->gvrd;
+	if (hpa) {
+		gpa = scb_o->gvrd & ~0x1ffUL;
+		unpin_guest_page(vcpu->kvm, gpa, hpa);
+		scb_s->gvrd = 0;
+	}
+
+	hpa = scb_s->riccbd;
+	if (hpa) {
+		gpa = scb_o->riccbd & ~0x3fUL;
+		unpin_guest_page(vcpu->kvm, gpa, hpa);
+		scb_s->riccbd = 0;
+	}
+}
+
+/*
+ * Instead of shadowing some blocks, we can simply forward them because the
+ * addresses in the scb are 64 bit long.
+ *
+ * This works as long as the data lies in one page. If blocks ever exceed one
+ * page, we have to fall back to shadowing.
+ *
+ * As we reuse the sca, the vcpu pointers contained in it are invalid. We must
+ * therefore not enable any facilities that access these pointers (e.g. SIGPIF).
+ *
+ * Returns: - 0 if all blocks were pinned.
+ *          - > 0 if control has to be given to guest 2
+ *          - -ENOMEM if out of memory
+ */
+static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
+{
+	struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
+	struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
+	hpa_t hpa;
+	gpa_t gpa;
+	int rc = 0;
+
+	gpa = scb_o->scaol & ~0xfUL;
+	if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_64BSCAO))
+		gpa |= (u64) scb_o->scaoh << 32;
+	if (gpa) {
+		if (!(gpa & ~0x1fffUL))
+			rc = set_validity_icpt(scb_s, 0x0038U);
+		else if ((gpa & ~0x1fffUL) == kvm_s390_get_prefix(vcpu))
+			rc = set_validity_icpt(scb_s, 0x0011U);
+		else if ((gpa & PAGE_MASK) !=
+			 ((gpa + sizeof(struct bsca_block) - 1) & PAGE_MASK))
+			rc = set_validity_icpt(scb_s, 0x003bU);
+		if (!rc) {
+			rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
+			if (rc == -EINVAL)
+				rc = set_validity_icpt(scb_s, 0x0034U);
+		}
+		if (rc)
+			goto unpin;
+		scb_s->scaoh = (u32)((u64)hpa >> 32);
+		scb_s->scaol = (u32)(u64)hpa;
+	}
+
+	gpa = scb_o->itdba & ~0xffUL;
+	if (gpa && (scb_s->ecb & 0x10U)) {
+		if (!(gpa & ~0x1fffU)) {
+			rc = set_validity_icpt(scb_s, 0x0080U);
+			goto unpin;
+		}
+		/* 256 bytes cannot cross page boundaries */
+		rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
+		if (rc == -EINVAL)
+			rc = set_validity_icpt(scb_s, 0x0080U);
+		if (rc)
+			goto unpin;
+		scb_s->itdba = hpa;
+	}
+
+	gpa = scb_o->gvrd & ~0x1ffUL;
+	if (gpa && (scb_s->eca & 0x00020000U) &&
+	    !(scb_s->ecd & 0x20000000U)) {
+		if (!(gpa & ~0x1fffUL)) {
+			rc = set_validity_icpt(scb_s, 0x1310U);
+			goto unpin;
+		}
+		/*
+		 * 512 bytes vector registers cannot cross page boundaries
+		 * if this block gets bigger, we have to shadow it.
+		 */
+		rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
+		if (rc == -EINVAL)
+			rc = set_validity_icpt(scb_s, 0x1310U);
+		if (rc)
+			goto unpin;
+		scb_s->gvrd = hpa;
+	}
+
+	gpa = scb_o->riccbd & ~0x3fUL;
+	if (gpa && (scb_s->ecb3 & 0x01U)) {
+		if (!(gpa & ~0x1fffUL)) {
+			rc = set_validity_icpt(scb_s, 0x0043U);
+			goto unpin;
+		}
+		/* 64 bytes cannot cross page boundaries */
+		rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
+		if (rc == -EINVAL)
+			rc = set_validity_icpt(scb_s, 0x0043U);
+		/* Validity 0x0044 will be checked by SIE */
+		if (rc)
+			goto unpin;
+		scb_s->gvrd = hpa;
+	}
+	return 0;
+unpin:
+	unpin_blocks(vcpu, vsie_page);
+	return rc;
+}
+
+/* unpin the scb provided by guest 2, marking it as dirty */
+static void unpin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page,
+		      gpa_t gpa)
+{
+	hpa_t hpa = (hpa_t) vsie_page->scb_o;
+
+	if (hpa)
+		unpin_guest_page(vcpu->kvm, gpa, hpa);
+	vsie_page->scb_o = NULL;
+}
+
+/*
+ * Pin the scb at gpa provided by guest 2 at vsie_page->scb_o.
+ *
+ * Returns: - 0 if the scb was pinned.
+ *          - > 0 if control has to be given to guest 2
+ *          - -ENOMEM if out of memory
+ */
+static int pin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page,
+		   gpa_t gpa)
+{
+	hpa_t hpa;
+	int rc;
+
+	rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
+	if (rc == -EINVAL) {
+		rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+		if (!rc)
+			rc = 1;
+	}
+	if (!rc)
+		vsie_page->scb_o = (struct kvm_s390_sie_block *) hpa;
+	return rc;
+}
+
+/*
+ * Inject a fault into guest 2.
+ *
+ * Returns: - > 0 if control has to be given to guest 2
+ *            < 0 if an error occurred during injection.
+ */
+static int inject_fault(struct kvm_vcpu *vcpu, __u16 code, __u64 vaddr,
+			bool write_flag)
+{
+	struct kvm_s390_pgm_info pgm = {
+		.code = code,
+		.trans_exc_code =
+			/* 0-51: virtual address */
+			(vaddr & 0xfffffffffffff000UL) |
+			/* 52-53: store / fetch */
+			(((unsigned int) !write_flag) + 1) << 10,
+			/* 62-63: asce id (alway primary == 0) */
+		.exc_access_id = 0, /* always primary */
+		.op_access_id = 0, /* not MVPG */
+	};
+	int rc;
+
+	if (code == PGM_PROTECTION)
+		pgm.trans_exc_code |= 0x4UL;
+
+	rc = kvm_s390_inject_prog_irq(vcpu, &pgm);
+	return rc ? rc : 1;
+}
+
+/*
+ * Handle a fault during vsie execution on a gmap shadow.
+ *
+ * Returns: - 0 if the fault was resolved
+ *          - > 0 if control has to be given to guest 2
+ *          - < 0 if an error occurred
+ */
+static int handle_fault(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
+{
+	int rc;
+
+	if (current->thread.gmap_int_code == PGM_PROTECTION)
+		/* we can directly forward all protection exceptions */
+		return inject_fault(vcpu, PGM_PROTECTION,
+				    current->thread.gmap_addr, 1);
+
+	rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
+				   current->thread.gmap_addr);
+	if (rc > 0) {
+		rc = inject_fault(vcpu, rc,
+				  current->thread.gmap_addr,
+				  current->thread.gmap_write_flag);
+		if (rc >= 0)
+			vsie_page->fault_addr = current->thread.gmap_addr;
+	}
+	return rc;
+}
+
+/*
+ * Retry the previous fault that required guest 2 intervention. This avoids
+ * one superfluous SIE re-entry and direct exit.
+ *
+ * Will ignore any errors. The next SIE fault will do proper fault handling.
+ */
+static void handle_last_fault(struct kvm_vcpu *vcpu,
+			      struct vsie_page *vsie_page)
+{
+	if (vsie_page->fault_addr)
+		kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
+				      vsie_page->fault_addr);
+	vsie_page->fault_addr = 0;
+}
+
+static inline void clear_vsie_icpt(struct vsie_page *vsie_page)
+{
+	vsie_page->scb_s.icptcode = 0;
+}
+
+/* rewind the psw and clear the vsie icpt, so we can retry execution */
+static void retry_vsie_icpt(struct vsie_page *vsie_page)
+{
+	struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
+	int ilen = insn_length(scb_s->ipa >> 8);
+
+	/* take care of EXECUTE instructions */
+	if (scb_s->icptstatus & 1) {
+		ilen = (scb_s->icptstatus >> 4) & 0x6;
+		if (!ilen)
+			ilen = 4;
+	}
+	scb_s->gpsw.addr = __rewind_psw(scb_s->gpsw, ilen);
+	clear_vsie_icpt(vsie_page);
+}
+
+/*
+ * Try to shadow + enable the guest 2 provided facility list.
+ * Retry instruction execution if enabled for and provided by guest 2.
+ *
+ * Returns: - 0 if handled (retry or guest 2 icpt)
+ *          - > 0 if control has to be given to guest 2
+ */
+static int handle_stfle(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
+{
+	struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
+	__u32 fac = vsie_page->scb_o->fac & 0x7ffffff8U;
+
+	if (fac && test_kvm_facility(vcpu->kvm, 7)) {
+		retry_vsie_icpt(vsie_page);
+		if (read_guest_real(vcpu, fac, &vsie_page->fac,
+				    sizeof(vsie_page->fac)))
+			return set_validity_icpt(scb_s, 0x1090U);
+		scb_s->fac = (__u32)(__u64) &vsie_page->fac;
+	}
+	return 0;
+}
+
+/*
+ * Run the vsie on a shadow scb and a shadow gmap, without any further
+ * sanity checks, handling SIE faults.
+ *
+ * Returns: - 0 everything went fine
+ *          - > 0 if control has to be given to guest 2
+ *          - < 0 if an error occurred
+ */
+static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
+{
+	struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
+	struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
+	int rc;
+
+	handle_last_fault(vcpu, vsie_page);
+
+	if (need_resched())
+		schedule();
+	if (test_cpu_flag(CIF_MCCK_PENDING))
+		s390_handle_mcck();
+
+	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+	local_irq_disable();
+	guest_enter_irqoff();
+	local_irq_enable();
+
+	rc = sie64a(scb_s, vcpu->run->s.regs.gprs);
+
+	local_irq_disable();
+	guest_exit_irqoff();
+	local_irq_enable();
+	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+	if (rc > 0)
+		rc = 0; /* we could still have an icpt */
+	else if (rc == -EFAULT)
+		return handle_fault(vcpu, vsie_page);
+
+	switch (scb_s->icptcode) {
+	case ICPT_INST:
+		if (scb_s->ipa == 0xb2b0)
+			rc = handle_stfle(vcpu, vsie_page);
+		break;
+	case ICPT_STOP:
+		/* stop not requested by g2 - must have been a kick */
+		if (!(atomic_read(&scb_o->cpuflags) & CPUSTAT_STOP_INT))
+			clear_vsie_icpt(vsie_page);
+		break;
+	case ICPT_VALIDITY:
+		if ((scb_s->ipa & 0xf000) != 0xf000)
+			scb_s->ipa += 0x1000;
+		break;
+	}
+	return rc;
+}
+
+static void release_gmap_shadow(struct vsie_page *vsie_page)
+{
+	if (vsie_page->gmap)
+		gmap_put(vsie_page->gmap);
+	WRITE_ONCE(vsie_page->gmap, NULL);
+	prefix_unmapped(vsie_page);
+}
+
+static int acquire_gmap_shadow(struct kvm_vcpu *vcpu,
+			       struct vsie_page *vsie_page)
+{
+	unsigned long asce;
+	union ctlreg0 cr0;
+	struct gmap *gmap;
+	int edat;
+
+	asce = vcpu->arch.sie_block->gcr[1];
+	cr0.val = vcpu->arch.sie_block->gcr[0];
+	edat = cr0.edat && test_kvm_facility(vcpu->kvm, 8);
+	edat += edat && test_kvm_facility(vcpu->kvm, 78);
+
+	/*
+	 * ASCE or EDAT could have changed since last icpt, or the gmap
+	 * we're holding has been unshadowed. If the gmap is still valid,
+	 * we can safely reuse it.
+	 */
+	if (vsie_page->gmap && gmap_shadow_valid(vsie_page->gmap, asce, edat))
+		return 0;
+
+	/* release the old shadow - if any, and mark the prefix as unmapped */
+	release_gmap_shadow(vsie_page);
+	gmap = gmap_shadow(vcpu->arch.gmap, asce, edat);
+	if (IS_ERR(gmap))
+		return PTR_ERR(gmap);
+	gmap->private = vcpu->kvm;
+	WRITE_ONCE(vsie_page->gmap, gmap);
+	return 0;
+}
+
+/*
+ * Register the shadow scb at the VCPU, e.g. for kicking out of vsie.
+ */
+static void register_shadow_scb(struct kvm_vcpu *vcpu,
+				struct vsie_page *vsie_page)
+{
+	struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
+
+	WRITE_ONCE(vcpu->arch.vsie_block, &vsie_page->scb_s);
+	/*
+	 * External calls have to lead to a kick of the vcpu and
+	 * therefore the vsie -> Simulate Wait state.
+	 */
+	atomic_or(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
+	/*
+	 * We have to adjust the g3 epoch by the g2 epoch. The epoch will
+	 * automatically be adjusted on tod clock changes via kvm_sync_clock.
+	 */
+	preempt_disable();
+	scb_s->epoch += vcpu->kvm->arch.epoch;
+	preempt_enable();
+}
+
+/*
+ * Unregister a shadow scb from a VCPU.
+ */
+static void unregister_shadow_scb(struct kvm_vcpu *vcpu)
+{
+	atomic_andnot(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
+	WRITE_ONCE(vcpu->arch.vsie_block, NULL);
+}
+
+/*
+ * Run the vsie on a shadowed scb, managing the gmap shadow, handling
+ * prefix pages and faults.
+ *
+ * Returns: - 0 if no errors occurred
+ *          - > 0 if control has to be given to guest 2
+ *          - -ENOMEM if out of memory
+ */
+static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
+{
+	struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
+	int rc = 0;
+
+	while (1) {
+		rc = acquire_gmap_shadow(vcpu, vsie_page);
+		if (!rc)
+			rc = map_prefix(vcpu, vsie_page);
+		if (!rc) {
+			gmap_enable(vsie_page->gmap);
+			update_intervention_requests(vsie_page);
+			rc = do_vsie_run(vcpu, vsie_page);
+			gmap_enable(vcpu->arch.gmap);
+		}
+		atomic_andnot(PROG_BLOCK_SIE, &scb_s->prog20);
+
+		if (rc == -EAGAIN)
+			rc = 0;
+		if (rc || scb_s->icptcode || signal_pending(current) ||
+		    kvm_s390_vcpu_has_irq(vcpu, 0))
+			break;
+	};
+
+	if (rc == -EFAULT) {
+		/*
+		 * Addressing exceptions are always presentes as intercepts.
+		 * As addressing exceptions are suppressing and our guest 3 PSW
+		 * points at the responsible instruction, we have to
+		 * forward the PSW and set the ilc. If we can't read guest 3
+		 * instruction, we can use an arbitrary ilc. Let's always use
+		 * ilen = 4 for now, so we can avoid reading in guest 3 virtual
+		 * memory. (we could also fake the shadow so the hardware
+		 * handles it).
+		 */
+		scb_s->icptcode = ICPT_PROGI;
+		scb_s->iprcc = PGM_ADDRESSING;
+		scb_s->pgmilc = 4;
+		scb_s->gpsw.addr = __rewind_psw(scb_s->gpsw, 4);
+	}
+	return rc;
+}
+
+/*
+ * Get or create a vsie page for a scb address.
+ *
+ * Returns: - address of a vsie page (cached or new one)
+ *          - NULL if the same scb address is already used by another VCPU
+ *          - ERR_PTR(-ENOMEM) if out of memory
+ */
+static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr)
+{
+	struct vsie_page *vsie_page;
+	struct page *page;
+	int nr_vcpus;
+
+	rcu_read_lock();
+	page = radix_tree_lookup(&kvm->arch.vsie.addr_to_page, addr >> 9);
+	rcu_read_unlock();
+	if (page) {
+		if (page_ref_inc_return(page) == 2)
+			return page_to_virt(page);
+		page_ref_dec(page);
+	}
+
+	/*
+	 * We want at least #online_vcpus shadows, so every VCPU can execute
+	 * the VSIE in parallel.
+	 */
+	nr_vcpus = atomic_read(&kvm->online_vcpus);
+
+	mutex_lock(&kvm->arch.vsie.mutex);
+	if (kvm->arch.vsie.page_count < nr_vcpus) {
+		page = alloc_page(GFP_KERNEL | __GFP_ZERO | GFP_DMA);
+		if (!page) {
+			mutex_unlock(&kvm->arch.vsie.mutex);
+			return ERR_PTR(-ENOMEM);
+		}
+		page_ref_inc(page);
+		kvm->arch.vsie.pages[kvm->arch.vsie.page_count] = page;
+		kvm->arch.vsie.page_count++;
+	} else {
+		/* reuse an existing entry that belongs to nobody */
+		while (true) {
+			page = kvm->arch.vsie.pages[kvm->arch.vsie.next];
+			if (page_ref_inc_return(page) == 2)
+				break;
+			page_ref_dec(page);
+			kvm->arch.vsie.next++;
+			kvm->arch.vsie.next %= nr_vcpus;
+		}
+		radix_tree_delete(&kvm->arch.vsie.addr_to_page, page->index >> 9);
+	}
+	page->index = addr;
+	/* double use of the same address */
+	if (radix_tree_insert(&kvm->arch.vsie.addr_to_page, addr >> 9, page)) {
+		page_ref_dec(page);
+		mutex_unlock(&kvm->arch.vsie.mutex);
+		return NULL;
+	}
+	mutex_unlock(&kvm->arch.vsie.mutex);
+
+	vsie_page = page_to_virt(page);
+	memset(&vsie_page->scb_s, 0, sizeof(struct kvm_s390_sie_block));
+	release_gmap_shadow(vsie_page);
+	vsie_page->fault_addr = 0;
+	vsie_page->scb_s.ihcpu = 0xffffU;
+	return vsie_page;
+}
+
+/* put a vsie page acquired via get_vsie_page */
+static void put_vsie_page(struct kvm *kvm, struct vsie_page *vsie_page)
+{
+	struct page *page = pfn_to_page(__pa(vsie_page) >> PAGE_SHIFT);
+
+	page_ref_dec(page);
+}
+
+int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu)
+{
+	struct vsie_page *vsie_page;
+	unsigned long scb_addr;
+	int rc;
+
+	vcpu->stat.instruction_sie++;
+	if (!test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_SIEF2))
+		return -EOPNOTSUPP;
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+	BUILD_BUG_ON(sizeof(struct vsie_page) != 4096);
+	scb_addr = kvm_s390_get_base_disp_s(vcpu, NULL);
+
+	/* 512 byte alignment */
+	if (unlikely(scb_addr & 0x1ffUL))
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+	if (signal_pending(current) || kvm_s390_vcpu_has_irq(vcpu, 0))
+		return 0;
+
+	vsie_page = get_vsie_page(vcpu->kvm, scb_addr);
+	if (IS_ERR(vsie_page))
+		return PTR_ERR(vsie_page);
+	else if (!vsie_page)
+		/* double use of sie control block - simply do nothing */
+		return 0;
+
+	rc = pin_scb(vcpu, vsie_page, scb_addr);
+	if (rc)
+		goto out_put;
+	rc = shadow_scb(vcpu, vsie_page);
+	if (rc)
+		goto out_unpin_scb;
+	rc = pin_blocks(vcpu, vsie_page);
+	if (rc)
+		goto out_unshadow;
+	register_shadow_scb(vcpu, vsie_page);
+	rc = vsie_run(vcpu, vsie_page);
+	unregister_shadow_scb(vcpu);
+	unpin_blocks(vcpu, vsie_page);
+out_unshadow:
+	unshadow_scb(vcpu, vsie_page);
+out_unpin_scb:
+	unpin_scb(vcpu, vsie_page, scb_addr);
+out_put:
+	put_vsie_page(vcpu->kvm, vsie_page);
+
+	return rc < 0 ? rc : 0;
+}
+
+/* Init the vsie data structures. To be called when a vm is initialized. */
+void kvm_s390_vsie_init(struct kvm *kvm)
+{
+	mutex_init(&kvm->arch.vsie.mutex);
+	INIT_RADIX_TREE(&kvm->arch.vsie.addr_to_page, GFP_KERNEL);
+}
+
+/* Destroy the vsie data structures. To be called when a vm is destroyed. */
+void kvm_s390_vsie_destroy(struct kvm *kvm)
+{
+	struct vsie_page *vsie_page;
+	struct page *page;
+	int i;
+
+	mutex_lock(&kvm->arch.vsie.mutex);
+	for (i = 0; i < kvm->arch.vsie.page_count; i++) {
+		page = kvm->arch.vsie.pages[i];
+		kvm->arch.vsie.pages[i] = NULL;
+		vsie_page = page_to_virt(page);
+		release_gmap_shadow(vsie_page);
+		/* free the radix tree entry */
+		radix_tree_delete(&kvm->arch.vsie.addr_to_page, page->index >> 9);
+		__free_page(page);
+	}
+	kvm->arch.vsie.page_count = 0;
+	mutex_unlock(&kvm->arch.vsie.mutex);
+}
+
+void kvm_s390_vsie_kick(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_sie_block *scb = READ_ONCE(vcpu->arch.vsie_block);
+
+	/*
+	 * Even if the VCPU lets go of the shadow sie block reference, it is
+	 * still valid in the cache. So we can safely kick it.
+	 */
+	if (scb) {
+		atomic_or(PROG_BLOCK_SIE, &scb->prog20);
+		if (scb->prog0c & PROG_IN_SIE)
+			atomic_or(CPUSTAT_STOP_INT, &scb->cpuflags);
+	}
+}
diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
index d96596128e9f..f481fcde067b 100644
--- a/arch/s390/lib/uaccess.c
+++ b/arch/s390/lib/uaccess.c
@@ -104,6 +104,7 @@ static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr,
 
 unsigned long __copy_from_user(void *to, const void __user *from, unsigned long n)
 {
+	check_object_size(to, n, false);
 	if (static_branch_likely(&have_mvcos))
 		return copy_from_user_mvcos(to, from, n);
 	return copy_from_user_mvcp(to, from, n);
@@ -177,6 +178,7 @@ static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x,
 
 unsigned long __copy_to_user(void __user *to, const void *from, unsigned long n)
 {
+	check_object_size(from, n, true);
 	if (static_branch_likely(&have_mvcos))
 		return copy_to_user_mvcos(to, from, n);
 	return copy_to_user_mvcs(to, from, n);
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 25783dc3c813..a58bca62a93b 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -418,6 +418,8 @@ static inline int do_exception(struct pt_regs *regs, int access)
 		(struct gmap *) S390_lowcore.gmap : NULL;
 	if (gmap) {
 		current->thread.gmap_addr = address;
+		current->thread.gmap_write_flag = !!(flags & FAULT_FLAG_WRITE);
+		current->thread.gmap_int_code = regs->int_code & 0xffff;
 		address = __gmap_translate(gmap, address);
 		if (address == -EFAULT) {
 			fault = VM_FAULT_BADMAP;
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 063c721ec0dc..2ce6bb3bab32 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -20,14 +20,16 @@
 #include <asm/gmap.h>
 #include <asm/tlb.h>
 
+#define GMAP_SHADOW_FAKE_TABLE 1ULL
+
 /**
- * gmap_alloc - allocate a guest address space
+ * gmap_alloc - allocate and initialize a guest address space
  * @mm: pointer to the parent mm_struct
  * @limit: maximum address of the gmap address space
  *
  * Returns a guest address space structure.
  */
-struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit)
+static struct gmap *gmap_alloc(unsigned long limit)
 {
 	struct gmap *gmap;
 	struct page *page;
@@ -55,10 +57,14 @@ struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit)
 	if (!gmap)
 		goto out;
 	INIT_LIST_HEAD(&gmap->crst_list);
+	INIT_LIST_HEAD(&gmap->children);
+	INIT_LIST_HEAD(&gmap->pt_list);
 	INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL);
 	INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC);
+	INIT_RADIX_TREE(&gmap->host_to_rmap, GFP_ATOMIC);
 	spin_lock_init(&gmap->guest_table_lock);
-	gmap->mm = mm;
+	spin_lock_init(&gmap->shadow_lock);
+	atomic_set(&gmap->ref_count, 1);
 	page = alloc_pages(GFP_KERNEL, 2);
 	if (!page)
 		goto out_free;
@@ -70,9 +76,6 @@ struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit)
 	gmap->asce = atype | _ASCE_TABLE_LENGTH |
 		_ASCE_USER_BITS | __pa(table);
 	gmap->asce_end = limit;
-	down_write(&mm->mmap_sem);
-	list_add(&gmap->list, &mm->context.gmap_list);
-	up_write(&mm->mmap_sem);
 	return gmap;
 
 out_free:
@@ -80,7 +83,28 @@ out_free:
 out:
 	return NULL;
 }
-EXPORT_SYMBOL_GPL(gmap_alloc);
+
+/**
+ * gmap_create - create a guest address space
+ * @mm: pointer to the parent mm_struct
+ * @limit: maximum size of the gmap address space
+ *
+ * Returns a guest address space structure.
+ */
+struct gmap *gmap_create(struct mm_struct *mm, unsigned long limit)
+{
+	struct gmap *gmap;
+
+	gmap = gmap_alloc(limit);
+	if (!gmap)
+		return NULL;
+	gmap->mm = mm;
+	spin_lock(&mm->context.gmap_lock);
+	list_add_rcu(&gmap->list, &mm->context.gmap_list);
+	spin_unlock(&mm->context.gmap_lock);
+	return gmap;
+}
+EXPORT_SYMBOL_GPL(gmap_create);
 
 static void gmap_flush_tlb(struct gmap *gmap)
 {
@@ -114,31 +138,117 @@ static void gmap_radix_tree_free(struct radix_tree_root *root)
 	} while (nr > 0);
 }
 
+static void gmap_rmap_radix_tree_free(struct radix_tree_root *root)
+{
+	struct gmap_rmap *rmap, *rnext, *head;
+	struct radix_tree_iter iter;
+	unsigned long indices[16];
+	unsigned long index;
+	void **slot;
+	int i, nr;
+
+	/* A radix tree is freed by deleting all of its entries */
+	index = 0;
+	do {
+		nr = 0;
+		radix_tree_for_each_slot(slot, root, &iter, index) {
+			indices[nr] = iter.index;
+			if (++nr == 16)
+				break;
+		}
+		for (i = 0; i < nr; i++) {
+			index = indices[i];
+			head = radix_tree_delete(root, index);
+			gmap_for_each_rmap_safe(rmap, rnext, head)
+				kfree(rmap);
+		}
+	} while (nr > 0);
+}
+
 /**
  * gmap_free - free a guest address space
  * @gmap: pointer to the guest address space structure
+ *
+ * No locks required. There are no references to this gmap anymore.
  */
-void gmap_free(struct gmap *gmap)
+static void gmap_free(struct gmap *gmap)
 {
 	struct page *page, *next;
 
-	/* Flush tlb. */
-	if (MACHINE_HAS_IDTE)
-		__tlb_flush_idte(gmap->asce);
-	else
-		__tlb_flush_global();
-
+	/* Flush tlb of all gmaps (if not already done for shadows) */
+	if (!(gmap_is_shadow(gmap) && gmap->removed))
+		gmap_flush_tlb(gmap);
 	/* Free all segment & region tables. */
 	list_for_each_entry_safe(page, next, &gmap->crst_list, lru)
 		__free_pages(page, 2);
 	gmap_radix_tree_free(&gmap->guest_to_host);
 	gmap_radix_tree_free(&gmap->host_to_guest);
-	down_write(&gmap->mm->mmap_sem);
-	list_del(&gmap->list);
-	up_write(&gmap->mm->mmap_sem);
+
+	/* Free additional data for a shadow gmap */
+	if (gmap_is_shadow(gmap)) {
+		/* Free all page tables. */
+		list_for_each_entry_safe(page, next, &gmap->pt_list, lru)
+			page_table_free_pgste(page);
+		gmap_rmap_radix_tree_free(&gmap->host_to_rmap);
+		/* Release reference to the parent */
+		gmap_put(gmap->parent);
+	}
+
 	kfree(gmap);
 }
-EXPORT_SYMBOL_GPL(gmap_free);
+
+/**
+ * gmap_get - increase reference counter for guest address space
+ * @gmap: pointer to the guest address space structure
+ *
+ * Returns the gmap pointer
+ */
+struct gmap *gmap_get(struct gmap *gmap)
+{
+	atomic_inc(&gmap->ref_count);
+	return gmap;
+}
+EXPORT_SYMBOL_GPL(gmap_get);
+
+/**
+ * gmap_put - decrease reference counter for guest address space
+ * @gmap: pointer to the guest address space structure
+ *
+ * If the reference counter reaches zero the guest address space is freed.
+ */
+void gmap_put(struct gmap *gmap)
+{
+	if (atomic_dec_return(&gmap->ref_count) == 0)
+		gmap_free(gmap);
+}
+EXPORT_SYMBOL_GPL(gmap_put);
+
+/**
+ * gmap_remove - remove a guest address space but do not free it yet
+ * @gmap: pointer to the guest address space structure
+ */
+void gmap_remove(struct gmap *gmap)
+{
+	struct gmap *sg, *next;
+
+	/* Remove all shadow gmaps linked to this gmap */
+	if (!list_empty(&gmap->children)) {
+		spin_lock(&gmap->shadow_lock);
+		list_for_each_entry_safe(sg, next, &gmap->children, list) {
+			list_del(&sg->list);
+			gmap_put(sg);
+		}
+		spin_unlock(&gmap->shadow_lock);
+	}
+	/* Remove gmap from the pre-mm list */
+	spin_lock(&gmap->mm->context.gmap_lock);
+	list_del_rcu(&gmap->list);
+	spin_unlock(&gmap->mm->context.gmap_lock);
+	synchronize_rcu();
+	/* Put reference */
+	gmap_put(gmap);
+}
+EXPORT_SYMBOL_GPL(gmap_remove);
 
 /**
  * gmap_enable - switch primary space to the guest address space
@@ -160,6 +270,17 @@ void gmap_disable(struct gmap *gmap)
 }
 EXPORT_SYMBOL_GPL(gmap_disable);
 
+/**
+ * gmap_get_enabled - get a pointer to the currently enabled gmap
+ *
+ * Returns a pointer to the currently enabled gmap. 0 if none is enabled.
+ */
+struct gmap *gmap_get_enabled(void)
+{
+	return (struct gmap *) S390_lowcore.gmap;
+}
+EXPORT_SYMBOL_GPL(gmap_get_enabled);
+
 /*
  * gmap_alloc_table is assumed to be called with mmap_sem held
  */
@@ -175,7 +296,7 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
 		return -ENOMEM;
 	new = (unsigned long *) page_to_phys(page);
 	crst_table_init(new, init);
-	spin_lock(&gmap->mm->page_table_lock);
+	spin_lock(&gmap->guest_table_lock);
 	if (*table & _REGION_ENTRY_INVALID) {
 		list_add(&page->lru, &gmap->crst_list);
 		*table = (unsigned long) new | _REGION_ENTRY_LENGTH |
@@ -183,7 +304,7 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
 		page->index = gaddr;
 		page = NULL;
 	}
-	spin_unlock(&gmap->mm->page_table_lock);
+	spin_unlock(&gmap->guest_table_lock);
 	if (page)
 		__free_pages(page, 2);
 	return 0;
@@ -219,6 +340,7 @@ static int __gmap_unlink_by_vmaddr(struct gmap *gmap, unsigned long vmaddr)
 	unsigned long *entry;
 	int flush = 0;
 
+	BUG_ON(gmap_is_shadow(gmap));
 	spin_lock(&gmap->guest_table_lock);
 	entry = radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT);
 	if (entry) {
@@ -258,6 +380,7 @@ int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len)
 	unsigned long off;
 	int flush;
 
+	BUG_ON(gmap_is_shadow(gmap));
 	if ((to | len) & (PMD_SIZE - 1))
 		return -EINVAL;
 	if (len == 0 || to + len < to)
@@ -289,6 +412,7 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from,
 	unsigned long off;
 	int flush;
 
+	BUG_ON(gmap_is_shadow(gmap));
 	if ((from | to | len) & (PMD_SIZE - 1))
 		return -EINVAL;
 	if (len == 0 || from + len < from || to + len < to ||
@@ -326,6 +450,8 @@ EXPORT_SYMBOL_GPL(gmap_map_segment);
  * This function does not establish potentially missing page table entries.
  * The mmap_sem of the mm that belongs to the address space must be held
  * when this function gets called.
+ *
+ * Note: Can also be called for shadow gmaps.
  */
 unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr)
 {
@@ -333,6 +459,7 @@ unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr)
 
 	vmaddr = (unsigned long)
 		radix_tree_lookup(&gmap->guest_to_host, gaddr >> PMD_SHIFT);
+	/* Note: guest_to_host is empty for a shadow gmap */
 	return vmaddr ? (vmaddr | (gaddr & ~PMD_MASK)) : -EFAULT;
 }
 EXPORT_SYMBOL_GPL(__gmap_translate);
@@ -369,11 +496,13 @@ void gmap_unlink(struct mm_struct *mm, unsigned long *table,
 	struct gmap *gmap;
 	int flush;
 
-	list_for_each_entry(gmap, &mm->context.gmap_list, list) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
 		flush = __gmap_unlink_by_vmaddr(gmap, vmaddr);
 		if (flush)
 			gmap_flush_tlb(gmap);
 	}
+	rcu_read_unlock();
 }
 
 /**
@@ -397,6 +526,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
 	pmd_t *pmd;
 	int rc;
 
+	BUG_ON(gmap_is_shadow(gmap));
 	/* Create higher level tables in the gmap page table */
 	table = gmap->table;
 	if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION1) {
@@ -552,116 +682,1412 @@ static LIST_HEAD(gmap_notifier_list);
 static DEFINE_SPINLOCK(gmap_notifier_lock);
 
 /**
- * gmap_register_ipte_notifier - register a pte invalidation callback
+ * gmap_register_pte_notifier - register a pte invalidation callback
  * @nb: pointer to the gmap notifier block
  */
-void gmap_register_ipte_notifier(struct gmap_notifier *nb)
+void gmap_register_pte_notifier(struct gmap_notifier *nb)
 {
 	spin_lock(&gmap_notifier_lock);
-	list_add(&nb->list, &gmap_notifier_list);
+	list_add_rcu(&nb->list, &gmap_notifier_list);
 	spin_unlock(&gmap_notifier_lock);
 }
-EXPORT_SYMBOL_GPL(gmap_register_ipte_notifier);
+EXPORT_SYMBOL_GPL(gmap_register_pte_notifier);
 
 /**
- * gmap_unregister_ipte_notifier - remove a pte invalidation callback
+ * gmap_unregister_pte_notifier - remove a pte invalidation callback
  * @nb: pointer to the gmap notifier block
  */
-void gmap_unregister_ipte_notifier(struct gmap_notifier *nb)
+void gmap_unregister_pte_notifier(struct gmap_notifier *nb)
 {
 	spin_lock(&gmap_notifier_lock);
-	list_del_init(&nb->list);
+	list_del_rcu(&nb->list);
 	spin_unlock(&gmap_notifier_lock);
+	synchronize_rcu();
+}
+EXPORT_SYMBOL_GPL(gmap_unregister_pte_notifier);
+
+/**
+ * gmap_call_notifier - call all registered invalidation callbacks
+ * @gmap: pointer to guest mapping meta data structure
+ * @start: start virtual address in the guest address space
+ * @end: end virtual address in the guest address space
+ */
+static void gmap_call_notifier(struct gmap *gmap, unsigned long start,
+			       unsigned long end)
+{
+	struct gmap_notifier *nb;
+
+	list_for_each_entry(nb, &gmap_notifier_list, list)
+		nb->notifier_call(gmap, start, end);
+}
+
+/**
+ * gmap_table_walk - walk the gmap page tables
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: virtual address in the guest address space
+ * @level: page table level to stop at
+ *
+ * Returns a table entry pointer for the given guest address and @level
+ * @level=0 : returns a pointer to a page table table entry (or NULL)
+ * @level=1 : returns a pointer to a segment table entry (or NULL)
+ * @level=2 : returns a pointer to a region-3 table entry (or NULL)
+ * @level=3 : returns a pointer to a region-2 table entry (or NULL)
+ * @level=4 : returns a pointer to a region-1 table entry (or NULL)
+ *
+ * Returns NULL if the gmap page tables could not be walked to the
+ * requested level.
+ *
+ * Note: Can also be called for shadow gmaps.
+ */
+static inline unsigned long *gmap_table_walk(struct gmap *gmap,
+					     unsigned long gaddr, int level)
+{
+	unsigned long *table;
+
+	if ((gmap->asce & _ASCE_TYPE_MASK) + 4 < (level * 4))
+		return NULL;
+	if (gmap_is_shadow(gmap) && gmap->removed)
+		return NULL;
+	if (gaddr & (-1UL << (31 + ((gmap->asce & _ASCE_TYPE_MASK) >> 2)*11)))
+		return NULL;
+	table = gmap->table;
+	switch (gmap->asce & _ASCE_TYPE_MASK) {
+	case _ASCE_TYPE_REGION1:
+		table += (gaddr >> 53) & 0x7ff;
+		if (level == 4)
+			break;
+		if (*table & _REGION_ENTRY_INVALID)
+			return NULL;
+		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+		/* Fallthrough */
+	case _ASCE_TYPE_REGION2:
+		table += (gaddr >> 42) & 0x7ff;
+		if (level == 3)
+			break;
+		if (*table & _REGION_ENTRY_INVALID)
+			return NULL;
+		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+		/* Fallthrough */
+	case _ASCE_TYPE_REGION3:
+		table += (gaddr >> 31) & 0x7ff;
+		if (level == 2)
+			break;
+		if (*table & _REGION_ENTRY_INVALID)
+			return NULL;
+		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+		/* Fallthrough */
+	case _ASCE_TYPE_SEGMENT:
+		table += (gaddr >> 20) & 0x7ff;
+		if (level == 1)
+			break;
+		if (*table & _REGION_ENTRY_INVALID)
+			return NULL;
+		table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN);
+		table += (gaddr >> 12) & 0xff;
+	}
+	return table;
+}
+
+/**
+ * gmap_pte_op_walk - walk the gmap page table, get the page table lock
+ *		      and return the pte pointer
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: virtual address in the guest address space
+ * @ptl: pointer to the spinlock pointer
+ *
+ * Returns a pointer to the locked pte for a guest address, or NULL
+ *
+ * Note: Can also be called for shadow gmaps.
+ */
+static pte_t *gmap_pte_op_walk(struct gmap *gmap, unsigned long gaddr,
+			       spinlock_t **ptl)
+{
+	unsigned long *table;
+
+	if (gmap_is_shadow(gmap))
+		spin_lock(&gmap->guest_table_lock);
+	/* Walk the gmap page table, lock and get pte pointer */
+	table = gmap_table_walk(gmap, gaddr, 1); /* get segment pointer */
+	if (!table || *table & _SEGMENT_ENTRY_INVALID) {
+		if (gmap_is_shadow(gmap))
+			spin_unlock(&gmap->guest_table_lock);
+		return NULL;
+	}
+	if (gmap_is_shadow(gmap)) {
+		*ptl = &gmap->guest_table_lock;
+		return pte_offset_map((pmd_t *) table, gaddr);
+	}
+	return pte_alloc_map_lock(gmap->mm, (pmd_t *) table, gaddr, ptl);
+}
+
+/**
+ * gmap_pte_op_fixup - force a page in and connect the gmap page table
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: virtual address in the guest address space
+ * @vmaddr: address in the host process address space
+ * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
+ *
+ * Returns 0 if the caller can retry __gmap_translate (might fail again),
+ * -ENOMEM if out of memory and -EFAULT if anything goes wrong while fixing
+ * up or connecting the gmap page table.
+ */
+static int gmap_pte_op_fixup(struct gmap *gmap, unsigned long gaddr,
+			     unsigned long vmaddr, int prot)
+{
+	struct mm_struct *mm = gmap->mm;
+	unsigned int fault_flags;
+	bool unlocked = false;
+
+	BUG_ON(gmap_is_shadow(gmap));
+	fault_flags = (prot == PROT_WRITE) ? FAULT_FLAG_WRITE : 0;
+	if (fixup_user_fault(current, mm, vmaddr, fault_flags, &unlocked))
+		return -EFAULT;
+	if (unlocked)
+		/* lost mmap_sem, caller has to retry __gmap_translate */
+		return 0;
+	/* Connect the page tables */
+	return __gmap_link(gmap, gaddr, vmaddr);
 }
-EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier);
 
 /**
- * gmap_ipte_notify - mark a range of ptes for invalidation notification
+ * gmap_pte_op_end - release the page table lock
+ * @ptl: pointer to the spinlock pointer
+ */
+static void gmap_pte_op_end(spinlock_t *ptl)
+{
+	spin_unlock(ptl);
+}
+
+/*
+ * gmap_protect_range - remove access rights to memory and set pgste bits
  * @gmap: pointer to guest mapping meta data structure
  * @gaddr: virtual address in the guest address space
  * @len: size of area
+ * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
+ * @bits: pgste notification bits to set
  *
- * Returns 0 if for each page in the given range a gmap mapping exists and
- * the invalidation notification could be set. If the gmap mapping is missing
- * for one or more pages -EFAULT is returned. If no memory could be allocated
- * -ENOMEM is returned. This function establishes missing page table entries.
+ * Returns 0 if successfully protected, -ENOMEM if out of memory and
+ * -EFAULT if gaddr is invalid (or mapping for shadows is missing).
+ *
+ * Called with sg->mm->mmap_sem in read.
+ *
+ * Note: Can also be called for shadow gmaps.
  */
-int gmap_ipte_notify(struct gmap *gmap, unsigned long gaddr, unsigned long len)
+static int gmap_protect_range(struct gmap *gmap, unsigned long gaddr,
+			      unsigned long len, int prot, unsigned long bits)
 {
-	unsigned long addr;
+	unsigned long vmaddr;
 	spinlock_t *ptl;
 	pte_t *ptep;
-	bool unlocked;
-	int rc = 0;
+	int rc;
+
+	while (len) {
+		rc = -EAGAIN;
+		ptep = gmap_pte_op_walk(gmap, gaddr, &ptl);
+		if (ptep) {
+			rc = ptep_force_prot(gmap->mm, gaddr, ptep, prot, bits);
+			gmap_pte_op_end(ptl);
+		}
+		if (rc) {
+			vmaddr = __gmap_translate(gmap, gaddr);
+			if (IS_ERR_VALUE(vmaddr))
+				return vmaddr;
+			rc = gmap_pte_op_fixup(gmap, gaddr, vmaddr, prot);
+			if (rc)
+				return rc;
+			continue;
+		}
+		gaddr += PAGE_SIZE;
+		len -= PAGE_SIZE;
+	}
+	return 0;
+}
+
+/**
+ * gmap_mprotect_notify - change access rights for a range of ptes and
+ *                        call the notifier if any pte changes again
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: virtual address in the guest address space
+ * @len: size of area
+ * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
+ *
+ * Returns 0 if for each page in the given range a gmap mapping exists,
+ * the new access rights could be set and the notifier could be armed.
+ * If the gmap mapping is missing for one or more pages -EFAULT is
+ * returned. If no memory could be allocated -ENOMEM is returned.
+ * This function establishes missing page table entries.
+ */
+int gmap_mprotect_notify(struct gmap *gmap, unsigned long gaddr,
+			 unsigned long len, int prot)
+{
+	int rc;
 
-	if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK))
+	if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK) || gmap_is_shadow(gmap))
+		return -EINVAL;
+	if (!MACHINE_HAS_ESOP && prot == PROT_READ)
 		return -EINVAL;
 	down_read(&gmap->mm->mmap_sem);
-	while (len) {
-		unlocked = false;
-		/* Convert gmap address and connect the page tables */
-		addr = __gmap_translate(gmap, gaddr);
-		if (IS_ERR_VALUE(addr)) {
-			rc = addr;
+	rc = gmap_protect_range(gmap, gaddr, len, prot, PGSTE_IN_BIT);
+	up_read(&gmap->mm->mmap_sem);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_mprotect_notify);
+
+/**
+ * gmap_read_table - get an unsigned long value from a guest page table using
+ *                   absolute addressing, without marking the page referenced.
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: virtual address in the guest address space
+ * @val: pointer to the unsigned long value to return
+ *
+ * Returns 0 if the value was read, -ENOMEM if out of memory and -EFAULT
+ * if reading using the virtual address failed.
+ *
+ * Called with gmap->mm->mmap_sem in read.
+ */
+int gmap_read_table(struct gmap *gmap, unsigned long gaddr, unsigned long *val)
+{
+	unsigned long address, vmaddr;
+	spinlock_t *ptl;
+	pte_t *ptep, pte;
+	int rc;
+
+	while (1) {
+		rc = -EAGAIN;
+		ptep = gmap_pte_op_walk(gmap, gaddr, &ptl);
+		if (ptep) {
+			pte = *ptep;
+			if (pte_present(pte) && (pte_val(pte) & _PAGE_READ)) {
+				address = pte_val(pte) & PAGE_MASK;
+				address += gaddr & ~PAGE_MASK;
+				*val = *(unsigned long *) address;
+				pte_val(*ptep) |= _PAGE_YOUNG;
+				/* Do *NOT* clear the _PAGE_INVALID bit! */
+				rc = 0;
+			}
+			gmap_pte_op_end(ptl);
+		}
+		if (!rc)
+			break;
+		vmaddr = __gmap_translate(gmap, gaddr);
+		if (IS_ERR_VALUE(vmaddr)) {
+			rc = vmaddr;
 			break;
 		}
-		/* Get the page mapped */
-		if (fixup_user_fault(current, gmap->mm, addr, FAULT_FLAG_WRITE,
-				     &unlocked)) {
-			rc = -EFAULT;
+		rc = gmap_pte_op_fixup(gmap, gaddr, vmaddr, PROT_READ);
+		if (rc)
 			break;
+	}
+	return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_read_table);
+
+/**
+ * gmap_insert_rmap - add a rmap to the host_to_rmap radix tree
+ * @sg: pointer to the shadow guest address space structure
+ * @vmaddr: vm address associated with the rmap
+ * @rmap: pointer to the rmap structure
+ *
+ * Called with the sg->guest_table_lock
+ */
+static inline void gmap_insert_rmap(struct gmap *sg, unsigned long vmaddr,
+				    struct gmap_rmap *rmap)
+{
+	void **slot;
+
+	BUG_ON(!gmap_is_shadow(sg));
+	slot = radix_tree_lookup_slot(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT);
+	if (slot) {
+		rmap->next = radix_tree_deref_slot_protected(slot,
+							&sg->guest_table_lock);
+		radix_tree_replace_slot(slot, rmap);
+	} else {
+		rmap->next = NULL;
+		radix_tree_insert(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT,
+				  rmap);
+	}
+}
+
+/**
+ * gmap_protect_rmap - modify access rights to memory and create an rmap
+ * @sg: pointer to the shadow guest address space structure
+ * @raddr: rmap address in the shadow gmap
+ * @paddr: address in the parent guest address space
+ * @len: length of the memory area to protect
+ * @prot: indicates access rights: none, read-only or read-write
+ *
+ * Returns 0 if successfully protected and the rmap was created, -ENOMEM
+ * if out of memory and -EFAULT if paddr is invalid.
+ */
+static int gmap_protect_rmap(struct gmap *sg, unsigned long raddr,
+			     unsigned long paddr, unsigned long len, int prot)
+{
+	struct gmap *parent;
+	struct gmap_rmap *rmap;
+	unsigned long vmaddr;
+	spinlock_t *ptl;
+	pte_t *ptep;
+	int rc;
+
+	BUG_ON(!gmap_is_shadow(sg));
+	parent = sg->parent;
+	while (len) {
+		vmaddr = __gmap_translate(parent, paddr);
+		if (IS_ERR_VALUE(vmaddr))
+			return vmaddr;
+		rmap = kzalloc(sizeof(*rmap), GFP_KERNEL);
+		if (!rmap)
+			return -ENOMEM;
+		rmap->raddr = raddr;
+		rc = radix_tree_preload(GFP_KERNEL);
+		if (rc) {
+			kfree(rmap);
+			return rc;
+		}
+		rc = -EAGAIN;
+		ptep = gmap_pte_op_walk(parent, paddr, &ptl);
+		if (ptep) {
+			spin_lock(&sg->guest_table_lock);
+			rc = ptep_force_prot(parent->mm, paddr, ptep, prot,
+					     PGSTE_VSIE_BIT);
+			if (!rc)
+				gmap_insert_rmap(sg, vmaddr, rmap);
+			spin_unlock(&sg->guest_table_lock);
+			gmap_pte_op_end(ptl);
 		}
-		/* While trying to map mmap_sem got unlocked. Let us retry */
-		if (unlocked)
+		radix_tree_preload_end();
+		if (rc) {
+			kfree(rmap);
+			rc = gmap_pte_op_fixup(parent, paddr, vmaddr, prot);
+			if (rc)
+				return rc;
 			continue;
-		rc = __gmap_link(gmap, gaddr, addr);
+		}
+		paddr += PAGE_SIZE;
+		len -= PAGE_SIZE;
+	}
+	return 0;
+}
+
+#define _SHADOW_RMAP_MASK	0x7
+#define _SHADOW_RMAP_REGION1	0x5
+#define _SHADOW_RMAP_REGION2	0x4
+#define _SHADOW_RMAP_REGION3	0x3
+#define _SHADOW_RMAP_SEGMENT	0x2
+#define _SHADOW_RMAP_PGTABLE	0x1
+
+/**
+ * gmap_idte_one - invalidate a single region or segment table entry
+ * @asce: region or segment table *origin* + table-type bits
+ * @vaddr: virtual address to identify the table entry to flush
+ *
+ * The invalid bit of a single region or segment table entry is set
+ * and the associated TLB entries depending on the entry are flushed.
+ * The table-type of the @asce identifies the portion of the @vaddr
+ * that is used as the invalidation index.
+ */
+static inline void gmap_idte_one(unsigned long asce, unsigned long vaddr)
+{
+	asm volatile(
+		"	.insn	rrf,0xb98e0000,%0,%1,0,0"
+		: : "a" (asce), "a" (vaddr) : "cc", "memory");
+}
+
+/**
+ * gmap_unshadow_page - remove a page from a shadow page table
+ * @sg: pointer to the shadow guest address space structure
+ * @raddr: rmap address in the shadow guest address space
+ *
+ * Called with the sg->guest_table_lock
+ */
+static void gmap_unshadow_page(struct gmap *sg, unsigned long raddr)
+{
+	unsigned long *table;
+
+	BUG_ON(!gmap_is_shadow(sg));
+	table = gmap_table_walk(sg, raddr, 0); /* get page table pointer */
+	if (!table || *table & _PAGE_INVALID)
+		return;
+	gmap_call_notifier(sg, raddr, raddr + (1UL << 12) - 1);
+	ptep_unshadow_pte(sg->mm, raddr, (pte_t *) table);
+}
+
+/**
+ * __gmap_unshadow_pgt - remove all entries from a shadow page table
+ * @sg: pointer to the shadow guest address space structure
+ * @raddr: rmap address in the shadow guest address space
+ * @pgt: pointer to the start of a shadow page table
+ *
+ * Called with the sg->guest_table_lock
+ */
+static void __gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr,
+				unsigned long *pgt)
+{
+	int i;
+
+	BUG_ON(!gmap_is_shadow(sg));
+	for (i = 0; i < 256; i++, raddr += 1UL << 12)
+		pgt[i] = _PAGE_INVALID;
+}
+
+/**
+ * gmap_unshadow_pgt - remove a shadow page table from a segment entry
+ * @sg: pointer to the shadow guest address space structure
+ * @raddr: address in the shadow guest address space
+ *
+ * Called with the sg->guest_table_lock
+ */
+static void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr)
+{
+	unsigned long sto, *ste, *pgt;
+	struct page *page;
+
+	BUG_ON(!gmap_is_shadow(sg));
+	ste = gmap_table_walk(sg, raddr, 1); /* get segment pointer */
+	if (!ste || !(*ste & _SEGMENT_ENTRY_ORIGIN))
+		return;
+	gmap_call_notifier(sg, raddr, raddr + (1UL << 20) - 1);
+	sto = (unsigned long) (ste - ((raddr >> 20) & 0x7ff));
+	gmap_idte_one(sto | _ASCE_TYPE_SEGMENT, raddr);
+	pgt = (unsigned long *)(*ste & _SEGMENT_ENTRY_ORIGIN);
+	*ste = _SEGMENT_ENTRY_EMPTY;
+	__gmap_unshadow_pgt(sg, raddr, pgt);
+	/* Free page table */
+	page = pfn_to_page(__pa(pgt) >> PAGE_SHIFT);
+	list_del(&page->lru);
+	page_table_free_pgste(page);
+}
+
+/**
+ * __gmap_unshadow_sgt - remove all entries from a shadow segment table
+ * @sg: pointer to the shadow guest address space structure
+ * @raddr: rmap address in the shadow guest address space
+ * @sgt: pointer to the start of a shadow segment table
+ *
+ * Called with the sg->guest_table_lock
+ */
+static void __gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr,
+				unsigned long *sgt)
+{
+	unsigned long asce, *pgt;
+	struct page *page;
+	int i;
+
+	BUG_ON(!gmap_is_shadow(sg));
+	asce = (unsigned long) sgt | _ASCE_TYPE_SEGMENT;
+	for (i = 0; i < 2048; i++, raddr += 1UL << 20) {
+		if (!(sgt[i] & _SEGMENT_ENTRY_ORIGIN))
+			continue;
+		pgt = (unsigned long *)(sgt[i] & _REGION_ENTRY_ORIGIN);
+		sgt[i] = _SEGMENT_ENTRY_EMPTY;
+		__gmap_unshadow_pgt(sg, raddr, pgt);
+		/* Free page table */
+		page = pfn_to_page(__pa(pgt) >> PAGE_SHIFT);
+		list_del(&page->lru);
+		page_table_free_pgste(page);
+	}
+}
+
+/**
+ * gmap_unshadow_sgt - remove a shadow segment table from a region-3 entry
+ * @sg: pointer to the shadow guest address space structure
+ * @raddr: rmap address in the shadow guest address space
+ *
+ * Called with the shadow->guest_table_lock
+ */
+static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr)
+{
+	unsigned long r3o, *r3e, *sgt;
+	struct page *page;
+
+	BUG_ON(!gmap_is_shadow(sg));
+	r3e = gmap_table_walk(sg, raddr, 2); /* get region-3 pointer */
+	if (!r3e || !(*r3e & _REGION_ENTRY_ORIGIN))
+		return;
+	gmap_call_notifier(sg, raddr, raddr + (1UL << 31) - 1);
+	r3o = (unsigned long) (r3e - ((raddr >> 31) & 0x7ff));
+	gmap_idte_one(r3o | _ASCE_TYPE_REGION3, raddr);
+	sgt = (unsigned long *)(*r3e & _REGION_ENTRY_ORIGIN);
+	*r3e = _REGION3_ENTRY_EMPTY;
+	__gmap_unshadow_sgt(sg, raddr, sgt);
+	/* Free segment table */
+	page = pfn_to_page(__pa(sgt) >> PAGE_SHIFT);
+	list_del(&page->lru);
+	__free_pages(page, 2);
+}
+
+/**
+ * __gmap_unshadow_r3t - remove all entries from a shadow region-3 table
+ * @sg: pointer to the shadow guest address space structure
+ * @raddr: address in the shadow guest address space
+ * @r3t: pointer to the start of a shadow region-3 table
+ *
+ * Called with the sg->guest_table_lock
+ */
+static void __gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr,
+				unsigned long *r3t)
+{
+	unsigned long asce, *sgt;
+	struct page *page;
+	int i;
+
+	BUG_ON(!gmap_is_shadow(sg));
+	asce = (unsigned long) r3t | _ASCE_TYPE_REGION3;
+	for (i = 0; i < 2048; i++, raddr += 1UL << 31) {
+		if (!(r3t[i] & _REGION_ENTRY_ORIGIN))
+			continue;
+		sgt = (unsigned long *)(r3t[i] & _REGION_ENTRY_ORIGIN);
+		r3t[i] = _REGION3_ENTRY_EMPTY;
+		__gmap_unshadow_sgt(sg, raddr, sgt);
+		/* Free segment table */
+		page = pfn_to_page(__pa(sgt) >> PAGE_SHIFT);
+		list_del(&page->lru);
+		__free_pages(page, 2);
+	}
+}
+
+/**
+ * gmap_unshadow_r3t - remove a shadow region-3 table from a region-2 entry
+ * @sg: pointer to the shadow guest address space structure
+ * @raddr: rmap address in the shadow guest address space
+ *
+ * Called with the sg->guest_table_lock
+ */
+static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr)
+{
+	unsigned long r2o, *r2e, *r3t;
+	struct page *page;
+
+	BUG_ON(!gmap_is_shadow(sg));
+	r2e = gmap_table_walk(sg, raddr, 3); /* get region-2 pointer */
+	if (!r2e || !(*r2e & _REGION_ENTRY_ORIGIN))
+		return;
+	gmap_call_notifier(sg, raddr, raddr + (1UL << 42) - 1);
+	r2o = (unsigned long) (r2e - ((raddr >> 42) & 0x7ff));
+	gmap_idte_one(r2o | _ASCE_TYPE_REGION2, raddr);
+	r3t = (unsigned long *)(*r2e & _REGION_ENTRY_ORIGIN);
+	*r2e = _REGION2_ENTRY_EMPTY;
+	__gmap_unshadow_r3t(sg, raddr, r3t);
+	/* Free region 3 table */
+	page = pfn_to_page(__pa(r3t) >> PAGE_SHIFT);
+	list_del(&page->lru);
+	__free_pages(page, 2);
+}
+
+/**
+ * __gmap_unshadow_r2t - remove all entries from a shadow region-2 table
+ * @sg: pointer to the shadow guest address space structure
+ * @raddr: rmap address in the shadow guest address space
+ * @r2t: pointer to the start of a shadow region-2 table
+ *
+ * Called with the sg->guest_table_lock
+ */
+static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr,
+				unsigned long *r2t)
+{
+	unsigned long asce, *r3t;
+	struct page *page;
+	int i;
+
+	BUG_ON(!gmap_is_shadow(sg));
+	asce = (unsigned long) r2t | _ASCE_TYPE_REGION2;
+	for (i = 0; i < 2048; i++, raddr += 1UL << 42) {
+		if (!(r2t[i] & _REGION_ENTRY_ORIGIN))
+			continue;
+		r3t = (unsigned long *)(r2t[i] & _REGION_ENTRY_ORIGIN);
+		r2t[i] = _REGION2_ENTRY_EMPTY;
+		__gmap_unshadow_r3t(sg, raddr, r3t);
+		/* Free region 3 table */
+		page = pfn_to_page(__pa(r3t) >> PAGE_SHIFT);
+		list_del(&page->lru);
+		__free_pages(page, 2);
+	}
+}
+
+/**
+ * gmap_unshadow_r2t - remove a shadow region-2 table from a region-1 entry
+ * @sg: pointer to the shadow guest address space structure
+ * @raddr: rmap address in the shadow guest address space
+ *
+ * Called with the sg->guest_table_lock
+ */
+static void gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr)
+{
+	unsigned long r1o, *r1e, *r2t;
+	struct page *page;
+
+	BUG_ON(!gmap_is_shadow(sg));
+	r1e = gmap_table_walk(sg, raddr, 4); /* get region-1 pointer */
+	if (!r1e || !(*r1e & _REGION_ENTRY_ORIGIN))
+		return;
+	gmap_call_notifier(sg, raddr, raddr + (1UL << 53) - 1);
+	r1o = (unsigned long) (r1e - ((raddr >> 53) & 0x7ff));
+	gmap_idte_one(r1o | _ASCE_TYPE_REGION1, raddr);
+	r2t = (unsigned long *)(*r1e & _REGION_ENTRY_ORIGIN);
+	*r1e = _REGION1_ENTRY_EMPTY;
+	__gmap_unshadow_r2t(sg, raddr, r2t);
+	/* Free region 2 table */
+	page = pfn_to_page(__pa(r2t) >> PAGE_SHIFT);
+	list_del(&page->lru);
+	__free_pages(page, 2);
+}
+
+/**
+ * __gmap_unshadow_r1t - remove all entries from a shadow region-1 table
+ * @sg: pointer to the shadow guest address space structure
+ * @raddr: rmap address in the shadow guest address space
+ * @r1t: pointer to the start of a shadow region-1 table
+ *
+ * Called with the shadow->guest_table_lock
+ */
+static void __gmap_unshadow_r1t(struct gmap *sg, unsigned long raddr,
+				unsigned long *r1t)
+{
+	unsigned long asce, *r2t;
+	struct page *page;
+	int i;
+
+	BUG_ON(!gmap_is_shadow(sg));
+	asce = (unsigned long) r1t | _ASCE_TYPE_REGION1;
+	for (i = 0; i < 2048; i++, raddr += 1UL << 53) {
+		if (!(r1t[i] & _REGION_ENTRY_ORIGIN))
+			continue;
+		r2t = (unsigned long *)(r1t[i] & _REGION_ENTRY_ORIGIN);
+		__gmap_unshadow_r2t(sg, raddr, r2t);
+		/* Clear entry and flush translation r1t -> r2t */
+		gmap_idte_one(asce, raddr);
+		r1t[i] = _REGION1_ENTRY_EMPTY;
+		/* Free region 2 table */
+		page = pfn_to_page(__pa(r2t) >> PAGE_SHIFT);
+		list_del(&page->lru);
+		__free_pages(page, 2);
+	}
+}
+
+/**
+ * gmap_unshadow - remove a shadow page table completely
+ * @sg: pointer to the shadow guest address space structure
+ *
+ * Called with sg->guest_table_lock
+ */
+static void gmap_unshadow(struct gmap *sg)
+{
+	unsigned long *table;
+
+	BUG_ON(!gmap_is_shadow(sg));
+	if (sg->removed)
+		return;
+	sg->removed = 1;
+	gmap_call_notifier(sg, 0, -1UL);
+	gmap_flush_tlb(sg);
+	table = (unsigned long *)(sg->asce & _ASCE_ORIGIN);
+	switch (sg->asce & _ASCE_TYPE_MASK) {
+	case _ASCE_TYPE_REGION1:
+		__gmap_unshadow_r1t(sg, 0, table);
+		break;
+	case _ASCE_TYPE_REGION2:
+		__gmap_unshadow_r2t(sg, 0, table);
+		break;
+	case _ASCE_TYPE_REGION3:
+		__gmap_unshadow_r3t(sg, 0, table);
+		break;
+	case _ASCE_TYPE_SEGMENT:
+		__gmap_unshadow_sgt(sg, 0, table);
+		break;
+	}
+}
+
+/**
+ * gmap_find_shadow - find a specific asce in the list of shadow tables
+ * @parent: pointer to the parent gmap
+ * @asce: ASCE for which the shadow table is created
+ * @edat_level: edat level to be used for the shadow translation
+ *
+ * Returns the pointer to a gmap if a shadow table with the given asce is
+ * already available, ERR_PTR(-EAGAIN) if another one is just being created,
+ * otherwise NULL
+ */
+static struct gmap *gmap_find_shadow(struct gmap *parent, unsigned long asce,
+				     int edat_level)
+{
+	struct gmap *sg;
+
+	list_for_each_entry(sg, &parent->children, list) {
+		if (sg->orig_asce != asce || sg->edat_level != edat_level ||
+		    sg->removed)
+			continue;
+		if (!sg->initialized)
+			return ERR_PTR(-EAGAIN);
+		atomic_inc(&sg->ref_count);
+		return sg;
+	}
+	return NULL;
+}
+
+/**
+ * gmap_shadow_valid - check if a shadow guest address space matches the
+ *                     given properties and is still valid
+ * @sg: pointer to the shadow guest address space structure
+ * @asce: ASCE for which the shadow table is requested
+ * @edat_level: edat level to be used for the shadow translation
+ *
+ * Returns 1 if the gmap shadow is still valid and matches the given
+ * properties, the caller can continue using it. Returns 0 otherwise, the
+ * caller has to request a new shadow gmap in this case.
+ *
+ */
+int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level)
+{
+	if (sg->removed)
+		return 0;
+	return sg->orig_asce == asce && sg->edat_level == edat_level;
+}
+EXPORT_SYMBOL_GPL(gmap_shadow_valid);
+
+/**
+ * gmap_shadow - create/find a shadow guest address space
+ * @parent: pointer to the parent gmap
+ * @asce: ASCE for which the shadow table is created
+ * @edat_level: edat level to be used for the shadow translation
+ *
+ * The pages of the top level page table referred by the asce parameter
+ * will be set to read-only and marked in the PGSTEs of the kvm process.
+ * The shadow table will be removed automatically on any change to the
+ * PTE mapping for the source table.
+ *
+ * Returns a guest address space structure, ERR_PTR(-ENOMEM) if out of memory,
+ * ERR_PTR(-EAGAIN) if the caller has to retry and ERR_PTR(-EFAULT) if the
+ * parent gmap table could not be protected.
+ */
+struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce,
+			 int edat_level)
+{
+	struct gmap *sg, *new;
+	unsigned long limit;
+	int rc;
+
+	BUG_ON(gmap_is_shadow(parent));
+	spin_lock(&parent->shadow_lock);
+	sg = gmap_find_shadow(parent, asce, edat_level);
+	spin_unlock(&parent->shadow_lock);
+	if (sg)
+		return sg;
+	/* Create a new shadow gmap */
+	limit = -1UL >> (33 - (((asce & _ASCE_TYPE_MASK) >> 2) * 11));
+	if (asce & _ASCE_REAL_SPACE)
+		limit = -1UL;
+	new = gmap_alloc(limit);
+	if (!new)
+		return ERR_PTR(-ENOMEM);
+	new->mm = parent->mm;
+	new->parent = gmap_get(parent);
+	new->orig_asce = asce;
+	new->edat_level = edat_level;
+	new->initialized = false;
+	spin_lock(&parent->shadow_lock);
+	/* Recheck if another CPU created the same shadow */
+	sg = gmap_find_shadow(parent, asce, edat_level);
+	if (sg) {
+		spin_unlock(&parent->shadow_lock);
+		gmap_free(new);
+		return sg;
+	}
+	if (asce & _ASCE_REAL_SPACE) {
+		/* only allow one real-space gmap shadow */
+		list_for_each_entry(sg, &parent->children, list) {
+			if (sg->orig_asce & _ASCE_REAL_SPACE) {
+				spin_lock(&sg->guest_table_lock);
+				gmap_unshadow(sg);
+				spin_unlock(&sg->guest_table_lock);
+				list_del(&sg->list);
+				gmap_put(sg);
+				break;
+			}
+		}
+	}
+	atomic_set(&new->ref_count, 2);
+	list_add(&new->list, &parent->children);
+	if (asce & _ASCE_REAL_SPACE) {
+		/* nothing to protect, return right away */
+		new->initialized = true;
+		spin_unlock(&parent->shadow_lock);
+		return new;
+	}
+	spin_unlock(&parent->shadow_lock);
+	/* protect after insertion, so it will get properly invalidated */
+	down_read(&parent->mm->mmap_sem);
+	rc = gmap_protect_range(parent, asce & _ASCE_ORIGIN,
+				((asce & _ASCE_TABLE_LENGTH) + 1) * 4096,
+				PROT_READ, PGSTE_VSIE_BIT);
+	up_read(&parent->mm->mmap_sem);
+	spin_lock(&parent->shadow_lock);
+	new->initialized = true;
+	if (rc) {
+		list_del(&new->list);
+		gmap_free(new);
+		new = ERR_PTR(rc);
+	}
+	spin_unlock(&parent->shadow_lock);
+	return new;
+}
+EXPORT_SYMBOL_GPL(gmap_shadow);
+
+/**
+ * gmap_shadow_r2t - create an empty shadow region 2 table
+ * @sg: pointer to the shadow guest address space structure
+ * @saddr: faulting address in the shadow gmap
+ * @r2t: parent gmap address of the region 2 table to get shadowed
+ * @fake: r2t references contiguous guest memory block, not a r2t
+ *
+ * The r2t parameter specifies the address of the source table. The
+ * four pages of the source table are made read-only in the parent gmap
+ * address space. A write to the source table area @r2t will automatically
+ * remove the shadow r2 table and all of its decendents.
+ *
+ * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the
+ * shadow table structure is incomplete, -ENOMEM if out of memory and
+ * -EFAULT if an address in the parent gmap could not be resolved.
+ *
+ * Called with sg->mm->mmap_sem in read.
+ */
+int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
+		    int fake)
+{
+	unsigned long raddr, origin, offset, len;
+	unsigned long *s_r2t, *table;
+	struct page *page;
+	int rc;
+
+	BUG_ON(!gmap_is_shadow(sg));
+	/* Allocate a shadow region second table */
+	page = alloc_pages(GFP_KERNEL, 2);
+	if (!page)
+		return -ENOMEM;
+	page->index = r2t & _REGION_ENTRY_ORIGIN;
+	if (fake)
+		page->index |= GMAP_SHADOW_FAKE_TABLE;
+	s_r2t = (unsigned long *) page_to_phys(page);
+	/* Install shadow region second table */
+	spin_lock(&sg->guest_table_lock);
+	table = gmap_table_walk(sg, saddr, 4); /* get region-1 pointer */
+	if (!table) {
+		rc = -EAGAIN;		/* Race with unshadow */
+		goto out_free;
+	}
+	if (!(*table & _REGION_ENTRY_INVALID)) {
+		rc = 0;			/* Already established */
+		goto out_free;
+	} else if (*table & _REGION_ENTRY_ORIGIN) {
+		rc = -EAGAIN;		/* Race with shadow */
+		goto out_free;
+	}
+	crst_table_init(s_r2t, _REGION2_ENTRY_EMPTY);
+	/* mark as invalid as long as the parent table is not protected */
+	*table = (unsigned long) s_r2t | _REGION_ENTRY_LENGTH |
+		 _REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INVALID;
+	if (sg->edat_level >= 1)
+		*table |= (r2t & _REGION_ENTRY_PROTECT);
+	list_add(&page->lru, &sg->crst_list);
+	if (fake) {
+		/* nothing to protect for fake tables */
+		*table &= ~_REGION_ENTRY_INVALID;
+		spin_unlock(&sg->guest_table_lock);
+		return 0;
+	}
+	spin_unlock(&sg->guest_table_lock);
+	/* Make r2t read-only in parent gmap page table */
+	raddr = (saddr & 0xffe0000000000000UL) | _SHADOW_RMAP_REGION1;
+	origin = r2t & _REGION_ENTRY_ORIGIN;
+	offset = ((r2t & _REGION_ENTRY_OFFSET) >> 6) * 4096;
+	len = ((r2t & _REGION_ENTRY_LENGTH) + 1) * 4096 - offset;
+	rc = gmap_protect_rmap(sg, raddr, origin + offset, len, PROT_READ);
+	spin_lock(&sg->guest_table_lock);
+	if (!rc) {
+		table = gmap_table_walk(sg, saddr, 4);
+		if (!table || (*table & _REGION_ENTRY_ORIGIN) !=
+			      (unsigned long) s_r2t)
+			rc = -EAGAIN;		/* Race with unshadow */
+		else
+			*table &= ~_REGION_ENTRY_INVALID;
+	} else {
+		gmap_unshadow_r2t(sg, raddr);
+	}
+	spin_unlock(&sg->guest_table_lock);
+	return rc;
+out_free:
+	spin_unlock(&sg->guest_table_lock);
+	__free_pages(page, 2);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_shadow_r2t);
+
+/**
+ * gmap_shadow_r3t - create a shadow region 3 table
+ * @sg: pointer to the shadow guest address space structure
+ * @saddr: faulting address in the shadow gmap
+ * @r3t: parent gmap address of the region 3 table to get shadowed
+ * @fake: r3t references contiguous guest memory block, not a r3t
+ *
+ * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the
+ * shadow table structure is incomplete, -ENOMEM if out of memory and
+ * -EFAULT if an address in the parent gmap could not be resolved.
+ *
+ * Called with sg->mm->mmap_sem in read.
+ */
+int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
+		    int fake)
+{
+	unsigned long raddr, origin, offset, len;
+	unsigned long *s_r3t, *table;
+	struct page *page;
+	int rc;
+
+	BUG_ON(!gmap_is_shadow(sg));
+	/* Allocate a shadow region second table */
+	page = alloc_pages(GFP_KERNEL, 2);
+	if (!page)
+		return -ENOMEM;
+	page->index = r3t & _REGION_ENTRY_ORIGIN;
+	if (fake)
+		page->index |= GMAP_SHADOW_FAKE_TABLE;
+	s_r3t = (unsigned long *) page_to_phys(page);
+	/* Install shadow region second table */
+	spin_lock(&sg->guest_table_lock);
+	table = gmap_table_walk(sg, saddr, 3); /* get region-2 pointer */
+	if (!table) {
+		rc = -EAGAIN;		/* Race with unshadow */
+		goto out_free;
+	}
+	if (!(*table & _REGION_ENTRY_INVALID)) {
+		rc = 0;			/* Already established */
+		goto out_free;
+	} else if (*table & _REGION_ENTRY_ORIGIN) {
+		rc = -EAGAIN;		/* Race with shadow */
+	}
+	crst_table_init(s_r3t, _REGION3_ENTRY_EMPTY);
+	/* mark as invalid as long as the parent table is not protected */
+	*table = (unsigned long) s_r3t | _REGION_ENTRY_LENGTH |
+		 _REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INVALID;
+	if (sg->edat_level >= 1)
+		*table |= (r3t & _REGION_ENTRY_PROTECT);
+	list_add(&page->lru, &sg->crst_list);
+	if (fake) {
+		/* nothing to protect for fake tables */
+		*table &= ~_REGION_ENTRY_INVALID;
+		spin_unlock(&sg->guest_table_lock);
+		return 0;
+	}
+	spin_unlock(&sg->guest_table_lock);
+	/* Make r3t read-only in parent gmap page table */
+	raddr = (saddr & 0xfffffc0000000000UL) | _SHADOW_RMAP_REGION2;
+	origin = r3t & _REGION_ENTRY_ORIGIN;
+	offset = ((r3t & _REGION_ENTRY_OFFSET) >> 6) * 4096;
+	len = ((r3t & _REGION_ENTRY_LENGTH) + 1) * 4096 - offset;
+	rc = gmap_protect_rmap(sg, raddr, origin + offset, len, PROT_READ);
+	spin_lock(&sg->guest_table_lock);
+	if (!rc) {
+		table = gmap_table_walk(sg, saddr, 3);
+		if (!table || (*table & _REGION_ENTRY_ORIGIN) !=
+			      (unsigned long) s_r3t)
+			rc = -EAGAIN;		/* Race with unshadow */
+		else
+			*table &= ~_REGION_ENTRY_INVALID;
+	} else {
+		gmap_unshadow_r3t(sg, raddr);
+	}
+	spin_unlock(&sg->guest_table_lock);
+	return rc;
+out_free:
+	spin_unlock(&sg->guest_table_lock);
+	__free_pages(page, 2);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_shadow_r3t);
+
+/**
+ * gmap_shadow_sgt - create a shadow segment table
+ * @sg: pointer to the shadow guest address space structure
+ * @saddr: faulting address in the shadow gmap
+ * @sgt: parent gmap address of the segment table to get shadowed
+ * @fake: sgt references contiguous guest memory block, not a sgt
+ *
+ * Returns: 0 if successfully shadowed or already shadowed, -EAGAIN if the
+ * shadow table structure is incomplete, -ENOMEM if out of memory and
+ * -EFAULT if an address in the parent gmap could not be resolved.
+ *
+ * Called with sg->mm->mmap_sem in read.
+ */
+int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
+		    int fake)
+{
+	unsigned long raddr, origin, offset, len;
+	unsigned long *s_sgt, *table;
+	struct page *page;
+	int rc;
+
+	BUG_ON(!gmap_is_shadow(sg) || (sgt & _REGION3_ENTRY_LARGE));
+	/* Allocate a shadow segment table */
+	page = alloc_pages(GFP_KERNEL, 2);
+	if (!page)
+		return -ENOMEM;
+	page->index = sgt & _REGION_ENTRY_ORIGIN;
+	if (fake)
+		page->index |= GMAP_SHADOW_FAKE_TABLE;
+	s_sgt = (unsigned long *) page_to_phys(page);
+	/* Install shadow region second table */
+	spin_lock(&sg->guest_table_lock);
+	table = gmap_table_walk(sg, saddr, 2); /* get region-3 pointer */
+	if (!table) {
+		rc = -EAGAIN;		/* Race with unshadow */
+		goto out_free;
+	}
+	if (!(*table & _REGION_ENTRY_INVALID)) {
+		rc = 0;			/* Already established */
+		goto out_free;
+	} else if (*table & _REGION_ENTRY_ORIGIN) {
+		rc = -EAGAIN;		/* Race with shadow */
+		goto out_free;
+	}
+	crst_table_init(s_sgt, _SEGMENT_ENTRY_EMPTY);
+	/* mark as invalid as long as the parent table is not protected */
+	*table = (unsigned long) s_sgt | _REGION_ENTRY_LENGTH |
+		 _REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID;
+	if (sg->edat_level >= 1)
+		*table |= sgt & _REGION_ENTRY_PROTECT;
+	list_add(&page->lru, &sg->crst_list);
+	if (fake) {
+		/* nothing to protect for fake tables */
+		*table &= ~_REGION_ENTRY_INVALID;
+		spin_unlock(&sg->guest_table_lock);
+		return 0;
+	}
+	spin_unlock(&sg->guest_table_lock);
+	/* Make sgt read-only in parent gmap page table */
+	raddr = (saddr & 0xffffffff80000000UL) | _SHADOW_RMAP_REGION3;
+	origin = sgt & _REGION_ENTRY_ORIGIN;
+	offset = ((sgt & _REGION_ENTRY_OFFSET) >> 6) * 4096;
+	len = ((sgt & _REGION_ENTRY_LENGTH) + 1) * 4096 - offset;
+	rc = gmap_protect_rmap(sg, raddr, origin + offset, len, PROT_READ);
+	spin_lock(&sg->guest_table_lock);
+	if (!rc) {
+		table = gmap_table_walk(sg, saddr, 2);
+		if (!table || (*table & _REGION_ENTRY_ORIGIN) !=
+			      (unsigned long) s_sgt)
+			rc = -EAGAIN;		/* Race with unshadow */
+		else
+			*table &= ~_REGION_ENTRY_INVALID;
+	} else {
+		gmap_unshadow_sgt(sg, raddr);
+	}
+	spin_unlock(&sg->guest_table_lock);
+	return rc;
+out_free:
+	spin_unlock(&sg->guest_table_lock);
+	__free_pages(page, 2);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_shadow_sgt);
+
+/**
+ * gmap_shadow_lookup_pgtable - find a shadow page table
+ * @sg: pointer to the shadow guest address space structure
+ * @saddr: the address in the shadow aguest address space
+ * @pgt: parent gmap address of the page table to get shadowed
+ * @dat_protection: if the pgtable is marked as protected by dat
+ * @fake: pgt references contiguous guest memory block, not a pgtable
+ *
+ * Returns 0 if the shadow page table was found and -EAGAIN if the page
+ * table was not found.
+ *
+ * Called with sg->mm->mmap_sem in read.
+ */
+int gmap_shadow_pgt_lookup(struct gmap *sg, unsigned long saddr,
+			   unsigned long *pgt, int *dat_protection,
+			   int *fake)
+{
+	unsigned long *table;
+	struct page *page;
+	int rc;
+
+	BUG_ON(!gmap_is_shadow(sg));
+	spin_lock(&sg->guest_table_lock);
+	table = gmap_table_walk(sg, saddr, 1); /* get segment pointer */
+	if (table && !(*table & _SEGMENT_ENTRY_INVALID)) {
+		/* Shadow page tables are full pages (pte+pgste) */
+		page = pfn_to_page(*table >> PAGE_SHIFT);
+		*pgt = page->index & ~GMAP_SHADOW_FAKE_TABLE;
+		*dat_protection = !!(*table & _SEGMENT_ENTRY_PROTECT);
+		*fake = !!(page->index & GMAP_SHADOW_FAKE_TABLE);
+		rc = 0;
+	} else  {
+		rc = -EAGAIN;
+	}
+	spin_unlock(&sg->guest_table_lock);
+	return rc;
+
+}
+EXPORT_SYMBOL_GPL(gmap_shadow_pgt_lookup);
+
+/**
+ * gmap_shadow_pgt - instantiate a shadow page table
+ * @sg: pointer to the shadow guest address space structure
+ * @saddr: faulting address in the shadow gmap
+ * @pgt: parent gmap address of the page table to get shadowed
+ * @fake: pgt references contiguous guest memory block, not a pgtable
+ *
+ * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the
+ * shadow table structure is incomplete, -ENOMEM if out of memory,
+ * -EFAULT if an address in the parent gmap could not be resolved and
+ *
+ * Called with gmap->mm->mmap_sem in read
+ */
+int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt,
+		    int fake)
+{
+	unsigned long raddr, origin;
+	unsigned long *s_pgt, *table;
+	struct page *page;
+	int rc;
+
+	BUG_ON(!gmap_is_shadow(sg) || (pgt & _SEGMENT_ENTRY_LARGE));
+	/* Allocate a shadow page table */
+	page = page_table_alloc_pgste(sg->mm);
+	if (!page)
+		return -ENOMEM;
+	page->index = pgt & _SEGMENT_ENTRY_ORIGIN;
+	if (fake)
+		page->index |= GMAP_SHADOW_FAKE_TABLE;
+	s_pgt = (unsigned long *) page_to_phys(page);
+	/* Install shadow page table */
+	spin_lock(&sg->guest_table_lock);
+	table = gmap_table_walk(sg, saddr, 1); /* get segment pointer */
+	if (!table) {
+		rc = -EAGAIN;		/* Race with unshadow */
+		goto out_free;
+	}
+	if (!(*table & _SEGMENT_ENTRY_INVALID)) {
+		rc = 0;			/* Already established */
+		goto out_free;
+	} else if (*table & _SEGMENT_ENTRY_ORIGIN) {
+		rc = -EAGAIN;		/* Race with shadow */
+		goto out_free;
+	}
+	/* mark as invalid as long as the parent table is not protected */
+	*table = (unsigned long) s_pgt | _SEGMENT_ENTRY |
+		 (pgt & _SEGMENT_ENTRY_PROTECT) | _SEGMENT_ENTRY_INVALID;
+	list_add(&page->lru, &sg->pt_list);
+	if (fake) {
+		/* nothing to protect for fake tables */
+		*table &= ~_SEGMENT_ENTRY_INVALID;
+		spin_unlock(&sg->guest_table_lock);
+		return 0;
+	}
+	spin_unlock(&sg->guest_table_lock);
+	/* Make pgt read-only in parent gmap page table (not the pgste) */
+	raddr = (saddr & 0xfffffffffff00000UL) | _SHADOW_RMAP_SEGMENT;
+	origin = pgt & _SEGMENT_ENTRY_ORIGIN & PAGE_MASK;
+	rc = gmap_protect_rmap(sg, raddr, origin, PAGE_SIZE, PROT_READ);
+	spin_lock(&sg->guest_table_lock);
+	if (!rc) {
+		table = gmap_table_walk(sg, saddr, 1);
+		if (!table || (*table & _SEGMENT_ENTRY_ORIGIN) !=
+			      (unsigned long) s_pgt)
+			rc = -EAGAIN;		/* Race with unshadow */
+		else
+			*table &= ~_SEGMENT_ENTRY_INVALID;
+	} else {
+		gmap_unshadow_pgt(sg, raddr);
+	}
+	spin_unlock(&sg->guest_table_lock);
+	return rc;
+out_free:
+	spin_unlock(&sg->guest_table_lock);
+	page_table_free_pgste(page);
+	return rc;
+
+}
+EXPORT_SYMBOL_GPL(gmap_shadow_pgt);
+
+/**
+ * gmap_shadow_page - create a shadow page mapping
+ * @sg: pointer to the shadow guest address space structure
+ * @saddr: faulting address in the shadow gmap
+ * @pte: pte in parent gmap address space to get shadowed
+ *
+ * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the
+ * shadow table structure is incomplete, -ENOMEM if out of memory and
+ * -EFAULT if an address in the parent gmap could not be resolved.
+ *
+ * Called with sg->mm->mmap_sem in read.
+ */
+int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte)
+{
+	struct gmap *parent;
+	struct gmap_rmap *rmap;
+	unsigned long vmaddr, paddr;
+	spinlock_t *ptl;
+	pte_t *sptep, *tptep;
+	int prot;
+	int rc;
+
+	BUG_ON(!gmap_is_shadow(sg));
+	parent = sg->parent;
+	prot = (pte_val(pte) & _PAGE_PROTECT) ? PROT_READ : PROT_WRITE;
+
+	rmap = kzalloc(sizeof(*rmap), GFP_KERNEL);
+	if (!rmap)
+		return -ENOMEM;
+	rmap->raddr = (saddr & PAGE_MASK) | _SHADOW_RMAP_PGTABLE;
+
+	while (1) {
+		paddr = pte_val(pte) & PAGE_MASK;
+		vmaddr = __gmap_translate(parent, paddr);
+		if (IS_ERR_VALUE(vmaddr)) {
+			rc = vmaddr;
+			break;
+		}
+		rc = radix_tree_preload(GFP_KERNEL);
 		if (rc)
 			break;
-		/* Walk the process page table, lock and get pte pointer */
-		ptep = get_locked_pte(gmap->mm, addr, &ptl);
-		VM_BUG_ON(!ptep);
-		/* Set notification bit in the pgste of the pte */
-		if ((pte_val(*ptep) & (_PAGE_INVALID | _PAGE_PROTECT)) == 0) {
-			ptep_set_notify(gmap->mm, addr, ptep);
-			gaddr += PAGE_SIZE;
-			len -= PAGE_SIZE;
+		rc = -EAGAIN;
+		sptep = gmap_pte_op_walk(parent, paddr, &ptl);
+		if (sptep) {
+			spin_lock(&sg->guest_table_lock);
+			/* Get page table pointer */
+			tptep = (pte_t *) gmap_table_walk(sg, saddr, 0);
+			if (!tptep) {
+				spin_unlock(&sg->guest_table_lock);
+				gmap_pte_op_end(ptl);
+				radix_tree_preload_end();
+				break;
+			}
+			rc = ptep_shadow_pte(sg->mm, saddr, sptep, tptep, pte);
+			if (rc > 0) {
+				/* Success and a new mapping */
+				gmap_insert_rmap(sg, vmaddr, rmap);
+				rmap = NULL;
+				rc = 0;
+			}
+			gmap_pte_op_end(ptl);
+			spin_unlock(&sg->guest_table_lock);
 		}
-		pte_unmap_unlock(ptep, ptl);
+		radix_tree_preload_end();
+		if (!rc)
+			break;
+		rc = gmap_pte_op_fixup(parent, paddr, vmaddr, prot);
+		if (rc)
+			break;
 	}
-	up_read(&gmap->mm->mmap_sem);
+	kfree(rmap);
 	return rc;
 }
-EXPORT_SYMBOL_GPL(gmap_ipte_notify);
+EXPORT_SYMBOL_GPL(gmap_shadow_page);
+
+/**
+ * gmap_shadow_notify - handle notifications for shadow gmap
+ *
+ * Called with sg->parent->shadow_lock.
+ */
+static void gmap_shadow_notify(struct gmap *sg, unsigned long vmaddr,
+			       unsigned long offset, pte_t *pte)
+{
+	struct gmap_rmap *rmap, *rnext, *head;
+	unsigned long gaddr, start, end, bits, raddr;
+	unsigned long *table;
+
+	BUG_ON(!gmap_is_shadow(sg));
+	spin_lock(&sg->parent->guest_table_lock);
+	table = radix_tree_lookup(&sg->parent->host_to_guest,
+				  vmaddr >> PMD_SHIFT);
+	gaddr = table ? __gmap_segment_gaddr(table) + offset : 0;
+	spin_unlock(&sg->parent->guest_table_lock);
+	if (!table)
+		return;
+
+	spin_lock(&sg->guest_table_lock);
+	if (sg->removed) {
+		spin_unlock(&sg->guest_table_lock);
+		return;
+	}
+	/* Check for top level table */
+	start = sg->orig_asce & _ASCE_ORIGIN;
+	end = start + ((sg->orig_asce & _ASCE_TABLE_LENGTH) + 1) * 4096;
+	if (!(sg->orig_asce & _ASCE_REAL_SPACE) && gaddr >= start &&
+	    gaddr < end) {
+		/* The complete shadow table has to go */
+		gmap_unshadow(sg);
+		spin_unlock(&sg->guest_table_lock);
+		list_del(&sg->list);
+		gmap_put(sg);
+		return;
+	}
+	/* Remove the page table tree from on specific entry */
+	head = radix_tree_delete(&sg->host_to_rmap, vmaddr >> 12);
+	gmap_for_each_rmap_safe(rmap, rnext, head) {
+		bits = rmap->raddr & _SHADOW_RMAP_MASK;
+		raddr = rmap->raddr ^ bits;
+		switch (bits) {
+		case _SHADOW_RMAP_REGION1:
+			gmap_unshadow_r2t(sg, raddr);
+			break;
+		case _SHADOW_RMAP_REGION2:
+			gmap_unshadow_r3t(sg, raddr);
+			break;
+		case _SHADOW_RMAP_REGION3:
+			gmap_unshadow_sgt(sg, raddr);
+			break;
+		case _SHADOW_RMAP_SEGMENT:
+			gmap_unshadow_pgt(sg, raddr);
+			break;
+		case _SHADOW_RMAP_PGTABLE:
+			gmap_unshadow_page(sg, raddr);
+			break;
+		}
+		kfree(rmap);
+	}
+	spin_unlock(&sg->guest_table_lock);
+}
 
 /**
  * ptep_notify - call all invalidation callbacks for a specific pte.
  * @mm: pointer to the process mm_struct
  * @addr: virtual address in the process address space
  * @pte: pointer to the page table entry
+ * @bits: bits from the pgste that caused the notify call
  *
  * This function is assumed to be called with the page table lock held
  * for the pte to notify.
  */
-void ptep_notify(struct mm_struct *mm, unsigned long vmaddr, pte_t *pte)
+void ptep_notify(struct mm_struct *mm, unsigned long vmaddr,
+		 pte_t *pte, unsigned long bits)
 {
 	unsigned long offset, gaddr;
 	unsigned long *table;
-	struct gmap_notifier *nb;
-	struct gmap *gmap;
+	struct gmap *gmap, *sg, *next;
 
 	offset = ((unsigned long) pte) & (255 * sizeof(pte_t));
 	offset = offset * (4096 / sizeof(pte_t));
-	spin_lock(&gmap_notifier_lock);
-	list_for_each_entry(gmap, &mm->context.gmap_list, list) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
+		if (!list_empty(&gmap->children) && (bits & PGSTE_VSIE_BIT)) {
+			spin_lock(&gmap->shadow_lock);
+			list_for_each_entry_safe(sg, next,
+						 &gmap->children, list)
+				gmap_shadow_notify(sg, vmaddr, offset, pte);
+			spin_unlock(&gmap->shadow_lock);
+		}
+		if (!(bits & PGSTE_IN_BIT))
+			continue;
+		spin_lock(&gmap->guest_table_lock);
 		table = radix_tree_lookup(&gmap->host_to_guest,
 					  vmaddr >> PMD_SHIFT);
-		if (!table)
-			continue;
-		gaddr = __gmap_segment_gaddr(table) + offset;
-		list_for_each_entry(nb, &gmap_notifier_list, list)
-			nb->notifier_call(gmap, gaddr);
+		if (table)
+			gaddr = __gmap_segment_gaddr(table) + offset;
+		spin_unlock(&gmap->guest_table_lock);
+		if (table)
+			gmap_call_notifier(gmap, gaddr, gaddr + PAGE_SIZE - 1);
 	}
-	spin_unlock(&gmap_notifier_lock);
+	rcu_read_unlock();
 }
 EXPORT_SYMBOL_GPL(ptep_notify);
 
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index e2565d2d0c32..995f78532cc2 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -137,6 +137,29 @@ static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
 	return new;
 }
 
+#ifdef CONFIG_PGSTE
+
+struct page *page_table_alloc_pgste(struct mm_struct *mm)
+{
+	struct page *page;
+	unsigned long *table;
+
+	page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
+	if (page) {
+		table = (unsigned long *) page_to_phys(page);
+		clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
+		clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2);
+	}
+	return page;
+}
+
+void page_table_free_pgste(struct page *page)
+{
+	__free_page(page);
+}
+
+#endif /* CONFIG_PGSTE */
+
 /*
  * page table entry allocation/free routines.
  */
@@ -149,7 +172,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
 	/* Try to get a fragment of a 4K page as a 2K page table */
 	if (!mm_alloc_pgste(mm)) {
 		table = NULL;
-		spin_lock_bh(&mm->context.list_lock);
+		spin_lock_bh(&mm->context.pgtable_lock);
 		if (!list_empty(&mm->context.pgtable_list)) {
 			page = list_first_entry(&mm->context.pgtable_list,
 						struct page, lru);
@@ -164,7 +187,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
 				list_del(&page->lru);
 			}
 		}
-		spin_unlock_bh(&mm->context.list_lock);
+		spin_unlock_bh(&mm->context.pgtable_lock);
 		if (table)
 			return table;
 	}
@@ -187,9 +210,9 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
 		/* Return the first 2K fragment of the page */
 		atomic_set(&page->_mapcount, 1);
 		clear_table(table, _PAGE_INVALID, PAGE_SIZE);
-		spin_lock_bh(&mm->context.list_lock);
+		spin_lock_bh(&mm->context.pgtable_lock);
 		list_add(&page->lru, &mm->context.pgtable_list);
-		spin_unlock_bh(&mm->context.list_lock);
+		spin_unlock_bh(&mm->context.pgtable_lock);
 	}
 	return table;
 }
@@ -203,13 +226,13 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
 	if (!mm_alloc_pgste(mm)) {
 		/* Free 2K page table fragment of a 4K page */
 		bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t));
-		spin_lock_bh(&mm->context.list_lock);
+		spin_lock_bh(&mm->context.pgtable_lock);
 		mask = atomic_xor_bits(&page->_mapcount, 1U << bit);
 		if (mask & 3)
 			list_add(&page->lru, &mm->context.pgtable_list);
 		else
 			list_del(&page->lru);
-		spin_unlock_bh(&mm->context.list_lock);
+		spin_unlock_bh(&mm->context.pgtable_lock);
 		if (mask != 0)
 			return;
 	}
@@ -235,13 +258,13 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
 		return;
 	}
 	bit = (__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t));
-	spin_lock_bh(&mm->context.list_lock);
+	spin_lock_bh(&mm->context.pgtable_lock);
 	mask = atomic_xor_bits(&page->_mapcount, 0x11U << bit);
 	if (mask & 3)
 		list_add_tail(&page->lru, &mm->context.pgtable_list);
 	else
 		list_del(&page->lru);
-	spin_unlock_bh(&mm->context.list_lock);
+	spin_unlock_bh(&mm->context.pgtable_lock);
 	table = (unsigned long *) (__pa(table) | (1U << bit));
 	tlb_remove_table(tlb, table);
 }
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index b98d1a152d46..5f092015aaa7 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -174,14 +174,17 @@ static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry)
 	return pgste;
 }
 
-static inline pgste_t pgste_ipte_notify(struct mm_struct *mm,
-					unsigned long addr,
-					pte_t *ptep, pgste_t pgste)
+static inline pgste_t pgste_pte_notify(struct mm_struct *mm,
+				       unsigned long addr,
+				       pte_t *ptep, pgste_t pgste)
 {
 #ifdef CONFIG_PGSTE
-	if (pgste_val(pgste) & PGSTE_IN_BIT) {
-		pgste_val(pgste) &= ~PGSTE_IN_BIT;
-		ptep_notify(mm, addr, ptep);
+	unsigned long bits;
+
+	bits = pgste_val(pgste) & (PGSTE_IN_BIT | PGSTE_VSIE_BIT);
+	if (bits) {
+		pgste_val(pgste) ^= bits;
+		ptep_notify(mm, addr, ptep, bits);
 	}
 #endif
 	return pgste;
@@ -194,7 +197,7 @@ static inline pgste_t ptep_xchg_start(struct mm_struct *mm,
 
 	if (mm_has_pgste(mm)) {
 		pgste = pgste_get_lock(ptep);
-		pgste = pgste_ipte_notify(mm, addr, ptep, pgste);
+		pgste = pgste_pte_notify(mm, addr, ptep, pgste);
 	}
 	return pgste;
 }
@@ -459,6 +462,90 @@ void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 	preempt_enable();
 }
 
+/**
+ * ptep_force_prot - change access rights of a locked pte
+ * @mm: pointer to the process mm_struct
+ * @addr: virtual address in the guest address space
+ * @ptep: pointer to the page table entry
+ * @prot: indicates guest access rights: PROT_NONE, PROT_READ or PROT_WRITE
+ * @bit: pgste bit to set (e.g. for notification)
+ *
+ * Returns 0 if the access rights were changed and -EAGAIN if the current
+ * and requested access rights are incompatible.
+ */
+int ptep_force_prot(struct mm_struct *mm, unsigned long addr,
+		    pte_t *ptep, int prot, unsigned long bit)
+{
+	pte_t entry;
+	pgste_t pgste;
+	int pte_i, pte_p;
+
+	pgste = pgste_get_lock(ptep);
+	entry = *ptep;
+	/* Check pte entry after all locks have been acquired */
+	pte_i = pte_val(entry) & _PAGE_INVALID;
+	pte_p = pte_val(entry) & _PAGE_PROTECT;
+	if ((pte_i && (prot != PROT_NONE)) ||
+	    (pte_p && (prot & PROT_WRITE))) {
+		pgste_set_unlock(ptep, pgste);
+		return -EAGAIN;
+	}
+	/* Change access rights and set pgste bit */
+	if (prot == PROT_NONE && !pte_i) {
+		ptep_flush_direct(mm, addr, ptep);
+		pgste = pgste_update_all(entry, pgste, mm);
+		pte_val(entry) |= _PAGE_INVALID;
+	}
+	if (prot == PROT_READ && !pte_p) {
+		ptep_flush_direct(mm, addr, ptep);
+		pte_val(entry) &= ~_PAGE_INVALID;
+		pte_val(entry) |= _PAGE_PROTECT;
+	}
+	pgste_val(pgste) |= bit;
+	pgste = pgste_set_pte(ptep, pgste, entry);
+	pgste_set_unlock(ptep, pgste);
+	return 0;
+}
+
+int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr,
+		    pte_t *sptep, pte_t *tptep, pte_t pte)
+{
+	pgste_t spgste, tpgste;
+	pte_t spte, tpte;
+	int rc = -EAGAIN;
+
+	if (!(pte_val(*tptep) & _PAGE_INVALID))
+		return 0;	/* already shadowed */
+	spgste = pgste_get_lock(sptep);
+	spte = *sptep;
+	if (!(pte_val(spte) & _PAGE_INVALID) &&
+	    !((pte_val(spte) & _PAGE_PROTECT) &&
+	      !(pte_val(pte) & _PAGE_PROTECT))) {
+		pgste_val(spgste) |= PGSTE_VSIE_BIT;
+		tpgste = pgste_get_lock(tptep);
+		pte_val(tpte) = (pte_val(spte) & PAGE_MASK) |
+				(pte_val(pte) & _PAGE_PROTECT);
+		/* don't touch the storage key - it belongs to parent pgste */
+		tpgste = pgste_set_pte(tptep, tpgste, tpte);
+		pgste_set_unlock(tptep, tpgste);
+		rc = 1;
+	}
+	pgste_set_unlock(sptep, spgste);
+	return rc;
+}
+
+void ptep_unshadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *ptep)
+{
+	pgste_t pgste;
+
+	pgste = pgste_get_lock(ptep);
+	/* notifier is called by the caller */
+	ptep_flush_direct(mm, saddr, ptep);
+	/* don't touch the storage key - it belongs to parent pgste */
+	pgste = pgste_set_pte(ptep, pgste, __pte(_PAGE_INVALID));
+	pgste_set_unlock(ptep, pgste);
+}
+
 static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry)
 {
 	if (!non_swap_entry(entry))
@@ -532,7 +619,7 @@ bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr)
 	pgste_val(pgste) &= ~PGSTE_UC_BIT;
 	pte = *ptep;
 	if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
-		pgste = pgste_ipte_notify(mm, addr, ptep, pgste);
+		pgste = pgste_pte_notify(mm, addr, ptep, pgste);
 		__ptep_ipte(addr, ptep);
 		if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE))
 			pte_val(pte) |= _PAGE_PROTECT;
@@ -555,12 +642,9 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
 	pgste_t old, new;
 	pte_t *ptep;
 
-	down_read(&mm->mmap_sem);
 	ptep = get_locked_pte(mm, addr, &ptl);
-	if (unlikely(!ptep)) {
-		up_read(&mm->mmap_sem);
+	if (unlikely(!ptep))
 		return -EFAULT;
-	}
 
 	new = old = pgste_get_lock(ptep);
 	pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT |
@@ -587,45 +671,100 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
 
 	pgste_set_unlock(ptep, new);
 	pte_unmap_unlock(ptep, ptl);
-	up_read(&mm->mmap_sem);
 	return 0;
 }
 EXPORT_SYMBOL(set_guest_storage_key);
 
-unsigned char get_guest_storage_key(struct mm_struct *mm, unsigned long addr)
+/**
+ * Conditionally set a guest storage key (handling csske).
+ * oldkey will be updated when either mr or mc is set and a pointer is given.
+ *
+ * Returns 0 if a guests storage key update wasn't necessary, 1 if the guest
+ * storage key was updated and -EFAULT on access errors.
+ */
+int cond_set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
+			       unsigned char key, unsigned char *oldkey,
+			       bool nq, bool mr, bool mc)
+{
+	unsigned char tmp, mask = _PAGE_ACC_BITS | _PAGE_FP_BIT;
+	int rc;
+
+	/* we can drop the pgste lock between getting and setting the key */
+	if (mr | mc) {
+		rc = get_guest_storage_key(current->mm, addr, &tmp);
+		if (rc)
+			return rc;
+		if (oldkey)
+			*oldkey = tmp;
+		if (!mr)
+			mask |= _PAGE_REFERENCED;
+		if (!mc)
+			mask |= _PAGE_CHANGED;
+		if (!((tmp ^ key) & mask))
+			return 0;
+	}
+	rc = set_guest_storage_key(current->mm, addr, key, nq);
+	return rc < 0 ? rc : 1;
+}
+EXPORT_SYMBOL(cond_set_guest_storage_key);
+
+/**
+ * Reset a guest reference bit (rrbe), returning the reference and changed bit.
+ *
+ * Returns < 0 in case of error, otherwise the cc to be reported to the guest.
+ */
+int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr)
 {
-	unsigned char key;
 	spinlock_t *ptl;
-	pgste_t pgste;
+	pgste_t old, new;
 	pte_t *ptep;
+	int cc = 0;
 
-	down_read(&mm->mmap_sem);
 	ptep = get_locked_pte(mm, addr, &ptl);
-	if (unlikely(!ptep)) {
-		up_read(&mm->mmap_sem);
+	if (unlikely(!ptep))
 		return -EFAULT;
-	}
-	pgste = pgste_get_lock(ptep);
 
-	if (pte_val(*ptep) & _PAGE_INVALID) {
-		key  = (pgste_val(pgste) & PGSTE_ACC_BITS) >> 56;
-		key |= (pgste_val(pgste) & PGSTE_FP_BIT) >> 56;
-		key |= (pgste_val(pgste) & PGSTE_GR_BIT) >> 48;
-		key |= (pgste_val(pgste) & PGSTE_GC_BIT) >> 48;
-	} else {
-		key = page_get_storage_key(pte_val(*ptep) & PAGE_MASK);
+	new = old = pgste_get_lock(ptep);
+	/* Reset guest reference bit only */
+	pgste_val(new) &= ~PGSTE_GR_BIT;
 
-		/* Reflect guest's logical view, not physical */
-		if (pgste_val(pgste) & PGSTE_GR_BIT)
-			key |= _PAGE_REFERENCED;
-		if (pgste_val(pgste) & PGSTE_GC_BIT)
-			key |= _PAGE_CHANGED;
+	if (!(pte_val(*ptep) & _PAGE_INVALID)) {
+		cc = page_reset_referenced(pte_val(*ptep) & PAGE_MASK);
+		/* Merge real referenced bit into host-set */
+		pgste_val(new) |= ((unsigned long) cc << 53) & PGSTE_HR_BIT;
 	}
+	/* Reflect guest's logical view, not physical */
+	cc |= (pgste_val(old) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 49;
+	/* Changing the guest storage key is considered a change of the page */
+	if ((pgste_val(new) ^ pgste_val(old)) & PGSTE_GR_BIT)
+		pgste_val(new) |= PGSTE_UC_BIT;
+
+	pgste_set_unlock(ptep, new);
+	pte_unmap_unlock(ptep, ptl);
+	return 0;
+}
+EXPORT_SYMBOL(reset_guest_reference_bit);
+
+int get_guest_storage_key(struct mm_struct *mm, unsigned long addr,
+			  unsigned char *key)
+{
+	spinlock_t *ptl;
+	pgste_t pgste;
+	pte_t *ptep;
 
+	ptep = get_locked_pte(mm, addr, &ptl);
+	if (unlikely(!ptep))
+		return -EFAULT;
+
+	pgste = pgste_get_lock(ptep);
+	*key = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
+	if (!(pte_val(*ptep) & _PAGE_INVALID))
+		*key = page_get_storage_key(pte_val(*ptep) & PAGE_MASK);
+	/* Reflect guest's logical view, not physical */
+	*key |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48;
 	pgste_set_unlock(ptep, pgste);
 	pte_unmap_unlock(ptep, ptl);
-	up_read(&mm->mmap_sem);
-	return key;
+	return 0;
 }
 EXPORT_SYMBOL(get_guest_storage_key);
 #endif
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index 070f1ae5cfad..7297fce9bf80 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -286,7 +286,7 @@ static inline void zpci_err_dma(unsigned long rc, unsigned long addr)
 static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page,
 				     unsigned long offset, size_t size,
 				     enum dma_data_direction direction,
-				     struct dma_attrs *attrs)
+				     unsigned long attrs)
 {
 	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
 	unsigned long nr_pages, iommu_page_index;
@@ -332,7 +332,7 @@ out_err:
 
 static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr,
 				 size_t size, enum dma_data_direction direction,
-				 struct dma_attrs *attrs)
+				 unsigned long attrs)
 {
 	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
 	unsigned long iommu_page_index;
@@ -355,7 +355,7 @@ static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr,
 
 static void *s390_dma_alloc(struct device *dev, size_t size,
 			    dma_addr_t *dma_handle, gfp_t flag,
-			    struct dma_attrs *attrs)
+			    unsigned long attrs)
 {
 	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
 	struct page *page;
@@ -370,7 +370,7 @@ static void *s390_dma_alloc(struct device *dev, size_t size,
 	pa = page_to_phys(page);
 	memset((void *) pa, 0, size);
 
-	map = s390_dma_map_pages(dev, page, 0, size, DMA_BIDIRECTIONAL, NULL);
+	map = s390_dma_map_pages(dev, page, 0, size, DMA_BIDIRECTIONAL, 0);
 	if (dma_mapping_error(dev, map)) {
 		free_pages(pa, get_order(size));
 		return NULL;
@@ -384,19 +384,19 @@ static void *s390_dma_alloc(struct device *dev, size_t size,
 
 static void s390_dma_free(struct device *dev, size_t size,
 			  void *pa, dma_addr_t dma_handle,
-			  struct dma_attrs *attrs)
+			  unsigned long attrs)
 {
 	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
 
 	size = PAGE_ALIGN(size);
 	atomic64_sub(size / PAGE_SIZE, &zdev->allocated_pages);
-	s390_dma_unmap_pages(dev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
+	s390_dma_unmap_pages(dev, dma_handle, size, DMA_BIDIRECTIONAL, 0);
 	free_pages((unsigned long) pa, get_order(size));
 }
 
 static int s390_dma_map_sg(struct device *dev, struct scatterlist *sg,
 			   int nr_elements, enum dma_data_direction dir,
-			   struct dma_attrs *attrs)
+			   unsigned long attrs)
 {
 	int mapped_elements = 0;
 	struct scatterlist *s;
@@ -405,7 +405,7 @@ static int s390_dma_map_sg(struct device *dev, struct scatterlist *sg,
 	for_each_sg(sg, s, nr_elements, i) {
 		struct page *page = sg_page(s);
 		s->dma_address = s390_dma_map_pages(dev, page, s->offset,
-						    s->length, dir, NULL);
+						    s->length, dir, 0);
 		if (!dma_mapping_error(dev, s->dma_address)) {
 			s->dma_length = s->length;
 			mapped_elements++;
@@ -419,7 +419,7 @@ unmap:
 	for_each_sg(sg, s, mapped_elements, i) {
 		if (s->dma_address)
 			s390_dma_unmap_pages(dev, s->dma_address, s->dma_length,
-					     dir, NULL);
+					     dir, 0);
 		s->dma_address = 0;
 		s->dma_length = 0;
 	}
@@ -429,13 +429,14 @@ unmap:
 
 static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
 			      int nr_elements, enum dma_data_direction dir,
-			      struct dma_attrs *attrs)
+			      unsigned long attrs)
 {
 	struct scatterlist *s;
 	int i;
 
 	for_each_sg(sg, s, nr_elements, i) {
-		s390_dma_unmap_pages(dev, s->dma_address, s->dma_length, dir, NULL);
+		s390_dma_unmap_pages(dev, s->dma_address, s->dma_length, dir,
+				     0);
 		s->dma_address = 0;
 		s->dma_length = 0;
 	}
diff --git a/arch/score/mm/init.c b/arch/score/mm/init.c
index 9fbce49ad3bd..444c26c0f750 100644
--- a/arch/score/mm/init.c
+++ b/arch/score/mm/init.c
@@ -91,7 +91,7 @@ void free_initrd_mem(unsigned long start, unsigned long end)
 }
 #endif
 
-void __init_refok free_initmem(void)
+void __ref free_initmem(void)
 {
 	free_initmem_default(POISON_FREE_INITMEM);
 }
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 0d5f3a9bb315..ee086958b2b2 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -38,6 +38,7 @@ config SUPERH
 	select GENERIC_IDLE_POLL_SETUP
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_CMOS_UPDATE if SH_SH03 || SH_DREAMCAST
+	select GENERIC_SCHED_CLOCK
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
 	select HAVE_MOD_ARCH_SPECIFIC if DWARF_UNWINDER
@@ -45,6 +46,7 @@ config SUPERH
 	select OLD_SIGSUSPEND
 	select OLD_SIGACTION
 	select HAVE_ARCH_AUDITSYSCALL
+	select HAVE_FUTEX_CMPXCHG if FUTEX
 	select HAVE_NMI
 	help
 	  The SuperH is a RISC processor targeted for use in embedded systems
@@ -184,6 +186,12 @@ config CPU_SH2A
 	select CPU_SH2
 	select UNCACHED_MAPPING
 
+config CPU_J2
+	bool
+	select CPU_SH2
+	select OF
+	select OF_EARLY_FLATTREE
+
 config CPU_SH3
 	bool
 	select CPU_HAS_INTEVT
@@ -250,6 +258,12 @@ config CPU_SUBTYPE_SH7619
 	select CPU_SH2
 	select SYS_SUPPORTS_SH_CMT
 
+config CPU_SUBTYPE_J2
+	bool "Support J2 processor"
+	select CPU_J2
+	select SYS_SUPPORTS_SMP
+	select GENERIC_CLOCKEVENTS_BROADCAST if SMP
+
 # SH-2A Processor Support
 
 config CPU_SUBTYPE_SH7201
@@ -739,6 +753,26 @@ endmenu
 
 menu "Boot options"
 
+config USE_BUILTIN_DTB
+	bool "Use builtin DTB"
+	default n
+	depends on SH_DEVICE_TREE
+	help
+	  Link a device tree blob for particular hardware into the kernel,
+	  suppressing use of the DTB pointer provided by the bootloader.
+	  This option should only be used with legacy bootloaders that are
+	  not capable of providing a DTB to the kernel, or for experimental
+	  hardware without stable device tree bindings.
+
+config BUILTIN_DTB_SOURCE
+	string "Source file for builtin DTB"
+	default ""
+	depends on USE_BUILTIN_DTB
+	help
+	  Base name (without suffix, relative to arch/sh/boot/dts) for the
+	  a DTS file that will be used to produce the DTB linked into the
+	  kernel.
+
 config ZERO_PAGE_OFFSET
 	hex
 	default "0x00010000" if PAGE_SIZE_64KB || SH_RTS7751R2D || \
diff --git a/arch/sh/Makefile b/arch/sh/Makefile
index bf5b3f5f4962..00476662ac2c 100644
--- a/arch/sh/Makefile
+++ b/arch/sh/Makefile
@@ -31,6 +31,7 @@ isa-y					:= $(isa-y)-up
 endif
 
 cflags-$(CONFIG_CPU_SH2)		:= $(call cc-option,-m2,)
+cflags-$(CONFIG_CPU_J2)			:= $(call cc-option,-mj2,)
 cflags-$(CONFIG_CPU_SH2A)		+= $(call cc-option,-m2a,) \
 					   $(call cc-option,-m2a-nofpu,) \
 					   $(call cc-option,-m4-nofpu,)
@@ -130,6 +131,8 @@ head-y	:= arch/sh/kernel/head_$(BITS).o
 core-y				+= arch/sh/kernel/ arch/sh/mm/ arch/sh/boards/
 core-$(CONFIG_SH_FPU_EMU)	+= arch/sh/math-emu/
 
+core-$(CONFIG_USE_BUILTIN_DTB)	+= arch/sh/boot/dts/
+
 # Mach groups
 machdir-$(CONFIG_SOLUTION_ENGINE)		+= mach-se
 machdir-$(CONFIG_SH_HP6XX)			+= mach-hp6xx
diff --git a/arch/sh/boards/Kconfig b/arch/sh/boards/Kconfig
index e0db04664e2e..e9c2c42031fe 100644
--- a/arch/sh/boards/Kconfig
+++ b/arch/sh/boards/Kconfig
@@ -11,6 +11,7 @@ config SH_DEVICE_TREE
 	select OF
 	select OF_EARLY_FLATTREE
 	select CLKSRC_OF
+	select COMMON_CLK
 	select GENERIC_CALIBRATE_DELAY
 	help
 	  Select Board Described by Device Tree to build a kernel that
diff --git a/arch/sh/boards/board-secureedge5410.c b/arch/sh/boards/board-secureedge5410.c
index 98b36205aa7b..97ec67ffec2b 100644
--- a/arch/sh/boards/board-secureedge5410.c
+++ b/arch/sh/boards/board-secureedge5410.c
@@ -14,7 +14,6 @@
 #include <linux/interrupt.h>
 #include <linux/timer.h>
 #include <linux/delay.h>
-#include <linux/module.h>
 #include <linux/sched.h>
 #include <asm/machvec.h>
 #include <mach/secureedge5410.h>
@@ -49,7 +48,7 @@ static int __init eraseconfig_init(void)
 				irq);
 	return 0;
 }
-module_init(eraseconfig_init);
+device_initcall(eraseconfig_init);
 
 /*
  * Initialize IRQ setting
diff --git a/arch/sh/boards/of-generic.c b/arch/sh/boards/of-generic.c
index 911ffb9f115b..1fb6d5714bae 100644
--- a/arch/sh/boards/of-generic.c
+++ b/arch/sh/boards/of-generic.c
@@ -124,13 +124,22 @@ static void __init sh_of_time_init(void)
 
 static void __init sh_of_setup(char **cmdline_p)
 {
+	struct device_node *root;
+
+#ifdef CONFIG_USE_BUILTIN_DTB
+	unflatten_and_copy_device_tree();
+#else
 	unflatten_device_tree();
+#endif
 
 	board_time_init = sh_of_time_init;
 
-	sh_mv.mv_name = of_flat_dt_get_machine_name();
-	if (!sh_mv.mv_name)
-		sh_mv.mv_name = "Unknown SH model";
+	sh_mv.mv_name = "Unknown SH model";
+	root = of_find_node_by_path("/");
+	if (root) {
+		of_property_read_string(root, "model", &sh_mv.mv_name);
+		of_node_put(root);
+	}
 
 	sh_of_smp_probe();
 }
diff --git a/arch/sh/boot/dts/Makefile b/arch/sh/boot/dts/Makefile
new file mode 100644
index 000000000000..e5ce3a0de7f4
--- /dev/null
+++ b/arch/sh/boot/dts/Makefile
@@ -0,0 +1,3 @@
+obj-$(CONFIG_USE_BUILTIN_DTB) += $(patsubst "%",%,$(CONFIG_BUILTIN_DTB_SOURCE)).dtb.o
+
+clean-files := *.dtb.S
diff --git a/arch/sh/boot/dts/j2_mimas_v2.dts b/arch/sh/boot/dts/j2_mimas_v2.dts
new file mode 100755
index 000000000000..880de75360b3
--- /dev/null
+++ b/arch/sh/boot/dts/j2_mimas_v2.dts
@@ -0,0 +1,96 @@
+/dts-v1/;
+
+/ {
+	compatible = "jcore,j2-soc";
+	model = "J2 FPGA SoC on Mimas v2 board";
+
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	interrupt-parent = <&aic>;
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			compatible = "jcore,j2";
+			reg = <0>;
+			clock-frequency = <50000000>;
+			d-cache-size = <8192>;
+			i-cache-size = <8192>;
+			d-cache-block-size = <16>;
+			i-cache-block-size = <16>;
+		};
+	};
+
+	memory@10000000 {
+		device_type = "memory";
+		reg = <0x10000000 0x4000000>;
+	};
+
+	aliases {
+		serial0 = &uart0;
+		spi0 = &spi0;
+	};
+
+	chosen {
+		stdout-path = "serial0";
+	};
+
+	soc@abcd0000 {
+		compatible = "simple-bus";
+		ranges = <0 0xabcd0000 0x100000>;
+
+		#address-cells = <1>;
+		#size-cells = <1>;
+
+		aic: interrupt-controller@200 {
+			compatible = "jcore,aic1";
+			reg = <0x200 0x10>;
+			interrupt-controller;
+			#interrupt-cells = <1>;
+		};
+
+		cache-controller@c0 {
+			compatible = "jcore,cache";
+			reg = <0xc0 4>;
+		};
+
+		timer@200 {
+			compatible = "jcore,pit";
+			reg = <0x200 0x30>;
+			interrupts = <0x48>;
+		};
+
+		spi0: spi@40 {
+			compatible = "jcore,spi2";
+
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			spi-max-frequency = <25000000>;
+
+			reg = <0x40 0x8>;
+
+			sdcard@0 {
+				compatible = "mmc-spi-slot";
+				reg = <0>;
+				spi-max-frequency = <25000000>;
+				voltage-ranges = <3200 3400>;
+				mode = <0>;
+			};
+		};
+
+		uart0: serial@100 {
+			clock-frequency = <125000000>;
+			compatible = "xlnx,xps-uartlite-1.00.a";
+			current-speed = <19200>;
+			device_type = "serial";
+			interrupts = <0x12>;
+			port-number = <0>;
+			reg = <0x100 0x10>;
+		};
+	};
+};
diff --git a/arch/sh/configs/j2_defconfig b/arch/sh/configs/j2_defconfig
new file mode 100644
index 000000000000..94d1eca52f72
--- /dev/null
+++ b/arch/sh/configs/j2_defconfig
@@ -0,0 +1,40 @@
+CONFIG_SMP=y
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_CPU_SUBTYPE_J2=y
+CONFIG_MEMORY_START=0x10000000
+CONFIG_MEMORY_SIZE=0x04000000
+CONFIG_CPU_BIG_ENDIAN=y
+CONFIG_SH_DEVICE_TREE=y
+CONFIG_HZ_100=y
+CONFIG_CMDLINE_OVERWRITE=y
+CONFIG_CMDLINE="console=ttyUL0 earlycon"
+CONFIG_BINFMT_ELF_FDPIC=y
+CONFIG_BINFMT_FLAT=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_NETDEVICES=y
+CONFIG_SERIAL_UARTLITE=y
+CONFIG_SERIAL_UARTLITE_CONSOLE=y
+CONFIG_I2C=y
+CONFIG_SPI=y
+CONFIG_SPI_JCORE=y
+CONFIG_WATCHDOG=y
+CONFIG_MMC=y
+CONFIG_MMC_SPI=y
+CONFIG_CLKSRC_JCORE_PIT=y
+CONFIG_JCORE_AIC=y
+CONFIG_EXT4_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_FAT_DEFAULT_IOCHARSET="ascii"
+CONFIG_FAT_DEFAULT_UTF8=y
+CONFIG_NLS_DEFAULT="utf8"
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ASCII=y
+CONFIG_NLS_UTF8=y
diff --git a/arch/sh/drivers/heartbeat.c b/arch/sh/drivers/heartbeat.c
index 7efc9c354fc7..49bace446a1a 100644
--- a/arch/sh/drivers/heartbeat.c
+++ b/arch/sh/drivers/heartbeat.c
@@ -19,7 +19,6 @@
  * for more details.
  */
 #include <linux/init.h>
-#include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/sched.h>
 #include <linux/timer.h>
@@ -139,26 +138,11 @@ static int heartbeat_drv_probe(struct platform_device *pdev)
 	return mod_timer(&hd->timer, jiffies + 1);
 }
 
-static int heartbeat_drv_remove(struct platform_device *pdev)
-{
-	struct heartbeat_data *hd = platform_get_drvdata(pdev);
-
-	del_timer_sync(&hd->timer);
-	iounmap(hd->base);
-
-	platform_set_drvdata(pdev, NULL);
-
-	if (!pdev->dev.platform_data)
-		kfree(hd);
-
-	return 0;
-}
-
 static struct platform_driver heartbeat_driver = {
 	.probe		= heartbeat_drv_probe,
-	.remove		= heartbeat_drv_remove,
 	.driver		= {
-		.name	= DRV_NAME,
+		.name			= DRV_NAME,
+		.suppress_bind_attrs	= true,
 	},
 };
 
@@ -167,14 +151,4 @@ static int __init heartbeat_init(void)
 	printk(KERN_NOTICE DRV_NAME ": version %s loaded\n", DRV_VERSION);
 	return platform_driver_register(&heartbeat_driver);
 }
-
-static void __exit heartbeat_exit(void)
-{
-	platform_driver_unregister(&heartbeat_driver);
-}
-module_init(heartbeat_init);
-module_exit(heartbeat_exit);
-
-MODULE_VERSION(DRV_VERSION);
-MODULE_AUTHOR("Paul Mundt");
-MODULE_LICENSE("GPL v2");
+device_initcall(heartbeat_init);
diff --git a/arch/sh/drivers/pci/pci.c b/arch/sh/drivers/pci/pci.c
index d5462b7bc514..84563e39a5b8 100644
--- a/arch/sh/drivers/pci/pci.c
+++ b/arch/sh/drivers/pci/pci.c
@@ -221,7 +221,7 @@ pcibios_bus_report_status_early(struct pci_channel *hose,
  * We can't use pci_find_device() here since we are
  * called from interrupt context.
  */
-static void __init_refok
+static void __ref
 pcibios_bus_report_status(struct pci_bus *bus, unsigned int status_mask,
 			  int warn)
 {
@@ -256,7 +256,7 @@ pcibios_bus_report_status(struct pci_bus *bus, unsigned int status_mask,
 			pcibios_bus_report_status(dev->subordinate, status_mask, warn);
 }
 
-void __init_refok pcibios_report_status(unsigned int status_mask, int warn)
+void __ref pcibios_report_status(unsigned int status_mask, int warn)
 {
 	struct pci_channel *hose;
 
diff --git a/arch/sh/include/asm/atomic.h b/arch/sh/include/asm/atomic.h
index c399e1c55685..8a7bd80c8b33 100644
--- a/arch/sh/include/asm/atomic.h
+++ b/arch/sh/include/asm/atomic.h
@@ -1,6 +1,12 @@
 #ifndef __ASM_SH_ATOMIC_H
 #define __ASM_SH_ATOMIC_H
 
+#if defined(CONFIG_CPU_J2)
+
+#include <asm-generic/atomic.h>
+
+#else
+
 /*
  * Atomic operations that C can't guarantee us.  Useful for
  * resource counting etc..
@@ -63,4 +69,6 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
 	return c;
 }
 
+#endif /* CONFIG_CPU_J2 */
+
 #endif /* __ASM_SH_ATOMIC_H */
diff --git a/arch/sh/include/asm/barrier.h b/arch/sh/include/asm/barrier.h
index 8a84e05adb2e..3c30b6e166b6 100644
--- a/arch/sh/include/asm/barrier.h
+++ b/arch/sh/include/asm/barrier.h
@@ -29,6 +29,11 @@
 #define wmb()		mb()
 #define ctrl_barrier()	__icbi(PAGE_OFFSET)
 #else
+#if defined(CONFIG_CPU_J2) && defined(CONFIG_SMP)
+#define __smp_mb()	do { int tmp = 0; __asm__ __volatile__ ("cas.l %0,%0,@%1" : "+r"(tmp) : "z"(&tmp) : "memory", "t"); } while(0)
+#define __smp_rmb()	__smp_mb()
+#define __smp_wmb()	__smp_mb()
+#endif
 #define ctrl_barrier()	__asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop")
 #endif
 
diff --git a/arch/sh/include/asm/bitops-cas.h b/arch/sh/include/asm/bitops-cas.h
new file mode 100644
index 000000000000..88f793c04d3c
--- /dev/null
+++ b/arch/sh/include/asm/bitops-cas.h
@@ -0,0 +1,93 @@
+#ifndef __ASM_SH_BITOPS_CAS_H
+#define __ASM_SH_BITOPS_CAS_H
+
+static inline unsigned __bo_cas(volatile unsigned *p, unsigned old, unsigned new)
+{
+	__asm__ __volatile__("cas.l %1,%0,@r0"
+		: "+r"(new)
+		: "r"(old), "z"(p)
+		: "t", "memory" );
+	return new;
+}
+
+static inline void set_bit(int nr, volatile void *addr)
+{
+	unsigned mask, old;
+	volatile unsigned *a = addr;
+
+	a += nr >> 5;
+	mask = 1U << (nr & 0x1f);
+
+	do old = *a;
+	while (__bo_cas(a, old, old|mask) != old);
+}
+
+static inline void clear_bit(int nr, volatile void *addr)
+{
+	unsigned mask, old;
+	volatile unsigned *a = addr;
+
+	a += nr >> 5;
+	mask = 1U << (nr & 0x1f);
+
+	do old = *a;
+	while (__bo_cas(a, old, old&~mask) != old);
+}
+
+static inline void change_bit(int nr, volatile void *addr)
+{
+	unsigned mask, old;
+	volatile unsigned *a = addr;
+
+	a += nr >> 5;
+	mask = 1U << (nr & 0x1f);
+
+	do old = *a;
+	while (__bo_cas(a, old, old^mask) != old);
+}
+
+static inline int test_and_set_bit(int nr, volatile void *addr)
+{
+	unsigned mask, old;
+	volatile unsigned *a = addr;
+
+	a += nr >> 5;
+	mask = 1U << (nr & 0x1f);
+
+	do old = *a;
+	while (__bo_cas(a, old, old|mask) != old);
+
+	return !!(old & mask);
+}
+
+static inline int test_and_clear_bit(int nr, volatile void *addr)
+{
+	unsigned mask, old;
+	volatile unsigned *a = addr;
+
+	a += nr >> 5;
+	mask = 1U << (nr & 0x1f);
+
+	do old = *a;
+	while (__bo_cas(a, old, old&~mask) != old);
+
+	return !!(old & mask);
+}
+
+static inline int test_and_change_bit(int nr, volatile void *addr)
+{
+	unsigned mask, old;
+	volatile unsigned *a = addr;
+
+	a += nr >> 5;
+	mask = 1U << (nr & 0x1f);
+
+	do old = *a;
+	while (__bo_cas(a, old, old^mask) != old);
+
+	return !!(old & mask);
+}
+
+#include <asm-generic/bitops/non-atomic.h>
+
+#endif /* __ASM_SH_BITOPS_CAS_H */
diff --git a/arch/sh/include/asm/bitops.h b/arch/sh/include/asm/bitops.h
index fc8e652cf173..a8699d60a8c4 100644
--- a/arch/sh/include/asm/bitops.h
+++ b/arch/sh/include/asm/bitops.h
@@ -18,6 +18,8 @@
 #include <asm/bitops-op32.h>
 #elif defined(CONFIG_CPU_SH4A)
 #include <asm/bitops-llsc.h>
+#elif defined(CONFIG_CPU_J2) && defined(CONFIG_SMP)
+#include <asm/bitops-cas.h>
 #else
 #include <asm-generic/bitops/atomic.h>
 #include <asm-generic/bitops/non-atomic.h>
diff --git a/arch/sh/include/asm/cmpxchg-cas.h b/arch/sh/include/asm/cmpxchg-cas.h
new file mode 100644
index 000000000000..d0d86649e8c1
--- /dev/null
+++ b/arch/sh/include/asm/cmpxchg-cas.h
@@ -0,0 +1,24 @@
+#ifndef __ASM_SH_CMPXCHG_CAS_H
+#define __ASM_SH_CMPXCHG_CAS_H
+
+static inline unsigned long
+__cmpxchg_u32(volatile u32 *m, unsigned long old, unsigned long new)
+{
+	__asm__ __volatile__("cas.l %1,%0,@r0"
+		: "+r"(new)
+		: "r"(old), "z"(m)
+		: "t", "memory" );
+	return new;
+}
+
+static inline unsigned long xchg_u32(volatile u32 *m, unsigned long val)
+{
+	unsigned long old;
+	do old = *m;
+	while (__cmpxchg_u32(m, old, val) != old);
+	return old;
+}
+
+#include <asm/cmpxchg-xchg.h>
+
+#endif /* __ASM_SH_CMPXCHG_CAS_H */
diff --git a/arch/sh/include/asm/cmpxchg-xchg.h b/arch/sh/include/asm/cmpxchg-xchg.h
index 7219719c23a3..1e881f5db659 100644
--- a/arch/sh/include/asm/cmpxchg-xchg.h
+++ b/arch/sh/include/asm/cmpxchg-xchg.h
@@ -21,7 +21,7 @@ static inline u32 __xchg_cmpxchg(volatile void *ptr, u32 x, int size)
 	int off = (unsigned long)ptr % sizeof(u32);
 	volatile u32 *p = ptr - off;
 #ifdef __BIG_ENDIAN
-	int bitoff = (sizeof(u32) - 1 - off) * BITS_PER_BYTE;
+	int bitoff = (sizeof(u32) - size - off) * BITS_PER_BYTE;
 #else
 	int bitoff = off * BITS_PER_BYTE;
 #endif
diff --git a/arch/sh/include/asm/cmpxchg.h b/arch/sh/include/asm/cmpxchg.h
index 5225916c1057..3dfe0467a773 100644
--- a/arch/sh/include/asm/cmpxchg.h
+++ b/arch/sh/include/asm/cmpxchg.h
@@ -13,6 +13,8 @@
 #include <asm/cmpxchg-grb.h>
 #elif defined(CONFIG_CPU_SH4A)
 #include <asm/cmpxchg-llsc.h>
+#elif defined(CONFIG_CPU_J2) && defined(CONFIG_SMP)
+#include <asm/cmpxchg-cas.h>
 #else
 #include <asm/cmpxchg-irq.h>
 #endif
diff --git a/arch/sh/include/asm/dma-mapping.h b/arch/sh/include/asm/dma-mapping.h
index e11cf0c8206b..0052ad40e86d 100644
--- a/arch/sh/include/asm/dma-mapping.h
+++ b/arch/sh/include/asm/dma-mapping.h
@@ -17,9 +17,9 @@ void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
 /* arch/sh/mm/consistent.c */
 extern void *dma_generic_alloc_coherent(struct device *dev, size_t size,
 					dma_addr_t *dma_addr, gfp_t flag,
-					struct dma_attrs *attrs);
+					unsigned long attrs);
 extern void dma_generic_free_coherent(struct device *dev, size_t size,
 				      void *vaddr, dma_addr_t dma_handle,
-				      struct dma_attrs *attrs);
+				      unsigned long attrs);
 
 #endif /* __ASM_SH_DMA_MAPPING_H */
diff --git a/arch/sh/include/asm/futex-cas.h b/arch/sh/include/asm/futex-cas.h
new file mode 100644
index 000000000000..267cb7a5f101
--- /dev/null
+++ b/arch/sh/include/asm/futex-cas.h
@@ -0,0 +1,34 @@
+#ifndef __ASM_SH_FUTEX_CAS_H
+#define __ASM_SH_FUTEX_CAS_H
+
+static inline int atomic_futex_op_cmpxchg_inatomic(u32 *uval,
+						   u32 __user *uaddr,
+						   u32 oldval, u32 newval)
+{
+	int err = 0;
+	__asm__ __volatile__(
+		"1:\n\t"
+		"cas.l	%2, %1, @r0\n"
+		"2:\n\t"
+#ifdef CONFIG_MMU
+		".section	.fixup,\"ax\"\n"
+		"3:\n\t"
+		"mov.l	4f, %0\n\t"
+		"jmp	@%0\n\t"
+		" mov	%3, %0\n\t"
+		".balign	4\n"
+		"4:	.long	2b\n\t"
+		".previous\n"
+		".section	__ex_table,\"a\"\n\t"
+		".long	1b, 3b\n\t"
+		".previous"
+#endif
+		:"+r" (err), "+r" (newval)
+		:"r" (oldval), "i" (-EFAULT), "z" (uaddr)
+		:"t", "memory");
+	if (err) return err;
+	*uval = newval;
+	return 0;
+}
+
+#endif /* __ASM_SH_FUTEX_CAS_H */
diff --git a/arch/sh/include/asm/futex-irq.h b/arch/sh/include/asm/futex-irq.h
index 63d33129ea23..ab01dbee0a82 100644
--- a/arch/sh/include/asm/futex-irq.h
+++ b/arch/sh/include/asm/futex-irq.h
@@ -1,92 +1,6 @@
 #ifndef __ASM_SH_FUTEX_IRQ_H
 #define __ASM_SH_FUTEX_IRQ_H
 
-
-static inline int atomic_futex_op_xchg_set(int oparg, u32 __user *uaddr,
-					   int *oldval)
-{
-	unsigned long flags;
-	int ret;
-
-	local_irq_save(flags);
-
-	ret = get_user(*oldval, uaddr);
-	if (!ret)
-		ret = put_user(oparg, uaddr);
-
-	local_irq_restore(flags);
-
-	return ret;
-}
-
-static inline int atomic_futex_op_xchg_add(int oparg, u32 __user *uaddr,
-					   int *oldval)
-{
-	unsigned long flags;
-	int ret;
-
-	local_irq_save(flags);
-
-	ret = get_user(*oldval, uaddr);
-	if (!ret)
-		ret = put_user(*oldval + oparg, uaddr);
-
-	local_irq_restore(flags);
-
-	return ret;
-}
-
-static inline int atomic_futex_op_xchg_or(int oparg, u32 __user *uaddr,
-					  int *oldval)
-{
-	unsigned long flags;
-	int ret;
-
-	local_irq_save(flags);
-
-	ret = get_user(*oldval, uaddr);
-	if (!ret)
-		ret = put_user(*oldval | oparg, uaddr);
-
-	local_irq_restore(flags);
-
-	return ret;
-}
-
-static inline int atomic_futex_op_xchg_and(int oparg, u32 __user *uaddr,
-					   int *oldval)
-{
-	unsigned long flags;
-	int ret;
-
-	local_irq_save(flags);
-
-	ret = get_user(*oldval, uaddr);
-	if (!ret)
-		ret = put_user(*oldval & oparg, uaddr);
-
-	local_irq_restore(flags);
-
-	return ret;
-}
-
-static inline int atomic_futex_op_xchg_xor(int oparg, u32 __user *uaddr,
-					   int *oldval)
-{
-	unsigned long flags;
-	int ret;
-
-	local_irq_save(flags);
-
-	ret = get_user(*oldval, uaddr);
-	if (!ret)
-		ret = put_user(*oldval ^ oparg, uaddr);
-
-	local_irq_restore(flags);
-
-	return ret;
-}
-
 static inline int atomic_futex_op_cmpxchg_inatomic(u32 *uval,
 						   u32 __user *uaddr,
 						   u32 oldval, u32 newval)
diff --git a/arch/sh/include/asm/futex-llsc.h b/arch/sh/include/asm/futex-llsc.h
new file mode 100644
index 000000000000..23591703bec0
--- /dev/null
+++ b/arch/sh/include/asm/futex-llsc.h
@@ -0,0 +1,41 @@
+#ifndef __ASM_SH_FUTEX_LLSC_H
+#define __ASM_SH_FUTEX_LLSC_H
+
+static inline int atomic_futex_op_cmpxchg_inatomic(u32 *uval,
+						   u32 __user *uaddr,
+						   u32 oldval, u32 newval)
+{
+	int err = 0;
+	__asm__ __volatile__(
+		"synco\n"
+		"1:\n\t"
+		"movli.l	@%2, r0\n\t"
+		"mov	r0, %1\n\t"
+		"cmp/eq	%1, %4\n\t"
+		"bf	2f\n\t"
+		"mov	%5, r0\n\t"
+		"movco.l	r0, @%2\n\t"
+		"bf	1b\n"
+		"2:\n\t"
+		"synco\n\t"
+#ifdef CONFIG_MMU
+		".section	.fixup,\"ax\"\n"
+		"3:\n\t"
+		"mov.l	4f, %0\n\t"
+		"jmp	@%0\n\t"
+		" mov	%3, %0\n\t"
+		".balign	4\n"
+		"4:	.long	2b\n\t"
+		".previous\n"
+		".section	__ex_table,\"a\"\n\t"
+		".long	1b, 3b\n\t"
+		".previous"
+#endif
+		:"+r" (err), "=&r" (*uval)
+		:"r" (uaddr), "i" (-EFAULT), "r" (oldval), "r" (newval)
+		:"t", "memory", "r0");
+	if (err) return err;
+	return 0;
+}
+
+#endif /* __ASM_SH_FUTEX_LLSC_H */
diff --git a/arch/sh/include/asm/futex.h b/arch/sh/include/asm/futex.h
index 7be39a646fbd..d0078747d308 100644
--- a/arch/sh/include/asm/futex.h
+++ b/arch/sh/include/asm/futex.h
@@ -7,16 +7,34 @@
 #include <linux/uaccess.h>
 #include <asm/errno.h>
 
-/* XXX: UP variants, fix for SH-4A and SMP.. */
+#if !defined(CONFIG_SMP)
 #include <asm/futex-irq.h>
+#elif defined(CONFIG_CPU_J2)
+#include <asm/futex-cas.h>
+#elif defined(CONFIG_CPU_SH4A)
+#include <asm/futex-llsc.h>
+#else
+#error SMP not supported on this configuration.
+#endif
+
+static inline int
+futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+			      u32 oldval, u32 newval)
+{
+	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
+		return -EFAULT;
+
+	return atomic_futex_op_cmpxchg_inatomic(uval, uaddr, oldval, newval);
+}
 
 static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
 {
 	int op = (encoded_op >> 28) & 7;
 	int cmp = (encoded_op >> 24) & 15;
-	int oparg = (encoded_op << 8) >> 20;
-	int cmparg = (encoded_op << 20) >> 20;
-	int oldval = 0, ret;
+	u32 oparg = (encoded_op << 8) >> 20;
+	u32 cmparg = (encoded_op << 20) >> 20;
+	u32 oldval, newval, prev;
+	int ret;
 
 	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
 		oparg = 1 << oparg;
@@ -26,26 +44,39 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
 
 	pagefault_disable();
 
-	switch (op) {
-	case FUTEX_OP_SET:
-		ret = atomic_futex_op_xchg_set(oparg, uaddr, &oldval);
-		break;
-	case FUTEX_OP_ADD:
-		ret = atomic_futex_op_xchg_add(oparg, uaddr, &oldval);
-		break;
-	case FUTEX_OP_OR:
-		ret = atomic_futex_op_xchg_or(oparg, uaddr, &oldval);
-		break;
-	case FUTEX_OP_ANDN:
-		ret = atomic_futex_op_xchg_and(~oparg, uaddr, &oldval);
-		break;
-	case FUTEX_OP_XOR:
-		ret = atomic_futex_op_xchg_xor(oparg, uaddr, &oldval);
-		break;
-	default:
-		ret = -ENOSYS;
-		break;
-	}
+	do {
+		if (op == FUTEX_OP_SET)
+			ret = oldval = 0;
+		else
+			ret = get_user(oldval, uaddr);
+
+		if (ret) break;
+
+		switch (op) {
+		case FUTEX_OP_SET:
+			newval = oparg;
+			break;
+		case FUTEX_OP_ADD:
+			newval = oldval + oparg;
+			break;
+		case FUTEX_OP_OR:
+			newval = oldval | oparg;
+			break;
+		case FUTEX_OP_ANDN:
+			newval = oldval & ~oparg;
+			break;
+		case FUTEX_OP_XOR:
+			newval = oldval ^ oparg;
+			break;
+		default:
+			ret = -ENOSYS;
+			break;
+		}
+
+		if (ret) break;
+
+		ret = futex_atomic_cmpxchg_inatomic(&prev, uaddr, oldval, newval);
+	} while (!ret && prev != oldval);
 
 	pagefault_enable();
 
@@ -53,10 +84,10 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
 		switch (cmp) {
 		case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
 		case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
-		case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
-		case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
-		case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
-		case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
+		case FUTEX_OP_CMP_LT: ret = ((int)oldval < (int)cmparg); break;
+		case FUTEX_OP_CMP_GE: ret = ((int)oldval >= (int)cmparg); break;
+		case FUTEX_OP_CMP_LE: ret = ((int)oldval <= (int)cmparg); break;
+		case FUTEX_OP_CMP_GT: ret = ((int)oldval > (int)cmparg); break;
 		default: ret = -ENOSYS;
 		}
 	}
@@ -64,15 +95,5 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
 	return ret;
 }
 
-static inline int
-futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
-			      u32 oldval, u32 newval)
-{
-	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
-		return -EFAULT;
-
-	return atomic_futex_op_cmpxchg_inatomic(uval, uaddr, oldval, newval);
-}
-
 #endif /* __KERNEL__ */
 #endif /* __ASM_SH_FUTEX_H */
diff --git a/arch/sh/include/asm/mc146818rtc.h b/arch/sh/include/asm/mc146818rtc.h
deleted file mode 100644
index 0aee96a97330..000000000000
--- a/arch/sh/include/asm/mc146818rtc.h
+++ /dev/null
@@ -1,7 +0,0 @@
-/*
- * Machine dependent access functions for RTC registers.
- */
-#ifndef _ASM_MC146818RTC_H
-#define _ASM_MC146818RTC_H
-
-#endif /* _ASM_MC146818RTC_H */
diff --git a/arch/sh/include/asm/processor.h b/arch/sh/include/asm/processor.h
index 1506897648aa..f9a09942a32d 100644
--- a/arch/sh/include/asm/processor.h
+++ b/arch/sh/include/asm/processor.h
@@ -15,7 +15,7 @@
  */
 enum cpu_type {
 	/* SH-2 types */
-	CPU_SH7619,
+	CPU_SH7619, CPU_J2,
 
 	/* SH-2A types */
 	CPU_SH7201, CPU_SH7203, CPU_SH7206, CPU_SH7263, CPU_SH7264, CPU_SH7269,
diff --git a/arch/sh/include/asm/rtc.h b/arch/sh/include/asm/rtc.h
index 52b0c2dba979..f7b010d48af7 100644
--- a/arch/sh/include/asm/rtc.h
+++ b/arch/sh/include/asm/rtc.h
@@ -6,17 +6,6 @@ extern void (*board_time_init)(void);
 extern void (*rtc_sh_get_time)(struct timespec *);
 extern int (*rtc_sh_set_time)(const time_t);
 
-/* some dummy definitions */
-#define RTC_BATT_BAD 0x100	/* battery bad */
-#define RTC_SQWE 0x08		/* enable square-wave output */
-#define RTC_DM_BINARY 0x04	/* all time/date values are BCD if clear */
-#define RTC_24H 0x02		/* 24 hour mode - else hours bit 7 means pm */
-#define RTC_DST_EN 0x01	        /* auto switch DST - works f. USA only */
-
-struct rtc_time;
-unsigned int get_rtc_time(struct rtc_time *);
-int set_rtc_time(struct rtc_time *);
-
 #define RTC_CAP_4_DIGIT_YEAR	(1 << 0)
 
 struct sh_rtc_platform_info {
diff --git a/arch/sh/include/asm/spinlock-cas.h b/arch/sh/include/asm/spinlock-cas.h
new file mode 100644
index 000000000000..c46e8cc7b515
--- /dev/null
+++ b/arch/sh/include/asm/spinlock-cas.h
@@ -0,0 +1,117 @@
+/*
+ * include/asm-sh/spinlock-cas.h
+ *
+ * Copyright (C) 2015 SEI
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#ifndef __ASM_SH_SPINLOCK_CAS_H
+#define __ASM_SH_SPINLOCK_CAS_H
+
+#include <asm/barrier.h>
+#include <asm/processor.h>
+
+static inline unsigned __sl_cas(volatile unsigned *p, unsigned old, unsigned new)
+{
+	__asm__ __volatile__("cas.l %1,%0,@r0"
+		: "+r"(new)
+		: "r"(old), "z"(p)
+		: "t", "memory" );
+	return new;
+}
+
+/*
+ * Your basic SMP spinlocks, allowing only a single CPU anywhere
+ */
+
+#define arch_spin_is_locked(x)		((x)->lock <= 0)
+#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+	smp_cond_load_acquire(&lock->lock, VAL > 0);
+}
+
+static inline void arch_spin_lock(arch_spinlock_t *lock)
+{
+	while (!__sl_cas(&lock->lock, 1, 0));
+}
+
+static inline void arch_spin_unlock(arch_spinlock_t *lock)
+{
+	__sl_cas(&lock->lock, 0, 1);
+}
+
+static inline int arch_spin_trylock(arch_spinlock_t *lock)
+{
+	return __sl_cas(&lock->lock, 1, 0);
+}
+
+/*
+ * Read-write spinlocks, allowing multiple readers but only one writer.
+ *
+ * NOTE! it is quite common to have readers in interrupts but no interrupt
+ * writers. For those circumstances we can "mix" irq-safe locks - any writer
+ * needs to get a irq-safe write-lock, but readers can get non-irqsafe
+ * read-locks.
+ */
+
+/**
+ * read_can_lock - would read_trylock() succeed?
+ * @lock: the rwlock in question.
+ */
+#define arch_read_can_lock(x)	((x)->lock > 0)
+
+/**
+ * write_can_lock - would write_trylock() succeed?
+ * @lock: the rwlock in question.
+ */
+#define arch_write_can_lock(x)	((x)->lock == RW_LOCK_BIAS)
+
+static inline void arch_read_lock(arch_rwlock_t *rw)
+{
+	unsigned old;
+	do old = rw->lock;
+	while (!old || __sl_cas(&rw->lock, old, old-1) != old);
+}
+
+static inline void arch_read_unlock(arch_rwlock_t *rw)
+{
+	unsigned old;
+	do old = rw->lock;
+	while (__sl_cas(&rw->lock, old, old+1) != old);
+}
+
+static inline void arch_write_lock(arch_rwlock_t *rw)
+{
+	while (__sl_cas(&rw->lock, RW_LOCK_BIAS, 0) != RW_LOCK_BIAS);
+}
+
+static inline void arch_write_unlock(arch_rwlock_t *rw)
+{
+	__sl_cas(&rw->lock, 0, RW_LOCK_BIAS);
+}
+
+static inline int arch_read_trylock(arch_rwlock_t *rw)
+{
+	unsigned old;
+	do old = rw->lock;
+	while (old && __sl_cas(&rw->lock, old, old-1) != old);
+	return !!old;
+}
+
+static inline int arch_write_trylock(arch_rwlock_t *rw)
+{
+	return __sl_cas(&rw->lock, RW_LOCK_BIAS, 0) == RW_LOCK_BIAS;
+}
+
+#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
+#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
+
+#define arch_spin_relax(lock)	cpu_relax()
+#define arch_read_relax(lock)	cpu_relax()
+#define arch_write_relax(lock)	cpu_relax()
+
+#endif /* __ASM_SH_SPINLOCK_CAS_H */
diff --git a/arch/sh/include/asm/spinlock-llsc.h b/arch/sh/include/asm/spinlock-llsc.h
new file mode 100644
index 000000000000..cec78143fa83
--- /dev/null
+++ b/arch/sh/include/asm/spinlock-llsc.h
@@ -0,0 +1,224 @@
+/*
+ * include/asm-sh/spinlock-llsc.h
+ *
+ * Copyright (C) 2002, 2003 Paul Mundt
+ * Copyright (C) 2006, 2007 Akio Idehara
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#ifndef __ASM_SH_SPINLOCK_LLSC_H
+#define __ASM_SH_SPINLOCK_LLSC_H
+
+#include <asm/barrier.h>
+#include <asm/processor.h>
+
+/*
+ * Your basic SMP spinlocks, allowing only a single CPU anywhere
+ */
+
+#define arch_spin_is_locked(x)		((x)->lock <= 0)
+#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+	smp_cond_load_acquire(&lock->lock, VAL > 0);
+}
+
+/*
+ * Simple spin lock operations.  There are two variants, one clears IRQ's
+ * on the local processor, one does not.
+ *
+ * We make no fairness assumptions.  They have a cost.
+ */
+static inline void arch_spin_lock(arch_spinlock_t *lock)
+{
+	unsigned long tmp;
+	unsigned long oldval;
+
+	__asm__ __volatile__ (
+		"1:						\n\t"
+		"movli.l	@%2, %0	! arch_spin_lock	\n\t"
+		"mov		%0, %1				\n\t"
+		"mov		#0, %0				\n\t"
+		"movco.l	%0, @%2				\n\t"
+		"bf		1b				\n\t"
+		"cmp/pl		%1				\n\t"
+		"bf		1b				\n\t"
+		: "=&z" (tmp), "=&r" (oldval)
+		: "r" (&lock->lock)
+		: "t", "memory"
+	);
+}
+
+static inline void arch_spin_unlock(arch_spinlock_t *lock)
+{
+	unsigned long tmp;
+
+	__asm__ __volatile__ (
+		"mov		#1, %0 ! arch_spin_unlock	\n\t"
+		"mov.l		%0, @%1				\n\t"
+		: "=&z" (tmp)
+		: "r" (&lock->lock)
+		: "t", "memory"
+	);
+}
+
+static inline int arch_spin_trylock(arch_spinlock_t *lock)
+{
+	unsigned long tmp, oldval;
+
+	__asm__ __volatile__ (
+		"1:						\n\t"
+		"movli.l	@%2, %0	! arch_spin_trylock	\n\t"
+		"mov		%0, %1				\n\t"
+		"mov		#0, %0				\n\t"
+		"movco.l	%0, @%2				\n\t"
+		"bf		1b				\n\t"
+		"synco						\n\t"
+		: "=&z" (tmp), "=&r" (oldval)
+		: "r" (&lock->lock)
+		: "t", "memory"
+	);
+
+	return oldval;
+}
+
+/*
+ * Read-write spinlocks, allowing multiple readers but only one writer.
+ *
+ * NOTE! it is quite common to have readers in interrupts but no interrupt
+ * writers. For those circumstances we can "mix" irq-safe locks - any writer
+ * needs to get a irq-safe write-lock, but readers can get non-irqsafe
+ * read-locks.
+ */
+
+/**
+ * read_can_lock - would read_trylock() succeed?
+ * @lock: the rwlock in question.
+ */
+#define arch_read_can_lock(x)	((x)->lock > 0)
+
+/**
+ * write_can_lock - would write_trylock() succeed?
+ * @lock: the rwlock in question.
+ */
+#define arch_write_can_lock(x)	((x)->lock == RW_LOCK_BIAS)
+
+static inline void arch_read_lock(arch_rwlock_t *rw)
+{
+	unsigned long tmp;
+
+	__asm__ __volatile__ (
+		"1:						\n\t"
+		"movli.l	@%1, %0	! arch_read_lock	\n\t"
+		"cmp/pl		%0				\n\t"
+		"bf		1b				\n\t"
+		"add		#-1, %0				\n\t"
+		"movco.l	%0, @%1				\n\t"
+		"bf		1b				\n\t"
+		: "=&z" (tmp)
+		: "r" (&rw->lock)
+		: "t", "memory"
+	);
+}
+
+static inline void arch_read_unlock(arch_rwlock_t *rw)
+{
+	unsigned long tmp;
+
+	__asm__ __volatile__ (
+		"1:						\n\t"
+		"movli.l	@%1, %0	! arch_read_unlock	\n\t"
+		"add		#1, %0				\n\t"
+		"movco.l	%0, @%1				\n\t"
+		"bf		1b				\n\t"
+		: "=&z" (tmp)
+		: "r" (&rw->lock)
+		: "t", "memory"
+	);
+}
+
+static inline void arch_write_lock(arch_rwlock_t *rw)
+{
+	unsigned long tmp;
+
+	__asm__ __volatile__ (
+		"1:						\n\t"
+		"movli.l	@%1, %0	! arch_write_lock	\n\t"
+		"cmp/hs		%2, %0				\n\t"
+		"bf		1b				\n\t"
+		"sub		%2, %0				\n\t"
+		"movco.l	%0, @%1				\n\t"
+		"bf		1b				\n\t"
+		: "=&z" (tmp)
+		: "r" (&rw->lock), "r" (RW_LOCK_BIAS)
+		: "t", "memory"
+	);
+}
+
+static inline void arch_write_unlock(arch_rwlock_t *rw)
+{
+	__asm__ __volatile__ (
+		"mov.l		%1, @%0 ! arch_write_unlock	\n\t"
+		:
+		: "r" (&rw->lock), "r" (RW_LOCK_BIAS)
+		: "t", "memory"
+	);
+}
+
+static inline int arch_read_trylock(arch_rwlock_t *rw)
+{
+	unsigned long tmp, oldval;
+
+	__asm__ __volatile__ (
+		"1:						\n\t"
+		"movli.l	@%2, %0	! arch_read_trylock	\n\t"
+		"mov		%0, %1				\n\t"
+		"cmp/pl		%0				\n\t"
+		"bf		2f				\n\t"
+		"add		#-1, %0				\n\t"
+		"movco.l	%0, @%2				\n\t"
+		"bf		1b				\n\t"
+		"2:						\n\t"
+		"synco						\n\t"
+		: "=&z" (tmp), "=&r" (oldval)
+		: "r" (&rw->lock)
+		: "t", "memory"
+	);
+
+	return (oldval > 0);
+}
+
+static inline int arch_write_trylock(arch_rwlock_t *rw)
+{
+	unsigned long tmp, oldval;
+
+	__asm__ __volatile__ (
+		"1:						\n\t"
+		"movli.l	@%2, %0	! arch_write_trylock	\n\t"
+		"mov		%0, %1				\n\t"
+		"cmp/hs		%3, %0				\n\t"
+		"bf		2f				\n\t"
+		"sub		%3, %0				\n\t"
+		"2:						\n\t"
+		"movco.l	%0, @%2				\n\t"
+		"bf		1b				\n\t"
+		"synco						\n\t"
+		: "=&z" (tmp), "=&r" (oldval)
+		: "r" (&rw->lock), "r" (RW_LOCK_BIAS)
+		: "t", "memory"
+	);
+
+	return (oldval > (RW_LOCK_BIAS - 1));
+}
+
+#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
+#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
+
+#define arch_spin_relax(lock)	cpu_relax()
+#define arch_read_relax(lock)	cpu_relax()
+#define arch_write_relax(lock)	cpu_relax()
+
+#endif /* __ASM_SH_SPINLOCK_LLSC_H */
diff --git a/arch/sh/include/asm/spinlock.h b/arch/sh/include/asm/spinlock.h
index 416834b60ad0..c2c61ea6a8e2 100644
--- a/arch/sh/include/asm/spinlock.h
+++ b/arch/sh/include/asm/spinlock.h
@@ -11,222 +11,12 @@
 #ifndef __ASM_SH_SPINLOCK_H
 #define __ASM_SH_SPINLOCK_H
 
-/*
- * The only locking implemented here uses SH-4A opcodes. For others,
- * split this out as per atomic-*.h.
- */
-#ifndef CONFIG_CPU_SH4A
-#error "Need movli.l/movco.l for spinlocks"
+#if defined(CONFIG_CPU_SH4A)
+#include <asm/spinlock-llsc.h>
+#elif defined(CONFIG_CPU_J2)
+#include <asm/spinlock-cas.h>
+#else
+#error "The configured cpu type does not support spinlocks"
 #endif
 
-#include <asm/barrier.h>
-#include <asm/processor.h>
-
-/*
- * Your basic SMP spinlocks, allowing only a single CPU anywhere
- */
-
-#define arch_spin_is_locked(x)		((x)->lock <= 0)
-#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
-
-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
-{
-	smp_cond_load_acquire(&lock->lock, VAL > 0);
-}
-
-/*
- * Simple spin lock operations.  There are two variants, one clears IRQ's
- * on the local processor, one does not.
- *
- * We make no fairness assumptions.  They have a cost.
- */
-static inline void arch_spin_lock(arch_spinlock_t *lock)
-{
-	unsigned long tmp;
-	unsigned long oldval;
-
-	__asm__ __volatile__ (
-		"1:						\n\t"
-		"movli.l	@%2, %0	! arch_spin_lock	\n\t"
-		"mov		%0, %1				\n\t"
-		"mov		#0, %0				\n\t"
-		"movco.l	%0, @%2				\n\t"
-		"bf		1b				\n\t"
-		"cmp/pl		%1				\n\t"
-		"bf		1b				\n\t"
-		: "=&z" (tmp), "=&r" (oldval)
-		: "r" (&lock->lock)
-		: "t", "memory"
-	);
-}
-
-static inline void arch_spin_unlock(arch_spinlock_t *lock)
-{
-	unsigned long tmp;
-
-	__asm__ __volatile__ (
-		"mov		#1, %0 ! arch_spin_unlock	\n\t"
-		"mov.l		%0, @%1				\n\t"
-		: "=&z" (tmp)
-		: "r" (&lock->lock)
-		: "t", "memory"
-	);
-}
-
-static inline int arch_spin_trylock(arch_spinlock_t *lock)
-{
-	unsigned long tmp, oldval;
-
-	__asm__ __volatile__ (
-		"1:						\n\t"
-		"movli.l	@%2, %0	! arch_spin_trylock	\n\t"
-		"mov		%0, %1				\n\t"
-		"mov		#0, %0				\n\t"
-		"movco.l	%0, @%2				\n\t"
-		"bf		1b				\n\t"
-		"synco						\n\t"
-		: "=&z" (tmp), "=&r" (oldval)
-		: "r" (&lock->lock)
-		: "t", "memory"
-	);
-
-	return oldval;
-}
-
-/*
- * Read-write spinlocks, allowing multiple readers but only one writer.
- *
- * NOTE! it is quite common to have readers in interrupts but no interrupt
- * writers. For those circumstances we can "mix" irq-safe locks - any writer
- * needs to get a irq-safe write-lock, but readers can get non-irqsafe
- * read-locks.
- */
-
-/**
- * read_can_lock - would read_trylock() succeed?
- * @lock: the rwlock in question.
- */
-#define arch_read_can_lock(x)	((x)->lock > 0)
-
-/**
- * write_can_lock - would write_trylock() succeed?
- * @lock: the rwlock in question.
- */
-#define arch_write_can_lock(x)	((x)->lock == RW_LOCK_BIAS)
-
-static inline void arch_read_lock(arch_rwlock_t *rw)
-{
-	unsigned long tmp;
-
-	__asm__ __volatile__ (
-		"1:						\n\t"
-		"movli.l	@%1, %0	! arch_read_lock	\n\t"
-		"cmp/pl		%0				\n\t"
-		"bf		1b				\n\t"
-		"add		#-1, %0				\n\t"
-		"movco.l	%0, @%1				\n\t"
-		"bf		1b				\n\t"
-		: "=&z" (tmp)
-		: "r" (&rw->lock)
-		: "t", "memory"
-	);
-}
-
-static inline void arch_read_unlock(arch_rwlock_t *rw)
-{
-	unsigned long tmp;
-
-	__asm__ __volatile__ (
-		"1:						\n\t"
-		"movli.l	@%1, %0	! arch_read_unlock	\n\t"
-		"add		#1, %0				\n\t"
-		"movco.l	%0, @%1				\n\t"
-		"bf		1b				\n\t"
-		: "=&z" (tmp)
-		: "r" (&rw->lock)
-		: "t", "memory"
-	);
-}
-
-static inline void arch_write_lock(arch_rwlock_t *rw)
-{
-	unsigned long tmp;
-
-	__asm__ __volatile__ (
-		"1:						\n\t"
-		"movli.l	@%1, %0	! arch_write_lock	\n\t"
-		"cmp/hs		%2, %0				\n\t"
-		"bf		1b				\n\t"
-		"sub		%2, %0				\n\t"
-		"movco.l	%0, @%1				\n\t"
-		"bf		1b				\n\t"
-		: "=&z" (tmp)
-		: "r" (&rw->lock), "r" (RW_LOCK_BIAS)
-		: "t", "memory"
-	);
-}
-
-static inline void arch_write_unlock(arch_rwlock_t *rw)
-{
-	__asm__ __volatile__ (
-		"mov.l		%1, @%0 ! arch_write_unlock	\n\t"
-		:
-		: "r" (&rw->lock), "r" (RW_LOCK_BIAS)
-		: "t", "memory"
-	);
-}
-
-static inline int arch_read_trylock(arch_rwlock_t *rw)
-{
-	unsigned long tmp, oldval;
-
-	__asm__ __volatile__ (
-		"1:						\n\t"
-		"movli.l	@%2, %0	! arch_read_trylock	\n\t"
-		"mov		%0, %1				\n\t"
-		"cmp/pl		%0				\n\t"
-		"bf		2f				\n\t"
-		"add		#-1, %0				\n\t"
-		"movco.l	%0, @%2				\n\t"
-		"bf		1b				\n\t"
-		"2:						\n\t"
-		"synco						\n\t"
-		: "=&z" (tmp), "=&r" (oldval)
-		: "r" (&rw->lock)
-		: "t", "memory"
-	);
-
-	return (oldval > 0);
-}
-
-static inline int arch_write_trylock(arch_rwlock_t *rw)
-{
-	unsigned long tmp, oldval;
-
-	__asm__ __volatile__ (
-		"1:						\n\t"
-		"movli.l	@%2, %0	! arch_write_trylock	\n\t"
-		"mov		%0, %1				\n\t"
-		"cmp/hs		%3, %0				\n\t"
-		"bf		2f				\n\t"
-		"sub		%3, %0				\n\t"
-		"2:						\n\t"
-		"movco.l	%0, @%2				\n\t"
-		"bf		1b				\n\t"
-		"synco						\n\t"
-		: "=&z" (tmp), "=&r" (oldval)
-		: "r" (&rw->lock), "r" (RW_LOCK_BIAS)
-		: "t", "memory"
-	);
-
-	return (oldval > (RW_LOCK_BIAS - 1));
-}
-
-#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
-#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
-
-#define arch_spin_relax(lock)	cpu_relax()
-#define arch_read_relax(lock)	cpu_relax()
-#define arch_write_relax(lock)	cpu_relax()
-
 #endif /* __ASM_SH_SPINLOCK_H */
diff --git a/arch/sh/include/asm/thread_info.h b/arch/sh/include/asm/thread_info.h
index 2afa321157be..6c65dcd470ab 100644
--- a/arch/sh/include/asm/thread_info.h
+++ b/arch/sh/include/asm/thread_info.h
@@ -151,19 +151,10 @@ extern void init_thread_xstate(void);
  * ever touches our thread-synchronous status, so we don't
  * have to worry about atomic accesses.
  */
-#define TS_RESTORE_SIGMASK	0x0001	/* restore signal mask in do_signal() */
 #define TS_USEDFPU		0x0002	/* FPU used by this task this quantum */
 
 #ifndef __ASSEMBLY__
 
-#define HAVE_SET_RESTORE_SIGMASK	1
-static inline void set_restore_sigmask(void)
-{
-	struct thread_info *ti = current_thread_info();
-	ti->status |= TS_RESTORE_SIGMASK;
-	WARN_ON(!test_bit(TIF_SIGPENDING, (unsigned long *)&ti->flags));
-}
-
 #define TI_FLAG_FAULT_CODE_SHIFT	24
 
 /*
@@ -182,23 +173,6 @@ static inline unsigned int get_thread_fault_code(void)
 	return ti->flags >> TI_FLAG_FAULT_CODE_SHIFT;
 }
 
-static inline void clear_restore_sigmask(void)
-{
-	current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
-}
-static inline bool test_restore_sigmask(void)
-{
-	return current_thread_info()->status & TS_RESTORE_SIGMASK;
-}
-static inline bool test_and_clear_restore_sigmask(void)
-{
-	struct thread_info *ti = current_thread_info();
-	if (!(ti->status & TS_RESTORE_SIGMASK))
-		return false;
-	ti->status &= ~TS_RESTORE_SIGMASK;
-	return true;
-}
-
 #endif	/* !__ASSEMBLY__ */
 
 #endif /* __KERNEL__ */
diff --git a/arch/sh/include/uapi/asm/cpu-features.h b/arch/sh/include/uapi/asm/cpu-features.h
index 694abe490edb..2f1bc851042a 100644
--- a/arch/sh/include/uapi/asm/cpu-features.h
+++ b/arch/sh/include/uapi/asm/cpu-features.h
@@ -22,5 +22,6 @@
 #define CPU_HAS_L2_CACHE	0x0080	/* Secondary cache / URAM */
 #define CPU_HAS_OP32		0x0100	/* 32-bit instruction support */
 #define CPU_HAS_PTEAEX		0x0200	/* PTE ASID Extension support */
+#define CPU_HAS_CAS_L		0x0400	/* cas.l atomic compare-and-swap */
 
 #endif /* __ASM_SH_CPU_FEATURES_H */
diff --git a/arch/sh/include/uapi/asm/sigcontext.h b/arch/sh/include/uapi/asm/sigcontext.h
index 8ce1435bc0bf..faa5d0833412 100644
--- a/arch/sh/include/uapi/asm/sigcontext.h
+++ b/arch/sh/include/uapi/asm/sigcontext.h
@@ -25,8 +25,6 @@ struct sigcontext {
 	unsigned long sc_mach;
 	unsigned long sc_macl;
 
-#if defined(__SH4__) || defined(CONFIG_CPU_SH4) || \
-    defined(__SH2A__) || defined(CONFIG_CPU_SH2A)
 	/* FPU registers */
 	unsigned long sc_fpregs[16];
 	unsigned long sc_xfpregs[16];
@@ -34,7 +32,6 @@ struct sigcontext {
 	unsigned int sc_fpul;
 	unsigned int sc_ownedfp;
 #endif
-#endif
 };
 
 #endif /* __ASM_SH_SIGCONTEXT_H */
diff --git a/arch/sh/include/uapi/asm/unistd_32.h b/arch/sh/include/uapi/asm/unistd_32.h
index d13a1d623736..c801bde9e6f5 100644
--- a/arch/sh/include/uapi/asm/unistd_32.h
+++ b/arch/sh/include/uapi/asm/unistd_32.h
@@ -380,7 +380,21 @@
 #define __NR_process_vm_writev	366
 #define __NR_kcmp		367
 #define __NR_finit_module	368
+#define __NR_sched_getattr	369
+#define __NR_sched_setattr	370
+#define __NR_renameat2		371
+#define __NR_seccomp		372
+#define __NR_getrandom		373
+#define __NR_memfd_create	374
+#define __NR_bpf		375
+#define __NR_execveat		376
+#define __NR_userfaultfd	377
+#define __NR_membarrier		378
+#define __NR_mlock2		379
+#define __NR_copy_file_range	380
+#define __NR_preadv2		381
+#define __NR_pwritev2		382
 
-#define NR_syscalls 369
+#define NR_syscalls 383
 
 #endif /* __ASM_SH_UNISTD_32_H */
diff --git a/arch/sh/include/uapi/asm/unistd_64.h b/arch/sh/include/uapi/asm/unistd_64.h
index 47ebd5b5ed55..ce0cb3598b62 100644
--- a/arch/sh/include/uapi/asm/unistd_64.h
+++ b/arch/sh/include/uapi/asm/unistd_64.h
@@ -400,7 +400,21 @@
 #define __NR_process_vm_writev	377
 #define __NR_kcmp		378
 #define __NR_finit_module	379
+#define __NR_sched_getattr	380
+#define __NR_sched_setattr	381
+#define __NR_renameat2		382
+#define __NR_seccomp		383
+#define __NR_getrandom		384
+#define __NR_memfd_create	385
+#define __NR_bpf		386
+#define __NR_execveat		387
+#define __NR_userfaultfd	388
+#define __NR_membarrier		389
+#define __NR_mlock2		390
+#define __NR_copy_file_range	391
+#define __NR_preadv2		392
+#define __NR_pwritev2		393
 
-#define NR_syscalls 380
+#define NR_syscalls 394
 
 #endif /* __ASM_SH_UNISTD_64_H */
diff --git a/arch/sh/kernel/cpu/clock.c b/arch/sh/kernel/cpu/clock.c
index 4187cf4fe185..fca9b1e78a63 100644
--- a/arch/sh/kernel/cpu/clock.c
+++ b/arch/sh/kernel/cpu/clock.c
@@ -24,11 +24,13 @@ int __init clk_init(void)
 {
 	int ret;
 
+#ifndef CONFIG_COMMON_CLK
 	ret = arch_clk_init();
 	if (unlikely(ret)) {
 		pr_err("%s: CPU clock registration failed.\n", __func__);
 		return ret;
 	}
+#endif
 
 	if (sh_mv.mv_clk_init) {
 		ret = sh_mv.mv_clk_init();
@@ -39,11 +41,13 @@ int __init clk_init(void)
 		}
 	}
 
+#ifndef CONFIG_COMMON_CLK
 	/* Kick the child clocks.. */
 	recalculate_root_clocks();
 
 	/* Enable the necessary init clocks */
 	clk_enable_init_clocks();
+#endif
 
 	return ret;
 }
diff --git a/arch/sh/kernel/cpu/init.c b/arch/sh/kernel/cpu/init.c
index 0d7360d549c1..c8b3be1b54e6 100644
--- a/arch/sh/kernel/cpu/init.c
+++ b/arch/sh/kernel/cpu/init.c
@@ -106,7 +106,7 @@ void __attribute__ ((weak)) l2_cache_init(void)
 /*
  * Generic first-level cache init
  */
-#ifdef CONFIG_SUPERH32
+#if defined(CONFIG_SUPERH32) && !defined(CONFIG_CPU_J2)
 static void cache_init(void)
 {
 	unsigned long ccr, flags;
@@ -323,9 +323,13 @@ asmlinkage void cpu_init(void)
 	cache_init();
 
 	if (raw_smp_processor_id() == 0) {
+#ifdef CONFIG_MMU
 		shm_align_mask = max_t(unsigned long,
 				       current_cpu_data.dcache.way_size - 1,
 				       PAGE_SIZE - 1);
+#else
+		shm_align_mask = PAGE_SIZE - 1;
+#endif
 
 		/* Boot CPU sets the cache shape */
 		detect_cache_shape();
diff --git a/arch/sh/kernel/cpu/proc.c b/arch/sh/kernel/cpu/proc.c
index 9e6624c9108b..4df4b284f591 100644
--- a/arch/sh/kernel/cpu/proc.c
+++ b/arch/sh/kernel/cpu/proc.c
@@ -27,6 +27,7 @@ static const char *cpu_name[] = {
 	[CPU_MXG]	= "MX-G",	[CPU_SH7723]	= "SH7723",
 	[CPU_SH7366]	= "SH7366",	[CPU_SH7724]	= "SH7724",
 	[CPU_SH7372]	= "SH7372",	[CPU_SH7734]	= "SH7734",
+	[CPU_J2]	= "J2",
 	[CPU_SH_NONE]	= "Unknown"
 };
 
diff --git a/arch/sh/kernel/cpu/sh2/Makefile b/arch/sh/kernel/cpu/sh2/Makefile
index f0f059acfcfb..904c4283d923 100644
--- a/arch/sh/kernel/cpu/sh2/Makefile
+++ b/arch/sh/kernel/cpu/sh2/Makefile
@@ -5,3 +5,7 @@
 obj-y	:= ex.o probe.o entry.o
 
 obj-$(CONFIG_CPU_SUBTYPE_SH7619) += setup-sh7619.o clock-sh7619.o
+
+# SMP setup
+smp-$(CONFIG_CPU_J2)			:= smp-j2.o
+obj-$(CONFIG_SMP)			+= $(smp-y)
diff --git a/arch/sh/kernel/cpu/sh2/entry.S b/arch/sh/kernel/cpu/sh2/entry.S
index a1505956ef28..1ee0a6e774c6 100644
--- a/arch/sh/kernel/cpu/sh2/entry.S
+++ b/arch/sh/kernel/cpu/sh2/entry.S
@@ -47,6 +47,13 @@ ENTRY(exception_handler)
 	mov.l	r3,@-sp
 	cli
 	mov.l	$cpu_mode,r2
+#ifdef CONFIG_SMP
+	mov.l	$cpuid,r3
+	mov.l	@r3,r3
+	mov.l	@r3,r3
+	shll2	r3
+	add	r3,r2
+#endif
 	mov.l	@r2,r0
 	mov.l	@(5*4,r15),r3	! previous SR
 	or	r0,r3		! set MD
@@ -57,6 +64,13 @@ ENTRY(exception_handler)
 	mov.l	__md_bit,r0
 	mov.l	r0,@r2		! enter kernel mode
 	mov.l	$current_thread_info,r2
+#ifdef CONFIG_SMP
+	mov.l	$cpuid,r0
+	mov.l	@r0,r0
+	mov.l	@r0,r0
+	shll2	r0
+	add	r0,r2
+#endif
 	mov.l	@r2,r2
 	mov	#(THREAD_SIZE >> 8),r0
 	shll8	r0
@@ -147,6 +161,11 @@ ENTRY(exception_handler)
 	mov	#31,r8
 	cmp/hs	r8,r9
 	bt	trap_entry	! 64 > vec >= 31  is trap
+#ifdef CONFIG_CPU_J2
+	mov	#16,r8
+	cmp/hs	r8,r9
+	bt	interrupt_entry	! 31 > vec >= 16 is interrupt
+#endif
 
 	mov.l	4f,r8
 	mov	r9,r4
@@ -260,6 +279,13 @@ restore_all:
 	lds.l	@r0+,macl
 	mov	r15,r0
 	mov.l	$cpu_mode,r2
+#ifdef CONFIG_SMP
+	mov.l	$cpuid,r3
+	mov.l	@r3,r3
+	mov.l	@r3,r3
+	shll2	r3
+	add	r3,r2
+#endif
 	mov	#OFF_SR,r3
 	mov.l	@(r0,r3),r1
 	mov.l	__md_bit,r3
@@ -276,6 +302,13 @@ restore_all:
 	mov.l	r1,@r2				! set pc
 	get_current_thread_info r0, r1
 	mov.l	$current_thread_info,r1
+#ifdef CONFIG_SMP
+	mov.l	$cpuid,r3
+	mov.l	@r3,r3
+	mov.l	@r3,r3
+	shll2	r3
+	add	r3,r1
+#endif
 	mov.l	r0,@r1
 	mov.l	@r15+,r0
 	mov.l	@r15+,r1
@@ -303,19 +336,41 @@ $current_thread_info:
 	.long	__current_thread_info
 $cpu_mode:	
 	.long	__cpu_mode
+#ifdef CONFIG_SMP
+$cpuid:
+	.long sh2_cpuid_addr
+#endif
 		
 ! common exception handler
 #include "../../entry-common.S"
+
+#ifdef CONFIG_NR_CPUS
+#define NR_CPUS CONFIG_NR_CPUS
+#else
+#define NR_CPUS 1
+#endif
 	
 	.data
 ! cpu operation mode 
 ! bit30 = MD (compatible SH3/4)
 __cpu_mode:
+	.rept	NR_CPUS
 	.long	0x40000000
+	.endr
+
+#ifdef CONFIG_SMP
+.global sh2_cpuid_addr
+sh2_cpuid_addr:
+	.long	dummy_cpuid
+dummy_cpuid:
+	.long	0
+#endif
 		
 	.section	.bss
 __current_thread_info:
+	.rept	NR_CPUS
 	.long	0
+	.endr
 
 ENTRY(exception_handling_table)
 	.space	4*32
diff --git a/arch/sh/kernel/cpu/sh2/probe.c b/arch/sh/kernel/cpu/sh2/probe.c
index 6c687ae812ef..4205f6d42b69 100644
--- a/arch/sh/kernel/cpu/sh2/probe.c
+++ b/arch/sh/kernel/cpu/sh2/probe.c
@@ -10,10 +10,27 @@
  * for more details.
  */
 #include <linux/init.h>
+#include <linux/of_fdt.h>
+#include <linux/smp.h>
+#include <linux/io.h>
 #include <asm/processor.h>
 #include <asm/cache.h>
 
-void cpu_probe(void)
+#if defined(CONFIG_CPU_J2)
+extern u32 __iomem *j2_ccr_base;
+static int __init scan_cache(unsigned long node, const char *uname,
+			     int depth, void *data)
+{
+	if (!of_flat_dt_is_compatible(node, "jcore,cache"))
+		return 0;
+
+	j2_ccr_base = (u32 __iomem *)of_flat_dt_translate_address(node);
+
+	return 1;
+}
+#endif
+
+void __ref cpu_probe(void)
 {
 #if defined(CONFIG_CPU_SUBTYPE_SH7619)
 	boot_cpu_data.type			= CPU_SH7619;
@@ -24,10 +41,30 @@ void cpu_probe(void)
 	boot_cpu_data.dcache.linesz		= L1_CACHE_BYTES;
 	boot_cpu_data.dcache.flags		= 0;
 #endif
+
+#if defined(CONFIG_CPU_J2)
+	unsigned cpu = hard_smp_processor_id();
+	if (cpu == 0) of_scan_flat_dt(scan_cache, NULL);
+	if (j2_ccr_base) __raw_writel(0x80000303, j2_ccr_base + 4*cpu);
+	if (cpu != 0) return;
+	boot_cpu_data.type			= CPU_J2;
+
+	/* These defaults are appropriate for the original/current
+	 * J2 cache. Once there is a proper framework for getting cache
+	 * info from device tree, we should switch to that. */
+	boot_cpu_data.dcache.ways		= 1;
+	boot_cpu_data.dcache.sets		= 256;
+	boot_cpu_data.dcache.entry_shift	= 5;
+	boot_cpu_data.dcache.linesz		= 32;
+	boot_cpu_data.dcache.flags		= 0;
+
+	boot_cpu_data.flags |= CPU_HAS_CAS_L;
+#else
 	/*
 	 * SH-2 doesn't have separate caches
 	 */
 	boot_cpu_data.dcache.flags |= SH_CACHE_COMBINED;
+#endif
 	boot_cpu_data.icache = boot_cpu_data.dcache;
 	boot_cpu_data.family = CPU_FAMILY_SH2;
 }
diff --git a/arch/sh/kernel/cpu/sh2/smp-j2.c b/arch/sh/kernel/cpu/sh2/smp-j2.c
new file mode 100644
index 000000000000..6ccd7e4dc008
--- /dev/null
+++ b/arch/sh/kernel/cpu/sh2/smp-j2.c
@@ -0,0 +1,139 @@
+/*
+ * SMP support for J2 processor
+ *
+ * Copyright (C) 2015-2016 Smart Energy Instruments, Inc.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <asm/cmpxchg.h>
+
+DEFINE_PER_CPU(unsigned, j2_ipi_messages);
+
+extern u32 *sh2_cpuid_addr;
+static u32 *j2_ipi_trigger;
+static int j2_ipi_irq;
+
+static irqreturn_t j2_ipi_interrupt_handler(int irq, void *arg)
+{
+	unsigned cpu = hard_smp_processor_id();
+	volatile unsigned *pmsg = &per_cpu(j2_ipi_messages, cpu);
+	unsigned messages, i;
+
+	do messages = *pmsg;
+	while (cmpxchg(pmsg, messages, 0) != messages);
+
+	if (!messages) return IRQ_NONE;
+
+	for (i=0; i<SMP_MSG_NR; i++)
+		if (messages & (1U<<i))
+			smp_message_recv(i);
+
+	return IRQ_HANDLED;
+}
+
+static void j2_smp_setup(void)
+{
+}
+
+static void j2_prepare_cpus(unsigned int max_cpus)
+{
+	struct device_node *np;
+	unsigned i, max = 1;
+
+	np = of_find_compatible_node(NULL, NULL, "jcore,ipi-controller");
+	if (!np)
+		goto out;
+
+	j2_ipi_irq = irq_of_parse_and_map(np, 0);
+	j2_ipi_trigger = of_iomap(np, 0);
+	if (!j2_ipi_irq || !j2_ipi_trigger)
+		goto out;
+
+	np = of_find_compatible_node(NULL, NULL, "jcore,cpuid-mmio");
+	if (!np)
+		goto out;
+
+	sh2_cpuid_addr = of_iomap(np, 0);
+	if (!sh2_cpuid_addr)
+		goto out;
+
+	if (request_irq(j2_ipi_irq, j2_ipi_interrupt_handler, IRQF_PERCPU,
+			"ipi", (void *)j2_ipi_interrupt_handler) != 0)
+		goto out;
+
+	max = max_cpus;
+out:
+	/* Disable any cpus past max_cpus, or all secondaries if we didn't
+	 * get the necessary resources to support SMP. */
+	for (i=max; i<NR_CPUS; i++) {
+		set_cpu_possible(i, false);
+		set_cpu_present(i, false);
+	}
+}
+
+static void j2_start_cpu(unsigned int cpu, unsigned long entry_point)
+{
+	struct device_node *np;
+	u32 regs[2];
+	void __iomem *release, *initpc;
+
+	if (!cpu) return;
+
+	np = of_get_cpu_node(cpu, NULL);
+	if (!np) return;
+
+	if (of_property_read_u32_array(np, "cpu-release-addr", regs, 2)) return;
+	release = ioremap_nocache(regs[0], sizeof(u32));
+	initpc = ioremap_nocache(regs[1], sizeof(u32));
+
+	__raw_writel(entry_point, initpc);
+	__raw_writel(1, release);
+
+	iounmap(initpc);
+	iounmap(release);
+
+	pr_info("J2 SMP: requested start of cpu %u\n", cpu);
+}
+
+static unsigned int j2_smp_processor_id(void)
+{
+	return __raw_readl(sh2_cpuid_addr);
+}
+
+static void j2_send_ipi(unsigned int cpu, unsigned int message)
+{
+	volatile unsigned *pmsg;
+	unsigned old;
+	unsigned long val;
+
+	/* There is only one IPI interrupt shared by all messages, so
+	 * we keep a separate interrupt flag per message type in sw. */
+	pmsg = &per_cpu(j2_ipi_messages, cpu);
+	do old = *pmsg;
+	while (cmpxchg(pmsg, old, old|(1U<<message)) != old);
+
+	/* Generate the actual interrupt by writing to CCRn bit 28. */
+	val = __raw_readl(j2_ipi_trigger + cpu);
+	__raw_writel(val | (1U<<28), j2_ipi_trigger + cpu);
+}
+
+static struct plat_smp_ops j2_smp_ops = {
+	.smp_setup		= j2_smp_setup,
+	.prepare_cpus		= j2_prepare_cpus,
+	.start_cpu		= j2_start_cpu,
+	.smp_processor_id	= j2_smp_processor_id,
+	.send_ipi		= j2_send_ipi,
+	.cpu_die		= native_cpu_die,
+	.cpu_disable		= native_cpu_disable,
+	.play_dead		= native_play_dead,
+};
+
+CPU_METHOD_OF_DECLARE(j2_cpu_method, "jcore,spin-table", &j2_smp_ops);
diff --git a/arch/sh/kernel/dma-nommu.c b/arch/sh/kernel/dma-nommu.c
index 5b0bfcda6d0b..eadb669a7329 100644
--- a/arch/sh/kernel/dma-nommu.c
+++ b/arch/sh/kernel/dma-nommu.c
@@ -13,7 +13,7 @@
 static dma_addr_t nommu_map_page(struct device *dev, struct page *page,
 				 unsigned long offset, size_t size,
 				 enum dma_data_direction dir,
-				 struct dma_attrs *attrs)
+				 unsigned long attrs)
 {
 	dma_addr_t addr = page_to_phys(page) + offset;
 
@@ -25,7 +25,7 @@ static dma_addr_t nommu_map_page(struct device *dev, struct page *page,
 
 static int nommu_map_sg(struct device *dev, struct scatterlist *sg,
 			int nents, enum dma_data_direction dir,
-			struct dma_attrs *attrs)
+			unsigned long attrs)
 {
 	struct scatterlist *s;
 	int i;
diff --git a/arch/sh/kernel/dwarf.c b/arch/sh/kernel/dwarf.c
index 9d209a07235e..e1d751ae2498 100644
--- a/arch/sh/kernel/dwarf.c
+++ b/arch/sh/kernel/dwarf.c
@@ -1009,10 +1009,8 @@ static void __init dwarf_unwinder_cleanup(void)
 	rbtree_postorder_for_each_entry_safe(cie, next_cie, &cie_root, node)
 		kfree(cie);
 
-	if (dwarf_reg_pool)
-		mempool_destroy(dwarf_reg_pool);
-	if (dwarf_frame_pool)
-		mempool_destroy(dwarf_frame_pool);
+	mempool_destroy(dwarf_reg_pool);
+	mempool_destroy(dwarf_frame_pool);
 	kmem_cache_destroy(dwarf_reg_cachep);
 	kmem_cache_destroy(dwarf_frame_cachep);
 }
diff --git a/arch/sh/kernel/head_32.S b/arch/sh/kernel/head_32.S
index 974bc152cc84..4e352c3f79e6 100644
--- a/arch/sh/kernel/head_32.S
+++ b/arch/sh/kernel/head_32.S
@@ -67,7 +67,7 @@ ENTRY(_stext)
 	ldc	r0, r6_bank
 #endif
 
-#ifdef CONFIG_OF
+#ifdef CONFIG_OF_FLATTREE
 	mov	r4, r12		! Store device tree blob pointer in r12
 #endif
 	
@@ -318,7 +318,7 @@ ENTRY(_stext)
 10:		
 #endif
 
-#ifdef CONFIG_OF
+#ifdef CONFIG_OF_FLATTREE
 	mov.l	8f, r0		! Make flat device tree available early.
 	jsr	@r0
 	 mov	r12, r4
@@ -349,7 +349,7 @@ ENTRY(stack_start)
 5:	.long	start_kernel
 6:	.long	cpu_init
 7:	.long	init_thread_union
-#if defined(CONFIG_OF)
+#if defined(CONFIG_OF_FLATTREE)
 8:	.long	sh_fdt_init
 #endif
 
diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c
index 5d34605b58b5..e7b49d81053e 100644
--- a/arch/sh/kernel/setup.c
+++ b/arch/sh/kernel/setup.c
@@ -242,7 +242,7 @@ void __init __weak plat_early_device_setup(void)
 {
 }
 
-#ifdef CONFIG_OF
+#ifdef CONFIG_OF_FLATTREE
 void __ref sh_fdt_init(phys_addr_t dt_phys)
 {
 	static int done = 0;
@@ -251,7 +251,11 @@ void __ref sh_fdt_init(phys_addr_t dt_phys)
 	/* Avoid calling an __init function on secondary cpus. */
 	if (done) return;
 
+#ifdef CONFIG_USE_BUILTIN_DTB
+	dt_virt = __dtb_start;
+#else
 	dt_virt = phys_to_virt(dt_phys);
+#endif
 
 	if (!dt_virt || !early_init_dt_scan(dt_virt)) {
 		pr_crit("Error: invalid device tree blob"
diff --git a/arch/sh/kernel/syscalls_32.S b/arch/sh/kernel/syscalls_32.S
index 734234be2f01..254bc22ee57d 100644
--- a/arch/sh/kernel/syscalls_32.S
+++ b/arch/sh/kernel/syscalls_32.S
@@ -386,3 +386,17 @@ ENTRY(sys_call_table)
 	.long sys_process_vm_writev
 	.long sys_kcmp
 	.long sys_finit_module
+	.long sys_sched_getattr
+	.long sys_sched_setattr		/* 370 */
+	.long sys_renameat2
+	.long sys_seccomp
+	.long sys_getrandom
+	.long sys_memfd_create
+	.long sys_bpf			/* 375 */
+	.long sys_execveat
+	.long sys_userfaultfd
+	.long sys_membarrier
+	.long sys_mlock2
+	.long sys_copy_file_range	/* 380 */
+	.long sys_preadv2
+	.long sys_pwritev2
diff --git a/arch/sh/kernel/syscalls_64.S b/arch/sh/kernel/syscalls_64.S
index 579fcb9a896b..d6a27f7a4c54 100644
--- a/arch/sh/kernel/syscalls_64.S
+++ b/arch/sh/kernel/syscalls_64.S
@@ -406,3 +406,17 @@ sys_call_table:
 	.long sys_process_vm_writev
 	.long sys_kcmp
 	.long sys_finit_module
+	.long sys_sched_getattr		/* 380 */
+	.long sys_sched_setattr
+	.long sys_renameat2
+	.long sys_seccomp
+	.long sys_getrandom
+	.long sys_memfd_create		/* 385 */
+	.long sys_bpf
+	.long sys_execveat
+	.long sys_userfaultfd
+	.long sys_membarrier
+	.long sys_mlock2		/* 390 */
+	.long sys_copy_file_range
+	.long sys_preadv2
+	.long sys_pwritev2
diff --git a/arch/sh/kernel/time.c b/arch/sh/kernel/time.c
index d6d0a986c6e9..fcd5e41977d1 100644
--- a/arch/sh/kernel/time.c
+++ b/arch/sh/kernel/time.c
@@ -11,7 +11,6 @@
  * for more details.
  */
 #include <linux/kernel.h>
-#include <linux/module.h>
 #include <linux/init.h>
 #include <linux/profile.h>
 #include <linux/timex.h>
@@ -50,27 +49,31 @@ int update_persistent_clock(struct timespec now)
 }
 #endif
 
-unsigned int get_rtc_time(struct rtc_time *tm)
+static int rtc_generic_get_time(struct device *dev, struct rtc_time *tm)
 {
-	if (rtc_sh_get_time != null_rtc_get_time) {
-		struct timespec tv;
+	struct timespec tv;
 
-		rtc_sh_get_time(&tv);
-		rtc_time_to_tm(tv.tv_sec, tm);
-	}
-
-	return RTC_24H;
+	rtc_sh_get_time(&tv);
+	rtc_time_to_tm(tv.tv_sec, tm);
+	return 0;
 }
-EXPORT_SYMBOL(get_rtc_time);
 
-int set_rtc_time(struct rtc_time *tm)
+static int rtc_generic_set_time(struct device *dev, struct rtc_time *tm)
 {
 	unsigned long secs;
 
 	rtc_tm_to_time(tm, &secs);
-	return rtc_sh_set_time(secs);
+	if ((rtc_sh_set_time == null_rtc_set_time) ||
+	    (rtc_sh_set_time(secs) < 0))
+		return -EOPNOTSUPP;
+
+	return 0;
 }
-EXPORT_SYMBOL(set_rtc_time);
+
+static const struct rtc_class_ops rtc_generic_ops = {
+	.read_time = rtc_generic_get_time,
+	.set_time = rtc_generic_set_time,
+};
 
 static int __init rtc_generic_init(void)
 {
@@ -79,11 +82,14 @@ static int __init rtc_generic_init(void)
 	if (rtc_sh_get_time == null_rtc_get_time)
 		return -ENODEV;
 
-	pdev = platform_device_register_simple("rtc-generic", -1, NULL, 0);
+	pdev = platform_device_register_data(NULL, "rtc-generic", -1,
+					     &rtc_generic_ops,
+					     sizeof(rtc_generic_ops));
+
 
 	return PTR_ERR_OR_ZERO(pdev);
 }
-module_init(rtc_generic_init);
+device_initcall(rtc_generic_init);
 
 void (*board_time_init)(void);
 
diff --git a/arch/sh/mm/Makefile b/arch/sh/mm/Makefile
index cee6b9999d86..92c3bd96aee5 100644
--- a/arch/sh/mm/Makefile
+++ b/arch/sh/mm/Makefile
@@ -4,7 +4,8 @@
 
 obj-y			:= alignment.o cache.o init.o consistent.o mmap.o
 
-cacheops-$(CONFIG_CPU_SH2)		:= cache-sh2.o
+cacheops-$(CONFIG_CPU_J2)		:= cache-j2.o
+cacheops-$(CONFIG_CPU_SUBTYPE_SH7619)	:= cache-sh2.o
 cacheops-$(CONFIG_CPU_SH2A)		:= cache-sh2a.o
 cacheops-$(CONFIG_CPU_SH3)		:= cache-sh3.o
 cacheops-$(CONFIG_CPU_SH4)		:= cache-sh4.o flush-sh4.o
diff --git a/arch/sh/mm/asids-debugfs.c b/arch/sh/mm/asids-debugfs.c
index ecfc6b0c1da1..bf95fdaedd0c 100644
--- a/arch/sh/mm/asids-debugfs.c
+++ b/arch/sh/mm/asids-debugfs.c
@@ -17,7 +17,6 @@
  * for more details.
  */
 #include <linux/init.h>
-#include <linux/module.h>
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
 #include <linux/spinlock.h>
@@ -70,6 +69,4 @@ static int __init asids_debugfs_init(void)
 
 	return PTR_ERR_OR_ZERO(asids_dentry);
 }
-module_init(asids_debugfs_init);
-
-MODULE_LICENSE("GPL v2");
+device_initcall(asids_debugfs_init);
diff --git a/arch/sh/mm/cache-j2.c b/arch/sh/mm/cache-j2.c
new file mode 100644
index 000000000000..391698bcac5b
--- /dev/null
+++ b/arch/sh/mm/cache-j2.c
@@ -0,0 +1,65 @@
+/*
+ * arch/sh/mm/cache-j2.c
+ *
+ * Copyright (C) 2015-2016 Smart Energy Instruments, Inc.
+ *
+ * Released under the terms of the GNU GPL v2.0.
+ */
+
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/cpumask.h>
+
+#include <asm/cache.h>
+#include <asm/addrspace.h>
+#include <asm/processor.h>
+#include <asm/cacheflush.h>
+#include <asm/io.h>
+
+#define ICACHE_ENABLE	0x1
+#define DCACHE_ENABLE	0x2
+#define CACHE_ENABLE	(ICACHE_ENABLE | DCACHE_ENABLE)
+#define ICACHE_FLUSH	0x100
+#define DCACHE_FLUSH	0x200
+#define CACHE_FLUSH	(ICACHE_FLUSH | DCACHE_FLUSH)
+
+u32 __iomem *j2_ccr_base;
+
+static void j2_flush_icache(void *args)
+{
+	unsigned cpu;
+	for_each_possible_cpu(cpu)
+		__raw_writel(CACHE_ENABLE | ICACHE_FLUSH, j2_ccr_base + cpu);
+}
+
+static void j2_flush_dcache(void *args)
+{
+	unsigned cpu;
+	for_each_possible_cpu(cpu)
+		__raw_writel(CACHE_ENABLE | DCACHE_FLUSH, j2_ccr_base + cpu);
+}
+
+static void j2_flush_both(void *args)
+{
+	unsigned cpu;
+	for_each_possible_cpu(cpu)
+		__raw_writel(CACHE_ENABLE | CACHE_FLUSH, j2_ccr_base + cpu);
+}
+
+void __init j2_cache_init(void)
+{
+	if (!j2_ccr_base)
+		return;
+
+	local_flush_cache_all = j2_flush_both;
+	local_flush_cache_mm = j2_flush_both;
+	local_flush_cache_dup_mm = j2_flush_both;
+	local_flush_cache_page = j2_flush_both;
+	local_flush_cache_range = j2_flush_both;
+	local_flush_dcache_page = j2_flush_dcache;
+	local_flush_icache_range = j2_flush_icache;
+	local_flush_icache_page = j2_flush_icache;
+	local_flush_cache_sigtramp = j2_flush_icache;
+
+	pr_info("Initial J2 CCR is %.8x\n", __raw_readl(j2_ccr_base));
+}
diff --git a/arch/sh/mm/cache.c b/arch/sh/mm/cache.c
index e58cfbf45150..36554a9ea99b 100644
--- a/arch/sh/mm/cache.c
+++ b/arch/sh/mm/cache.c
@@ -42,6 +42,8 @@ static inline void cacheop_on_each_cpu(void (*func) (void *info), void *info,
 {
 	preempt_disable();
 
+	/* Needing IPI for cross-core flush is SHX3-specific. */
+#ifdef CONFIG_CPU_SHX3
 	/*
 	 * It's possible that this gets called early on when IRQs are
 	 * still disabled due to ioremapping by the boot CPU, so don't
@@ -49,6 +51,7 @@ static inline void cacheop_on_each_cpu(void (*func) (void *info), void *info,
 	 */
 	if (num_online_cpus() > 1)
 		smp_call_function(func, info, wait);
+#endif
 
 	func(info);
 
@@ -244,7 +247,11 @@ void flush_cache_sigtramp(unsigned long address)
 
 static void compute_alias(struct cache_info *c)
 {
+#ifdef CONFIG_MMU
 	c->alias_mask = ((c->sets - 1) << c->entry_shift) & ~(PAGE_SIZE - 1);
+#else
+	c->alias_mask = 0;
+#endif
 	c->n_aliases = c->alias_mask ? (c->alias_mask >> PAGE_SHIFT) + 1 : 0;
 }
 
@@ -305,7 +312,11 @@ void __init cpu_cache_init(void)
 	if (unlikely(cache_disabled))
 		goto skip;
 
-	if (boot_cpu_data.family == CPU_FAMILY_SH2) {
+	if (boot_cpu_data.type == CPU_J2) {
+		extern void __weak j2_cache_init(void);
+
+		j2_cache_init();
+	} else if (boot_cpu_data.family == CPU_FAMILY_SH2) {
 		extern void __weak sh2_cache_init(void);
 
 		sh2_cache_init();
diff --git a/arch/sh/mm/consistent.c b/arch/sh/mm/consistent.c
index b81d9dbf9fef..92b6976fde59 100644
--- a/arch/sh/mm/consistent.c
+++ b/arch/sh/mm/consistent.c
@@ -34,7 +34,7 @@ fs_initcall(dma_init);
 
 void *dma_generic_alloc_coherent(struct device *dev, size_t size,
 				 dma_addr_t *dma_handle, gfp_t gfp,
-				 struct dma_attrs *attrs)
+				 unsigned long attrs)
 {
 	void *ret, *ret_nocache;
 	int order = get_order(size);
@@ -66,7 +66,7 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t size,
 
 void dma_generic_free_coherent(struct device *dev, size_t size,
 			       void *vaddr, dma_addr_t dma_handle,
-			       struct dma_attrs *attrs)
+			       unsigned long attrs)
 {
 	int order = get_order(size);
 	unsigned long pfn = dma_handle >> PAGE_SHIFT;
diff --git a/arch/sh/mm/ioremap.c b/arch/sh/mm/ioremap.c
index 0c99ec2e7ed8..d09ddfe58fd8 100644
--- a/arch/sh/mm/ioremap.c
+++ b/arch/sh/mm/ioremap.c
@@ -34,7 +34,7 @@
  * have to convert them into an offset in a page-aligned mapping, but the
  * caller shouldn't need to know that small detail.
  */
-void __iomem * __init_refok
+void __iomem * __ref
 __ioremap_caller(phys_addr_t phys_addr, unsigned long size,
 		 pgprot_t pgprot, void *caller)
 {
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 546293d9e6c5..59b09600dd32 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -43,6 +43,7 @@ config SPARC
 	select OLD_SIGSUSPEND
 	select ARCH_HAS_SG_CHAIN
 	select CPU_NO_EFFICIENT_FFS
+	select HAVE_ARCH_HARDENED_USERCOPY
 
 config SPARC32
 	def_bool !64BIT
diff --git a/arch/sparc/include/asm/io_32.h b/arch/sparc/include/asm/io_32.h
index 57f26c398dc9..4dd268a3a8b0 100644
--- a/arch/sparc/include/asm/io_32.h
+++ b/arch/sparc/include/asm/io_32.h
@@ -140,16 +140,6 @@ void ioport_unmap(void __iomem *);
 struct pci_dev;
 void pci_iounmap(struct pci_dev *dev, void __iomem *);
 
-
-
-/*
- * At the moment, we do not use CMOS_READ anywhere outside of rtc.c,
- * so rtc_port is static in it. This should not change unless a new
- * hardware pops up.
- */
-#define RTC_PORT(x)   (rtc_port + (x))
-#define RTC_ALWAYS_BCD  0
-
 static inline int sbus_can_dma_64bit(void)
 {
 	return 0; /* actually, sparc_cpu_model==sun4d */
diff --git a/arch/sparc/include/asm/pci_64.h b/arch/sparc/include/asm/pci_64.h
index 022d16008a00..2303635158f5 100644
--- a/arch/sparc/include/asm/pci_64.h
+++ b/arch/sparc/include/asm/pci_64.h
@@ -55,9 +55,6 @@ static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
 }
 
 #define HAVE_ARCH_PCI_RESOURCE_TO_USER
-void pci_resource_to_user(const struct pci_dev *dev, int bar,
-			  const struct resource *rsrc,
-			  resource_size_t *start, resource_size_t *end);
 #endif /* __KERNEL__ */
 
 #endif /* __SPARC64_PCI_H */
diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h
index bde59825d06c..3d7b925f6516 100644
--- a/arch/sparc/include/asm/thread_info_64.h
+++ b/arch/sparc/include/asm/thread_info_64.h
@@ -222,32 +222,8 @@ register struct thread_info *current_thread_info_reg asm("g6");
  *
  * Note that there are only 8 bits available.
  */
-#define TS_RESTORE_SIGMASK	0x0001	/* restore signal mask in do_signal() */
 
 #ifndef __ASSEMBLY__
-#define HAVE_SET_RESTORE_SIGMASK	1
-static inline void set_restore_sigmask(void)
-{
-	struct thread_info *ti = current_thread_info();
-	ti->status |= TS_RESTORE_SIGMASK;
-	WARN_ON(!test_bit(TIF_SIGPENDING, &ti->flags));
-}
-static inline void clear_restore_sigmask(void)
-{
-	current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
-}
-static inline bool test_restore_sigmask(void)
-{
-	return current_thread_info()->status & TS_RESTORE_SIGMASK;
-}
-static inline bool test_and_clear_restore_sigmask(void)
-{
-	struct thread_info *ti = current_thread_info();
-	if (!(ti->status & TS_RESTORE_SIGMASK))
-		return false;
-	ti->status &= ~TS_RESTORE_SIGMASK;
-	return true;
-}
 
 #define thread32_stack_is_64bit(__SP) (((__SP) & 0x1) != 0)
 #define test_thread_64bit_stack(__SP) \
diff --git a/arch/sparc/include/asm/uaccess_32.h b/arch/sparc/include/asm/uaccess_32.h
index 57aca2792d29..341a5a133f48 100644
--- a/arch/sparc/include/asm/uaccess_32.h
+++ b/arch/sparc/include/asm/uaccess_32.h
@@ -248,22 +248,28 @@ unsigned long __copy_user(void __user *to, const void __user *from, unsigned lon
 
 static inline unsigned long copy_to_user(void __user *to, const void *from, unsigned long n)
 {
-	if (n && __access_ok((unsigned long) to, n))
+	if (n && __access_ok((unsigned long) to, n)) {
+		if (!__builtin_constant_p(n))
+			check_object_size(from, n, true);
 		return __copy_user(to, (__force void __user *) from, n);
-	else
+	} else
 		return n;
 }
 
 static inline unsigned long __copy_to_user(void __user *to, const void *from, unsigned long n)
 {
+	if (!__builtin_constant_p(n))
+		check_object_size(from, n, true);
 	return __copy_user(to, (__force void __user *) from, n);
 }
 
 static inline unsigned long copy_from_user(void *to, const void __user *from, unsigned long n)
 {
-	if (n && __access_ok((unsigned long) from, n))
+	if (n && __access_ok((unsigned long) from, n)) {
+		if (!__builtin_constant_p(n))
+			check_object_size(to, n, false);
 		return __copy_user((__force void __user *) to, from, n);
-	else
+	} else
 		return n;
 }
 
diff --git a/arch/sparc/include/asm/uaccess_64.h b/arch/sparc/include/asm/uaccess_64.h
index e9a51d64974d..8bda94fab8e8 100644
--- a/arch/sparc/include/asm/uaccess_64.h
+++ b/arch/sparc/include/asm/uaccess_64.h
@@ -210,8 +210,12 @@ unsigned long copy_from_user_fixup(void *to, const void __user *from,
 static inline unsigned long __must_check
 copy_from_user(void *to, const void __user *from, unsigned long size)
 {
-	unsigned long ret = ___copy_from_user(to, from, size);
+	unsigned long ret;
 
+	if (!__builtin_constant_p(size))
+		check_object_size(to, size, false);
+
+	ret = ___copy_from_user(to, from, size);
 	if (unlikely(ret))
 		ret = copy_from_user_fixup(to, from, size);
 
@@ -227,8 +231,11 @@ unsigned long copy_to_user_fixup(void __user *to, const void *from,
 static inline unsigned long __must_check
 copy_to_user(void __user *to, const void *from, unsigned long size)
 {
-	unsigned long ret = ___copy_to_user(to, from, size);
+	unsigned long ret;
 
+	if (!__builtin_constant_p(size))
+		check_object_size(from, size, true);
+	ret = ___copy_to_user(to, from, size);
 	if (unlikely(ret))
 		ret = copy_to_user_fixup(to, from, size);
 	return ret;
diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c
index 37686828c3d9..5c615abff030 100644
--- a/arch/sparc/kernel/iommu.c
+++ b/arch/sparc/kernel/iommu.c
@@ -196,7 +196,7 @@ static inline void iommu_free_ctx(struct iommu *iommu, int ctx)
 
 static void *dma_4u_alloc_coherent(struct device *dev, size_t size,
 				   dma_addr_t *dma_addrp, gfp_t gfp,
-				   struct dma_attrs *attrs)
+				   unsigned long attrs)
 {
 	unsigned long order, first_page;
 	struct iommu *iommu;
@@ -245,7 +245,7 @@ static void *dma_4u_alloc_coherent(struct device *dev, size_t size,
 
 static void dma_4u_free_coherent(struct device *dev, size_t size,
 				 void *cpu, dma_addr_t dvma,
-				 struct dma_attrs *attrs)
+				 unsigned long attrs)
 {
 	struct iommu *iommu;
 	unsigned long order, npages;
@@ -263,7 +263,7 @@ static void dma_4u_free_coherent(struct device *dev, size_t size,
 static dma_addr_t dma_4u_map_page(struct device *dev, struct page *page,
 				  unsigned long offset, size_t sz,
 				  enum dma_data_direction direction,
-				  struct dma_attrs *attrs)
+				  unsigned long attrs)
 {
 	struct iommu *iommu;
 	struct strbuf *strbuf;
@@ -385,7 +385,7 @@ do_flush_sync:
 
 static void dma_4u_unmap_page(struct device *dev, dma_addr_t bus_addr,
 			      size_t sz, enum dma_data_direction direction,
-			      struct dma_attrs *attrs)
+			      unsigned long attrs)
 {
 	struct iommu *iommu;
 	struct strbuf *strbuf;
@@ -431,7 +431,7 @@ static void dma_4u_unmap_page(struct device *dev, dma_addr_t bus_addr,
 
 static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist,
 			 int nelems, enum dma_data_direction direction,
-			 struct dma_attrs *attrs)
+			 unsigned long attrs)
 {
 	struct scatterlist *s, *outs, *segstart;
 	unsigned long flags, handle, prot, ctx;
@@ -607,7 +607,7 @@ static unsigned long fetch_sg_ctx(struct iommu *iommu, struct scatterlist *sg)
 
 static void dma_4u_unmap_sg(struct device *dev, struct scatterlist *sglist,
 			    int nelems, enum dma_data_direction direction,
-			    struct dma_attrs *attrs)
+			    unsigned long attrs)
 {
 	unsigned long flags, ctx;
 	struct scatterlist *sg;
diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c
index ffd5ff4678cf..2344103414d1 100644
--- a/arch/sparc/kernel/ioport.c
+++ b/arch/sparc/kernel/ioport.c
@@ -260,7 +260,7 @@ EXPORT_SYMBOL(sbus_set_sbus64);
  */
 static void *sbus_alloc_coherent(struct device *dev, size_t len,
 				 dma_addr_t *dma_addrp, gfp_t gfp,
-				 struct dma_attrs *attrs)
+				 unsigned long attrs)
 {
 	struct platform_device *op = to_platform_device(dev);
 	unsigned long len_total = PAGE_ALIGN(len);
@@ -315,7 +315,7 @@ err_nopages:
 }
 
 static void sbus_free_coherent(struct device *dev, size_t n, void *p,
-			       dma_addr_t ba, struct dma_attrs *attrs)
+			       dma_addr_t ba, unsigned long attrs)
 {
 	struct resource *res;
 	struct page *pgv;
@@ -355,7 +355,7 @@ static void sbus_free_coherent(struct device *dev, size_t n, void *p,
 static dma_addr_t sbus_map_page(struct device *dev, struct page *page,
 				unsigned long offset, size_t len,
 				enum dma_data_direction dir,
-				struct dma_attrs *attrs)
+				unsigned long attrs)
 {
 	void *va = page_address(page) + offset;
 
@@ -371,20 +371,20 @@ static dma_addr_t sbus_map_page(struct device *dev, struct page *page,
 }
 
 static void sbus_unmap_page(struct device *dev, dma_addr_t ba, size_t n,
-			    enum dma_data_direction dir, struct dma_attrs *attrs)
+			    enum dma_data_direction dir, unsigned long attrs)
 {
 	mmu_release_scsi_one(dev, ba, n);
 }
 
 static int sbus_map_sg(struct device *dev, struct scatterlist *sg, int n,
-		       enum dma_data_direction dir, struct dma_attrs *attrs)
+		       enum dma_data_direction dir, unsigned long attrs)
 {
 	mmu_get_scsi_sgl(dev, sg, n);
 	return n;
 }
 
 static void sbus_unmap_sg(struct device *dev, struct scatterlist *sg, int n,
-			  enum dma_data_direction dir, struct dma_attrs *attrs)
+			  enum dma_data_direction dir, unsigned long attrs)
 {
 	mmu_release_scsi_sgl(dev, sg, n);
 }
@@ -429,7 +429,7 @@ arch_initcall(sparc_register_ioport);
  */
 static void *pci32_alloc_coherent(struct device *dev, size_t len,
 				  dma_addr_t *pba, gfp_t gfp,
-				  struct dma_attrs *attrs)
+				  unsigned long attrs)
 {
 	unsigned long len_total = PAGE_ALIGN(len);
 	void *va;
@@ -482,7 +482,7 @@ err_nopages:
  * past this call are illegal.
  */
 static void pci32_free_coherent(struct device *dev, size_t n, void *p,
-				dma_addr_t ba, struct dma_attrs *attrs)
+				dma_addr_t ba, unsigned long attrs)
 {
 	struct resource *res;
 
@@ -518,14 +518,14 @@ static void pci32_free_coherent(struct device *dev, size_t n, void *p,
 static dma_addr_t pci32_map_page(struct device *dev, struct page *page,
 				 unsigned long offset, size_t size,
 				 enum dma_data_direction dir,
-				 struct dma_attrs *attrs)
+				 unsigned long attrs)
 {
 	/* IIep is write-through, not flushing. */
 	return page_to_phys(page) + offset;
 }
 
 static void pci32_unmap_page(struct device *dev, dma_addr_t ba, size_t size,
-			     enum dma_data_direction dir, struct dma_attrs *attrs)
+			     enum dma_data_direction dir, unsigned long attrs)
 {
 	if (dir != PCI_DMA_TODEVICE)
 		dma_make_coherent(ba, PAGE_ALIGN(size));
@@ -548,7 +548,7 @@ static void pci32_unmap_page(struct device *dev, dma_addr_t ba, size_t size,
  */
 static int pci32_map_sg(struct device *device, struct scatterlist *sgl,
 			int nents, enum dma_data_direction dir,
-			struct dma_attrs *attrs)
+			unsigned long attrs)
 {
 	struct scatterlist *sg;
 	int n;
@@ -567,7 +567,7 @@ static int pci32_map_sg(struct device *device, struct scatterlist *sgl,
  */
 static void pci32_unmap_sg(struct device *dev, struct scatterlist *sgl,
 			   int nents, enum dma_data_direction dir,
-			   struct dma_attrs *attrs)
+			   unsigned long attrs)
 {
 	struct scatterlist *sg;
 	int n;
diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c
index c2b202d763a1..9c1878f4fa9f 100644
--- a/arch/sparc/kernel/pci.c
+++ b/arch/sparc/kernel/pci.c
@@ -986,16 +986,18 @@ void pci_resource_to_user(const struct pci_dev *pdev, int bar,
 			  const struct resource *rp, resource_size_t *start,
 			  resource_size_t *end)
 {
-	struct pci_pbm_info *pbm = pdev->dev.archdata.host_controller;
-	unsigned long offset;
-
-	if (rp->flags & IORESOURCE_IO)
-		offset = pbm->io_space.start;
-	else
-		offset = pbm->mem_space.start;
+	struct pci_bus_region region;
 
-	*start = rp->start - offset;
-	*end = rp->end - offset;
+	/*
+	 * "User" addresses are shown in /sys/devices/pci.../.../resource
+	 * and /proc/bus/pci/devices and used as mmap offsets for
+	 * /proc/bus/pci/BB/DD.F files (see proc_bus_pci_mmap()).
+	 *
+	 * On sparc, these are PCI bus addresses, i.e., raw BAR values.
+	 */
+	pcibios_resource_to_bus(pdev->bus, &region, (struct resource *) rp);
+	*start = region.start;
+	*end = region.end;
 }
 
 void pcibios_set_master(struct pci_dev *dev)
diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c
index 836e8cef47e2..61c6f935accc 100644
--- a/arch/sparc/kernel/pci_sun4v.c
+++ b/arch/sparc/kernel/pci_sun4v.c
@@ -130,7 +130,7 @@ static inline long iommu_batch_end(void)
 
 static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
 				   dma_addr_t *dma_addrp, gfp_t gfp,
-				   struct dma_attrs *attrs)
+				   unsigned long attrs)
 {
 	unsigned long flags, order, first_page, npages, n;
 	struct iommu *iommu;
@@ -213,7 +213,7 @@ static void dma_4v_iommu_demap(void *demap_arg, unsigned long entry,
 }
 
 static void dma_4v_free_coherent(struct device *dev, size_t size, void *cpu,
-				 dma_addr_t dvma, struct dma_attrs *attrs)
+				 dma_addr_t dvma, unsigned long attrs)
 {
 	struct pci_pbm_info *pbm;
 	struct iommu *iommu;
@@ -235,7 +235,7 @@ static void dma_4v_free_coherent(struct device *dev, size_t size, void *cpu,
 static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page,
 				  unsigned long offset, size_t sz,
 				  enum dma_data_direction direction,
-				  struct dma_attrs *attrs)
+				  unsigned long attrs)
 {
 	struct iommu *iommu;
 	unsigned long flags, npages, oaddr;
@@ -294,7 +294,7 @@ iommu_map_fail:
 
 static void dma_4v_unmap_page(struct device *dev, dma_addr_t bus_addr,
 			      size_t sz, enum dma_data_direction direction,
-			      struct dma_attrs *attrs)
+			      unsigned long attrs)
 {
 	struct pci_pbm_info *pbm;
 	struct iommu *iommu;
@@ -322,7 +322,7 @@ static void dma_4v_unmap_page(struct device *dev, dma_addr_t bus_addr,
 
 static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
 			 int nelems, enum dma_data_direction direction,
-			 struct dma_attrs *attrs)
+			 unsigned long attrs)
 {
 	struct scatterlist *s, *outs, *segstart;
 	unsigned long flags, handle, prot;
@@ -466,7 +466,7 @@ iommu_map_failed:
 
 static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist,
 			    int nelems, enum dma_data_direction direction,
-			    struct dma_attrs *attrs)
+			    unsigned long attrs)
 {
 	struct pci_pbm_info *pbm;
 	struct scatterlist *sg;
diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S
index 7d02b1fef025..d79b3b734245 100644
--- a/arch/sparc/kernel/vmlinux.lds.S
+++ b/arch/sparc/kernel/vmlinux.lds.S
@@ -150,6 +150,13 @@ SECTIONS
 	}
 	PERCPU_SECTION(SMP_CACHE_BYTES)
 
+#ifdef CONFIG_JUMP_LABEL
+	. = ALIGN(PAGE_SIZE);
+	.exit.text : {
+		EXIT_TEXT
+	}
+#endif
+
 	. = ALIGN(PAGE_SIZE);
 	__init_end = .;
 	BSS_SECTION(0, 0, 0)
diff --git a/arch/tile/include/asm/thread_info.h b/arch/tile/include/asm/thread_info.h
index c1467ac59ce6..b7659b8f1117 100644
--- a/arch/tile/include/asm/thread_info.h
+++ b/arch/tile/include/asm/thread_info.h
@@ -166,32 +166,5 @@ extern void _cpu_idle(void);
 #ifdef __tilegx__
 #define TS_COMPAT		0x0001	/* 32-bit compatibility mode */
 #endif
-#define TS_RESTORE_SIGMASK	0x0008	/* restore signal mask in do_signal */
-
-#ifndef __ASSEMBLY__
-#define HAVE_SET_RESTORE_SIGMASK	1
-static inline void set_restore_sigmask(void)
-{
-	struct thread_info *ti = current_thread_info();
-	ti->status |= TS_RESTORE_SIGMASK;
-	WARN_ON(!test_bit(TIF_SIGPENDING, &ti->flags));
-}
-static inline void clear_restore_sigmask(void)
-{
-	current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
-}
-static inline bool test_restore_sigmask(void)
-{
-	return current_thread_info()->status & TS_RESTORE_SIGMASK;
-}
-static inline bool test_and_clear_restore_sigmask(void)
-{
-	struct thread_info *ti = current_thread_info();
-	if (!(ti->status & TS_RESTORE_SIGMASK))
-		return false;
-	ti->status &= ~TS_RESTORE_SIGMASK;
-	return true;
-}
-#endif	/* !__ASSEMBLY__ */
 
 #endif /* _ASM_TILE_THREAD_INFO_H */
diff --git a/arch/tile/kernel/pci-dma.c b/arch/tile/kernel/pci-dma.c
index b6bc0547a4f6..09bb774b39cd 100644
--- a/arch/tile/kernel/pci-dma.c
+++ b/arch/tile/kernel/pci-dma.c
@@ -34,7 +34,7 @@
 
 static void *tile_dma_alloc_coherent(struct device *dev, size_t size,
 				     dma_addr_t *dma_handle, gfp_t gfp,
-				     struct dma_attrs *attrs)
+				     unsigned long attrs)
 {
 	u64 dma_mask = (dev && dev->coherent_dma_mask) ?
 		dev->coherent_dma_mask : DMA_BIT_MASK(32);
@@ -78,7 +78,7 @@ static void *tile_dma_alloc_coherent(struct device *dev, size_t size,
  */
 static void tile_dma_free_coherent(struct device *dev, size_t size,
 				   void *vaddr, dma_addr_t dma_handle,
-				   struct dma_attrs *attrs)
+				   unsigned long attrs)
 {
 	homecache_free_pages((unsigned long)vaddr, get_order(size));
 }
@@ -202,7 +202,7 @@ static void __dma_complete_pa_range(dma_addr_t dma_addr, size_t size,
 
 static int tile_dma_map_sg(struct device *dev, struct scatterlist *sglist,
 			   int nents, enum dma_data_direction direction,
-			   struct dma_attrs *attrs)
+			   unsigned long attrs)
 {
 	struct scatterlist *sg;
 	int i;
@@ -224,7 +224,7 @@ static int tile_dma_map_sg(struct device *dev, struct scatterlist *sglist,
 
 static void tile_dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
 			      int nents, enum dma_data_direction direction,
-			      struct dma_attrs *attrs)
+			      unsigned long attrs)
 {
 	struct scatterlist *sg;
 	int i;
@@ -240,7 +240,7 @@ static void tile_dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
 static dma_addr_t tile_dma_map_page(struct device *dev, struct page *page,
 				    unsigned long offset, size_t size,
 				    enum dma_data_direction direction,
-				    struct dma_attrs *attrs)
+				    unsigned long attrs)
 {
 	BUG_ON(!valid_dma_direction(direction));
 
@@ -252,7 +252,7 @@ static dma_addr_t tile_dma_map_page(struct device *dev, struct page *page,
 
 static void tile_dma_unmap_page(struct device *dev, dma_addr_t dma_address,
 				size_t size, enum dma_data_direction direction,
-				struct dma_attrs *attrs)
+				unsigned long attrs)
 {
 	BUG_ON(!valid_dma_direction(direction));
 
@@ -343,7 +343,7 @@ EXPORT_SYMBOL(tile_dma_map_ops);
 
 static void *tile_pci_dma_alloc_coherent(struct device *dev, size_t size,
 					 dma_addr_t *dma_handle, gfp_t gfp,
-					 struct dma_attrs *attrs)
+					 unsigned long attrs)
 {
 	int node = dev_to_node(dev);
 	int order = get_order(size);
@@ -368,14 +368,14 @@ static void *tile_pci_dma_alloc_coherent(struct device *dev, size_t size,
  */
 static void tile_pci_dma_free_coherent(struct device *dev, size_t size,
 				       void *vaddr, dma_addr_t dma_handle,
-				       struct dma_attrs *attrs)
+				       unsigned long attrs)
 {
 	homecache_free_pages((unsigned long)vaddr, get_order(size));
 }
 
 static int tile_pci_dma_map_sg(struct device *dev, struct scatterlist *sglist,
 			       int nents, enum dma_data_direction direction,
-			       struct dma_attrs *attrs)
+			       unsigned long attrs)
 {
 	struct scatterlist *sg;
 	int i;
@@ -400,7 +400,7 @@ static int tile_pci_dma_map_sg(struct device *dev, struct scatterlist *sglist,
 static void tile_pci_dma_unmap_sg(struct device *dev,
 				  struct scatterlist *sglist, int nents,
 				  enum dma_data_direction direction,
-				  struct dma_attrs *attrs)
+				  unsigned long attrs)
 {
 	struct scatterlist *sg;
 	int i;
@@ -416,7 +416,7 @@ static void tile_pci_dma_unmap_sg(struct device *dev,
 static dma_addr_t tile_pci_dma_map_page(struct device *dev, struct page *page,
 					unsigned long offset, size_t size,
 					enum dma_data_direction direction,
-					struct dma_attrs *attrs)
+					unsigned long attrs)
 {
 	BUG_ON(!valid_dma_direction(direction));
 
@@ -429,7 +429,7 @@ static dma_addr_t tile_pci_dma_map_page(struct device *dev, struct page *page,
 static void tile_pci_dma_unmap_page(struct device *dev, dma_addr_t dma_address,
 				    size_t size,
 				    enum dma_data_direction direction,
-				    struct dma_attrs *attrs)
+				    unsigned long attrs)
 {
 	BUG_ON(!valid_dma_direction(direction));
 
@@ -531,7 +531,7 @@ EXPORT_SYMBOL(gx_pci_dma_map_ops);
 #ifdef CONFIG_SWIOTLB
 static void *tile_swiotlb_alloc_coherent(struct device *dev, size_t size,
 					 dma_addr_t *dma_handle, gfp_t gfp,
-					 struct dma_attrs *attrs)
+					 unsigned long attrs)
 {
 	gfp |= GFP_DMA;
 	return swiotlb_alloc_coherent(dev, size, dma_handle, gfp);
@@ -539,7 +539,7 @@ static void *tile_swiotlb_alloc_coherent(struct device *dev, size_t size,
 
 static void tile_swiotlb_free_coherent(struct device *dev, size_t size,
 				       void *vaddr, dma_addr_t dma_addr,
-				       struct dma_attrs *attrs)
+				       unsigned long attrs)
 {
 	swiotlb_free_coherent(dev, size, vaddr, dma_addr);
 }
diff --git a/arch/tile/kernel/vmlinux.lds.S b/arch/tile/kernel/vmlinux.lds.S
index 378f5d8d1ec8..9d449caf8910 100644
--- a/arch/tile/kernel/vmlinux.lds.S
+++ b/arch/tile/kernel/vmlinux.lds.S
@@ -60,6 +60,18 @@ SECTIONS
   /* "Init" is divided into two areas with very different virtual addresses. */
   INIT_TEXT_SECTION(PAGE_SIZE)
 
+  /*
+   * Some things, like the __jump_table, may contain symbol references
+   * to __exit text, so include such text in the final image if so.
+   * In that case we also override the _einittext from INIT_TEXT_SECTION.
+   */
+#ifdef CONFIG_JUMP_LABEL
+  .exit.text : {
+    EXIT_TEXT
+    _einittext = .;
+  }
+#endif
+
   /* Now we skip back to PAGE_OFFSET for the data. */
   . = (. - TEXT_OFFSET + PAGE_OFFSET);
   #undef LOAD_OFFSET
diff --git a/arch/um/Kconfig.common b/arch/um/Kconfig.common
index cc0013475444..fd443852103c 100644
--- a/arch/um/Kconfig.common
+++ b/arch/um/Kconfig.common
@@ -1,14 +1,17 @@
 config UML
 	bool
 	default y
+	select ARCH_HAS_KCOV
 	select HAVE_ARCH_AUDITSYSCALL
 	select HAVE_ARCH_SECCOMP_FILTER
 	select HAVE_UID16
 	select HAVE_FUTEX_CMPXCHG if FUTEX
+	select HAVE_DEBUG_KMEMLEAK
 	select GENERIC_IRQ_SHOW
 	select GENERIC_CPU_DEVICES
 	select GENERIC_IO
 	select GENERIC_CLOCKEVENTS
+	select HAVE_GCC_PLUGINS
 	select TTY # Needed for line.c
 
 config MMU
@@ -30,10 +33,9 @@ config PCI
 config PCMCIA
 	bool
 
-# Yet to do!
 config TRACE_IRQFLAGS_SUPPORT
 	bool
-	default n
+	default y
 
 config LOCKDEP_SUPPORT
 	bool
diff --git a/arch/um/Makefile b/arch/um/Makefile
index e3abe6f3156d..0ca46ededfc7 100644
--- a/arch/um/Makefile
+++ b/arch/um/Makefile
@@ -78,8 +78,8 @@ include $(ARCH_DIR)/Makefile-os-$(OS)
 
 KBUILD_CPPFLAGS += -I$(srctree)/$(HOST_DIR)/include \
 		   -I$(srctree)/$(HOST_DIR)/include/uapi \
-		   -I$(HOST_DIR)/include/generated \
-		   -I$(HOST_DIR)/include/generated/uapi
+		   -I$(objtree)/$(HOST_DIR)/include/generated \
+		   -I$(objtree)/$(HOST_DIR)/include/generated/uapi
 
 # -Derrno=kernel_errno - This turns all kernel references to errno into
 # kernel_errno to separate them from the libc errno.  This allows -fno-common
diff --git a/arch/um/include/asm/irqflags.h b/arch/um/include/asm/irqflags.h
index c780d8a16773..3bb221e1d5a4 100644
--- a/arch/um/include/asm/irqflags.h
+++ b/arch/um/include/asm/irqflags.h
@@ -6,37 +6,33 @@ extern int set_signals(int enable);
 extern void block_signals(void);
 extern void unblock_signals(void);
 
+#define arch_local_save_flags arch_local_save_flags
 static inline unsigned long arch_local_save_flags(void)
 {
 	return get_signals();
 }
 
+#define arch_local_irq_restore arch_local_irq_restore
 static inline void arch_local_irq_restore(unsigned long flags)
 {
 	set_signals(flags);
 }
 
+#define arch_local_irq_enable arch_local_irq_enable
 static inline void arch_local_irq_enable(void)
 {
 	unblock_signals();
 }
 
+#define arch_local_irq_disable arch_local_irq_disable
 static inline void arch_local_irq_disable(void)
 {
 	block_signals();
 }
 
-static inline unsigned long arch_local_irq_save(void)
-{
-	unsigned long flags;
-	flags = arch_local_save_flags();
-	arch_local_irq_disable();
-	return flags;
-}
+#define ARCH_IRQ_DISABLED	0
+#define ARCh_IRQ_ENABLED	(SIGIO|SIGVTALRM)
 
-static inline bool arch_irqs_disabled(void)
-{
-	return arch_local_save_flags() == 0;
-}
+#include <asm-generic/irqflags.h>
 
 #endif
diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile
index a6a5e42caaef..2f36d515762e 100644
--- a/arch/um/kernel/Makefile
+++ b/arch/um/kernel/Makefile
@@ -3,6 +3,11 @@
 # Licensed under the GPL
 #
 
+# Don't instrument UML-specific code; without this, we may crash when
+# accessing the instrumentation buffer for the first time from the
+# kernel.
+KCOV_INSTRUMENT                := n
+
 CPPFLAGS_vmlinux.lds := -DSTART=$(LDS_START)		\
                         -DELF_ARCH=$(LDS_ELF_ARCH)	\
                         -DELF_FORMAT=$(LDS_ELF_FORMAT)	\
diff --git a/arch/um/kernel/initrd.c b/arch/um/kernel/initrd.c
index 55cead809b18..48bae81f8dca 100644
--- a/arch/um/kernel/initrd.c
+++ b/arch/um/kernel/initrd.c
@@ -37,8 +37,6 @@ static int __init read_initrd(void)
 	}
 
 	area = alloc_bootmem(size);
-	if (area == NULL)
-		return 0;
 
 	if (load_initrd(initrd, area, size) == -1)
 		return 0;
diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index 16630e75f056..e8175a8aa22c 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -319,9 +319,6 @@ int __init linux_main(int argc, char **argv)
 
 	start_vm = VMALLOC_START;
 
-	setup_physmem(uml_physmem, uml_reserved, physmem_size, highmem);
-	mem_total_pages(physmem_size, iomem_size, highmem);
-
 	virtmem_size = physmem_size;
 	stack = (unsigned long) argv;
 	stack &= ~(1024 * 1024 - 1);
@@ -334,7 +331,6 @@ int __init linux_main(int argc, char **argv)
 		printf("Kernel virtual memory size shrunk to %lu bytes\n",
 		       virtmem_size);
 
-	stack_protections((unsigned long) &init_thread_info);
 	os_flush_stdout();
 
 	return start_uml();
@@ -342,6 +338,10 @@ int __init linux_main(int argc, char **argv)
 
 void __init setup_arch(char **cmdline_p)
 {
+	stack_protections((unsigned long) &init_thread_info);
+	setup_physmem(uml_physmem, uml_reserved, physmem_size, highmem);
+	mem_total_pages(physmem_size, iomem_size, highmem);
+
 	paging_init();
 	strlcpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
 	*cmdline_p = command_line;
diff --git a/arch/um/os-Linux/Makefile b/arch/um/os-Linux/Makefile
index 08ff5094fcdd..ada473bf6f46 100644
--- a/arch/um/os-Linux/Makefile
+++ b/arch/um/os-Linux/Makefile
@@ -3,6 +3,9 @@
 # Licensed under the GPL
 #
 
+# Don't instrument UML-specific code
+KCOV_INSTRUMENT                := n
+
 obj-y = aio.o execvp.o file.o helper.o irq.o main.o mem.o process.o \
 	registers.o sigio.o signal.o start_up.o time.o tty.o \
 	umid.o user_syms.o util.o drivers/ skas/
diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c
index 8acaf4e384c0..a86d7cc2c2d8 100644
--- a/arch/um/os-Linux/signal.c
+++ b/arch/um/os-Linux/signal.c
@@ -15,6 +15,7 @@
 #include <kern_util.h>
 #include <os.h>
 #include <sysdep/mcontext.h>
+#include <um_malloc.h>
 
 void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *) = {
 	[SIGTRAP]	= relay_signal,
@@ -32,7 +33,7 @@ static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc)
 	struct uml_pt_regs *r;
 	int save_errno = errno;
 
-	r = malloc(sizeof(struct uml_pt_regs));
+	r = uml_kmalloc(sizeof(struct uml_pt_regs), UM_GFP_ATOMIC);
 	if (!r)
 		panic("out of memory");
 
@@ -91,7 +92,7 @@ static void timer_real_alarm_handler(mcontext_t *mc)
 {
 	struct uml_pt_regs *regs;
 
-	regs = malloc(sizeof(struct uml_pt_regs));
+	regs = uml_kmalloc(sizeof(struct uml_pt_regs), UM_GFP_ATOMIC);
 	if (!regs)
 		panic("out of memory");
 
diff --git a/arch/unicore32/kernel/pci.c b/arch/unicore32/kernel/pci.c
index d45fa5f3e9c4..62137d13c6f9 100644
--- a/arch/unicore32/kernel/pci.c
+++ b/arch/unicore32/kernel/pci.c
@@ -265,10 +265,8 @@ static int __init pci_common_init(void)
 
 	pci_fixup_irqs(pci_common_swizzle, pci_puv3_map_irq);
 
-	if (!pci_has_flag(PCI_PROBE_ONLY)) {
-		pci_bus_size_bridges(puv3_bus);
-		pci_bus_assign_resources(puv3_bus);
-	}
+	pci_bus_size_bridges(puv3_bus);
+	pci_bus_assign_resources(puv3_bus);
 	pci_bus_add_devices(puv3_bus);
 	return 0;
 }
@@ -279,9 +277,6 @@ char * __init pcibios_setup(char *str)
 	if (!strcmp(str, "debug")) {
 		debug_pci = 1;
 		return NULL;
-	} else if (!strcmp(str, "firmware")) {
-		pci_add_flags(PCI_PROBE_ONLY);
-		return NULL;
 	}
 	return str;
 }
diff --git a/arch/unicore32/mm/dma-swiotlb.c b/arch/unicore32/mm/dma-swiotlb.c
index 16c08b2143a7..3e9f6489ba38 100644
--- a/arch/unicore32/mm/dma-swiotlb.c
+++ b/arch/unicore32/mm/dma-swiotlb.c
@@ -19,14 +19,14 @@
 
 static void *unicore_swiotlb_alloc_coherent(struct device *dev, size_t size,
 					    dma_addr_t *dma_handle, gfp_t flags,
-					    struct dma_attrs *attrs)
+					    unsigned long attrs)
 {
 	return swiotlb_alloc_coherent(dev, size, dma_handle, flags);
 }
 
 static void unicore_swiotlb_free_coherent(struct device *dev, size_t size,
 					  void *vaddr, dma_addr_t dma_addr,
-					  struct dma_attrs *attrs)
+					  unsigned long attrs)
 {
 	swiotlb_free_coherent(dev, size, vaddr, dma_addr);
 }
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 2fa55851d2a9..c580d8c33562 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -80,6 +80,7 @@ config X86
 	select HAVE_ALIGNED_STRUCT_PAGE		if SLUB
 	select HAVE_AOUT			if X86_32
 	select HAVE_ARCH_AUDITSYSCALL
+	select HAVE_ARCH_HARDENED_USERCOPY
 	select HAVE_ARCH_HUGE_VMAP		if X86_64 || X86_PAE
 	select HAVE_ARCH_JUMP_LABEL
 	select HAVE_ARCH_KASAN			if X86_64 && SPARSEMEM_VMEMMAP
@@ -91,6 +92,7 @@ config X86
 	select HAVE_ARCH_SOFT_DIRTY		if X86_64
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_ARCH_TRANSPARENT_HUGEPAGE
+	select HAVE_ARCH_WITHIN_STACK_FRAMES
 	select HAVE_EBPF_JIT			if X86_64
 	select HAVE_CC_STACKPROTECTOR
 	select HAVE_CMPXCHG_DOUBLE
@@ -111,6 +113,7 @@ config X86
 	select HAVE_FUNCTION_GRAPH_FP_TEST
 	select HAVE_FUNCTION_GRAPH_TRACER
 	select HAVE_FUNCTION_TRACER
+	select HAVE_GCC_PLUGINS
 	select HAVE_GENERIC_DMA_COHERENT	if X86_32
 	select HAVE_HW_BREAKPOINT
 	select HAVE_IDE
@@ -151,6 +154,7 @@ config X86
 	select OLD_SIGSUSPEND3			if X86_32 || IA32_EMULATION
 	select PERF_EVENTS
 	select RTC_LIB
+	select RTC_MC146818_LIB
 	select SPARSE_IRQ
 	select SRCU
 	select SYSCTL_EXCEPTION_TRACE
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index be8e688fa0d4..12ea8f8384f4 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -96,7 +96,7 @@ $(obj)/zoffset.h: $(obj)/compressed/vmlinux FORCE
 	$(call if_changed,zoffset)
 
 
-AFLAGS_header.o += -I$(obj)
+AFLAGS_header.o += -I$(objtree)/$(obj)
 $(obj)/header.o: $(obj)/zoffset.h
 
 LDFLAGS_setup.elf	:= -T
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index a1e71d431fed..1433f6b4607d 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -204,8 +204,12 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
 	 * handling, because syscall restart has a fixup for compat
 	 * syscalls.  The fixup is exercised by the ptrace_syscall_32
 	 * selftest.
+	 *
+	 * We also need to clear TS_REGS_POKED_I386: the 32-bit tracer
+	 * special case only applies after poking regs and before the
+	 * very next return to user mode.
 	 */
-	ti->status &= ~TS_COMPAT;
+	ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED);
 #endif
 
 	user_enter_irqoff();
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 4cddd17153fb..f848572169ea 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -294,7 +294,7 @@
 # 285 sys_setaltroot
 286	i386	add_key			sys_add_key
 287	i386	request_key		sys_request_key
-288	i386	keyctl			sys_keyctl
+288	i386	keyctl			sys_keyctl			compat_sys_keyctl
 289	i386	ioprio_set		sys_ioprio_set
 290	i386	ioprio_get		sys_ioprio_get
 291	i386	inotify_init		sys_inotify_init
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 6ba89a1ab0e5..d5409660f5de 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -75,7 +75,7 @@ CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \
        -fno-omit-frame-pointer -foptimize-sibling-calls \
        -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO
 
-$(vobjs): KBUILD_CFLAGS += $(CFL)
+$(vobjs): KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS),$(KBUILD_CFLAGS)) $(CFL)
 
 #
 # vDSO code runs in userspace and -pg doesn't help with profiling anyway.
@@ -145,6 +145,7 @@ KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS))
 KBUILD_CFLAGS_32 := $(filter-out -mcmodel=kernel,$(KBUILD_CFLAGS_32))
 KBUILD_CFLAGS_32 := $(filter-out -fno-pic,$(KBUILD_CFLAGS_32))
 KBUILD_CFLAGS_32 := $(filter-out -mfentry,$(KBUILD_CFLAGS_32))
+KBUILD_CFLAGS_32 := $(filter-out $(GCC_PLUGINS_CFLAGS),$(KBUILD_CFLAGS_32))
 KBUILD_CFLAGS_32 += -m32 -msoft-float -mregparm=0 -fpic
 KBUILD_CFLAGS_32 += $(call cc-option, -fno-stack-protector)
 KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls)
diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index 2f02d23a05ef..94d54d0defa7 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -96,9 +96,8 @@ static notrace cycle_t vread_pvclock(int *mode)
 {
 	const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti;
 	cycle_t ret;
-	u64 tsc, pvti_tsc;
-	u64 last, delta, pvti_system_time;
-	u32 version, pvti_tsc_to_system_mul, pvti_tsc_shift;
+	u64 last;
+	u32 version;
 
 	/*
 	 * Note: The kernel and hypervisor must guarantee that cpu ID
@@ -123,29 +122,15 @@ static notrace cycle_t vread_pvclock(int *mode)
 	 */
 
 	do {
-		version = pvti->version;
-
-		smp_rmb();
+		version = pvclock_read_begin(pvti);
 
 		if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) {
 			*mode = VCLOCK_NONE;
 			return 0;
 		}
 
-		tsc = rdtsc_ordered();
-		pvti_tsc_to_system_mul = pvti->tsc_to_system_mul;
-		pvti_tsc_shift = pvti->tsc_shift;
-		pvti_system_time = pvti->system_time;
-		pvti_tsc = pvti->tsc_timestamp;
-
-		/* Make sure that the version double-check is last. */
-		smp_rmb();
-	} while (unlikely((version & 1) || version != pvti->version));
-
-	delta = tsc - pvti_tsc;
-	ret = pvti_system_time +
-		pvclock_scale_delta(delta, pvti_tsc_to_system_mul,
-				    pvti_tsc_shift);
+		ret = __pvclock_read_cycles(pvti);
+	} while (pvclock_read_retry(pvti, version));
 
 	/* refer to vread_tsc() comment for rationale */
 	last = gtod->cycle_last;
diff --git a/arch/x86/entry/vdso/vdso2c.h b/arch/x86/entry/vdso/vdso2c.h
index 63a03bb91497..4f741192846d 100644
--- a/arch/x86/entry/vdso/vdso2c.h
+++ b/arch/x86/entry/vdso/vdso2c.h
@@ -22,6 +22,9 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
 
 	ELF(Phdr) *pt = (ELF(Phdr) *)(raw_addr + GET_LE(&hdr->e_phoff));
 
+	if (hdr->e_type != ET_DYN)
+		fail("input is not a shared object\n");
+
 	/* Walk the segment table. */
 	for (i = 0; i < GET_LE(&hdr->e_phnum); i++) {
 		if (GET_LE(&pt[i].p_type) == PT_LOAD) {
@@ -49,6 +52,9 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
 	if (stripped_len < load_size)
 		fail("stripped input is too short\n");
 
+	if (!dyn)
+		fail("input has no PT_DYNAMIC section -- your toolchain is buggy\n");
+
 	/* Walk the dynamic table */
 	for (i = 0; dyn + i < dyn_end &&
 		     GET_LE(&dyn[i].d_tag) != DT_NULL; i++) {
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index fad97886d8b1..d0efb5cb1b00 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -263,10 +263,13 @@ static bool check_hw_exists(void)
 	return true;
 
 msr_fail:
-	pr_cont("Broken PMU hardware detected, using software events only.\n");
-	printk("%sFailed to access perfctr msr (MSR %x is %Lx)\n",
-		boot_cpu_has(X86_FEATURE_HYPERVISOR) ? KERN_INFO : KERN_ERR,
-		reg, val_new);
+	if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
+		pr_cont("PMU not available due to virtualization, using software events only.\n");
+	} else {
+		pr_cont("Broken PMU hardware detected, using software events only.\n");
+		pr_err("Failed to access perfctr msr (MSR %x is %Lx)\n",
+		       reg, val_new);
+	}
 
 	return false;
 }
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index 3a27b93e6261..44461626830e 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -9,7 +9,6 @@
 #include <linux/kmemcheck.h>
 #include <linux/scatterlist.h>
 #include <linux/dma-debug.h>
-#include <linux/dma-attrs.h>
 #include <asm/io.h>
 #include <asm/swiotlb.h>
 #include <linux/dma-contiguous.h>
@@ -48,11 +47,11 @@ extern int dma_supported(struct device *hwdev, u64 mask);
 
 extern void *dma_generic_alloc_coherent(struct device *dev, size_t size,
 					dma_addr_t *dma_addr, gfp_t flag,
-					struct dma_attrs *attrs);
+					unsigned long attrs);
 
 extern void dma_generic_free_coherent(struct device *dev, size_t size,
 				      void *vaddr, dma_addr_t dma_addr,
-				      struct dma_attrs *attrs);
+				      unsigned long attrs);
 
 #ifdef CONFIG_X86_DMA_REMAP /* Platform code defines bridge-specific code */
 extern bool dma_capable(struct device *dev, dma_addr_t addr, size_t size);
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index fea7724141a0..e7f155c3045e 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -344,8 +344,8 @@ extern int compat_arch_setup_additional_pages(struct linux_binprm *bprm,
  */
 static inline int mmap_is_ia32(void)
 {
-	return config_enabled(CONFIG_X86_32) ||
-	       (config_enabled(CONFIG_COMPAT) &&
+	return IS_ENABLED(CONFIG_X86_32) ||
+	       (IS_ENABLED(CONFIG_COMPAT) &&
 		test_thread_flag(TIF_ADDR32));
 }
 
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 116b58347501..2737366ea583 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -137,9 +137,9 @@ static inline int copy_fregs_to_user(struct fregs_state __user *fx)
 
 static inline int copy_fxregs_to_user(struct fxregs_state __user *fx)
 {
-	if (config_enabled(CONFIG_X86_32))
+	if (IS_ENABLED(CONFIG_X86_32))
 		return user_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx));
-	else if (config_enabled(CONFIG_AS_FXSAVEQ))
+	else if (IS_ENABLED(CONFIG_AS_FXSAVEQ))
 		return user_insn(fxsaveq %[fx], [fx] "=m" (*fx), "m" (*fx));
 
 	/* See comment in copy_fxregs_to_kernel() below. */
@@ -150,10 +150,10 @@ static inline void copy_kernel_to_fxregs(struct fxregs_state *fx)
 {
 	int err;
 
-	if (config_enabled(CONFIG_X86_32)) {
+	if (IS_ENABLED(CONFIG_X86_32)) {
 		err = check_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
 	} else {
-		if (config_enabled(CONFIG_AS_FXSAVEQ)) {
+		if (IS_ENABLED(CONFIG_AS_FXSAVEQ)) {
 			err = check_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
 		} else {
 			/* See comment in copy_fxregs_to_kernel() below. */
@@ -166,9 +166,9 @@ static inline void copy_kernel_to_fxregs(struct fxregs_state *fx)
 
 static inline int copy_user_to_fxregs(struct fxregs_state __user *fx)
 {
-	if (config_enabled(CONFIG_X86_32))
+	if (IS_ENABLED(CONFIG_X86_32))
 		return user_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
-	else if (config_enabled(CONFIG_AS_FXSAVEQ))
+	else if (IS_ENABLED(CONFIG_AS_FXSAVEQ))
 		return user_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
 
 	/* See comment in copy_fxregs_to_kernel() below. */
@@ -190,9 +190,9 @@ static inline int copy_user_to_fregs(struct fregs_state __user *fx)
 
 static inline void copy_fxregs_to_kernel(struct fpu *fpu)
 {
-	if (config_enabled(CONFIG_X86_32))
+	if (IS_ENABLED(CONFIG_X86_32))
 		asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state.fxsave));
-	else if (config_enabled(CONFIG_AS_FXSAVEQ))
+	else if (IS_ENABLED(CONFIG_AS_FXSAVEQ))
 		asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state.fxsave));
 	else {
 		/* Using "rex64; fxsave %0" is broken because, if the memory
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 69e62862b622..33ae3a4d0159 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -35,8 +35,9 @@
 #include <asm/asm.h>
 #include <asm/kvm_page_track.h>
 
-#define KVM_MAX_VCPUS 255
-#define KVM_SOFT_MAX_VCPUS 160
+#define KVM_MAX_VCPUS 288
+#define KVM_SOFT_MAX_VCPUS 240
+#define KVM_MAX_VCPU_ID 1023
 #define KVM_USER_MEM_SLOTS 509
 /* memory slots that are not exposed to userspace */
 #define KVM_PRIVATE_MEM_SLOTS 3
@@ -599,6 +600,7 @@ struct kvm_vcpu_arch {
 	u64 mcg_cap;
 	u64 mcg_status;
 	u64 mcg_ctl;
+	u64 mcg_ext_ctl;
 	u64 *mce_banks;
 
 	/* Cache MMIO info */
@@ -682,9 +684,12 @@ struct kvm_arch_memory_slot {
 struct kvm_apic_map {
 	struct rcu_head rcu;
 	u8 mode;
-	struct kvm_lapic *phys_map[256];
-	/* first index is cluster id second is cpu id in a cluster */
-	struct kvm_lapic *logical_map[16][16];
+	u32 max_apic_id;
+	union {
+		struct kvm_lapic *xapic_flat_map[8];
+		struct kvm_lapic *xapic_cluster_map[16][4];
+	};
+	struct kvm_lapic *phys_map[];
 };
 
 /* Hyper-V emulation context */
@@ -779,6 +784,9 @@ struct kvm_arch {
 	u32 ldr_mode;
 	struct page *avic_logical_id_table_page;
 	struct page *avic_physical_id_table_page;
+
+	bool x2apic_format;
+	bool x2apic_broadcast_quirk_disabled;
 };
 
 struct kvm_vm_stat {
@@ -1006,6 +1014,11 @@ struct kvm_x86_ops {
 	int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq,
 			      uint32_t guest_irq, bool set);
 	void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu);
+
+	int (*set_hv_timer)(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc);
+	void (*cancel_hv_timer)(struct kvm_vcpu *vcpu);
+
+	void (*setup_mce)(struct kvm_vcpu *vcpu);
 };
 
 struct kvm_arch_async_pf {
@@ -1026,7 +1039,7 @@ void kvm_mmu_setup(struct kvm_vcpu *vcpu);
 void kvm_mmu_init_vm(struct kvm *kvm);
 void kvm_mmu_uninit_vm(struct kvm *kvm);
 void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
-		u64 dirty_mask, u64 nx_mask, u64 x_mask);
+		u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask);
 
 void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
 void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
@@ -1077,6 +1090,10 @@ extern u32  kvm_max_guest_tsc_khz;
 extern u8   kvm_tsc_scaling_ratio_frac_bits;
 /* maximum allowed value of TSC scaling ratio */
 extern u64  kvm_max_tsc_scaling_ratio;
+/* 1ull << kvm_tsc_scaling_ratio_frac_bits */
+extern u64  kvm_default_tsc_scaling_ratio;
+
+extern u64 kvm_mce_cap_supported;
 
 enum emulation_result {
 	EMULATE_DONE,         /* no further processing */
@@ -1352,7 +1369,7 @@ bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu);
 bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
 			     struct kvm_vcpu **dest_vcpu);
 
-void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
+void kvm_set_msi_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
 		     struct kvm_lapic_irq *irq);
 
 static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/include/asm/mc146818rtc.h b/arch/x86/include/asm/mc146818rtc.h
index 0f555cc31984..24acd9ba7837 100644
--- a/arch/x86/include/asm/mc146818rtc.h
+++ b/arch/x86/include/asm/mc146818rtc.h
@@ -6,7 +6,6 @@
 
 #include <asm/io.h>
 #include <asm/processor.h>
-#include <linux/mc146818rtc.h>
 
 #ifndef RTC_PORT
 #define RTC_PORT(x)	(0x70 + (x))
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 396348196aa7..d8abfcf524d1 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -155,7 +155,7 @@ static inline void arch_exit_mmap(struct mm_struct *mm)
 #ifdef CONFIG_X86_64
 static inline bool is_64bit_mm(struct mm_struct *mm)
 {
-	return	!config_enabled(CONFIG_IA32_EMULATION) ||
+	return	!IS_ENABLED(CONFIG_IA32_EMULATION) ||
 		!(mm->context.ia32_compat == TIF_IA32);
 }
 #else
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index 7c1c89598688..d019f0cc80ec 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -25,6 +25,24 @@ void pvclock_resume(void);
 
 void pvclock_touch_watchdogs(void);
 
+static __always_inline
+unsigned pvclock_read_begin(const struct pvclock_vcpu_time_info *src)
+{
+	unsigned version = src->version & ~1;
+	/* Make sure that the version is read before the data. */
+	virt_rmb();
+	return version;
+}
+
+static __always_inline
+bool pvclock_read_retry(const struct pvclock_vcpu_time_info *src,
+			unsigned version)
+{
+	/* Make sure that the version is re-read after the data. */
+	virt_rmb();
+	return unlikely(version != src->version);
+}
+
 /*
  * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
  * yielding a 64-bit result.
@@ -69,23 +87,12 @@ static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift)
 }
 
 static __always_inline
-unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src,
-			       cycle_t *cycles, u8 *flags)
+cycle_t __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src)
 {
-	unsigned version;
-	cycle_t offset;
-	u64 delta;
-
-	version = src->version;
-	/* Make the latest version visible */
-	smp_rmb();
-
-	delta = rdtsc_ordered() - src->tsc_timestamp;
-	offset = pvclock_scale_delta(delta, src->tsc_to_system_mul,
-				   src->tsc_shift);
-	*cycles = src->system_time + offset;
-	*flags = src->flags;
-	return version;
+	u64 delta = rdtsc_ordered() - src->tsc_timestamp;
+	cycle_t offset = pvclock_scale_delta(delta, src->tsc_to_system_mul,
+					     src->tsc_shift);
+	return src->system_time + offset;
 }
 
 struct pvclock_vsyscall_time_info {
diff --git a/arch/x86/include/asm/rtc.h b/arch/x86/include/asm/rtc.h
deleted file mode 100644
index f71c3b0ed360..000000000000
--- a/arch/x86/include/asm/rtc.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/rtc.h>
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index d0fe23ec7e98..14824fc78f7e 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -193,7 +193,6 @@ struct __attribute__ ((__packed__)) vmcb {
 	struct vmcb_save_area save;
 };
 
-#define SVM_CPUID_FEATURE_SHIFT 2
 #define SVM_CPUID_FUNC 0x8000000a
 
 #define SVM_VM_CR_SVM_DISABLE 4
diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h
index ab05d73e2bb7..d2f69b9ff732 100644
--- a/arch/x86/include/asm/swiotlb.h
+++ b/arch/x86/include/asm/swiotlb.h
@@ -31,9 +31,9 @@ static inline void dma_mark_clean(void *addr, size_t size) {}
 
 extern void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 					dma_addr_t *dma_handle, gfp_t flags,
-					struct dma_attrs *attrs);
+					unsigned long attrs);
 extern void x86_swiotlb_free_coherent(struct device *dev, size_t size,
 					void *vaddr, dma_addr_t dma_addr,
-					struct dma_attrs *attrs);
+					unsigned long attrs);
 
 #endif /* _ASM_X86_SWIOTLB_H */
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index 999b7cd2e78c..4e23dd15c661 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -60,7 +60,7 @@ static inline long syscall_get_error(struct task_struct *task,
 	 * TS_COMPAT is set for 32-bit syscall entries and then
 	 * remains set until we return to user mode.
 	 */
-	if (task_thread_info(task)->status & TS_COMPAT)
+	if (task_thread_info(task)->status & (TS_COMPAT|TS_I386_REGS_POKED))
 		/*
 		 * Sign-extend the value so (int)-EFOO becomes (long)-EFOO
 		 * and will match correctly in comparisons.
@@ -239,9 +239,6 @@ static inline int syscall_get_arch(void)
 	 * TS_COMPAT is set for 32-bit syscall entry and then
 	 * remains set until we return to user mode.
 	 *
-	 * TIF_IA32 tasks should always have TS_COMPAT set at
-	 * system call time.
-	 *
 	 * x32 tasks should be considered AUDIT_ARCH_X86_64.
 	 */
 	if (task_thread_info(current)->status & TS_COMPAT)
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 89bff044a6f5..8b7c8d8e0852 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -176,6 +176,50 @@ static inline unsigned long current_stack_pointer(void)
 	return sp;
 }
 
+/*
+ * Walks up the stack frames to make sure that the specified object is
+ * entirely contained by a single stack frame.
+ *
+ * Returns:
+ *		 1 if within a frame
+ *		-1 if placed across a frame boundary (or outside stack)
+ *		 0 unable to determine (no frame pointers, etc)
+ */
+static inline int arch_within_stack_frames(const void * const stack,
+					   const void * const stackend,
+					   const void *obj, unsigned long len)
+{
+#if defined(CONFIG_FRAME_POINTER)
+	const void *frame = NULL;
+	const void *oldframe;
+
+	oldframe = __builtin_frame_address(1);
+	if (oldframe)
+		frame = __builtin_frame_address(2);
+	/*
+	 * low ----------------------------------------------> high
+	 * [saved bp][saved ip][args][local vars][saved bp][saved ip]
+	 *                     ^----------------^
+	 *               allow copies only within here
+	 */
+	while (stack <= frame && frame < stackend) {
+		/*
+		 * If obj + len extends past the last frame, this
+		 * check won't pass and the next frame will be 0,
+		 * causing us to bail out and correctly report
+		 * the copy as invalid.
+		 */
+		if (obj + len <= frame)
+			return obj >= oldframe + 2 * sizeof(void *) ? 1 : -1;
+		oldframe = frame;
+		frame = *(const void * const *)frame;
+	}
+	return -1;
+#else
+	return 0;
+#endif
+}
+
 #else /* !__ASSEMBLY__ */
 
 #ifdef CONFIG_X86_64
@@ -219,32 +263,11 @@ static inline unsigned long current_stack_pointer(void)
  * have to worry about atomic accesses.
  */
 #define TS_COMPAT		0x0002	/* 32bit syscall active (64BIT)*/
-#define TS_RESTORE_SIGMASK	0x0008	/* restore signal mask in do_signal() */
+#ifdef CONFIG_COMPAT
+#define TS_I386_REGS_POKED	0x0004	/* regs poked by 32-bit ptracer */
+#endif
 
 #ifndef __ASSEMBLY__
-#define HAVE_SET_RESTORE_SIGMASK	1
-static inline void set_restore_sigmask(void)
-{
-	struct thread_info *ti = current_thread_info();
-	ti->status |= TS_RESTORE_SIGMASK;
-	WARN_ON(!test_bit(TIF_SIGPENDING, (unsigned long *)&ti->flags));
-}
-static inline void clear_restore_sigmask(void)
-{
-	current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
-}
-static inline bool test_restore_sigmask(void)
-{
-	return current_thread_info()->status & TS_RESTORE_SIGMASK;
-}
-static inline bool test_and_clear_restore_sigmask(void)
-{
-	struct thread_info *ti = current_thread_info();
-	if (!(ti->status & TS_RESTORE_SIGMASK))
-		return false;
-	ti->status &= ~TS_RESTORE_SIGMASK;
-	return true;
-}
 
 static inline bool in_ia32_syscall(void)
 {
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index c03bfb68c503..a0ae610b9280 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -761,9 +761,10 @@ copy_from_user(void *to, const void __user *from, unsigned long n)
 	 * case, and do only runtime checking for non-constant sizes.
 	 */
 
-	if (likely(sz < 0 || sz >= n))
+	if (likely(sz < 0 || sz >= n)) {
+		check_object_size(to, n, false);
 		n = _copy_from_user(to, from, n);
-	else if(__builtin_constant_p(n))
+	} else if (__builtin_constant_p(n))
 		copy_from_user_overflow();
 	else
 		__copy_from_user_overflow(sz, n);
@@ -781,9 +782,10 @@ copy_to_user(void __user *to, const void *from, unsigned long n)
 	might_fault();
 
 	/* See the comment in copy_from_user() above. */
-	if (likely(sz < 0 || sz >= n))
+	if (likely(sz < 0 || sz >= n)) {
+		check_object_size(from, n, true);
 		n = _copy_to_user(to, from, n);
-	else if(__builtin_constant_p(n))
+	} else if (__builtin_constant_p(n))
 		copy_to_user_overflow();
 	else
 		__copy_to_user_overflow(sz, n);
@@ -812,21 +814,21 @@ copy_to_user(void __user *to, const void *from, unsigned long n)
 #define user_access_begin()	__uaccess_begin()
 #define user_access_end()	__uaccess_end()
 
-#define unsafe_put_user(x, ptr)						\
-({										\
+#define unsafe_put_user(x, ptr, err_label)					\
+do {										\
 	int __pu_err;								\
 	__put_user_size((x), (ptr), sizeof(*(ptr)), __pu_err, -EFAULT);		\
-	__builtin_expect(__pu_err, 0);						\
-})
+	if (unlikely(__pu_err)) goto err_label;					\
+} while (0)
 
-#define unsafe_get_user(x, ptr)						\
-({										\
+#define unsafe_get_user(x, ptr, err_label)					\
+do {										\
 	int __gu_err;								\
 	unsigned long __gu_val;							\
 	__get_user_size(__gu_val, (ptr), sizeof(*(ptr)), __gu_err, -EFAULT);	\
 	(x) = (__force __typeof__(*(ptr)))__gu_val;				\
-	__builtin_expect(__gu_err, 0);						\
-})
+	if (unlikely(__gu_err)) goto err_label;					\
+} while (0)
 
 #endif /* _ASM_X86_UACCESS_H */
 
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index 4b32da24faaf..7d3bdd1ed697 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -37,6 +37,7 @@ unsigned long __must_check __copy_from_user_ll_nocache_nozero
 static __always_inline unsigned long __must_check
 __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n)
 {
+	check_object_size(from, n, true);
 	return __copy_to_user_ll(to, from, n);
 }
 
@@ -95,6 +96,7 @@ static __always_inline unsigned long
 __copy_from_user(void *to, const void __user *from, unsigned long n)
 {
 	might_fault();
+	check_object_size(to, n, false);
 	if (__builtin_constant_p(n)) {
 		unsigned long ret;
 
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 2eac2aa3e37f..673059a109fe 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -54,6 +54,7 @@ int __copy_from_user_nocheck(void *dst, const void __user *src, unsigned size)
 {
 	int ret = 0;
 
+	check_object_size(dst, size, false);
 	if (!__builtin_constant_p(size))
 		return copy_user_generic(dst, (__force void *)src, size);
 	switch (size) {
@@ -119,6 +120,7 @@ int __copy_to_user_nocheck(void __user *dst, const void *src, unsigned size)
 {
 	int ret = 0;
 
+	check_object_size(src, size, true);
 	if (!__builtin_constant_p(size))
 		return copy_user_generic((__force void *)dst, src, size);
 	switch (size) {
diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h
index cce9ee68e335..0116b2ee9e64 100644
--- a/arch/x86/include/asm/virtext.h
+++ b/arch/x86/include/asm/virtext.h
@@ -83,23 +83,19 @@ static inline void cpu_emergency_vmxoff(void)
  */
 static inline int cpu_has_svm(const char **msg)
 {
-	uint32_t eax, ebx, ecx, edx;
-
 	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
 		if (msg)
 			*msg = "not amd";
 		return 0;
 	}
 
-	cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
-	if (eax < SVM_CPUID_FUNC) {
+	if (boot_cpu_data.extended_cpuid_level < SVM_CPUID_FUNC) {
 		if (msg)
 			*msg = "can't execute cpuid_8000000a";
 		return 0;
 	}
 
-	cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
-	if (!(ecx & (1 << SVM_CPUID_FEATURE_SHIFT))) {
+	if (!boot_cpu_has(X86_FEATURE_SVM)) {
 		if (msg)
 			*msg = "svm not available";
 		return 0;
diff --git a/arch/x86/include/asm/xen/page-coherent.h b/arch/x86/include/asm/xen/page-coherent.h
index acd844c017d3..f02f025ff988 100644
--- a/arch/x86/include/asm/xen/page-coherent.h
+++ b/arch/x86/include/asm/xen/page-coherent.h
@@ -2,12 +2,11 @@
 #define _ASM_X86_XEN_PAGE_COHERENT_H
 
 #include <asm/page.h>
-#include <linux/dma-attrs.h>
 #include <linux/dma-mapping.h>
 
 static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size,
 		dma_addr_t *dma_handle, gfp_t flags,
-		struct dma_attrs *attrs)
+		unsigned long attrs)
 {
 	void *vstart = (void*)__get_free_pages(flags, get_order(size));
 	*dma_handle = virt_to_phys(vstart);
@@ -16,18 +15,18 @@ static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size,
 
 static inline void xen_free_coherent_pages(struct device *hwdev, size_t size,
 		void *cpu_addr, dma_addr_t dma_handle,
-		struct dma_attrs *attrs)
+		unsigned long attrs)
 {
 	free_pages((unsigned long) cpu_addr, get_order(size));
 }
 
 static inline void xen_dma_map_page(struct device *hwdev, struct page *page,
 	     dma_addr_t dev_addr, unsigned long offset, size_t size,
-	     enum dma_data_direction dir, struct dma_attrs *attrs) { }
+	     enum dma_data_direction dir, unsigned long attrs) { }
 
 static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
 		size_t size, enum dma_data_direction dir,
-		struct dma_attrs *attrs) { }
+		unsigned long attrs) { }
 
 static inline void xen_dma_sync_single_for_cpu(struct device *hwdev,
 		dma_addr_t handle, size_t size, enum dma_data_direction dir) { }
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index 42d27a62a404..63ff468a7986 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -241,7 +241,7 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
 static dma_addr_t gart_map_page(struct device *dev, struct page *page,
 				unsigned long offset, size_t size,
 				enum dma_data_direction dir,
-				struct dma_attrs *attrs)
+				unsigned long attrs)
 {
 	unsigned long bus;
 	phys_addr_t paddr = page_to_phys(page) + offset;
@@ -263,7 +263,7 @@ static dma_addr_t gart_map_page(struct device *dev, struct page *page,
  */
 static void gart_unmap_page(struct device *dev, dma_addr_t dma_addr,
 			    size_t size, enum dma_data_direction dir,
-			    struct dma_attrs *attrs)
+			    unsigned long attrs)
 {
 	unsigned long iommu_page;
 	int npages;
@@ -285,7 +285,7 @@ static void gart_unmap_page(struct device *dev, dma_addr_t dma_addr,
  * Wrapper for pci_unmap_single working with scatterlists.
  */
 static void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
-			  enum dma_data_direction dir, struct dma_attrs *attrs)
+			  enum dma_data_direction dir, unsigned long attrs)
 {
 	struct scatterlist *s;
 	int i;
@@ -293,7 +293,7 @@ static void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
 	for_each_sg(sg, s, nents, i) {
 		if (!s->dma_length || !s->length)
 			break;
-		gart_unmap_page(dev, s->dma_address, s->dma_length, dir, NULL);
+		gart_unmap_page(dev, s->dma_address, s->dma_length, dir, 0);
 	}
 }
 
@@ -315,7 +315,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg,
 			addr = dma_map_area(dev, addr, s->length, dir, 0);
 			if (addr == bad_dma_addr) {
 				if (i > 0)
-					gart_unmap_sg(dev, sg, i, dir, NULL);
+					gart_unmap_sg(dev, sg, i, dir, 0);
 				nents = 0;
 				sg[0].dma_length = 0;
 				break;
@@ -386,7 +386,7 @@ dma_map_cont(struct device *dev, struct scatterlist *start, int nelems,
  * Merge chunks that have page aligned sizes into a continuous mapping.
  */
 static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents,
-		       enum dma_data_direction dir, struct dma_attrs *attrs)
+		       enum dma_data_direction dir, unsigned long attrs)
 {
 	struct scatterlist *s, *ps, *start_sg, *sgmap;
 	int need = 0, nextneed, i, out, start;
@@ -456,7 +456,7 @@ static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents,
 
 error:
 	flush_gart();
-	gart_unmap_sg(dev, sg, out, dir, NULL);
+	gart_unmap_sg(dev, sg, out, dir, 0);
 
 	/* When it was forced or merged try again in a dumb way */
 	if (force_iommu || iommu_merge) {
@@ -476,7 +476,7 @@ error:
 /* allocate and map a coherent mapping */
 static void *
 gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr,
-		    gfp_t flag, struct dma_attrs *attrs)
+		    gfp_t flag, unsigned long attrs)
 {
 	dma_addr_t paddr;
 	unsigned long align_mask;
@@ -508,9 +508,9 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr,
 /* free a coherent mapping */
 static void
 gart_free_coherent(struct device *dev, size_t size, void *vaddr,
-		   dma_addr_t dma_addr, struct dma_attrs *attrs)
+		   dma_addr_t dma_addr, unsigned long attrs)
 {
-	gart_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL, NULL);
+	gart_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL, 0);
 	dma_generic_free_coherent(dev, size, vaddr, dma_addr, attrs);
 }
 
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 7943d38c57ca..20abd912f0e4 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -147,7 +147,7 @@ static int force_enable_local_apic __initdata;
  */
 static int __init parse_lapic(char *arg)
 {
-	if (config_enabled(CONFIG_X86_32) && !arg)
+	if (IS_ENABLED(CONFIG_X86_32) && !arg)
 		force_enable_local_apic = 1;
 	else if (arg && !strncmp(arg, "notscdeadline", 13))
 		setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index a5e400afc563..6066d945c40e 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -523,7 +523,7 @@ static int apic_set_affinity(struct irq_data *irq_data,
 	struct apic_chip_data *data = irq_data->chip_data;
 	int err, irq = irq_data->irq;
 
-	if (!config_enabled(CONFIG_SMP))
+	if (!IS_ENABLED(CONFIG_SMP))
 		return -EPERM;
 
 	if (!cpumask_intersects(dest, cpu_online_mask))
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 9e231d88bb33..a184c210efba 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -159,8 +159,8 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
 	struct task_struct *tsk = current;
 	int ia32_fxstate = (buf != buf_fx);
 
-	ia32_fxstate &= (config_enabled(CONFIG_X86_32) ||
-			 config_enabled(CONFIG_IA32_EMULATION));
+	ia32_fxstate &= (IS_ENABLED(CONFIG_X86_32) ||
+			 IS_ENABLED(CONFIG_IA32_EMULATION));
 
 	if (!access_ok(VERIFY_WRITE, buf, size))
 		return -EACCES;
@@ -268,8 +268,8 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 	u64 xfeatures = 0;
 	int fx_only = 0;
 
-	ia32_fxstate &= (config_enabled(CONFIG_X86_32) ||
-			 config_enabled(CONFIG_IA32_EMULATION));
+	ia32_fxstate &= (IS_ENABLED(CONFIG_X86_32) ||
+			 IS_ENABLED(CONFIG_IA32_EMULATION));
 
 	if (!buf) {
 		fpu__clear(fpu);
@@ -416,8 +416,8 @@ void fpu__init_prepare_fx_sw_frame(void)
 	fx_sw_reserved.xfeatures = xfeatures_mask;
 	fx_sw_reserved.xstate_size = fpu_user_xstate_size;
 
-	if (config_enabled(CONFIG_IA32_EMULATION) ||
-	    config_enabled(CONFIG_X86_32)) {
+	if (IS_ENABLED(CONFIG_IA32_EMULATION) ||
+	    IS_ENABLED(CONFIG_X86_32)) {
 		int fsave_header_size = sizeof(struct fregs_state);
 
 		fx_sw_reserved_ia32 = fx_sw_reserved;
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 3d747070fe67..ed16e58658a4 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -1019,7 +1019,6 @@ void hpet_disable(void)
  */
 #include <linux/mc146818rtc.h>
 #include <linux/rtc.h>
-#include <asm/rtc.h>
 
 #define DEFAULT_RTC_INT_FREQ	64
 #define DEFAULT_RTC_SHIFT	6
@@ -1243,7 +1242,7 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id)
 	memset(&curr_time, 0, sizeof(struct rtc_time));
 
 	if (hpet_rtc_flags & (RTC_UIE | RTC_AIE))
-		get_rtc_time(&curr_time);
+		mc146818_set_time(&curr_time);
 
 	if (hpet_rtc_flags & RTC_UIE &&
 	    curr_time.tm_sec != hpet_prev_update_sec) {
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 04b132a767f1..bfe4d6c96fbd 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -17,6 +17,7 @@
 #include <linux/debugfs.h>
 #include <linux/delay.h>
 #include <linux/hardirq.h>
+#include <linux/ratelimit.h>
 #include <linux/slab.h>
 #include <linux/export.h>
 
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 833b1d329c47..5d400ba1349d 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -340,7 +340,7 @@ static inline struct iommu_table *find_iommu_table(struct device *dev)
 
 static void calgary_unmap_sg(struct device *dev, struct scatterlist *sglist,
 			     int nelems,enum dma_data_direction dir,
-			     struct dma_attrs *attrs)
+			     unsigned long attrs)
 {
 	struct iommu_table *tbl = find_iommu_table(dev);
 	struct scatterlist *s;
@@ -364,7 +364,7 @@ static void calgary_unmap_sg(struct device *dev, struct scatterlist *sglist,
 
 static int calgary_map_sg(struct device *dev, struct scatterlist *sg,
 			  int nelems, enum dma_data_direction dir,
-			  struct dma_attrs *attrs)
+			  unsigned long attrs)
 {
 	struct iommu_table *tbl = find_iommu_table(dev);
 	struct scatterlist *s;
@@ -396,7 +396,7 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg,
 
 	return nelems;
 error:
-	calgary_unmap_sg(dev, sg, nelems, dir, NULL);
+	calgary_unmap_sg(dev, sg, nelems, dir, 0);
 	for_each_sg(sg, s, nelems, i) {
 		sg->dma_address = DMA_ERROR_CODE;
 		sg->dma_length = 0;
@@ -407,7 +407,7 @@ error:
 static dma_addr_t calgary_map_page(struct device *dev, struct page *page,
 				   unsigned long offset, size_t size,
 				   enum dma_data_direction dir,
-				   struct dma_attrs *attrs)
+				   unsigned long attrs)
 {
 	void *vaddr = page_address(page) + offset;
 	unsigned long uaddr;
@@ -422,7 +422,7 @@ static dma_addr_t calgary_map_page(struct device *dev, struct page *page,
 
 static void calgary_unmap_page(struct device *dev, dma_addr_t dma_addr,
 			       size_t size, enum dma_data_direction dir,
-			       struct dma_attrs *attrs)
+			       unsigned long attrs)
 {
 	struct iommu_table *tbl = find_iommu_table(dev);
 	unsigned int npages;
@@ -432,7 +432,7 @@ static void calgary_unmap_page(struct device *dev, dma_addr_t dma_addr,
 }
 
 static void* calgary_alloc_coherent(struct device *dev, size_t size,
-	dma_addr_t *dma_handle, gfp_t flag, struct dma_attrs *attrs)
+	dma_addr_t *dma_handle, gfp_t flag, unsigned long attrs)
 {
 	void *ret = NULL;
 	dma_addr_t mapping;
@@ -466,7 +466,7 @@ error:
 
 static void calgary_free_coherent(struct device *dev, size_t size,
 				  void *vaddr, dma_addr_t dma_handle,
-				  struct dma_attrs *attrs)
+				  unsigned long attrs)
 {
 	unsigned int npages;
 	struct iommu_table *tbl = find_iommu_table(dev);
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 6ba014c61d62..d30c37750765 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -77,7 +77,7 @@ void __init pci_iommu_alloc(void)
 }
 void *dma_generic_alloc_coherent(struct device *dev, size_t size,
 				 dma_addr_t *dma_addr, gfp_t flag,
-				 struct dma_attrs *attrs)
+				 unsigned long attrs)
 {
 	unsigned long dma_mask;
 	struct page *page;
@@ -120,7 +120,7 @@ again:
 }
 
 void dma_generic_free_coherent(struct device *dev, size_t size, void *vaddr,
-			       dma_addr_t dma_addr, struct dma_attrs *attrs)
+			       dma_addr_t dma_addr, unsigned long attrs)
 {
 	unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
 	struct page *page = virt_to_page(vaddr);
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c
index da15918d1c81..00e71ce396a8 100644
--- a/arch/x86/kernel/pci-nommu.c
+++ b/arch/x86/kernel/pci-nommu.c
@@ -28,7 +28,7 @@ check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size)
 static dma_addr_t nommu_map_page(struct device *dev, struct page *page,
 				 unsigned long offset, size_t size,
 				 enum dma_data_direction dir,
-				 struct dma_attrs *attrs)
+				 unsigned long attrs)
 {
 	dma_addr_t bus = page_to_phys(page) + offset;
 	WARN_ON(size == 0);
@@ -55,7 +55,7 @@ static dma_addr_t nommu_map_page(struct device *dev, struct page *page,
  */
 static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg,
 			int nents, enum dma_data_direction dir,
-			struct dma_attrs *attrs)
+			unsigned long attrs)
 {
 	struct scatterlist *s;
 	int i;
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index 5069ef560d83..b47edb8f5256 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -16,7 +16,7 @@ int swiotlb __read_mostly;
 
 void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 					dma_addr_t *dma_handle, gfp_t flags,
-					struct dma_attrs *attrs)
+					unsigned long attrs)
 {
 	void *vaddr;
 
@@ -37,7 +37,7 @@ void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 
 void x86_swiotlb_free_coherent(struct device *dev, size_t size,
 				      void *vaddr, dma_addr_t dma_addr,
-				      struct dma_attrs *attrs)
+				      unsigned long attrs)
 {
 	if (is_swiotlb_buffer(dma_to_phys(dev, dma_addr)))
 		swiotlb_free_coherent(dev, size, vaddr, dma_addr);
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 600edd225e81..f79576a541ff 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -923,15 +923,18 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 value)
 
 	case offsetof(struct user32, regs.orig_eax):
 		/*
-		 * A 32-bit debugger setting orig_eax means to restore
-		 * the state of the task restarting a 32-bit syscall.
-		 * Make sure we interpret the -ERESTART* codes correctly
-		 * in case the task is not actually still sitting at the
-		 * exit from a 32-bit syscall with TS_COMPAT still set.
+		 * Warning: bizarre corner case fixup here.  A 32-bit
+		 * debugger setting orig_eax to -1 wants to disable
+		 * syscall restart.  Make sure that the syscall
+		 * restart code sign-extends orig_ax.  Also make sure
+		 * we interpret the -ERESTART* codes correctly if
+		 * loaded into regs->ax in case the task is not
+		 * actually still sitting at the exit from a 32-bit
+		 * syscall with TS_COMPAT still set.
 		 */
 		regs->orig_ax = value;
 		if (syscall_get_nr(child, regs) >= 0)
-			task_thread_info(child)->status |= TS_COMPAT;
+			task_thread_info(child)->status |= TS_I386_REGS_POKED;
 		break;
 
 	case offsetof(struct user32, regs.eflags):
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 06c58ce46762..3599404e3089 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -64,14 +64,9 @@ u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src)
 	u8 flags;
 
 	do {
-		version = src->version;
-		/* Make the latest version visible */
-		smp_rmb();
-
+		version = pvclock_read_begin(src);
 		flags = src->flags;
-		/* Make sure that the version double-check is last. */
-		smp_rmb();
-	} while ((src->version & 1) || version != src->version);
+	} while (pvclock_read_retry(src, version));
 
 	return flags & valid_flags;
 }
@@ -84,10 +79,10 @@ cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
 	u8 flags;
 
 	do {
-		version = __pvclock_read_cycles(src, &ret, &flags);
-		/* Make sure that the version double-check is last. */
-		smp_rmb();
-	} while ((src->version & 1) || version != src->version);
+		version = pvclock_read_begin(src);
+		ret = __pvclock_read_cycles(src);
+		flags = src->flags;
+	} while (pvclock_read_retry(src, version));
 
 	if (unlikely((flags & PVCLOCK_GUEST_STOPPED) != 0)) {
 		src->flags &= ~PVCLOCK_GUEST_STOPPED;
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index eceaa082ec3f..79c6311cd912 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -13,7 +13,6 @@
 #include <asm/x86_init.h>
 #include <asm/time.h>
 #include <asm/intel-mid.h>
-#include <asm/rtc.h>
 #include <asm/setup.h>
 
 #ifdef CONFIG_X86_32
@@ -47,7 +46,7 @@ int mach_set_rtc_mmss(const struct timespec *now)
 
 	rtc_time_to_tm(nowtime, &tm);
 	if (!rtc_valid_tm(&tm)) {
-		retval = set_rtc_time(&tm);
+		retval = mc146818_set_time(&tm);
 		if (retval)
 			printk(KERN_ERR "%s: RTC write failed with error %d\n",
 			       __func__, retval);
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 22cc2f9f8aec..04cb3212db2d 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -146,7 +146,7 @@ static int restore_sigcontext(struct pt_regs *regs,
 		buf = (void __user *)buf_val;
 	} get_user_catch(err);
 
-	err |= fpu__restore_sig(buf, config_enabled(CONFIG_X86_32));
+	err |= fpu__restore_sig(buf, IS_ENABLED(CONFIG_X86_32));
 
 	force_iret();
 
@@ -245,14 +245,14 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
 	struct fpu *fpu = &current->thread.fpu;
 
 	/* redzone */
-	if (config_enabled(CONFIG_X86_64))
+	if (IS_ENABLED(CONFIG_X86_64))
 		sp -= 128;
 
 	/* This is the X/Open sanctioned signal stack switching.  */
 	if (ka->sa.sa_flags & SA_ONSTACK) {
 		if (sas_ss_flags(sp) == 0)
 			sp = current->sas_ss_sp + current->sas_ss_size;
-	} else if (config_enabled(CONFIG_X86_32) &&
+	} else if (IS_ENABLED(CONFIG_X86_32) &&
 		   !onsigstack &&
 		   (regs->ss & 0xffff) != __USER_DS &&
 		   !(ka->sa.sa_flags & SA_RESTORER) &&
@@ -262,7 +262,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
 	}
 
 	if (fpu->fpstate_active) {
-		sp = fpu__alloc_mathframe(sp, config_enabled(CONFIG_X86_32),
+		sp = fpu__alloc_mathframe(sp, IS_ENABLED(CONFIG_X86_32),
 					  &buf_fx, &math_size);
 		*fpstate = (void __user *)sp;
 	}
@@ -662,18 +662,18 @@ badframe:
 
 static inline int is_ia32_compat_frame(void)
 {
-	return config_enabled(CONFIG_IA32_EMULATION) &&
+	return IS_ENABLED(CONFIG_IA32_EMULATION) &&
 	       test_thread_flag(TIF_IA32);
 }
 
 static inline int is_ia32_frame(void)
 {
-	return config_enabled(CONFIG_X86_32) || is_ia32_compat_frame();
+	return IS_ENABLED(CONFIG_X86_32) || is_ia32_compat_frame();
 }
 
 static inline int is_x32_frame(void)
 {
-	return config_enabled(CONFIG_X86_X32_ABI) && test_thread_flag(TIF_X32);
+	return IS_ENABLED(CONFIG_X86_X32_ABI) && test_thread_flag(TIF_X32);
 }
 
 static int
@@ -760,8 +760,30 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
 
 static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs)
 {
-#ifdef CONFIG_X86_64
-	if (in_ia32_syscall())
+	/*
+	 * This function is fundamentally broken as currently
+	 * implemented.
+	 *
+	 * The idea is that we want to trigger a call to the
+	 * restart_block() syscall and that we want in_ia32_syscall(),
+	 * in_x32_syscall(), etc. to match whatever they were in the
+	 * syscall being restarted.  We assume that the syscall
+	 * instruction at (regs->ip - 2) matches whatever syscall
+	 * instruction we used to enter in the first place.
+	 *
+	 * The problem is that we can get here when ptrace pokes
+	 * syscall-like values into regs even if we're not in a syscall
+	 * at all.
+	 *
+	 * For now, we maintain historical behavior and guess based on
+	 * stored state.  We could do better by saving the actual
+	 * syscall arch in restart_block or (with caveats on x32) by
+	 * checking if regs->ip points to 'int $0x80'.  The current
+	 * behavior is incorrect if a tracer has a different bitness
+	 * than the tracee.
+	 */
+#ifdef CONFIG_IA32_EMULATION
+	if (current_thread_info()->status & (TS_COMPAT|TS_I386_REGS_POKED))
 		return __NR_ia32_restart_syscall;
 #endif
 #ifdef CONFIG_X86_X32_ABI
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 639a6e34500c..ab8e32f7b9a8 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -32,7 +32,6 @@ config KVM
 	select HAVE_KVM_IRQ_BYPASS
 	select HAVE_KVM_IRQ_ROUTING
 	select HAVE_KVM_EVENTFD
-	select KVM_APIC_ARCHITECTURE
 	select KVM_ASYNC_PF
 	select USER_RETURN_NOTIFIER
 	select KVM_MMIO
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index a4bf5b45d65a..5fb6c620180e 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -645,7 +645,6 @@ static const struct kvm_io_device_ops speaker_dev_ops = {
 	.write    = speaker_ioport_write,
 };
 
-/* Caller must hold slots_lock */
 struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
 {
 	struct kvm_pit *pit;
@@ -690,6 +689,7 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
 
 	kvm_pit_set_reinject(pit, true);
 
+	mutex_lock(&kvm->slots_lock);
 	kvm_iodevice_init(&pit->dev, &pit_dev_ops);
 	ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, KVM_PIT_BASE_ADDRESS,
 				      KVM_PIT_MEM_LENGTH, &pit->dev);
@@ -704,12 +704,14 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
 		if (ret < 0)
 			goto fail_register_speaker;
 	}
+	mutex_unlock(&kvm->slots_lock);
 
 	return pit;
 
 fail_register_speaker:
 	kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &pit->dev);
 fail_register_pit:
+	mutex_unlock(&kvm->slots_lock);
 	kvm_pit_set_reinject(pit, false);
 	kthread_stop(pit->worker_task);
 fail_kthread:
diff --git a/arch/x86/kvm/iommu.c b/arch/x86/kvm/iommu.c
index 95e0e6481f07..b181426f67b4 100644
--- a/arch/x86/kvm/iommu.c
+++ b/arch/x86/kvm/iommu.c
@@ -28,9 +28,7 @@
 #include <linux/moduleparam.h>
 #include <linux/pci.h>
 #include <linux/stat.h>
-#include <linux/dmar.h>
 #include <linux/iommu.h>
-#include <linux/intel-iommu.h>
 #include "assigned-dev.h"
 
 static bool allow_unsafe_assigned_interrupts;
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 61ebdc13a29a..035731eb3897 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -120,4 +120,7 @@ void __kvm_migrate_timers(struct kvm_vcpu *vcpu);
 
 int apic_has_pending_timer(struct kvm_vcpu *vcpu);
 
+int kvm_setup_default_irq_routing(struct kvm *kvm);
+int kvm_setup_empty_irq_routing(struct kvm *kvm);
+
 #endif
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index dfb4c6476877..25810b144b58 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -110,13 +110,17 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
 	return r;
 }
 
-void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
+void kvm_set_msi_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
 		     struct kvm_lapic_irq *irq)
 {
-	trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data);
+	trace_kvm_msi_set_irq(e->msi.address_lo | (kvm->arch.x2apic_format ?
+	                                     (u64)e->msi.address_hi << 32 : 0),
+	                      e->msi.data);
 
 	irq->dest_id = (e->msi.address_lo &
 			MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
+	if (kvm->arch.x2apic_format)
+		irq->dest_id |= MSI_ADDR_EXT_DEST_ID(e->msi.address_hi);
 	irq->vector = (e->msi.data &
 			MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT;
 	irq->dest_mode = (1 << MSI_ADDR_DEST_MODE_SHIFT) & e->msi.address_lo;
@@ -129,15 +133,24 @@ void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
 }
 EXPORT_SYMBOL_GPL(kvm_set_msi_irq);
 
+static inline bool kvm_msi_route_invalid(struct kvm *kvm,
+		struct kvm_kernel_irq_routing_entry *e)
+{
+	return kvm->arch.x2apic_format && (e->msi.address_hi & 0xff);
+}
+
 int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
 		struct kvm *kvm, int irq_source_id, int level, bool line_status)
 {
 	struct kvm_lapic_irq irq;
 
+	if (kvm_msi_route_invalid(kvm, e))
+		return -EINVAL;
+
 	if (!level)
 		return -1;
 
-	kvm_set_msi_irq(e, &irq);
+	kvm_set_msi_irq(kvm, e, &irq);
 
 	return kvm_irq_delivery_to_apic(kvm, NULL, &irq, NULL);
 }
@@ -153,7 +166,10 @@ int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
 	if (unlikely(e->type != KVM_IRQ_ROUTING_MSI))
 		return -EWOULDBLOCK;
 
-	kvm_set_msi_irq(e, &irq);
+	if (kvm_msi_route_invalid(kvm, e))
+		return -EINVAL;
+
+	kvm_set_msi_irq(kvm, e, &irq);
 
 	if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL))
 		return r;
@@ -248,7 +264,8 @@ static int kvm_hv_set_sint(struct kvm_kernel_irq_routing_entry *e,
 	return kvm_hv_synic_set_irq(kvm, e->hv_sint.vcpu, e->hv_sint.sint);
 }
 
-int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e,
+int kvm_set_routing_entry(struct kvm *kvm,
+			  struct kvm_kernel_irq_routing_entry *e,
 			  const struct kvm_irq_routing_entry *ue)
 {
 	int r = -EINVAL;
@@ -285,6 +302,9 @@ int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e,
 		e->msi.address_lo = ue->u.msi.address_lo;
 		e->msi.address_hi = ue->u.msi.address_hi;
 		e->msi.data = ue->u.msi.data;
+
+		if (kvm_msi_route_invalid(kvm, e))
+			goto out;
 		break;
 	case KVM_IRQ_ROUTING_HV_SINT:
 		e->set = kvm_hv_set_sint;
@@ -388,21 +408,16 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
 			       kvm->arch.nr_reserved_ioapic_pins);
 	for (i = 0; i < nr_ioapic_pins; ++i) {
 		hlist_for_each_entry(entry, &table->map[i], link) {
-			u32 dest_id, dest_mode;
-			bool level;
+			struct kvm_lapic_irq irq;
 
 			if (entry->type != KVM_IRQ_ROUTING_MSI)
 				continue;
-			dest_id = (entry->msi.address_lo >> 12) & 0xff;
-			dest_mode = (entry->msi.address_lo >> 2) & 0x1;
-			level = entry->msi.data & MSI_DATA_TRIGGER_LEVEL;
-			if (level && kvm_apic_match_dest(vcpu, NULL, 0,
-						dest_id, dest_mode)) {
-				u32 vector = entry->msi.data & 0xff;
-
-				__set_bit(vector,
-					  ioapic_handled_vectors);
-			}
+
+			kvm_set_msi_irq(vcpu->kvm, entry, &irq);
+
+			if (irq.level && kvm_apic_match_dest(vcpu, NULL, 0,
+						irq.dest_id, irq.dest_mode))
+				__set_bit(irq.vector, ioapic_handled_vectors);
 		}
 	}
 	srcu_read_unlock(&kvm->irq_srcu, idx);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 57549ed47ca5..b62c85229711 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -115,26 +115,43 @@ static inline int apic_enabled(struct kvm_lapic *apic)
 	(LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
 	 APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
 
-/* The logical map is definitely wrong if we have multiple
- * modes at the same time.  (Physical map is always right.)
- */
-static inline bool kvm_apic_logical_map_valid(struct kvm_apic_map *map)
-{
-	return !(map->mode & (map->mode - 1));
+static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map,
+		u32 dest_id, struct kvm_lapic ***cluster, u16 *mask) {
+	switch (map->mode) {
+	case KVM_APIC_MODE_X2APIC: {
+		u32 offset = (dest_id >> 16) * 16;
+		u32 max_apic_id = map->max_apic_id;
+
+		if (offset <= max_apic_id) {
+			u8 cluster_size = min(max_apic_id - offset + 1, 16U);
+
+			*cluster = &map->phys_map[offset];
+			*mask = dest_id & (0xffff >> (16 - cluster_size));
+		} else {
+			*mask = 0;
+		}
+
+		return true;
+		}
+	case KVM_APIC_MODE_XAPIC_FLAT:
+		*cluster = map->xapic_flat_map;
+		*mask = dest_id & 0xff;
+		return true;
+	case KVM_APIC_MODE_XAPIC_CLUSTER:
+		*cluster = map->xapic_cluster_map[dest_id >> 4];
+		*mask = dest_id & 0xf;
+		return true;
+	default:
+		/* Not optimized. */
+		return false;
+	}
 }
 
-static inline void
-apic_logical_id(struct kvm_apic_map *map, u32 dest_id, u16 *cid, u16 *lid)
+static void kvm_apic_map_free(struct rcu_head *rcu)
 {
-	unsigned lid_bits;
+	struct kvm_apic_map *map = container_of(rcu, struct kvm_apic_map, rcu);
 
-	BUILD_BUG_ON(KVM_APIC_MODE_XAPIC_CLUSTER !=  4);
-	BUILD_BUG_ON(KVM_APIC_MODE_XAPIC_FLAT    !=  8);
-	BUILD_BUG_ON(KVM_APIC_MODE_X2APIC        != 16);
-	lid_bits = map->mode;
-
-	*cid = dest_id >> lid_bits;
-	*lid = dest_id & ((1 << lid_bits) - 1);
+	kvfree(map);
 }
 
 static void recalculate_apic_map(struct kvm *kvm)
@@ -142,17 +159,26 @@ static void recalculate_apic_map(struct kvm *kvm)
 	struct kvm_apic_map *new, *old = NULL;
 	struct kvm_vcpu *vcpu;
 	int i;
-
-	new = kzalloc(sizeof(struct kvm_apic_map), GFP_KERNEL);
+	u32 max_id = 255;
 
 	mutex_lock(&kvm->arch.apic_map_lock);
 
+	kvm_for_each_vcpu(i, vcpu, kvm)
+		if (kvm_apic_present(vcpu))
+			max_id = max(max_id, kvm_apic_id(vcpu->arch.apic));
+
+	new = kvm_kvzalloc(sizeof(struct kvm_apic_map) +
+	                   sizeof(struct kvm_lapic *) * ((u64)max_id + 1));
+
 	if (!new)
 		goto out;
 
+	new->max_apic_id = max_id;
+
 	kvm_for_each_vcpu(i, vcpu, kvm) {
 		struct kvm_lapic *apic = vcpu->arch.apic;
-		u16 cid, lid;
+		struct kvm_lapic **cluster;
+		u16 mask;
 		u32 ldr, aid;
 
 		if (!kvm_apic_present(vcpu))
@@ -161,7 +187,7 @@ static void recalculate_apic_map(struct kvm *kvm)
 		aid = kvm_apic_id(apic);
 		ldr = kvm_lapic_get_reg(apic, APIC_LDR);
 
-		if (aid < ARRAY_SIZE(new->phys_map))
+		if (aid <= new->max_apic_id)
 			new->phys_map[aid] = apic;
 
 		if (apic_x2apic_mode(apic)) {
@@ -174,13 +200,11 @@ static void recalculate_apic_map(struct kvm *kvm)
 				new->mode |= KVM_APIC_MODE_XAPIC_CLUSTER;
 		}
 
-		if (!kvm_apic_logical_map_valid(new))
+		if (!kvm_apic_map_get_logical_dest(new, ldr, &cluster, &mask))
 			continue;
 
-		apic_logical_id(new, ldr, &cid, &lid);
-
-		if (lid && cid < ARRAY_SIZE(new->logical_map))
-			new->logical_map[cid][ffs(lid) - 1] = apic;
+		if (mask)
+			cluster[ffs(mask) - 1] = apic;
 	}
 out:
 	old = rcu_dereference_protected(kvm->arch.apic_map,
@@ -189,7 +213,7 @@ out:
 	mutex_unlock(&kvm->arch.apic_map_lock);
 
 	if (old)
-		kfree_rcu(old, rcu);
+		call_rcu(&old->rcu, kvm_apic_map_free);
 
 	kvm_make_scan_ioapic_request(kvm);
 }
@@ -210,7 +234,7 @@ static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
 	}
 }
 
-static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id)
+static inline void kvm_apic_set_xapic_id(struct kvm_lapic *apic, u8 id)
 {
 	kvm_lapic_set_reg(apic, APIC_ID, id << 24);
 	recalculate_apic_map(apic->vcpu->kvm);
@@ -222,11 +246,11 @@ static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id)
 	recalculate_apic_map(apic->vcpu->kvm);
 }
 
-static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u8 id)
+static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id)
 {
 	u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf));
 
-	kvm_lapic_set_reg(apic, APIC_ID, id << 24);
+	kvm_lapic_set_reg(apic, APIC_ID, id);
 	kvm_lapic_set_reg(apic, APIC_LDR, ldr);
 	recalculate_apic_map(apic->vcpu->kvm);
 }
@@ -599,17 +623,30 @@ static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
 	}
 }
 
-/* KVM APIC implementation has two quirks
- *  - dest always begins at 0 while xAPIC MDA has offset 24,
- *  - IOxAPIC messages have to be delivered (directly) to x2APIC.
+/* The KVM local APIC implementation has two quirks:
+ *
+ *  - the xAPIC MDA stores the destination at bits 24-31, while this
+ *    is not true of struct kvm_lapic_irq's dest_id field.  This is
+ *    just a quirk in the API and is not problematic.
+ *
+ *  - in-kernel IOAPIC messages have to be delivered directly to
+ *    x2APIC, because the kernel does not support interrupt remapping.
+ *    In order to support broadcast without interrupt remapping, x2APIC
+ *    rewrites the destination of non-IPI messages from APIC_BROADCAST
+ *    to X2APIC_BROADCAST.
+ *
+ * The broadcast quirk can be disabled with KVM_CAP_X2APIC_API.  This is
+ * important when userspace wants to use x2APIC-format MSIs, because
+ * APIC_BROADCAST (0xff) is a legal route for "cluster 0, CPUs 0-7".
  */
-static u32 kvm_apic_mda(unsigned int dest_id, struct kvm_lapic *source,
-                                              struct kvm_lapic *target)
+static u32 kvm_apic_mda(struct kvm_vcpu *vcpu, unsigned int dest_id,
+		struct kvm_lapic *source, struct kvm_lapic *target)
 {
 	bool ipi = source != NULL;
 	bool x2apic_mda = apic_x2apic_mode(ipi ? source : target);
 
-	if (!ipi && dest_id == APIC_BROADCAST && x2apic_mda)
+	if (!vcpu->kvm->arch.x2apic_broadcast_quirk_disabled &&
+	    !ipi && dest_id == APIC_BROADCAST && x2apic_mda)
 		return X2APIC_BROADCAST;
 
 	return x2apic_mda ? dest_id : SET_APIC_DEST_FIELD(dest_id);
@@ -619,7 +656,7 @@ bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
 			   int short_hand, unsigned int dest, int dest_mode)
 {
 	struct kvm_lapic *target = vcpu->arch.apic;
-	u32 mda = kvm_apic_mda(dest, source, target);
+	u32 mda = kvm_apic_mda(vcpu, dest, source, target);
 
 	apic_debug("target %p, source %p, dest 0x%x, "
 		   "dest_mode 0x%x, short_hand 0x%x\n",
@@ -671,102 +708,126 @@ static void kvm_apic_disabled_lapic_found(struct kvm *kvm)
 	}
 }
 
-bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
-		struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map)
+static bool kvm_apic_is_broadcast_dest(struct kvm *kvm, struct kvm_lapic **src,
+		struct kvm_lapic_irq *irq, struct kvm_apic_map *map)
 {
-	struct kvm_apic_map *map;
-	unsigned long bitmap = 1;
-	struct kvm_lapic **dst;
-	int i;
-	bool ret, x2apic_ipi;
+	if (kvm->arch.x2apic_broadcast_quirk_disabled) {
+		if ((irq->dest_id == APIC_BROADCAST &&
+				map->mode != KVM_APIC_MODE_X2APIC))
+			return true;
+		if (irq->dest_id == X2APIC_BROADCAST)
+			return true;
+	} else {
+		bool x2apic_ipi = src && *src && apic_x2apic_mode(*src);
+		if (irq->dest_id == (x2apic_ipi ?
+		                     X2APIC_BROADCAST : APIC_BROADCAST))
+			return true;
+	}
 
-	*r = -1;
+	return false;
+}
 
-	if (irq->shorthand == APIC_DEST_SELF) {
-		*r = kvm_apic_set_irq(src->vcpu, irq, dest_map);
-		return true;
-	}
+/* Return true if the interrupt can be handled by using *bitmap as index mask
+ * for valid destinations in *dst array.
+ * Return false if kvm_apic_map_get_dest_lapic did nothing useful.
+ * Note: we may have zero kvm_lapic destinations when we return true, which
+ * means that the interrupt should be dropped.  In this case, *bitmap would be
+ * zero and *dst undefined.
+ */
+static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm,
+		struct kvm_lapic **src, struct kvm_lapic_irq *irq,
+		struct kvm_apic_map *map, struct kvm_lapic ***dst,
+		unsigned long *bitmap)
+{
+	int i, lowest;
 
-	if (irq->shorthand)
+	if (irq->shorthand == APIC_DEST_SELF && src) {
+		*dst = src;
+		*bitmap = 1;
+		return true;
+	} else if (irq->shorthand)
 		return false;
 
-	x2apic_ipi = src && apic_x2apic_mode(src);
-	if (irq->dest_id == (x2apic_ipi ? X2APIC_BROADCAST : APIC_BROADCAST))
+	if (!map || kvm_apic_is_broadcast_dest(kvm, src, irq, map))
 		return false;
 
-	ret = true;
-	rcu_read_lock();
-	map = rcu_dereference(kvm->arch.apic_map);
-
-	if (!map) {
-		ret = false;
-		goto out;
+	if (irq->dest_mode == APIC_DEST_PHYSICAL) {
+		if (irq->dest_id > map->max_apic_id) {
+			*bitmap = 0;
+		} else {
+			*dst = &map->phys_map[irq->dest_id];
+			*bitmap = 1;
+		}
+		return true;
 	}
 
-	if (irq->dest_mode == APIC_DEST_PHYSICAL) {
-		if (irq->dest_id >= ARRAY_SIZE(map->phys_map))
-			goto out;
+	*bitmap = 0;
+	if (!kvm_apic_map_get_logical_dest(map, irq->dest_id, dst,
+				(u16 *)bitmap))
+		return false;
 
-		dst = &map->phys_map[irq->dest_id];
-	} else {
-		u16 cid;
+	if (!kvm_lowest_prio_delivery(irq))
+		return true;
 
-		if (!kvm_apic_logical_map_valid(map)) {
-			ret = false;
-			goto out;
+	if (!kvm_vector_hashing_enabled()) {
+		lowest = -1;
+		for_each_set_bit(i, bitmap, 16) {
+			if (!(*dst)[i])
+				continue;
+			if (lowest < 0)
+				lowest = i;
+			else if (kvm_apic_compare_prio((*dst)[i]->vcpu,
+						(*dst)[lowest]->vcpu) < 0)
+				lowest = i;
 		}
+	} else {
+		if (!*bitmap)
+			return true;
 
-		apic_logical_id(map, irq->dest_id, &cid, (u16 *)&bitmap);
+		lowest = kvm_vector_to_index(irq->vector, hweight16(*bitmap),
+				bitmap, 16);
 
-		if (cid >= ARRAY_SIZE(map->logical_map))
-			goto out;
+		if (!(*dst)[lowest]) {
+			kvm_apic_disabled_lapic_found(kvm);
+			*bitmap = 0;
+			return true;
+		}
+	}
 
-		dst = map->logical_map[cid];
+	*bitmap = (lowest >= 0) ? 1 << lowest : 0;
 
-		if (!kvm_lowest_prio_delivery(irq))
-			goto set_irq;
+	return true;
+}
 
-		if (!kvm_vector_hashing_enabled()) {
-			int l = -1;
-			for_each_set_bit(i, &bitmap, 16) {
-				if (!dst[i])
-					continue;
-				if (l < 0)
-					l = i;
-				else if (kvm_apic_compare_prio(dst[i]->vcpu,
-							dst[l]->vcpu) < 0)
-					l = i;
-			}
-			bitmap = (l >= 0) ? 1 << l : 0;
-		} else {
-			int idx;
-			unsigned int dest_vcpus;
+bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
+		struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map)
+{
+	struct kvm_apic_map *map;
+	unsigned long bitmap;
+	struct kvm_lapic **dst = NULL;
+	int i;
+	bool ret;
 
-			dest_vcpus = hweight16(bitmap);
-			if (dest_vcpus == 0)
-				goto out;
+	*r = -1;
 
-			idx = kvm_vector_to_index(irq->vector,
-				dest_vcpus, &bitmap, 16);
+	if (irq->shorthand == APIC_DEST_SELF) {
+		*r = kvm_apic_set_irq(src->vcpu, irq, dest_map);
+		return true;
+	}
 
-			if (!dst[idx]) {
-				kvm_apic_disabled_lapic_found(kvm);
-				goto out;
-			}
+	rcu_read_lock();
+	map = rcu_dereference(kvm->arch.apic_map);
 
-			bitmap = (idx >= 0) ? 1 << idx : 0;
+	ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dst, &bitmap);
+	if (ret)
+		for_each_set_bit(i, &bitmap, 16) {
+			if (!dst[i])
+				continue;
+			if (*r < 0)
+				*r = 0;
+			*r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map);
 		}
-	}
 
-set_irq:
-	for_each_set_bit(i, &bitmap, 16) {
-		if (!dst[i])
-			continue;
-		if (*r < 0)
-			*r = 0;
-		*r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map);
-	}
-out:
 	rcu_read_unlock();
 	return ret;
 }
@@ -789,8 +850,9 @@ bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
 			struct kvm_vcpu **dest_vcpu)
 {
 	struct kvm_apic_map *map;
+	unsigned long bitmap;
+	struct kvm_lapic **dst = NULL;
 	bool ret = false;
-	struct kvm_lapic *dst = NULL;
 
 	if (irq->shorthand)
 		return false;
@@ -798,69 +860,16 @@ bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
 	rcu_read_lock();
 	map = rcu_dereference(kvm->arch.apic_map);
 
-	if (!map)
-		goto out;
-
-	if (irq->dest_mode == APIC_DEST_PHYSICAL) {
-		if (irq->dest_id == 0xFF)
-			goto out;
-
-		if (irq->dest_id >= ARRAY_SIZE(map->phys_map))
-			goto out;
-
-		dst = map->phys_map[irq->dest_id];
-		if (dst && kvm_apic_present(dst->vcpu))
-			*dest_vcpu = dst->vcpu;
-		else
-			goto out;
-	} else {
-		u16 cid;
-		unsigned long bitmap = 1;
-		int i, r = 0;
-
-		if (!kvm_apic_logical_map_valid(map))
-			goto out;
-
-		apic_logical_id(map, irq->dest_id, &cid, (u16 *)&bitmap);
-
-		if (cid >= ARRAY_SIZE(map->logical_map))
-			goto out;
-
-		if (kvm_vector_hashing_enabled() &&
-				kvm_lowest_prio_delivery(irq)) {
-			int idx;
-			unsigned int dest_vcpus;
-
-			dest_vcpus = hweight16(bitmap);
-			if (dest_vcpus == 0)
-				goto out;
+	if (kvm_apic_map_get_dest_lapic(kvm, NULL, irq, map, &dst, &bitmap) &&
+			hweight16(bitmap) == 1) {
+		unsigned long i = find_first_bit(&bitmap, 16);
 
-			idx = kvm_vector_to_index(irq->vector, dest_vcpus,
-						  &bitmap, 16);
-
-			dst = map->logical_map[cid][idx];
-			if (!dst) {
-				kvm_apic_disabled_lapic_found(kvm);
-				goto out;
-			}
-
-			*dest_vcpu = dst->vcpu;
-		} else {
-			for_each_set_bit(i, &bitmap, 16) {
-				dst = map->logical_map[cid][i];
-				if (++r == 2)
-					goto out;
-			}
-
-			if (dst && kvm_apic_present(dst->vcpu))
-				*dest_vcpu = dst->vcpu;
-			else
-				goto out;
+		if (dst[i]) {
+			*dest_vcpu = dst[i]->vcpu;
+			ret = true;
 		}
 	}
 
-	ret = true;
-out:
 	rcu_read_unlock();
 	return ret;
 }
@@ -1127,12 +1136,6 @@ static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
 		return 0;
 
 	switch (offset) {
-	case APIC_ID:
-		if (apic_x2apic_mode(apic))
-			val = kvm_apic_id(apic);
-		else
-			val = kvm_apic_id(apic) << 24;
-		break;
 	case APIC_ARBPRI:
 		apic_debug("Access APIC ARBPRI register which is for P6\n");
 		break;
@@ -1314,6 +1317,111 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu)
 			nsec_to_cycles(vcpu, lapic_timer_advance_ns)));
 }
 
+static void start_sw_tscdeadline(struct kvm_lapic *apic)
+{
+	u64 guest_tsc, tscdeadline = apic->lapic_timer.tscdeadline;
+	u64 ns = 0;
+	ktime_t expire;
+	struct kvm_vcpu *vcpu = apic->vcpu;
+	unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz;
+	unsigned long flags;
+	ktime_t now;
+
+	if (unlikely(!tscdeadline || !this_tsc_khz))
+		return;
+
+	local_irq_save(flags);
+
+	now = apic->lapic_timer.timer.base->get_time();
+	guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
+	if (likely(tscdeadline > guest_tsc)) {
+		ns = (tscdeadline - guest_tsc) * 1000000ULL;
+		do_div(ns, this_tsc_khz);
+		expire = ktime_add_ns(now, ns);
+		expire = ktime_sub_ns(expire, lapic_timer_advance_ns);
+		hrtimer_start(&apic->lapic_timer.timer,
+				expire, HRTIMER_MODE_ABS_PINNED);
+	} else
+		apic_timer_expired(apic);
+
+	local_irq_restore(flags);
+}
+
+bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
+{
+	if (!lapic_in_kernel(vcpu))
+		return false;
+
+	return vcpu->arch.apic->lapic_timer.hv_timer_in_use;
+}
+EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use);
+
+static void cancel_hv_tscdeadline(struct kvm_lapic *apic)
+{
+	kvm_x86_ops->cancel_hv_timer(apic->vcpu);
+	apic->lapic_timer.hv_timer_in_use = false;
+}
+
+void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
+{
+	struct kvm_lapic *apic = vcpu->arch.apic;
+
+	WARN_ON(!apic->lapic_timer.hv_timer_in_use);
+	WARN_ON(swait_active(&vcpu->wq));
+	cancel_hv_tscdeadline(apic);
+	apic_timer_expired(apic);
+}
+EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer);
+
+static bool start_hv_tscdeadline(struct kvm_lapic *apic)
+{
+	u64 tscdeadline = apic->lapic_timer.tscdeadline;
+
+	if (atomic_read(&apic->lapic_timer.pending) ||
+		kvm_x86_ops->set_hv_timer(apic->vcpu, tscdeadline)) {
+		if (apic->lapic_timer.hv_timer_in_use)
+			cancel_hv_tscdeadline(apic);
+	} else {
+		apic->lapic_timer.hv_timer_in_use = true;
+		hrtimer_cancel(&apic->lapic_timer.timer);
+
+		/* In case the sw timer triggered in the window */
+		if (atomic_read(&apic->lapic_timer.pending))
+			cancel_hv_tscdeadline(apic);
+	}
+	trace_kvm_hv_timer_state(apic->vcpu->vcpu_id,
+			apic->lapic_timer.hv_timer_in_use);
+	return apic->lapic_timer.hv_timer_in_use;
+}
+
+void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu)
+{
+	struct kvm_lapic *apic = vcpu->arch.apic;
+
+	WARN_ON(apic->lapic_timer.hv_timer_in_use);
+
+	if (apic_lvtt_tscdeadline(apic))
+		start_hv_tscdeadline(apic);
+}
+EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_hv_timer);
+
+void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu)
+{
+	struct kvm_lapic *apic = vcpu->arch.apic;
+
+	/* Possibly the TSC deadline timer is not enabled yet */
+	if (!apic->lapic_timer.hv_timer_in_use)
+		return;
+
+	cancel_hv_tscdeadline(apic);
+
+	if (atomic_read(&apic->lapic_timer.pending))
+		return;
+
+	start_sw_tscdeadline(apic);
+}
+EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_sw_timer);
+
 static void start_apic_timer(struct kvm_lapic *apic)
 {
 	ktime_t now;
@@ -1360,32 +1468,8 @@ static void start_apic_timer(struct kvm_lapic *apic)
 			   ktime_to_ns(ktime_add_ns(now,
 					apic->lapic_timer.period)));
 	} else if (apic_lvtt_tscdeadline(apic)) {
-		/* lapic timer in tsc deadline mode */
-		u64 guest_tsc, tscdeadline = apic->lapic_timer.tscdeadline;
-		u64 ns = 0;
-		ktime_t expire;
-		struct kvm_vcpu *vcpu = apic->vcpu;
-		unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz;
-		unsigned long flags;
-
-		if (unlikely(!tscdeadline || !this_tsc_khz))
-			return;
-
-		local_irq_save(flags);
-
-		now = apic->lapic_timer.timer.base->get_time();
-		guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
-		if (likely(tscdeadline > guest_tsc)) {
-			ns = (tscdeadline - guest_tsc) * 1000000ULL;
-			do_div(ns, this_tsc_khz);
-			expire = ktime_add_ns(now, ns);
-			expire = ktime_sub_ns(expire, lapic_timer_advance_ns);
-			hrtimer_start(&apic->lapic_timer.timer,
-				      expire, HRTIMER_MODE_ABS_PINNED);
-		} else
-			apic_timer_expired(apic);
-
-		local_irq_restore(flags);
+		if (!(kvm_x86_ops->set_hv_timer && start_hv_tscdeadline(apic)))
+			start_sw_tscdeadline(apic);
 	}
 }
 
@@ -1413,7 +1497,7 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
 	switch (reg) {
 	case APIC_ID:		/* Local APIC ID */
 		if (!apic_x2apic_mode(apic))
-			kvm_apic_set_id(apic, val >> 24);
+			kvm_apic_set_xapic_id(apic, val >> 24);
 		else
 			ret = 1;
 		break;
@@ -1674,9 +1758,10 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
 
 	/* update jump label if enable bit changes */
 	if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE) {
-		if (value & MSR_IA32_APICBASE_ENABLE)
+		if (value & MSR_IA32_APICBASE_ENABLE) {
+			kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
 			static_key_slow_dec_deferred(&apic_hw_disabled);
-		else
+		} else
 			static_key_slow_inc(&apic_hw_disabled.key);
 		recalculate_apic_map(vcpu->kvm);
 	}
@@ -1716,8 +1801,11 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
 	/* Stop the timer in case it's a reset to an active apic */
 	hrtimer_cancel(&apic->lapic_timer.timer);
 
-	if (!init_event)
-		kvm_apic_set_id(apic, vcpu->vcpu_id);
+	if (!init_event) {
+		kvm_lapic_set_base(vcpu, APIC_DEFAULT_PHYS_BASE |
+		                         MSR_IA32_APICBASE_ENABLE);
+		kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
+	}
 	kvm_apic_set_version(apic->vcpu);
 
 	for (i = 0; i < KVM_APIC_LVT_NUM; i++)
@@ -1856,9 +1944,6 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
 	 * thinking that APIC satet has changed.
 	 */
 	vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE;
-	kvm_lapic_set_base(vcpu,
-			APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE);
-
 	static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */
 	kvm_lapic_reset(vcpu, false);
 	kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
@@ -1938,17 +2023,48 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
 	return vector;
 }
 
-void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
-		struct kvm_lapic_state *s)
+static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
+		struct kvm_lapic_state *s, bool set)
+{
+	if (apic_x2apic_mode(vcpu->arch.apic)) {
+		u32 *id = (u32 *)(s->regs + APIC_ID);
+
+		if (vcpu->kvm->arch.x2apic_format) {
+			if (*id != vcpu->vcpu_id)
+				return -EINVAL;
+		} else {
+			if (set)
+				*id >>= 24;
+			else
+				*id <<= 24;
+		}
+	}
+
+	return 0;
+}
+
+int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
+{
+	memcpy(s->regs, vcpu->arch.apic->regs, sizeof(*s));
+	return kvm_apic_state_fixup(vcpu, s, false);
+}
+
+int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
+	int r;
+
 
 	kvm_lapic_set_base(vcpu, vcpu->arch.apic_base);
 	/* set SPIV separately to get count of SW disabled APICs right */
 	apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV)));
+
+	r = kvm_apic_state_fixup(vcpu, s, true);
+	if (r)
+		return r;
 	memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s);
-	/* call kvm_apic_set_id() to put apic into apic_map */
-	kvm_apic_set_id(apic, kvm_apic_id(apic));
+
+	recalculate_apic_map(vcpu->kvm);
 	kvm_apic_set_version(vcpu);
 
 	apic_update_ppr(apic);
@@ -1974,6 +2090,8 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
 		kvm_rtc_eoi_tracking_restore_one(vcpu);
 
 	vcpu->arch.apic_arb_prio = 0;
+
+	return 0;
 }
 
 void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 891c6da7d4aa..f60d01c29d51 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -20,6 +20,7 @@ struct kvm_timer {
 	u64 tscdeadline;
 	u64 expired_tscdeadline;
 	atomic_t pending;			/* accumulated triggered timers */
+	bool hv_timer_in_use;
 };
 
 struct kvm_lapic {
@@ -80,8 +81,8 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
 
 u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
 int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
-void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
-		struct kvm_lapic_state *s);
+int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s);
+int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s);
 int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
 
 u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu);
@@ -199,9 +200,15 @@ static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu)
 	return lapic_in_kernel(vcpu) && test_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
 }
 
-static inline int kvm_apic_id(struct kvm_lapic *apic)
+static inline u32 kvm_apic_id(struct kvm_lapic *apic)
 {
-	return (kvm_lapic_get_reg(apic, APIC_ID) >> 24) & 0xff;
+	/* To avoid a race between apic_base and following APIC_ID update when
+	 * switching to x2apic_mode, the x2apic mode returns initial x2apic id.
+	 */
+	if (apic_x2apic_mode(apic))
+		return apic->vcpu->vcpu_id;
+
+	return kvm_lapic_get_reg(apic, APIC_ID) >> 24;
 }
 
 bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
@@ -212,4 +219,8 @@ bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
 			struct kvm_vcpu **dest_vcpu);
 int kvm_vector_to_index(u32 vector, u32 dest_vcpus,
 			const unsigned long *bitmap, u32 bitmap_size);
+void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu);
+void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu);
+void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu);
+bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu);
 #endif
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 745a5f445ae2..3d4cc8cc56a3 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -176,6 +176,7 @@ static u64 __read_mostly shadow_user_mask;
 static u64 __read_mostly shadow_accessed_mask;
 static u64 __read_mostly shadow_dirty_mask;
 static u64 __read_mostly shadow_mmio_mask;
+static u64 __read_mostly shadow_present_mask;
 
 static void mmu_spte_set(u64 *sptep, u64 spte);
 static void mmu_free_roots(struct kvm_vcpu *vcpu);
@@ -283,13 +284,14 @@ static bool check_mmio_spte(struct kvm_vcpu *vcpu, u64 spte)
 }
 
 void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
-		u64 dirty_mask, u64 nx_mask, u64 x_mask)
+		u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask)
 {
 	shadow_user_mask = user_mask;
 	shadow_accessed_mask = accessed_mask;
 	shadow_dirty_mask = dirty_mask;
 	shadow_nx_mask = nx_mask;
 	shadow_x_mask = x_mask;
+	shadow_present_mask = p_mask;
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
 
@@ -305,7 +307,7 @@ static int is_nx(struct kvm_vcpu *vcpu)
 
 static int is_shadow_present_pte(u64 pte)
 {
-	return pte & PT_PRESENT_MASK && !is_mmio_spte(pte);
+	return (pte & 0xFFFFFFFFull) && !is_mmio_spte(pte);
 }
 
 static int is_large_pte(u64 pte)
@@ -524,7 +526,7 @@ static void mmu_spte_set(u64 *sptep, u64 new_spte)
 }
 
 /* Rules for using mmu_spte_update:
- * Update the state bits, it means the mapped pfn is not changged.
+ * Update the state bits, it means the mapped pfn is not changed.
  *
  * Whenever we overwrite a writable spte with a read-only one we
  * should flush remote TLBs. Otherwise rmap_write_protect
@@ -2246,10 +2248,9 @@ static void link_shadow_page(struct kvm_vcpu *vcpu, u64 *sptep,
 {
 	u64 spte;
 
-	BUILD_BUG_ON(VMX_EPT_READABLE_MASK != PT_PRESENT_MASK ||
-			VMX_EPT_WRITABLE_MASK != PT_WRITABLE_MASK);
+	BUILD_BUG_ON(VMX_EPT_WRITABLE_MASK != PT_WRITABLE_MASK);
 
-	spte = __pa(sp->spt) | PT_PRESENT_MASK | PT_WRITABLE_MASK |
+	spte = __pa(sp->spt) | shadow_present_mask | PT_WRITABLE_MASK |
 	       shadow_user_mask | shadow_x_mask | shadow_accessed_mask;
 
 	mmu_spte_set(sptep, spte);
@@ -2516,13 +2517,19 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 		    gfn_t gfn, kvm_pfn_t pfn, bool speculative,
 		    bool can_unsync, bool host_writable)
 {
-	u64 spte;
+	u64 spte = 0;
 	int ret = 0;
 
 	if (set_mmio_spte(vcpu, sptep, gfn, pfn, pte_access))
 		return 0;
 
-	spte = PT_PRESENT_MASK;
+	/*
+	 * For the EPT case, shadow_present_mask is 0 if hardware
+	 * supports exec-only page table entries.  In that case,
+	 * ACC_USER_MASK and shadow_user_mask are used to represent
+	 * read access.  See FNAME(gpte_access) in paging_tmpl.h.
+	 */
+	spte |= shadow_present_mask;
 	if (!speculative)
 		spte |= shadow_accessed_mask;
 
@@ -3190,7 +3197,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 		MMU_WARN_ON(VALID_PAGE(root));
 		if (vcpu->arch.mmu.root_level == PT32E_ROOT_LEVEL) {
 			pdptr = vcpu->arch.mmu.get_pdptr(vcpu, i);
-			if (!is_present_gpte(pdptr)) {
+			if (!(pdptr & PT_PRESENT_MASK)) {
 				vcpu->arch.mmu.pae_root[i] = 0;
 				continue;
 			}
@@ -3915,9 +3922,7 @@ static void update_permission_bitmask(struct kvm_vcpu *vcpu,
 				 *   clearer.
 				 */
 				smap = cr4_smap && u && !uf && !ff;
-			} else
-				/* Not really needed: no U/S accesses on ept  */
-				u = 1;
+			}
 
 			fault = (ff && !x) || (uf && !u) || (wf && !w) ||
 				(smapf && smap);
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 66b33b96a31b..ddc56e91f2e4 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -93,11 +93,6 @@ static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu)
 	return kvm_mmu_load(vcpu);
 }
 
-static inline int is_present_gpte(unsigned long pte)
-{
-	return pte & PT_PRESENT_MASK;
-}
-
 /*
  * Currently, we have two sorts of write-protection, a) the first one
  * write-protects guest page to sync the guest modification, b) another one is
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index bc019f70e0b6..a01105485315 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -131,7 +131,7 @@ static inline void FNAME(protect_clean_gpte)(unsigned *access, unsigned gpte)
 static inline int FNAME(is_present_gpte)(unsigned long pte)
 {
 #if PTTYPE != PTTYPE_EPT
-	return is_present_gpte(pte);
+	return pte & PT_PRESENT_MASK;
 #else
 	return pte & 7;
 #endif
@@ -181,13 +181,19 @@ no_present:
 	return true;
 }
 
+/*
+ * For PTTYPE_EPT, a page table can be executable but not readable
+ * on supported processors. Therefore, set_spte does not automatically
+ * set bit 0 if execute only is supported. Here, we repurpose ACC_USER_MASK
+ * to signify readability since it isn't used in the EPT case
+ */
 static inline unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, u64 gpte)
 {
 	unsigned access;
 #if PTTYPE == PTTYPE_EPT
 	access = ((gpte & VMX_EPT_WRITABLE_MASK) ? ACC_WRITE_MASK : 0) |
 		((gpte & VMX_EPT_EXECUTABLE_MASK) ? ACC_EXEC_MASK : 0) |
-		ACC_USER_MASK;
+		((gpte & VMX_EPT_READABLE_MASK) ? ACC_USER_MASK : 0);
 #else
 	BUILD_BUG_ON(ACC_EXEC_MASK != PT_PRESENT_MASK);
 	BUILD_BUG_ON(ACC_EXEC_MASK != 1);
diff --git a/arch/x86/kvm/pmu_intel.c b/arch/x86/kvm/pmu_intel.c
index ab38af4f4947..9d4a8504a95a 100644
--- a/arch/x86/kvm/pmu_intel.c
+++ b/arch/x86/kvm/pmu_intel.c
@@ -93,7 +93,7 @@ static unsigned intel_find_fixed_event(int idx)
 	return intel_arch_events[fixed_pmc_events[idx]].event_type;
 }
 
-/* check if a PMC is enabled by comparising it with globl_ctrl bits. */
+/* check if a PMC is enabled by comparing it with globl_ctrl bits. */
 static bool intel_pmc_is_enabled(struct kvm_pmc *pmc)
 {
 	struct kvm_pmu *pmu = pmc_to_pmu(pmc);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 16ef31b87452..af523d84d102 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1577,7 +1577,7 @@ static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
 static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
 {
        /*
-        * Any change of EFLAGS.VM is accompained by a reload of SS
+        * Any change of EFLAGS.VM is accompanied by a reload of SS
         * (caused by either a task switch or an inter-privilege IRET),
         * so we do not need to update the CPL here.
         */
@@ -4940,6 +4940,12 @@ out:
 static void svm_handle_external_intr(struct kvm_vcpu *vcpu)
 {
 	local_irq_enable();
+	/*
+	 * We must have an instruction with interrupts enabled, so
+	 * the timer interrupt isn't delayed by the interrupt shadow.
+	 */
+	asm("nop");
+	local_irq_disable();
 }
 
 static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 8de925031b5c..0a6cc6754ec5 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -1348,6 +1348,21 @@ TRACE_EVENT(kvm_avic_unaccelerated_access,
 		  __entry->vec)
 );
 
+TRACE_EVENT(kvm_hv_timer_state,
+		TP_PROTO(unsigned int vcpu_id, unsigned int hv_timer_in_use),
+		TP_ARGS(vcpu_id, hv_timer_in_use),
+		TP_STRUCT__entry(
+			__field(unsigned int, vcpu_id)
+			__field(unsigned int, hv_timer_in_use)
+			),
+		TP_fast_assign(
+			__entry->vcpu_id = vcpu_id;
+			__entry->hv_timer_in_use = hv_timer_in_use;
+			),
+		TP_printk("vcpu_id %x hv_timer %x\n",
+			__entry->vcpu_id,
+			__entry->hv_timer_in_use)
+);
 #endif /* _TRACE_KVM_H */
 
 #undef TRACE_INCLUDE_PATH
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index df07a0a4611f..a45d8580f91e 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -110,6 +110,13 @@ module_param_named(pml, enable_pml, bool, S_IRUGO);
 
 #define KVM_VMX_TSC_MULTIPLIER_MAX     0xffffffffffffffffULL
 
+/* Guest_tsc -> host_tsc conversion requires 64-bit division.  */
+static int __read_mostly cpu_preemption_timer_multi;
+static bool __read_mostly enable_preemption_timer = 1;
+#ifdef CONFIG_X86_64
+module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO);
+#endif
+
 #define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD)
 #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE)
 #define KVM_VM_CR0_ALWAYS_ON						\
@@ -398,6 +405,12 @@ struct nested_vmx {
 	/* The host-usable pointer to the above */
 	struct page *current_vmcs12_page;
 	struct vmcs12 *current_vmcs12;
+	/*
+	 * Cache of the guest's VMCS, existing outside of guest memory.
+	 * Loaded from guest memory during VMPTRLD. Flushed to guest
+	 * memory during VMXOFF, VMCLEAR, VMPTRLD.
+	 */
+	struct vmcs12 *cached_vmcs12;
 	struct vmcs *current_shadow_vmcs;
 	/*
 	 * Indicates if the shadow vmcs must be updated with the
@@ -421,7 +434,6 @@ struct nested_vmx {
 	struct pi_desc *pi_desc;
 	bool pi_pending;
 	u16 posted_intr_nv;
-	u64 msr_ia32_feature_control;
 
 	struct hrtimer preemption_timer;
 	bool preemption_timer_expired;
@@ -597,11 +609,22 @@ struct vcpu_vmx {
 #define PML_ENTITY_NUM		512
 	struct page *pml_pg;
 
+	/* apic deadline value in host tsc */
+	u64 hv_deadline_tsc;
+
 	u64 current_tsc_ratio;
 
 	bool guest_pkru_valid;
 	u32 guest_pkru;
 	u32 host_pkru;
+
+	/*
+	 * Only bits masked by msr_ia32_feature_control_valid_bits can be set in
+	 * msr_ia32_feature_control. FEATURE_CONTROL_LOCKED is always included
+	 * in msr_ia32_feature_control_valid_bits.
+	 */
+	u64 msr_ia32_feature_control;
+	u64 msr_ia32_feature_control_valid_bits;
 };
 
 enum segment_cache_field {
@@ -841,7 +864,7 @@ static inline short vmcs_field_to_offset(unsigned long field)
 
 static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu)
 {
-	return to_vmx(vcpu)->nested.current_vmcs12;
+	return to_vmx(vcpu)->nested.cached_vmcs12;
 }
 
 static struct page *nested_get_page(struct kvm_vcpu *vcpu, gpa_t addr)
@@ -1056,6 +1079,58 @@ static inline bool cpu_has_vmx_virtual_intr_delivery(void)
 		SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
 }
 
+/*
+ * Comment's format: document - errata name - stepping - processor name.
+ * Refer from
+ * https://www.virtualbox.org/svn/vbox/trunk/src/VBox/VMM/VMMR0/HMR0.cpp
+ */
+static u32 vmx_preemption_cpu_tfms[] = {
+/* 323344.pdf - BA86   - D0 - Xeon 7500 Series */
+0x000206E6,
+/* 323056.pdf - AAX65  - C2 - Xeon L3406 */
+/* 322814.pdf - AAT59  - C2 - i7-600, i5-500, i5-400 and i3-300 Mobile */
+/* 322911.pdf - AAU65  - C2 - i5-600, i3-500 Desktop and Pentium G6950 */
+0x00020652,
+/* 322911.pdf - AAU65  - K0 - i5-600, i3-500 Desktop and Pentium G6950 */
+0x00020655,
+/* 322373.pdf - AAO95  - B1 - Xeon 3400 Series */
+/* 322166.pdf - AAN92  - B1 - i7-800 and i5-700 Desktop */
+/*
+ * 320767.pdf - AAP86  - B1 -
+ * i7-900 Mobile Extreme, i7-800 and i7-700 Mobile
+ */
+0x000106E5,
+/* 321333.pdf - AAM126 - C0 - Xeon 3500 */
+0x000106A0,
+/* 321333.pdf - AAM126 - C1 - Xeon 3500 */
+0x000106A1,
+/* 320836.pdf - AAJ124 - C0 - i7-900 Desktop Extreme and i7-900 Desktop */
+0x000106A4,
+ /* 321333.pdf - AAM126 - D0 - Xeon 3500 */
+ /* 321324.pdf - AAK139 - D0 - Xeon 5500 */
+ /* 320836.pdf - AAJ124 - D0 - i7-900 Extreme and i7-900 Desktop */
+0x000106A5,
+};
+
+static inline bool cpu_has_broken_vmx_preemption_timer(void)
+{
+	u32 eax = cpuid_eax(0x00000001), i;
+
+	/* Clear the reserved bits */
+	eax &= ~(0x3U << 14 | 0xfU << 28);
+	for (i = 0; i < ARRAY_SIZE(vmx_preemption_cpu_tfms); i++)
+		if (eax == vmx_preemption_cpu_tfms[i])
+			return true;
+
+	return false;
+}
+
+static inline bool cpu_has_vmx_preemption_timer(void)
+{
+	return vmcs_config.pin_based_exec_ctrl &
+		PIN_BASED_VMX_PREEMPTION_TIMER;
+}
+
 static inline bool cpu_has_vmx_posted_intr(void)
 {
 	return IS_ENABLED(CONFIG_X86_LOCAL_APIC) &&
@@ -1603,6 +1678,11 @@ static __always_inline void vmcs_set_bits(unsigned long field, u32 mask)
 	__vmcs_writel(field, __vmcs_readl(field) | mask);
 }
 
+static inline void vm_entry_controls_reset_shadow(struct vcpu_vmx *vmx)
+{
+	vmx->vm_entry_controls_shadow = vmcs_read32(VM_ENTRY_CONTROLS);
+}
+
 static inline void vm_entry_controls_init(struct vcpu_vmx *vmx, u32 val)
 {
 	vmcs_write32(VM_ENTRY_CONTROLS, val);
@@ -1631,6 +1711,11 @@ static inline void vm_entry_controls_clearbit(struct vcpu_vmx *vmx, u32 val)
 	vm_entry_controls_set(vmx, vm_entry_controls_get(vmx) & ~val);
 }
 
+static inline void vm_exit_controls_reset_shadow(struct vcpu_vmx *vmx)
+{
+	vmx->vm_exit_controls_shadow = vmcs_read32(VM_EXIT_CONTROLS);
+}
+
 static inline void vm_exit_controls_init(struct vcpu_vmx *vmx, u32 val)
 {
 	vmcs_write32(VM_EXIT_CONTROLS, val);
@@ -2121,22 +2206,14 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
+	bool already_loaded = vmx->loaded_vmcs->cpu == cpu;
 
 	if (!vmm_exclusive)
 		kvm_cpu_vmxon(phys_addr);
-	else if (vmx->loaded_vmcs->cpu != cpu)
+	else if (!already_loaded)
 		loaded_vmcs_clear(vmx->loaded_vmcs);
 
-	if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) {
-		per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
-		vmcs_load(vmx->loaded_vmcs->vmcs);
-	}
-
-	if (vmx->loaded_vmcs->cpu != cpu) {
-		struct desc_ptr *gdt = this_cpu_ptr(&host_gdt);
-		unsigned long sysenter_esp;
-
-		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+	if (!already_loaded) {
 		local_irq_disable();
 		crash_disable_local_vmclear(cpu);
 
@@ -2151,6 +2228,18 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 			 &per_cpu(loaded_vmcss_on_cpu, cpu));
 		crash_enable_local_vmclear(cpu);
 		local_irq_enable();
+	}
+
+	if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) {
+		per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
+		vmcs_load(vmx->loaded_vmcs->vmcs);
+	}
+
+	if (!already_loaded) {
+		struct desc_ptr *gdt = this_cpu_ptr(&host_gdt);
+		unsigned long sysenter_esp;
+
+		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
 
 		/*
 		 * Linux uses per-cpu TSS and GDT, so set these when switching
@@ -2716,13 +2805,12 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
 		vmx->nested.nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT |
 			 VMX_EPTP_WB_BIT | VMX_EPT_2MB_PAGE_BIT |
 			 VMX_EPT_INVEPT_BIT;
+		if (cpu_has_vmx_ept_execute_only())
+			vmx->nested.nested_vmx_ept_caps |=
+				VMX_EPT_EXECUTE_ONLY_BIT;
 		vmx->nested.nested_vmx_ept_caps &= vmx_capability.ept;
-		/*
-		 * For nested guests, we don't do anything specific
-		 * for single context invalidation. Hence, only advertise
-		 * support for global context invalidation.
-		 */
-		vmx->nested.nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT;
+		vmx->nested.nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT |
+			VMX_EPT_EXTENT_CONTEXT_BIT;
 	} else
 		vmx->nested.nested_vmx_ept_caps = 0;
 
@@ -2853,7 +2941,6 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
 			vmx->nested.nested_vmx_secondary_ctls_high);
 		break;
 	case MSR_IA32_VMX_EPT_VPID_CAP:
-		/* Currently, no nested vpid support */
 		*pdata = vmx->nested.nested_vmx_ept_caps |
 			((u64)vmx->nested.nested_vmx_vpid_caps << 32);
 		break;
@@ -2864,6 +2951,14 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
 	return 0;
 }
 
+static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu,
+						 uint64_t val)
+{
+	uint64_t valid_bits = to_vmx(vcpu)->msr_ia32_feature_control_valid_bits;
+
+	return !(val & ~valid_bits);
+}
+
 /*
  * Reads an msr value (of 'msr_index') into 'pdata'.
  * Returns 0 on success, non-0 otherwise.
@@ -2905,10 +3000,15 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 			return 1;
 		msr_info->data = vmcs_read64(GUEST_BNDCFGS);
 		break;
-	case MSR_IA32_FEATURE_CONTROL:
-		if (!nested_vmx_allowed(vcpu))
+	case MSR_IA32_MCG_EXT_CTL:
+		if (!msr_info->host_initiated &&
+		    !(to_vmx(vcpu)->msr_ia32_feature_control &
+		      FEATURE_CONTROL_LMCE))
 			return 1;
-		msr_info->data = to_vmx(vcpu)->nested.msr_ia32_feature_control;
+		msr_info->data = vcpu->arch.mcg_ext_ctl;
+		break;
+	case MSR_IA32_FEATURE_CONTROL:
+		msr_info->data = to_vmx(vcpu)->msr_ia32_feature_control;
 		break;
 	case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
 		if (!nested_vmx_allowed(vcpu))
@@ -2998,12 +3098,20 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	case MSR_IA32_TSC_ADJUST:
 		ret = kvm_set_msr_common(vcpu, msr_info);
 		break;
+	case MSR_IA32_MCG_EXT_CTL:
+		if ((!msr_info->host_initiated &&
+		     !(to_vmx(vcpu)->msr_ia32_feature_control &
+		       FEATURE_CONTROL_LMCE)) ||
+		    (data & ~MCG_EXT_CTL_LMCE_EN))
+			return 1;
+		vcpu->arch.mcg_ext_ctl = data;
+		break;
 	case MSR_IA32_FEATURE_CONTROL:
-		if (!nested_vmx_allowed(vcpu) ||
-		    (to_vmx(vcpu)->nested.msr_ia32_feature_control &
+		if (!vmx_feature_control_msr_valid(vcpu, data) ||
+		    (to_vmx(vcpu)->msr_ia32_feature_control &
 		     FEATURE_CONTROL_LOCKED && !msr_info->host_initiated))
 			return 1;
-		vmx->nested.msr_ia32_feature_control = data;
+		vmx->msr_ia32_feature_control = data;
 		if (msr_info->host_initiated && data == 0)
 			vmx_leave_nested(vcpu);
 		break;
@@ -3297,25 +3405,27 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 		      vmx_capability.ept, vmx_capability.vpid);
 	}
 
-	min = VM_EXIT_SAVE_DEBUG_CONTROLS;
+	min = VM_EXIT_SAVE_DEBUG_CONTROLS | VM_EXIT_ACK_INTR_ON_EXIT;
 #ifdef CONFIG_X86_64
 	min |= VM_EXIT_HOST_ADDR_SPACE_SIZE;
 #endif
 	opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT |
-		VM_EXIT_ACK_INTR_ON_EXIT | VM_EXIT_CLEAR_BNDCFGS;
+		VM_EXIT_CLEAR_BNDCFGS;
 	if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS,
 				&_vmexit_control) < 0)
 		return -EIO;
 
 	min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
-	opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR;
+	opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR |
+		 PIN_BASED_VMX_PREEMPTION_TIMER;
 	if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
 				&_pin_based_exec_control) < 0)
 		return -EIO;
 
+	if (cpu_has_broken_vmx_preemption_timer())
+		_pin_based_exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
 	if (!(_cpu_based_2nd_exec_control &
-		SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) ||
-		!(_vmexit_control & VM_EXIT_ACK_INTR_ON_EXIT))
+		SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY))
 		_pin_based_exec_control &= ~PIN_BASED_POSTED_INTR;
 
 	min = VM_ENTRY_LOAD_DEBUG_CONTROLS;
@@ -3364,7 +3474,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 
 	/*
 	 * Some cpus support VM_ENTRY_(LOAD|SAVE)_IA32_PERF_GLOBAL_CTRL
-	 * but due to arrata below it can't be used. Workaround is to use
+	 * but due to errata below it can't be used. Workaround is to use
 	 * msr load mechanism to switch IA32_PERF_GLOBAL_CTRL.
 	 *
 	 * VM Exit May Incorrectly Clear IA32_PERF_GLOBAL_CTRL [34:32]
@@ -4781,6 +4891,8 @@ static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
 
 	if (!kvm_vcpu_apicv_active(&vmx->vcpu))
 		pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR;
+	/* Enable the preemption timer dynamically */
+	pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
 	return pin_based_exec_ctrl;
 }
 
@@ -4896,6 +5008,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 
 	/* Control */
 	vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx));
+	vmx->hv_deadline_tsc = -1;
 
 	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx));
 
@@ -6016,12 +6129,14 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
 	gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
 	trace_kvm_page_fault(gpa, exit_qualification);
 
-	/* It is a write fault? */
-	error_code = exit_qualification & PFERR_WRITE_MASK;
+	/* it is a read fault? */
+	error_code = (exit_qualification << 2) & PFERR_USER_MASK;
+	/* it is a write fault? */
+	error_code |= exit_qualification & PFERR_WRITE_MASK;
 	/* It is a fetch fault? */
 	error_code |= (exit_qualification << 2) & PFERR_FETCH_MASK;
 	/* ept page table is present? */
-	error_code |= (exit_qualification >> 3) & PFERR_PRESENT_MASK;
+	error_code |= (exit_qualification & 0x38) != 0;
 
 	vcpu->arch.exit_qualification = exit_qualification;
 
@@ -6355,9 +6470,6 @@ static __init int hardware_setup(void)
 	for (msr = 0x800; msr <= 0x8ff; msr++)
 		vmx_disable_intercept_msr_read_x2apic(msr);
 
-	/* According SDM, in x2apic mode, the whole id reg is used.  But in
-	 * KVM, it only use the highest eight bits. Need to intercept it */
-	vmx_enable_intercept_msr_read_x2apic(0x802);
 	/* TMCCT */
 	vmx_enable_intercept_msr_read_x2apic(0x839);
 	/* TPR */
@@ -6368,10 +6480,12 @@ static __init int hardware_setup(void)
 	vmx_disable_intercept_msr_write_x2apic(0x83f);
 
 	if (enable_ept) {
-		kvm_mmu_set_mask_ptes(0ull,
+		kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK,
 			(enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull,
 			(enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull,
-			0ull, VMX_EPT_EXECUTABLE_MASK);
+			0ull, VMX_EPT_EXECUTABLE_MASK,
+			cpu_has_vmx_ept_execute_only() ?
+				      0ull : VMX_EPT_READABLE_MASK);
 		ept_set_mmio_spte_mask();
 		kvm_enable_tdp();
 	} else
@@ -6393,8 +6507,21 @@ static __init int hardware_setup(void)
 		kvm_x86_ops->enable_log_dirty_pt_masked = NULL;
 	}
 
+	if (cpu_has_vmx_preemption_timer() && enable_preemption_timer) {
+		u64 vmx_msr;
+
+		rdmsrl(MSR_IA32_VMX_MISC, vmx_msr);
+		cpu_preemption_timer_multi =
+			 vmx_msr & VMX_MISC_PREEMPTION_TIMER_RATE_MASK;
+	} else {
+		kvm_x86_ops->set_hv_timer = NULL;
+		kvm_x86_ops->cancel_hv_timer = NULL;
+	}
+
 	kvm_set_posted_intr_wakeup_handler(wakeup_handler);
 
+	kvm_mce_cap_supported |= MCG_LMCE_P;
+
 	return alloc_kvm_area();
 
 out8:
@@ -6862,16 +6989,22 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
 		return 1;
 	}
 
-	if ((vmx->nested.msr_ia32_feature_control & VMXON_NEEDED_FEATURES)
+	if ((vmx->msr_ia32_feature_control & VMXON_NEEDED_FEATURES)
 			!= VMXON_NEEDED_FEATURES) {
 		kvm_inject_gp(vcpu, 0);
 		return 1;
 	}
 
+	vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL);
+	if (!vmx->nested.cached_vmcs12)
+		return -ENOMEM;
+
 	if (enable_shadow_vmcs) {
 		shadow_vmcs = alloc_vmcs();
-		if (!shadow_vmcs)
+		if (!shadow_vmcs) {
+			kfree(vmx->nested.cached_vmcs12);
 			return -ENOMEM;
+		}
 		/* mark vmcs as shadow */
 		shadow_vmcs->revision_id |= (1u << 31);
 		/* init shadow vmcs */
@@ -6942,6 +7075,11 @@ static inline void nested_release_vmcs12(struct vcpu_vmx *vmx)
 		vmcs_write64(VMCS_LINK_POINTER, -1ull);
 	}
 	vmx->nested.posted_intr_nv = -1;
+
+	/* Flush VMCS12 to guest memory */
+	memcpy(vmx->nested.current_vmcs12, vmx->nested.cached_vmcs12,
+	       VMCS12_SIZE);
+
 	kunmap(vmx->nested.current_vmcs12_page);
 	nested_release_page(vmx->nested.current_vmcs12_page);
 	vmx->nested.current_vmptr = -1ull;
@@ -6962,6 +7100,7 @@ static void free_nested(struct vcpu_vmx *vmx)
 	nested_release_vmcs12(vmx);
 	if (enable_shadow_vmcs)
 		free_vmcs(vmx->nested.current_shadow_vmcs);
+	kfree(vmx->nested.cached_vmcs12);
 	/* Unpin physical memory we referred to in current vmcs02 */
 	if (vmx->nested.apic_access_page) {
 		nested_release_page(vmx->nested.apic_access_page);
@@ -7365,6 +7504,13 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
 		vmx->nested.current_vmptr = vmptr;
 		vmx->nested.current_vmcs12 = new_vmcs12;
 		vmx->nested.current_vmcs12_page = page;
+		/*
+		 * Load VMCS12 from guest memory since it is not already
+		 * cached.
+		 */
+		memcpy(vmx->nested.cached_vmcs12,
+		       vmx->nested.current_vmcs12, VMCS12_SIZE);
+
 		if (enable_shadow_vmcs) {
 			vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
 				      SECONDARY_EXEC_SHADOW_VMCS);
@@ -7458,12 +7604,16 @@ static int handle_invept(struct kvm_vcpu *vcpu)
 
 	switch (type) {
 	case VMX_EPT_EXTENT_GLOBAL:
+	/*
+	 * TODO: track mappings and invalidate
+	 * single context requests appropriately
+	 */
+	case VMX_EPT_EXTENT_CONTEXT:
 		kvm_mmu_sync_roots(vcpu);
 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
 		nested_vmx_succeed(vcpu);
 		break;
 	default:
-		/* Trap single context invalidation invept calls */
 		BUG_ON(1);
 		break;
 	}
@@ -7560,6 +7710,12 @@ static int handle_pml_full(struct kvm_vcpu *vcpu)
 	return 1;
 }
 
+static int handle_preemption_timer(struct kvm_vcpu *vcpu)
+{
+	kvm_lapic_expired_hv_timer(vcpu);
+	return 1;
+}
+
 /*
  * The exit handlers return 1 if the exit was handled fully and guest execution
  * may resume.  Otherwise they set the kvm_run parameter to indicate what needs
@@ -7610,6 +7766,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
 	[EXIT_REASON_XSAVES]                  = handle_xsaves,
 	[EXIT_REASON_XRSTORS]                 = handle_xrstors,
 	[EXIT_REASON_PML_FULL]		      = handle_pml_full,
+	[EXIT_REASON_PREEMPTION_TIMER]	      = handle_preemption_timer,
 };
 
 static const int kvm_vmx_max_exit_handlers =
@@ -7918,6 +8075,8 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
 		 * the XSS exit bitmap in vmcs12.
 		 */
 		return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
+	case EXIT_REASON_PREEMPTION_TIMER:
+		return false;
 	default:
 		return true;
 	}
@@ -8303,7 +8462,7 @@ static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa)
 	 * the next L2->L1 exit.
 	 */
 	if (!is_guest_mode(vcpu) ||
-	    !nested_cpu_has2(vmx->nested.current_vmcs12,
+	    !nested_cpu_has2(get_vmcs12(&vmx->vcpu),
 			     SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
 		vmcs_write64(APIC_ACCESS_ADDR, hpa);
 }
@@ -8436,7 +8595,6 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
 			"push %[sp]\n\t"
 #endif
 			"pushf\n\t"
-			"orl $0x200, (%%" _ASM_SP ")\n\t"
 			__ASM_SIZE(push) " $%c[cs]\n\t"
 			"call *%[entry]\n\t"
 			:
@@ -8449,8 +8607,7 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
 			[ss]"i"(__KERNEL_DS),
 			[cs]"i"(__KERNEL_CS)
 			);
-	} else
-		local_irq_enable();
+	}
 }
 
 static bool vmx_has_high_real_mode_segbase(void)
@@ -8601,6 +8758,26 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
 					msrs[i].host);
 }
 
+void vmx_arm_hv_timer(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	u64 tscl;
+	u32 delta_tsc;
+
+	if (vmx->hv_deadline_tsc == -1)
+		return;
+
+	tscl = rdtsc();
+	if (vmx->hv_deadline_tsc > tscl)
+		/* sure to be 32 bit only because checked on set_hv_timer */
+		delta_tsc = (u32)((vmx->hv_deadline_tsc - tscl) >>
+			cpu_preemption_timer_multi);
+	else
+		delta_tsc = 0;
+
+	vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, delta_tsc);
+}
+
 static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -8650,6 +8827,8 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 	atomic_switch_perf_msrs(vmx);
 	debugctlmsr = get_debugctlmsr();
 
+	vmx_arm_hv_timer(vcpu);
+
 	vmx->__launched = vmx->loaded_vmcs->launched;
 	asm(
 		/* Store host registers */
@@ -8940,6 +9119,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 	vmx->nested.current_vmptr = -1ull;
 	vmx->nested.current_vmcs12 = NULL;
 
+	vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED;
+
 	return &vmx->vcpu;
 
 free_vmcs:
@@ -9080,6 +9261,13 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
 
 	if (cpu_has_secondary_exec_ctrls())
 		vmcs_set_secondary_exec_control(secondary_exec_ctl);
+
+	if (nested_vmx_allowed(vcpu))
+		to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |=
+			FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
+	else
+		to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &=
+			~FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
 }
 
 static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
@@ -9636,9 +9824,14 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 	vmcs_write64(VMCS_LINK_POINTER, -1ull);
 
 	exec_control = vmcs12->pin_based_vm_exec_control;
-	exec_control |= vmcs_config.pin_based_exec_ctrl;
+
+	/* Preemption timer setting is only taken from vmcs01.  */
 	exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
+	exec_control |= vmcs_config.pin_based_exec_ctrl;
+	if (vmx->hv_deadline_tsc == -1)
+		exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
 
+	/* Posted interrupts setting is only taken from vmcs12.  */
 	if (nested_cpu_has_posted_intr(vmcs12)) {
 		/*
 		 * Note that we use L0's vector here and in
@@ -10556,8 +10749,8 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
 				       vmcs12->vm_exit_intr_error_code,
 				       KVM_ISA_VMX);
 
-	vm_entry_controls_init(vmx, vmcs_read32(VM_ENTRY_CONTROLS));
-	vm_exit_controls_init(vmx, vmcs_read32(VM_EXIT_CONTROLS));
+	vm_entry_controls_reset_shadow(vmx);
+	vm_exit_controls_reset_shadow(vmx);
 	vmx_segment_cache_clear(vmx);
 
 	/* if no vmcs02 cache requested, remove the one we used */
@@ -10566,8 +10759,14 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
 
 	load_vmcs12_host_state(vcpu, vmcs12);
 
-	/* Update TSC_OFFSET if TSC was changed while L2 ran */
+	/* Update any VMCS fields that might have changed while L2 ran */
 	vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset);
+	if (vmx->hv_deadline_tsc == -1)
+		vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL,
+				PIN_BASED_VMX_PREEMPTION_TIMER);
+	else
+		vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL,
+			      PIN_BASED_VMX_PREEMPTION_TIMER);
 
 	/* This is needed for same reason as it was needed in prepare_vmcs02 */
 	vmx->host_rsp = 0;
@@ -10647,6 +10846,64 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu,
 	return X86EMUL_CONTINUE;
 }
 
+#ifdef CONFIG_X86_64
+/* (a << shift) / divisor, return 1 if overflow otherwise 0 */
+static inline int u64_shl_div_u64(u64 a, unsigned int shift,
+				  u64 divisor, u64 *result)
+{
+	u64 low = a << shift, high = a >> (64 - shift);
+
+	/* To avoid the overflow on divq */
+	if (high >= divisor)
+		return 1;
+
+	/* Low hold the result, high hold rem which is discarded */
+	asm("divq %2\n\t" : "=a" (low), "=d" (high) :
+	    "rm" (divisor), "0" (low), "1" (high));
+	*result = low;
+
+	return 0;
+}
+
+static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	u64 tscl = rdtsc();
+	u64 guest_tscl = kvm_read_l1_tsc(vcpu, tscl);
+	u64 delta_tsc = max(guest_deadline_tsc, guest_tscl) - guest_tscl;
+
+	/* Convert to host delta tsc if tsc scaling is enabled */
+	if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio &&
+			u64_shl_div_u64(delta_tsc,
+				kvm_tsc_scaling_ratio_frac_bits,
+				vcpu->arch.tsc_scaling_ratio,
+				&delta_tsc))
+		return -ERANGE;
+
+	/*
+	 * If the delta tsc can't fit in the 32 bit after the multi shift,
+	 * we can't use the preemption timer.
+	 * It's possible that it fits on later vmentries, but checking
+	 * on every vmentry is costly so we just use an hrtimer.
+	 */
+	if (delta_tsc >> (cpu_preemption_timer_multi + 32))
+		return -ERANGE;
+
+	vmx->hv_deadline_tsc = tscl + delta_tsc;
+	vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL,
+			PIN_BASED_VMX_PREEMPTION_TIMER);
+	return 0;
+}
+
+static void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	vmx->hv_deadline_tsc = -1;
+	vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL,
+			PIN_BASED_VMX_PREEMPTION_TIMER);
+}
+#endif
+
 static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
 {
 	if (ple_gap)
@@ -10691,7 +10948,7 @@ static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
  *   this case, return 1, otherwise, return 0.
  *
  */
-static int vmx_pre_block(struct kvm_vcpu *vcpu)
+static int pi_pre_block(struct kvm_vcpu *vcpu)
 {
 	unsigned long flags;
 	unsigned int dest;
@@ -10758,7 +11015,18 @@ static int vmx_pre_block(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
-static void vmx_post_block(struct kvm_vcpu *vcpu)
+static int vmx_pre_block(struct kvm_vcpu *vcpu)
+{
+	if (pi_pre_block(vcpu))
+		return 1;
+
+	if (kvm_lapic_hv_timer_in_use(vcpu))
+		kvm_lapic_switch_to_sw_timer(vcpu);
+
+	return 0;
+}
+
+static void pi_post_block(struct kvm_vcpu *vcpu)
 {
 	struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
 	struct pi_desc old, new;
@@ -10800,6 +11068,14 @@ static void vmx_post_block(struct kvm_vcpu *vcpu)
 	}
 }
 
+static void vmx_post_block(struct kvm_vcpu *vcpu)
+{
+	if (kvm_x86_ops->set_hv_timer)
+		kvm_lapic_switch_to_hv_timer(vcpu);
+
+	pi_post_block(vcpu);
+}
+
 /*
  * vmx_update_pi_irte - set IRTE for Posted-Interrupts
  *
@@ -10844,7 +11120,7 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
 		 * We will support full lowest-priority interrupt later.
 		 */
 
-		kvm_set_msi_irq(e, &irq);
+		kvm_set_msi_irq(kvm, e, &irq);
 		if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
 			/*
 			 * Make sure the IRTE is in remapped mode if
@@ -10889,6 +11165,16 @@ out:
 	return ret;
 }
 
+static void vmx_setup_mce(struct kvm_vcpu *vcpu)
+{
+	if (vcpu->arch.mcg_cap & MCG_LMCE_P)
+		to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |=
+			FEATURE_CONTROL_LMCE;
+	else
+		to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &=
+			~FEATURE_CONTROL_LMCE;
+}
+
 static struct kvm_x86_ops vmx_x86_ops = {
 	.cpu_has_kvm_support = cpu_has_kvm_support,
 	.disabled_by_bios = vmx_disabled_by_bios,
@@ -11013,6 +11299,13 @@ static struct kvm_x86_ops vmx_x86_ops = {
 	.pmu_ops = &intel_pmu_ops,
 
 	.update_pi_irte = vmx_update_pi_irte,
+
+#ifdef CONFIG_X86_64
+	.set_hv_timer = vmx_set_hv_timer,
+	.cancel_hv_timer = vmx_cancel_hv_timer,
+#endif
+
+	.setup_mce = vmx_setup_mce,
 };
 
 static int __init vmx_init(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9c496c7e8c00..19f9f9e05c2a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -71,7 +71,8 @@
 
 #define MAX_IO_MSRS 256
 #define KVM_MAX_MCE_BANKS 32
-#define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P)
+u64 __read_mostly kvm_mce_cap_supported = MCG_CTL_P | MCG_SER_P;
+EXPORT_SYMBOL_GPL(kvm_mce_cap_supported);
 
 #define emul_to_vcpu(ctxt) \
 	container_of(ctxt, struct kvm_vcpu, arch.emulate_ctxt)
@@ -90,8 +91,12 @@ static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
 
+#define KVM_X2APIC_API_VALID_FLAGS (KVM_X2APIC_API_USE_32BIT_IDS | \
+                                    KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK)
+
 static void update_cr8_intercept(struct kvm_vcpu *vcpu);
 static void process_nmi(struct kvm_vcpu *vcpu);
+static void enter_smm(struct kvm_vcpu *vcpu);
 static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
 
 struct kvm_x86_ops *kvm_x86_ops __read_mostly;
@@ -114,7 +119,8 @@ u8   __read_mostly kvm_tsc_scaling_ratio_frac_bits;
 EXPORT_SYMBOL_GPL(kvm_tsc_scaling_ratio_frac_bits);
 u64  __read_mostly kvm_max_tsc_scaling_ratio;
 EXPORT_SYMBOL_GPL(kvm_max_tsc_scaling_ratio);
-static u64 __read_mostly kvm_default_tsc_scaling_ratio;
+u64 __read_mostly kvm_default_tsc_scaling_ratio;
+EXPORT_SYMBOL_GPL(kvm_default_tsc_scaling_ratio);
 
 /* tsc tolerance in parts per million - default to 1/2 of the NTP threshold */
 static u32 __read_mostly tsc_tolerance_ppm = 250;
@@ -538,7 +544,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3)
 		goto out;
 	}
 	for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
-		if (is_present_gpte(pdpte[i]) &&
+		if ((pdpte[i] & PT_PRESENT_MASK) &&
 		    (pdpte[i] &
 		     vcpu->arch.mmu.guest_rsvd_check.rsvd_bits_mask[0][2])) {
 			ret = 0;
@@ -983,6 +989,7 @@ static u32 emulated_msrs[] = {
 	MSR_IA32_MISC_ENABLE,
 	MSR_IA32_MCG_STATUS,
 	MSR_IA32_MCG_CTL,
+	MSR_IA32_MCG_EXT_CTL,
 	MSR_IA32_SMBASE,
 };
 
@@ -1162,7 +1169,7 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
 	int version;
 	int r;
 	struct pvclock_wall_clock wc;
-	struct timespec boot;
+	struct timespec64 boot;
 
 	if (!wall_clock)
 		return;
@@ -1185,13 +1192,13 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
 	 * wall clock specified here.  guest system time equals host
 	 * system time for us, thus we must fill in host boot time here.
 	 */
-	getboottime(&boot);
+	getboottime64(&boot);
 
 	if (kvm->arch.kvmclock_offset) {
-		struct timespec ts = ns_to_timespec(kvm->arch.kvmclock_offset);
-		boot = timespec_sub(boot, ts);
+		struct timespec64 ts = ns_to_timespec64(kvm->arch.kvmclock_offset);
+		boot = timespec64_sub(boot, ts);
 	}
-	wc.sec = boot.tv_sec;
+	wc.sec = (u32)boot.tv_sec; /* overflow in 2106 guest time */
 	wc.nsec = boot.tv_nsec;
 	wc.version = version;
 
@@ -2616,6 +2623,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_TSC_CONTROL:
 		r = kvm_has_tsc_control;
 		break;
+	case KVM_CAP_X2APIC_API:
+		r = KVM_X2APIC_API_VALID_FLAGS;
+		break;
 	default:
 		r = 0;
 		break;
@@ -2678,11 +2688,9 @@ long kvm_arch_dev_ioctl(struct file *filp,
 		break;
 	}
 	case KVM_X86_GET_MCE_CAP_SUPPORTED: {
-		u64 mce_cap;
-
-		mce_cap = KVM_MCE_CAP_SUPPORTED;
 		r = -EFAULT;
-		if (copy_to_user(argp, &mce_cap, sizeof mce_cap))
+		if (copy_to_user(argp, &kvm_mce_cap_supported,
+				 sizeof(kvm_mce_cap_supported)))
 			goto out;
 		r = 0;
 		break;
@@ -2734,6 +2742,11 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 				rdtsc() - vcpu->arch.last_host_tsc;
 		if (tsc_delta < 0)
 			mark_tsc_unstable("KVM discovered backwards TSC");
+
+		if (kvm_lapic_hv_timer_in_use(vcpu) &&
+				kvm_x86_ops->set_hv_timer(vcpu,
+					kvm_get_lapic_tscdeadline_msr(vcpu)))
+			kvm_lapic_switch_to_sw_timer(vcpu);
 		if (check_tsc_unstable()) {
 			u64 offset = kvm_compute_tsc_offset(vcpu,
 						vcpu->arch.last_guest_tsc);
@@ -2767,15 +2780,17 @@ static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
 	if (vcpu->arch.apicv_active)
 		kvm_x86_ops->sync_pir_to_irr(vcpu);
 
-	memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s);
-
-	return 0;
+	return kvm_apic_get_state(vcpu, s);
 }
 
 static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
 				    struct kvm_lapic_state *s)
 {
-	kvm_apic_post_state_restore(vcpu, s);
+	int r;
+
+	r = kvm_apic_set_state(vcpu, s);
+	if (r)
+		return r;
 	update_cr8_intercept(vcpu);
 
 	return 0;
@@ -2860,7 +2875,7 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
 	r = -EINVAL;
 	if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS)
 		goto out;
-	if (mcg_cap & ~(KVM_MCE_CAP_SUPPORTED | 0xff | 0xff0000))
+	if (mcg_cap & ~(kvm_mce_cap_supported | 0xff | 0xff0000))
 		goto out;
 	r = 0;
 	vcpu->arch.mcg_cap = mcg_cap;
@@ -2870,6 +2885,9 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
 	/* Init IA32_MCi_CTL to all 1s */
 	for (bank = 0; bank < bank_num; bank++)
 		vcpu->arch.mce_banks[bank*4] = ~(u64)0;
+
+	if (kvm_x86_ops->setup_mce)
+		kvm_x86_ops->setup_mce(vcpu);
 out:
 	return r;
 }
@@ -3768,7 +3786,7 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
 		r = -EEXIST;
 		if (irqchip_in_kernel(kvm))
 			goto split_irqchip_unlock;
-		if (atomic_read(&kvm->online_vcpus))
+		if (kvm->created_vcpus)
 			goto split_irqchip_unlock;
 		r = kvm_setup_empty_irq_routing(kvm);
 		if (r)
@@ -3782,6 +3800,18 @@ split_irqchip_unlock:
 		mutex_unlock(&kvm->lock);
 		break;
 	}
+	case KVM_CAP_X2APIC_API:
+		r = -EINVAL;
+		if (cap->args[0] & ~KVM_X2APIC_API_VALID_FLAGS)
+			break;
+
+		if (cap->args[0] & KVM_X2APIC_API_USE_32BIT_IDS)
+			kvm->arch.x2apic_format = true;
+		if (cap->args[0] & KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK)
+			kvm->arch.x2apic_broadcast_quirk_disabled = true;
+
+		r = 0;
+		break;
 	default:
 		r = -EINVAL;
 		break;
@@ -3833,7 +3863,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		if (kvm->arch.vpic)
 			goto create_irqchip_unlock;
 		r = -EINVAL;
-		if (atomic_read(&kvm->online_vcpus))
+		if (kvm->created_vcpus)
 			goto create_irqchip_unlock;
 		r = -ENOMEM;
 		vpic = kvm_create_pic(kvm);
@@ -3873,7 +3903,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
 				   sizeof(struct kvm_pit_config)))
 			goto out;
 	create_pit:
-		mutex_lock(&kvm->slots_lock);
+		mutex_lock(&kvm->lock);
 		r = -EEXIST;
 		if (kvm->arch.vpit)
 			goto create_pit_unlock;
@@ -3882,7 +3912,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		if (kvm->arch.vpit)
 			r = 0;
 	create_pit_unlock:
-		mutex_unlock(&kvm->slots_lock);
+		mutex_unlock(&kvm->lock);
 		break;
 	case KVM_GET_IRQCHIP: {
 		/* 0: PIC master, 1: PIC slave, 2: IOAPIC */
@@ -3989,7 +4019,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
 	case KVM_SET_BOOT_CPU_ID:
 		r = 0;
 		mutex_lock(&kvm->lock);
-		if (atomic_read(&kvm->online_vcpus) != 0)
+		if (kvm->created_vcpus)
 			r = -EBUSY;
 		else
 			kvm->arch.bsp_vcpu_id = arg;
@@ -5297,13 +5327,8 @@ static void kvm_smm_changed(struct kvm_vcpu *vcpu)
 		/* This is a good place to trace that we are exiting SMM.  */
 		trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, false);
 
-		if (unlikely(vcpu->arch.smi_pending)) {
-			kvm_make_request(KVM_REQ_SMI, vcpu);
-			vcpu->arch.smi_pending = 0;
-		} else {
-			/* Process a latched INIT, if any.  */
-			kvm_make_request(KVM_REQ_EVENT, vcpu);
-		}
+		/* Process a latched INIT or SMI, if any.  */
+		kvm_make_request(KVM_REQ_EVENT, vcpu);
 	}
 
 	kvm_mmu_reset_context(vcpu);
@@ -5849,8 +5874,8 @@ int kvm_arch_init(void *opaque)
 	kvm_x86_ops = ops;
 
 	kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
-			PT_DIRTY_MASK, PT64_NX_MASK, 0);
-
+			PT_DIRTY_MASK, PT64_NX_MASK, 0,
+			PT_PRESENT_MASK);
 	kvm_timer_init();
 
 	perf_register_guest_info_callbacks(&kvm_guest_cbs);
@@ -6084,7 +6109,10 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
 	}
 
 	/* try to inject new event if pending */
-	if (vcpu->arch.nmi_pending && kvm_x86_ops->nmi_allowed(vcpu)) {
+	if (vcpu->arch.smi_pending && !is_smm(vcpu)) {
+		vcpu->arch.smi_pending = false;
+		enter_smm(vcpu);
+	} else if (vcpu->arch.nmi_pending && kvm_x86_ops->nmi_allowed(vcpu)) {
 		--vcpu->arch.nmi_pending;
 		vcpu->arch.nmi_injected = true;
 		kvm_x86_ops->set_nmi(vcpu);
@@ -6107,6 +6135,7 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
 			kvm_x86_ops->set_irq(vcpu);
 		}
 	}
+
 	return 0;
 }
 
@@ -6130,7 +6159,7 @@ static void process_nmi(struct kvm_vcpu *vcpu)
 #define put_smstate(type, buf, offset, val)			  \
 	*(type *)((buf) + (offset) - 0x7e00) = val
 
-static u32 process_smi_get_segment_flags(struct kvm_segment *seg)
+static u32 enter_smm_get_segment_flags(struct kvm_segment *seg)
 {
 	u32 flags = 0;
 	flags |= seg->g       << 23;
@@ -6144,7 +6173,7 @@ static u32 process_smi_get_segment_flags(struct kvm_segment *seg)
 	return flags;
 }
 
-static void process_smi_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n)
+static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n)
 {
 	struct kvm_segment seg;
 	int offset;
@@ -6159,11 +6188,11 @@ static void process_smi_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n)
 
 	put_smstate(u32, buf, offset + 8, seg.base);
 	put_smstate(u32, buf, offset + 4, seg.limit);
-	put_smstate(u32, buf, offset, process_smi_get_segment_flags(&seg));
+	put_smstate(u32, buf, offset, enter_smm_get_segment_flags(&seg));
 }
 
 #ifdef CONFIG_X86_64
-static void process_smi_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
+static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
 {
 	struct kvm_segment seg;
 	int offset;
@@ -6172,7 +6201,7 @@ static void process_smi_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
 	kvm_get_segment(vcpu, &seg, n);
 	offset = 0x7e00 + n * 16;
 
-	flags = process_smi_get_segment_flags(&seg) >> 8;
+	flags = enter_smm_get_segment_flags(&seg) >> 8;
 	put_smstate(u16, buf, offset, seg.selector);
 	put_smstate(u16, buf, offset + 2, flags);
 	put_smstate(u32, buf, offset + 4, seg.limit);
@@ -6180,7 +6209,7 @@ static void process_smi_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
 }
 #endif
 
-static void process_smi_save_state_32(struct kvm_vcpu *vcpu, char *buf)
+static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, char *buf)
 {
 	struct desc_ptr dt;
 	struct kvm_segment seg;
@@ -6204,13 +6233,13 @@ static void process_smi_save_state_32(struct kvm_vcpu *vcpu, char *buf)
 	put_smstate(u32, buf, 0x7fc4, seg.selector);
 	put_smstate(u32, buf, 0x7f64, seg.base);
 	put_smstate(u32, buf, 0x7f60, seg.limit);
-	put_smstate(u32, buf, 0x7f5c, process_smi_get_segment_flags(&seg));
+	put_smstate(u32, buf, 0x7f5c, enter_smm_get_segment_flags(&seg));
 
 	kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
 	put_smstate(u32, buf, 0x7fc0, seg.selector);
 	put_smstate(u32, buf, 0x7f80, seg.base);
 	put_smstate(u32, buf, 0x7f7c, seg.limit);
-	put_smstate(u32, buf, 0x7f78, process_smi_get_segment_flags(&seg));
+	put_smstate(u32, buf, 0x7f78, enter_smm_get_segment_flags(&seg));
 
 	kvm_x86_ops->get_gdt(vcpu, &dt);
 	put_smstate(u32, buf, 0x7f74, dt.address);
@@ -6221,7 +6250,7 @@ static void process_smi_save_state_32(struct kvm_vcpu *vcpu, char *buf)
 	put_smstate(u32, buf, 0x7f54, dt.size);
 
 	for (i = 0; i < 6; i++)
-		process_smi_save_seg_32(vcpu, buf, i);
+		enter_smm_save_seg_32(vcpu, buf, i);
 
 	put_smstate(u32, buf, 0x7f14, kvm_read_cr4(vcpu));
 
@@ -6230,7 +6259,7 @@ static void process_smi_save_state_32(struct kvm_vcpu *vcpu, char *buf)
 	put_smstate(u32, buf, 0x7ef8, vcpu->arch.smbase);
 }
 
-static void process_smi_save_state_64(struct kvm_vcpu *vcpu, char *buf)
+static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf)
 {
 #ifdef CONFIG_X86_64
 	struct desc_ptr dt;
@@ -6262,7 +6291,7 @@ static void process_smi_save_state_64(struct kvm_vcpu *vcpu, char *buf)
 
 	kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
 	put_smstate(u16, buf, 0x7e90, seg.selector);
-	put_smstate(u16, buf, 0x7e92, process_smi_get_segment_flags(&seg) >> 8);
+	put_smstate(u16, buf, 0x7e92, enter_smm_get_segment_flags(&seg) >> 8);
 	put_smstate(u32, buf, 0x7e94, seg.limit);
 	put_smstate(u64, buf, 0x7e98, seg.base);
 
@@ -6272,7 +6301,7 @@ static void process_smi_save_state_64(struct kvm_vcpu *vcpu, char *buf)
 
 	kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
 	put_smstate(u16, buf, 0x7e70, seg.selector);
-	put_smstate(u16, buf, 0x7e72, process_smi_get_segment_flags(&seg) >> 8);
+	put_smstate(u16, buf, 0x7e72, enter_smm_get_segment_flags(&seg) >> 8);
 	put_smstate(u32, buf, 0x7e74, seg.limit);
 	put_smstate(u64, buf, 0x7e78, seg.base);
 
@@ -6281,31 +6310,26 @@ static void process_smi_save_state_64(struct kvm_vcpu *vcpu, char *buf)
 	put_smstate(u64, buf, 0x7e68, dt.address);
 
 	for (i = 0; i < 6; i++)
-		process_smi_save_seg_64(vcpu, buf, i);
+		enter_smm_save_seg_64(vcpu, buf, i);
 #else
 	WARN_ON_ONCE(1);
 #endif
 }
 
-static void process_smi(struct kvm_vcpu *vcpu)
+static void enter_smm(struct kvm_vcpu *vcpu)
 {
 	struct kvm_segment cs, ds;
 	struct desc_ptr dt;
 	char buf[512];
 	u32 cr0;
 
-	if (is_smm(vcpu)) {
-		vcpu->arch.smi_pending = true;
-		return;
-	}
-
 	trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, true);
 	vcpu->arch.hflags |= HF_SMM_MASK;
 	memset(buf, 0, 512);
 	if (guest_cpuid_has_longmode(vcpu))
-		process_smi_save_state_64(vcpu, buf);
+		enter_smm_save_state_64(vcpu, buf);
 	else
-		process_smi_save_state_32(vcpu, buf);
+		enter_smm_save_state_32(vcpu, buf);
 
 	kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, buf, sizeof(buf));
 
@@ -6361,6 +6385,12 @@ static void process_smi(struct kvm_vcpu *vcpu)
 	kvm_mmu_reset_context(vcpu);
 }
 
+static void process_smi(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.smi_pending = true;
+	kvm_make_request(KVM_REQ_EVENT, vcpu);
+}
+
 void kvm_make_scan_ioapic_request(struct kvm *kvm)
 {
 	kvm_make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
@@ -6555,8 +6585,18 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 
 		if (inject_pending_event(vcpu, req_int_win) != 0)
 			req_immediate_exit = true;
-		/* enable NMI/IRQ window open exits if needed */
 		else {
+			/* Enable NMI/IRQ window open exits if needed.
+			 *
+			 * SMIs have two cases: 1) they can be nested, and
+			 * then there is nothing to do here because RSM will
+			 * cause a vmexit anyway; 2) or the SMI can be pending
+			 * because inject_pending_event has completed the
+			 * injection of an IRQ or NMI from the previous vmexit,
+			 * and then we request an immediate exit to inject the SMI.
+			 */
+			if (vcpu->arch.smi_pending && !is_smm(vcpu))
+				req_immediate_exit = true;
 			if (vcpu->arch.nmi_pending)
 				kvm_x86_ops->enable_nmi_window(vcpu);
 			if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
@@ -6607,12 +6647,14 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 
 	kvm_load_guest_xcr0(vcpu);
 
-	if (req_immediate_exit)
+	if (req_immediate_exit) {
+		kvm_make_request(KVM_REQ_EVENT, vcpu);
 		smp_send_reschedule(vcpu->cpu);
+	}
 
 	trace_kvm_entry(vcpu->vcpu_id);
 	wait_lapic_expire(vcpu);
-	__kvm_guest_enter();
+	guest_enter_irqoff();
 
 	if (unlikely(vcpu->arch.switch_db_regs)) {
 		set_debugreg(0, 7);
@@ -6663,16 +6705,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 
 	++vcpu->stat.exits;
 
-	/*
-	 * We must have an instruction between local_irq_enable() and
-	 * kvm_guest_exit(), so the timer interrupt isn't delayed by
-	 * the interrupt shadow.  The stat.exits increment will do nicely.
-	 * But we need to prevent reordering, hence this barrier():
-	 */
-	barrier();
-
-	kvm_guest_exit();
+	guest_exit_irqoff();
 
+	local_irq_enable();
 	preempt_enable();
 
 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
@@ -7409,6 +7444,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 {
 	vcpu->arch.hflags = 0;
 
+	vcpu->arch.smi_pending = 0;
 	atomic_set(&vcpu->arch.nmi_queued, 0);
 	vcpu->arch.nmi_pending = 0;
 	vcpu->arch.nmi_injected = false;
@@ -7601,11 +7637,6 @@ bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
 	return (vcpu->arch.apic_base & MSR_IA32_APICBASE_BSP) != 0;
 }
 
-bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
-{
-	return irqchip_in_kernel(vcpu->kvm) == lapic_in_kernel(vcpu);
-}
-
 struct static_key kvm_no_apic_vcpu __read_mostly;
 EXPORT_SYMBOL_GPL(kvm_no_apic_vcpu);
 
@@ -7872,7 +7903,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 	kfree(kvm->arch.vpic);
 	kfree(kvm->arch.vioapic);
 	kvm_free_vcpus(kvm);
-	kfree(rcu_dereference_check(kvm->arch.apic_map, 1));
+	kvfree(rcu_dereference_check(kvm->arch.apic_map, 1));
 	kvm_mmu_uninit_vm(kvm);
 }
 
@@ -8380,7 +8411,7 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
 	/*
 	 * When producer of consumer is unregistered, we change back to
 	 * remapped mode, so we can re-use the current implementation
-	 * when the irq is masked/disabed or the consumer side (KVM
+	 * when the irq is masked/disabled or the consumer side (KVM
 	 * int this case doesn't want to receive the interrupts.
 	*/
 	ret = kvm_x86_ops->update_pi_irte(irqfd->kvm, prod->irq, irqfd->gsi, 0);
diff --git a/arch/x86/lib/hweight.S b/arch/x86/lib/hweight.S
index 02de3d74d2c5..8a602a1e404a 100644
--- a/arch/x86/lib/hweight.S
+++ b/arch/x86/lib/hweight.S
@@ -35,6 +35,7 @@ ENDPROC(__sw_hweight32)
 
 ENTRY(__sw_hweight64)
 #ifdef CONFIG_X86_64
+	pushq   %rdi
 	pushq   %rdx
 
 	movq    %rdi, %rdx                      # w -> t
@@ -60,6 +61,7 @@ ENTRY(__sw_hweight64)
 	shrq    $56, %rax                       # w = w_tmp >> 56
 
 	popq    %rdx
+	popq    %rdi
 	ret
 #else /* CONFIG_X86_32 */
 	/* We're getting an u64 arg in (%eax,%edx): unsigned long hweight64(__u64 w) */
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index fb4c1b42fc7e..620928903be3 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -208,7 +208,7 @@ static int __meminit save_mr(struct map_range *mr, int nr_range,
  * adjust the page_size_mask for small range to go with
  *	big page size instead small one if nearby are ram too.
  */
-static void __init_refok adjust_range_page_size_mask(struct map_range *mr,
+static void __ref adjust_range_page_size_mask(struct map_range *mr,
 							 int nr_range)
 {
 	int i;
@@ -396,7 +396,7 @@ bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn)
  * This runs before bootmem is initialized and gets pages directly from
  * the physical memory. To access them they are temporarily mapped.
  */
-unsigned long __init_refok init_memory_mapping(unsigned long start,
+unsigned long __ref init_memory_mapping(unsigned long start,
 					       unsigned long end)
 {
 	struct map_range mr[NR_RANGE_MR];
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 8196054fedb0..7b6a9d14c8c0 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -133,7 +133,7 @@ static void pcibios_fixup_device_resources(struct pci_dev *dev)
 	if (pci_probe & PCI_NOASSIGN_BARS) {
 		/*
 		* If the BIOS did not assign the BAR, zero out the
-		* resource so the kernel doesn't attmept to assign
+		* resource so the kernel doesn't attempt to assign
 		* it later on in pci_assign_unassigned_resources
 		*/
 		for (bar = 0; bar <= PCI_STD_RESOURCE_END; bar++) {
diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c
index 5ceda85b8687..052c1cb76305 100644
--- a/arch/x86/pci/sta2x11-fixup.c
+++ b/arch/x86/pci/sta2x11-fixup.c
@@ -169,7 +169,7 @@ static void *sta2x11_swiotlb_alloc_coherent(struct device *dev,
 					    size_t size,
 					    dma_addr_t *dma_handle,
 					    gfp_t flags,
-					    struct dma_attrs *attrs)
+					    unsigned long attrs)
 {
 	void *vaddr;
 
diff --git a/arch/x86/pci/vmd.c b/arch/x86/pci/vmd.c
index 613cac7395c4..b814ca675131 100644
--- a/arch/x86/pci/vmd.c
+++ b/arch/x86/pci/vmd.c
@@ -119,10 +119,11 @@ static void vmd_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
 static void vmd_irq_enable(struct irq_data *data)
 {
 	struct vmd_irq *vmdirq = data->chip_data;
+	unsigned long flags;
 
-	raw_spin_lock(&list_lock);
+	raw_spin_lock_irqsave(&list_lock, flags);
 	list_add_tail_rcu(&vmdirq->node, &vmdirq->irq->irq_list);
-	raw_spin_unlock(&list_lock);
+	raw_spin_unlock_irqrestore(&list_lock, flags);
 
 	data->chip->irq_unmask(data);
 }
@@ -130,12 +131,14 @@ static void vmd_irq_enable(struct irq_data *data)
 static void vmd_irq_disable(struct irq_data *data)
 {
 	struct vmd_irq *vmdirq = data->chip_data;
+	unsigned long flags;
 
 	data->chip->irq_mask(data);
 
-	raw_spin_lock(&list_lock);
+	raw_spin_lock_irqsave(&list_lock, flags);
 	list_del_rcu(&vmdirq->node);
-	raw_spin_unlock(&list_lock);
+	INIT_LIST_HEAD_RCU(&vmdirq->node);
+	raw_spin_unlock_irqrestore(&list_lock, flags);
 }
 
 /*
@@ -166,16 +169,20 @@ static irq_hw_number_t vmd_get_hwirq(struct msi_domain_info *info,
  * XXX: We can be even smarter selecting the best IRQ once we solve the
  * affinity problem.
  */
-static struct vmd_irq_list *vmd_next_irq(struct vmd_dev *vmd)
+static struct vmd_irq_list *vmd_next_irq(struct vmd_dev *vmd, struct msi_desc *desc)
 {
-	int i, best = 0;
+	int i, best = 1;
+	unsigned long flags;
 
-	raw_spin_lock(&list_lock);
+	if (!desc->msi_attrib.is_msix || vmd->msix_count == 1)
+		return &vmd->irqs[0];
+
+	raw_spin_lock_irqsave(&list_lock, flags);
 	for (i = 1; i < vmd->msix_count; i++)
 		if (vmd->irqs[i].count < vmd->irqs[best].count)
 			best = i;
 	vmd->irqs[best].count++;
-	raw_spin_unlock(&list_lock);
+	raw_spin_unlock_irqrestore(&list_lock, flags);
 
 	return &vmd->irqs[best];
 }
@@ -184,14 +191,15 @@ static int vmd_msi_init(struct irq_domain *domain, struct msi_domain_info *info,
 			unsigned int virq, irq_hw_number_t hwirq,
 			msi_alloc_info_t *arg)
 {
-	struct vmd_dev *vmd = vmd_from_bus(msi_desc_to_pci_dev(arg->desc)->bus);
+	struct msi_desc *desc = arg->desc;
+	struct vmd_dev *vmd = vmd_from_bus(msi_desc_to_pci_dev(desc)->bus);
 	struct vmd_irq *vmdirq = kzalloc(sizeof(*vmdirq), GFP_KERNEL);
 
 	if (!vmdirq)
 		return -ENOMEM;
 
 	INIT_LIST_HEAD(&vmdirq->node);
-	vmdirq->irq = vmd_next_irq(vmd);
+	vmdirq->irq = vmd_next_irq(vmd, desc);
 	vmdirq->virq = virq;
 
 	irq_domain_set_info(domain, virq, vmdirq->irq->vmd_vector, info->chip,
@@ -203,11 +211,12 @@ static void vmd_msi_free(struct irq_domain *domain,
 			struct msi_domain_info *info, unsigned int virq)
 {
 	struct vmd_irq *vmdirq = irq_get_chip_data(virq);
+	unsigned long flags;
 
 	/* XXX: Potential optimization to rebalance */
-	raw_spin_lock(&list_lock);
+	raw_spin_lock_irqsave(&list_lock, flags);
 	vmdirq->irq->count--;
-	raw_spin_unlock(&list_lock);
+	raw_spin_unlock_irqrestore(&list_lock, flags);
 
 	kfree_rcu(vmdirq, rcu);
 }
@@ -261,18 +270,18 @@ static struct device *to_vmd_dev(struct device *dev)
 
 static struct dma_map_ops *vmd_dma_ops(struct device *dev)
 {
-	return to_vmd_dev(dev)->archdata.dma_ops;
+	return get_dma_ops(to_vmd_dev(dev));
 }
 
 static void *vmd_alloc(struct device *dev, size_t size, dma_addr_t *addr,
-		       gfp_t flag, struct dma_attrs *attrs)
+		       gfp_t flag, unsigned long attrs)
 {
 	return vmd_dma_ops(dev)->alloc(to_vmd_dev(dev), size, addr, flag,
 				       attrs);
 }
 
 static void vmd_free(struct device *dev, size_t size, void *vaddr,
-		     dma_addr_t addr, struct dma_attrs *attrs)
+		     dma_addr_t addr, unsigned long attrs)
 {
 	return vmd_dma_ops(dev)->free(to_vmd_dev(dev), size, vaddr, addr,
 				      attrs);
@@ -280,7 +289,7 @@ static void vmd_free(struct device *dev, size_t size, void *vaddr,
 
 static int vmd_mmap(struct device *dev, struct vm_area_struct *vma,
 		    void *cpu_addr, dma_addr_t addr, size_t size,
-		    struct dma_attrs *attrs)
+		    unsigned long attrs)
 {
 	return vmd_dma_ops(dev)->mmap(to_vmd_dev(dev), vma, cpu_addr, addr,
 				      size, attrs);
@@ -288,7 +297,7 @@ static int vmd_mmap(struct device *dev, struct vm_area_struct *vma,
 
 static int vmd_get_sgtable(struct device *dev, struct sg_table *sgt,
 			   void *cpu_addr, dma_addr_t addr, size_t size,
-			   struct dma_attrs *attrs)
+			   unsigned long attrs)
 {
 	return vmd_dma_ops(dev)->get_sgtable(to_vmd_dev(dev), sgt, cpu_addr,
 					     addr, size, attrs);
@@ -297,26 +306,26 @@ static int vmd_get_sgtable(struct device *dev, struct sg_table *sgt,
 static dma_addr_t vmd_map_page(struct device *dev, struct page *page,
 			       unsigned long offset, size_t size,
 			       enum dma_data_direction dir,
-			       struct dma_attrs *attrs)
+			       unsigned long attrs)
 {
 	return vmd_dma_ops(dev)->map_page(to_vmd_dev(dev), page, offset, size,
 					  dir, attrs);
 }
 
 static void vmd_unmap_page(struct device *dev, dma_addr_t addr, size_t size,
-			   enum dma_data_direction dir, struct dma_attrs *attrs)
+			   enum dma_data_direction dir, unsigned long attrs)
 {
 	vmd_dma_ops(dev)->unmap_page(to_vmd_dev(dev), addr, size, dir, attrs);
 }
 
 static int vmd_map_sg(struct device *dev, struct scatterlist *sg, int nents,
-		      enum dma_data_direction dir, struct dma_attrs *attrs)
+		      enum dma_data_direction dir, unsigned long attrs)
 {
 	return vmd_dma_ops(dev)->map_sg(to_vmd_dev(dev), sg, nents, dir, attrs);
 }
 
 static void vmd_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
-			 enum dma_data_direction dir, struct dma_attrs *attrs)
+			 enum dma_data_direction dir, unsigned long attrs)
 {
 	vmd_dma_ops(dev)->unmap_sg(to_vmd_dev(dev), sg, nents, dir, attrs);
 }
@@ -367,7 +376,7 @@ static void vmd_teardown_dma_ops(struct vmd_dev *vmd)
 {
 	struct dma_domain *domain = &vmd->dma_domain;
 
-	if (vmd->dev->dev.archdata.dma_ops)
+	if (get_dma_ops(&vmd->dev->dev))
 		del_dma_domain(domain);
 }
 
@@ -379,7 +388,7 @@ static void vmd_teardown_dma_ops(struct vmd_dev *vmd)
 
 static void vmd_setup_dma_ops(struct vmd_dev *vmd)
 {
-	const struct dma_map_ops *source = vmd->dev->dev.archdata.dma_ops;
+	const struct dma_map_ops *source = get_dma_ops(&vmd->dev->dev);
 	struct dma_map_ops *dest = &vmd->dma_ops;
 	struct dma_domain *domain = &vmd->dma_domain;
 
@@ -594,7 +603,7 @@ static int vmd_enable_domain(struct vmd_dev *vmd)
 	sd->node = pcibus_to_node(vmd->dev->bus);
 
 	vmd->irq_domain = pci_msi_create_irq_domain(NULL, &vmd_msi_domain_info,
-						    NULL);
+						    x86_vector_domain);
 	if (!vmd->irq_domain)
 		return -ENODEV;
 
diff --git a/arch/x86/platform/efi/early_printk.c b/arch/x86/platform/efi/early_printk.c
index 524142117296..5fdacb322ceb 100644
--- a/arch/x86/platform/efi/early_printk.c
+++ b/arch/x86/platform/efi/early_printk.c
@@ -44,7 +44,7 @@ early_initcall(early_efi_map_fb);
  * In case earlyprintk=efi,keep we have the whole framebuffer mapped already
  * so just return the offset efi_fb + start.
  */
-static __init_refok void *early_efi_map(unsigned long start, unsigned long len)
+static __ref void *early_efi_map(unsigned long start, unsigned long len)
 {
 	unsigned long base;
 
@@ -56,7 +56,7 @@ static __init_refok void *early_efi_map(unsigned long start, unsigned long len)
 		return early_ioremap(base + start, len);
 }
 
-static __init_refok void early_efi_unmap(void *addr, unsigned long len)
+static __ref void early_efi_unmap(void *addr, unsigned long len)
 {
 	if (!efi_fb)
 		early_iounmap(addr, len);
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 17c8bbd4e2f0..1fbb408e2e72 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -51,7 +51,6 @@
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 #include <asm/x86_init.h>
-#include <asm/rtc.h>
 #include <asm/uv/uv.h>
 
 static struct efi efi_phys __initdata;
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 04db6fbce96d..677e29e29473 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -25,6 +25,7 @@
 #include <linux/bootmem.h>
 #include <linux/ioport.h>
 #include <linux/init.h>
+#include <linux/mc146818rtc.h>
 #include <linux/efi.h>
 #include <linux/uaccess.h>
 #include <linux/io.h>
diff --git a/arch/x86/platform/intel-mid/intel_mid_vrtc.c b/arch/x86/platform/intel-mid/intel_mid_vrtc.c
index ee40fcb6e54d..58024862a7eb 100644
--- a/arch/x86/platform/intel-mid/intel_mid_vrtc.c
+++ b/arch/x86/platform/intel-mid/intel_mid_vrtc.c
@@ -22,6 +22,7 @@
 #include <linux/init.h>
 #include <linux/sfi.h>
 #include <linux/platform_device.h>
+#include <linux/mc146818rtc.h>
 
 #include <asm/intel-mid.h>
 #include <asm/intel_mid_vrtc.h>
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
index f2b5e6a5cf95..f0b5f2d402af 100644
--- a/arch/x86/power/hibernate_64.c
+++ b/arch/x86/power/hibernate_64.c
@@ -37,11 +37,11 @@ unsigned long jump_address_phys;
  */
 unsigned long restore_cr3 __visible;
 
-pgd_t *temp_level4_pgt __visible;
+unsigned long temp_level4_pgt __visible;
 
 unsigned long relocated_restore_code __visible;
 
-static int set_up_temporary_text_mapping(void)
+static int set_up_temporary_text_mapping(pgd_t *pgd)
 {
 	pmd_t *pmd;
 	pud_t *pud;
@@ -71,7 +71,7 @@ static int set_up_temporary_text_mapping(void)
 		__pmd((jump_address_phys & PMD_MASK) | __PAGE_KERNEL_LARGE_EXEC));
 	set_pud(pud + pud_index(restore_jump_address),
 		__pud(__pa(pmd) | _KERNPG_TABLE));
-	set_pgd(temp_level4_pgt + pgd_index(restore_jump_address),
+	set_pgd(pgd + pgd_index(restore_jump_address),
 		__pgd(__pa(pud) | _KERNPG_TABLE));
 
 	return 0;
@@ -90,15 +90,16 @@ static int set_up_temporary_mappings(void)
 		.kernel_mapping = true,
 	};
 	unsigned long mstart, mend;
+	pgd_t *pgd;
 	int result;
 	int i;
 
-	temp_level4_pgt = (pgd_t *)get_safe_page(GFP_ATOMIC);
-	if (!temp_level4_pgt)
+	pgd = (pgd_t *)get_safe_page(GFP_ATOMIC);
+	if (!pgd)
 		return -ENOMEM;
 
 	/* Prepare a temporary mapping for the kernel text */
-	result = set_up_temporary_text_mapping();
+	result = set_up_temporary_text_mapping(pgd);
 	if (result)
 		return result;
 
@@ -107,13 +108,12 @@ static int set_up_temporary_mappings(void)
 		mstart = pfn_mapped[i].start << PAGE_SHIFT;
 		mend   = pfn_mapped[i].end << PAGE_SHIFT;
 
-		result = kernel_ident_mapping_init(&info, temp_level4_pgt,
-						   mstart, mend);
-
+		result = kernel_ident_mapping_init(&info, pgd, mstart, mend);
 		if (result)
 			return result;
 	}
 
+	temp_level4_pgt = (unsigned long)pgd - __PAGE_OFFSET;
 	return 0;
 }
 
diff --git a/arch/x86/power/hibernate_asm_64.S b/arch/x86/power/hibernate_asm_64.S
index 8eee0e9c93f0..ce8da3a0412c 100644
--- a/arch/x86/power/hibernate_asm_64.S
+++ b/arch/x86/power/hibernate_asm_64.S
@@ -72,8 +72,6 @@ ENTRY(restore_image)
 	/* code below has been relocated to a safe page */
 ENTRY(core_restore_code)
 	/* switch to temporary page tables */
-	movq	$__PAGE_OFFSET, %rcx
-	subq	%rcx, %rax
 	movq	%rax, %cr3
 	/* flush TLB */
 	movq	%rbx, %rcx
diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
index 12734a96df47..ac58c1616408 100644
--- a/arch/x86/purgatory/Makefile
+++ b/arch/x86/purgatory/Makefile
@@ -8,6 +8,8 @@ PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y))
 LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined -nostdlib -z nodefaultlib
 targets += purgatory.ro
 
+KCOV_INSTRUMENT := n
+
 # Default KBUILD_CFLAGS can have -pg option set when FTRACE is enabled. That
 # in turn leaves some undefined symbols like __fentry__ in purgatory and not
 # sure how to relocate those. Like kexec-tools, use custom flags.
diff --git a/arch/x86/realmode/rm/Makefile b/arch/x86/realmode/rm/Makefile
index c556c5ae8de5..25012abc3409 100644
--- a/arch/x86/realmode/rm/Makefile
+++ b/arch/x86/realmode/rm/Makefile
@@ -48,7 +48,7 @@ targets += realmode.lds
 $(obj)/realmode.lds: $(obj)/pasyms.h
 
 LDFLAGS_realmode.elf := --emit-relocs -T
-CPPFLAGS_realmode.lds += -P -C -I$(obj)
+CPPFLAGS_realmode.lds += -P -C -I$(objtree)/$(obj)
 
 targets += realmode.elf
 $(obj)/realmode.elf: $(obj)/realmode.lds $(REALMODE_OBJS) FORCE
diff --git a/arch/x86/um/vdso/Makefile b/arch/x86/um/vdso/Makefile
index 6c803ca49b5d..d72dec406ccb 100644
--- a/arch/x86/um/vdso/Makefile
+++ b/arch/x86/um/vdso/Makefile
@@ -2,6 +2,9 @@
 # Building vDSO images for x86.
 #
 
+# Prevents link failures: __sanitizer_cov_trace_pc() is not linked in.
+KCOV_INSTRUMENT                := n
+
 VDSO64-y		:= y
 
 vdso-install-$(VDSO64-y)	+= vdso.so
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index cd993051aed7..8ffb089b19a5 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -34,9 +34,7 @@
 #include <linux/edd.h>
 #include <linux/frame.h>
 
-#ifdef CONFIG_KEXEC_CORE
 #include <linux/kexec.h>
-#endif
 
 #include <xen/xen.h>
 #include <xen/events.h>
@@ -1334,7 +1332,8 @@ static void xen_crash_shutdown(struct pt_regs *regs)
 static int
 xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr)
 {
-	xen_reboot(SHUTDOWN_crash);
+	if (!kexec_crash_loaded())
+		xen_reboot(SHUTDOWN_crash);
 	return NOTIFY_DONE;
 }
 
diff --git a/arch/xtensa/kernel/pci-dma.c b/arch/xtensa/kernel/pci-dma.c
index cd66698348ca..1e68806d6695 100644
--- a/arch/xtensa/kernel/pci-dma.c
+++ b/arch/xtensa/kernel/pci-dma.c
@@ -142,7 +142,7 @@ static void xtensa_sync_sg_for_device(struct device *dev,
 
 static void *xtensa_dma_alloc(struct device *dev, size_t size,
 			      dma_addr_t *handle, gfp_t flag,
-			      struct dma_attrs *attrs)
+			      unsigned long attrs)
 {
 	unsigned long ret;
 	unsigned long uncached = 0;
@@ -171,7 +171,7 @@ static void *xtensa_dma_alloc(struct device *dev, size_t size,
 }
 
 static void xtensa_dma_free(struct device *hwdev, size_t size, void *vaddr,
-			    dma_addr_t dma_handle, struct dma_attrs *attrs)
+			    dma_addr_t dma_handle, unsigned long attrs)
 {
 	unsigned long addr = (unsigned long)vaddr +
 		XCHAL_KSEG_CACHED_VADDR - XCHAL_KSEG_BYPASS_VADDR;
@@ -185,7 +185,7 @@ static void xtensa_dma_free(struct device *hwdev, size_t size, void *vaddr,
 static dma_addr_t xtensa_map_page(struct device *dev, struct page *page,
 				  unsigned long offset, size_t size,
 				  enum dma_data_direction dir,
-				  struct dma_attrs *attrs)
+				  unsigned long attrs)
 {
 	dma_addr_t dma_handle = page_to_phys(page) + offset;
 
@@ -195,14 +195,14 @@ static dma_addr_t xtensa_map_page(struct device *dev, struct page *page,
 
 static void xtensa_unmap_page(struct device *dev, dma_addr_t dma_handle,
 			      size_t size, enum dma_data_direction dir,
-			      struct dma_attrs *attrs)
+			      unsigned long attrs)
 {
 	xtensa_sync_single_for_cpu(dev, dma_handle, size, dir);
 }
 
 static int xtensa_map_sg(struct device *dev, struct scatterlist *sg,
 			 int nents, enum dma_data_direction dir,
-			 struct dma_attrs *attrs)
+			 unsigned long attrs)
 {
 	struct scatterlist *s;
 	int i;
@@ -217,7 +217,7 @@ static int xtensa_map_sg(struct device *dev, struct scatterlist *sg,
 static void xtensa_unmap_sg(struct device *dev,
 			    struct scatterlist *sg, int nents,
 			    enum dma_data_direction dir,
-			    struct dma_attrs *attrs)
+			    unsigned long attrs)
 {
 	struct scatterlist *s;
 	int i;