diff options
Diffstat (limited to 'arch')
564 files changed, 15446 insertions, 6714 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 15996290fed4..e9c9334507dd 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -357,6 +357,43 @@ config SECCOMP_FILTER See Documentation/prctl/seccomp_filter.txt for details. +config HAVE_GCC_PLUGINS + bool + help + An arch should select this symbol if it supports building with + GCC plugins. + +menuconfig GCC_PLUGINS + bool "GCC plugins" + depends on HAVE_GCC_PLUGINS + depends on !COMPILE_TEST + help + GCC plugins are loadable modules that provide extra features to the + compiler. They are useful for runtime instrumentation and static analysis. + + See Documentation/gcc-plugins.txt for details. + +config GCC_PLUGIN_CYC_COMPLEXITY + bool "Compute the cyclomatic complexity of a function" + depends on GCC_PLUGINS + help + The complexity M of a function's control flow graph is defined as: + M = E - N + 2P + where + + E = the number of edges + N = the number of nodes + P = the number of connected components (exit nodes). + +config GCC_PLUGIN_SANCOV + bool + depends on GCC_PLUGINS + help + This plugin inserts a __sanitizer_cov_trace_pc() call at the start of + basic blocks. It supports all gcc versions with plugin support (from + gcc-4.5 on). It is based on the commit "Add fuzzing coverage support" + by Dmitry Vyukov <dvyukov@google.com>. + config HAVE_CC_STACKPROTECTOR bool help @@ -424,6 +461,15 @@ config CC_STACKPROTECTOR_STRONG endchoice +config HAVE_ARCH_WITHIN_STACK_FRAMES + bool + help + An architecture should select this if it can walk the kernel stack + frames to determine if an object is part of either the arguments + or local variables (i.e. that it excludes saved return addresses, + and similar) by implementing an inline arch_within_stack_frames(), + which is used by CONFIG_HARDENED_USERCOPY. + config HAVE_CONTEXT_TRACKING bool help diff --git a/arch/alpha/boot/Makefile b/arch/alpha/boot/Makefile index 8399bd0e68e8..0cbe4c59d3ce 100644 --- a/arch/alpha/boot/Makefile +++ b/arch/alpha/boot/Makefile @@ -15,7 +15,7 @@ targets := vmlinux.gz vmlinux \ OBJSTRIP := $(obj)/tools/objstrip HOSTCFLAGS := -Wall -I$(objtree)/usr/include -BOOTCFLAGS += -I$(obj) -I$(srctree)/$(obj) +BOOTCFLAGS += -I$(objtree)/$(obj) -I$(srctree)/$(obj) # SRM bootable image. Copy to offset 512 of a partition. $(obj)/bootimage: $(addprefix $(obj)/tools/,mkbb lxboot bootlx) $(obj)/vmlinux.nh diff --git a/arch/alpha/include/asm/dma-mapping.h b/arch/alpha/include/asm/dma-mapping.h index 3c3451f58ff4..c63b6ac19ee5 100644 --- a/arch/alpha/include/asm/dma-mapping.h +++ b/arch/alpha/include/asm/dma-mapping.h @@ -1,8 +1,6 @@ #ifndef _ALPHA_DMA_MAPPING_H #define _ALPHA_DMA_MAPPING_H -#include <linux/dma-attrs.h> - extern struct dma_map_ops *dma_ops; static inline struct dma_map_ops *get_dma_ops(struct device *dev) diff --git a/arch/alpha/include/asm/rtc.h b/arch/alpha/include/asm/rtc.h deleted file mode 100644 index f71c3b0ed360..000000000000 --- a/arch/alpha/include/asm/rtc.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/rtc.h> diff --git a/arch/alpha/include/asm/thread_info.h b/arch/alpha/include/asm/thread_info.h index 32e920a83ae5..e9e90bfa2b50 100644 --- a/arch/alpha/include/asm/thread_info.h +++ b/arch/alpha/include/asm/thread_info.h @@ -86,33 +86,6 @@ register struct thread_info *__current_thread_info __asm__("$8"); #define TS_UAC_NOPRINT 0x0001 /* ! Preserve the following three */ #define TS_UAC_NOFIX 0x0002 /* ! flags as they match */ #define TS_UAC_SIGBUS 0x0004 /* ! userspace part of 'osf_sysinfo' */ -#define TS_RESTORE_SIGMASK 0x0008 /* restore signal mask in do_signal() */ - -#ifndef __ASSEMBLY__ -#define HAVE_SET_RESTORE_SIGMASK 1 -static inline void set_restore_sigmask(void) -{ - struct thread_info *ti = current_thread_info(); - ti->status |= TS_RESTORE_SIGMASK; - WARN_ON(!test_bit(TIF_SIGPENDING, (unsigned long *)&ti->flags)); -} -static inline void clear_restore_sigmask(void) -{ - current_thread_info()->status &= ~TS_RESTORE_SIGMASK; -} -static inline bool test_restore_sigmask(void) -{ - return current_thread_info()->status & TS_RESTORE_SIGMASK; -} -static inline bool test_and_clear_restore_sigmask(void) -{ - struct thread_info *ti = current_thread_info(); - if (!(ti->status & TS_RESTORE_SIGMASK)) - return false; - ti->status &= ~TS_RESTORE_SIGMASK; - return true; -} -#endif #define SET_UNALIGN_CTL(task,value) ({ \ __u32 status = task_thread_info(task)->status & ~UAC_BITMASK; \ diff --git a/arch/alpha/kernel/core_marvel.c b/arch/alpha/kernel/core_marvel.c index 53dd2f1a53aa..d5f0580746a5 100644 --- a/arch/alpha/kernel/core_marvel.c +++ b/arch/alpha/kernel/core_marvel.c @@ -24,7 +24,6 @@ #include <asm/gct.h> #include <asm/pgalloc.h> #include <asm/tlbflush.h> -#include <asm/rtc.h> #include <asm/vga.h> #include "proto.h" diff --git a/arch/alpha/kernel/machvec_impl.h b/arch/alpha/kernel/machvec_impl.h index f54bdf658cd0..d3398f6ab74c 100644 --- a/arch/alpha/kernel/machvec_impl.h +++ b/arch/alpha/kernel/machvec_impl.h @@ -137,7 +137,7 @@ #define __initmv __initdata #define ALIAS_MV(x) #else -#define __initmv __initdata_refok +#define __initmv __refdata /* GCC actually has a syntax for defining aliases, but is under some delusion that you shouldn't be able to declare it extern somewhere diff --git a/arch/alpha/kernel/pci-noop.c b/arch/alpha/kernel/pci-noop.c index 8e735b5e56bd..bb152e21e5ae 100644 --- a/arch/alpha/kernel/pci-noop.c +++ b/arch/alpha/kernel/pci-noop.c @@ -109,7 +109,7 @@ sys_pciconfig_write(unsigned long bus, unsigned long dfn, static void *alpha_noop_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, - struct dma_attrs *attrs) + unsigned long attrs) { void *ret; diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c index 8969bf2dfe3a..451fc9cdd323 100644 --- a/arch/alpha/kernel/pci_iommu.c +++ b/arch/alpha/kernel/pci_iommu.c @@ -349,7 +349,7 @@ static struct pci_dev *alpha_gendev_to_pci(struct device *dev) static dma_addr_t alpha_pci_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { struct pci_dev *pdev = alpha_gendev_to_pci(dev); int dac_allowed; @@ -369,7 +369,7 @@ static dma_addr_t alpha_pci_map_page(struct device *dev, struct page *page, static void alpha_pci_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { unsigned long flags; struct pci_dev *pdev = alpha_gendev_to_pci(dev); @@ -433,7 +433,7 @@ static void alpha_pci_unmap_page(struct device *dev, dma_addr_t dma_addr, static void *alpha_pci_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addrp, gfp_t gfp, - struct dma_attrs *attrs) + unsigned long attrs) { struct pci_dev *pdev = alpha_gendev_to_pci(dev); void *cpu_addr; @@ -478,7 +478,7 @@ try_again: static void alpha_pci_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_addr, - struct dma_attrs *attrs) + unsigned long attrs) { struct pci_dev *pdev = alpha_gendev_to_pci(dev); pci_unmap_single(pdev, dma_addr, size, PCI_DMA_BIDIRECTIONAL); @@ -651,7 +651,7 @@ sg_fill(struct device *dev, struct scatterlist *leader, struct scatterlist *end, static int alpha_pci_map_sg(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { struct pci_dev *pdev = alpha_gendev_to_pci(dev); struct scatterlist *start, *end, *out; @@ -729,7 +729,7 @@ static int alpha_pci_map_sg(struct device *dev, struct scatterlist *sg, static void alpha_pci_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { struct pci_dev *pdev = alpha_gendev_to_pci(dev); unsigned long flags; diff --git a/arch/alpha/kernel/rtc.c b/arch/alpha/kernel/rtc.c index f535a3fd0f60..ceed68c7500b 100644 --- a/arch/alpha/kernel/rtc.c +++ b/arch/alpha/kernel/rtc.c @@ -15,8 +15,6 @@ #include <linux/rtc.h> #include <linux/platform_device.h> -#include <asm/rtc.h> - #include "proto.h" @@ -81,7 +79,7 @@ init_rtc_epoch(void) static int alpha_rtc_read_time(struct device *dev, struct rtc_time *tm) { - __get_rtc_time(tm); + mc146818_get_time(tm); /* Adjust for non-default epochs. It's easier to depend on the generic __get_rtc_time and adjust the epoch here than create @@ -112,7 +110,7 @@ alpha_rtc_set_time(struct device *dev, struct rtc_time *tm) tm = &xtm; } - return __set_rtc_time(tm); + return mc146818_set_time(tm); } static int diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c index ab74b5d9186c..20afc65e22dc 100644 --- a/arch/arc/mm/dma.c +++ b/arch/arc/mm/dma.c @@ -22,7 +22,7 @@ static void *arc_dma_alloc(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp, struct dma_attrs *attrs) + dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) { unsigned long order = get_order(size); struct page *page; @@ -46,7 +46,7 @@ static void *arc_dma_alloc(struct device *dev, size_t size, * (vs. always going to memory - thus are faster) */ if ((is_isa_arcv2() && ioc_exists) || - dma_get_attr(DMA_ATTR_NON_CONSISTENT, attrs)) + (attrs & DMA_ATTR_NON_CONSISTENT)) need_coh = 0; /* @@ -90,13 +90,13 @@ static void *arc_dma_alloc(struct device *dev, size_t size, } static void arc_dma_free(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_handle, struct dma_attrs *attrs) + dma_addr_t dma_handle, unsigned long attrs) { phys_addr_t paddr = plat_dma_to_phys(dev, dma_handle); struct page *page = virt_to_page(paddr); int is_non_coh = 1; - is_non_coh = dma_get_attr(DMA_ATTR_NON_CONSISTENT, attrs) || + is_non_coh = (attrs & DMA_ATTR_NON_CONSISTENT) || (is_isa_arcv2() && ioc_exists); if (PageHighMem(page) || !is_non_coh) @@ -130,7 +130,7 @@ static void _dma_cache_sync(phys_addr_t paddr, size_t size, static dma_addr_t arc_dma_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { phys_addr_t paddr = page_to_phys(page) + offset; _dma_cache_sync(paddr, size, dir); @@ -138,7 +138,7 @@ static dma_addr_t arc_dma_map_page(struct device *dev, struct page *page, } static int arc_dma_map_sg(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, struct dma_attrs *attrs) + int nents, enum dma_data_direction dir, unsigned long attrs) { struct scatterlist *s; int i; diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c index 8be930394750..399e2f223d25 100644 --- a/arch/arc/mm/init.c +++ b/arch/arc/mm/init.c @@ -220,7 +220,7 @@ void __init mem_init(void) /* * free_initmem: Free all the __init memory. */ -void __init_refok free_initmem(void) +void __ref free_initmem(void) { free_initmem_default(-1); } diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 14b4cf75ceec..a9c4e48bb7ec 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -35,6 +35,7 @@ config ARM select HARDIRQS_SW_RESEND select HAVE_ARCH_AUDITSYSCALL if (AEABI && !OABI_COMPAT) select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6 + select HAVE_ARCH_HARDENED_USERCOPY select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU select HAVE_ARCH_MMAP_RND_BITS if MMU @@ -54,6 +55,7 @@ config ARM select HAVE_FTRACE_MCOUNT_RECORD if (!XIP_KERNEL) select HAVE_FUNCTION_GRAPH_TRACER if (!THUMB2_KERNEL) select HAVE_FUNCTION_TRACER if (!XIP_KERNEL) + select HAVE_GCC_PLUGINS select HAVE_GENERIC_DMA_COHERENT select HAVE_HW_BREAKPOINT if (PERF_EVENTS && (CPU_V6 || CPU_V6K || CPU_V7)) select HAVE_IDE if PCI || ISA || PCMCIA @@ -699,7 +701,7 @@ config ARCH_VIRT depends on ARCH_MULTI_V7 select ARM_AMBA select ARM_GIC - select ARM_GIC_V2M if PCI_MSI + select ARM_GIC_V2M if PCI select ARM_GIC_V3 select ARM_PSCI select HAVE_ARM_ARCH_TIMER diff --git a/arch/arm/common/dmabounce.c b/arch/arm/common/dmabounce.c index 1143c4d5c567..301281645d08 100644 --- a/arch/arm/common/dmabounce.c +++ b/arch/arm/common/dmabounce.c @@ -310,7 +310,7 @@ static inline void unmap_single(struct device *dev, struct safe_buffer *buf, */ static dma_addr_t dmabounce_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { dma_addr_t dma_addr; int ret; @@ -344,7 +344,7 @@ static dma_addr_t dmabounce_map_page(struct device *dev, struct page *page, * should be) */ static void dmabounce_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, - enum dma_data_direction dir, struct dma_attrs *attrs) + enum dma_data_direction dir, unsigned long attrs) { struct safe_buffer *buf; diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index a83570f10124..d009f7911ffc 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h @@ -5,7 +5,6 @@ #include <linux/mm_types.h> #include <linux/scatterlist.h> -#include <linux/dma-attrs.h> #include <linux/dma-debug.h> #include <asm/memory.h> @@ -174,7 +173,7 @@ static inline void dma_mark_clean(void *addr, size_t size) { } * to be the device-viewed address. */ extern void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, - gfp_t gfp, struct dma_attrs *attrs); + gfp_t gfp, unsigned long attrs); /** * arm_dma_free - free memory allocated by arm_dma_alloc @@ -191,7 +190,7 @@ extern void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, * during and after this call executing are illegal. */ extern void arm_dma_free(struct device *dev, size_t size, void *cpu_addr, - dma_addr_t handle, struct dma_attrs *attrs); + dma_addr_t handle, unsigned long attrs); /** * arm_dma_mmap - map a coherent DMA allocation into user space @@ -208,7 +207,7 @@ extern void arm_dma_free(struct device *dev, size_t size, void *cpu_addr, */ extern int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, - struct dma_attrs *attrs); + unsigned long attrs); /* * This can be called during early boot to increase the size of the atomic @@ -262,16 +261,16 @@ extern void dmabounce_unregister_dev(struct device *); * The scatter list versions of the above methods. */ extern int arm_dma_map_sg(struct device *, struct scatterlist *, int, - enum dma_data_direction, struct dma_attrs *attrs); + enum dma_data_direction, unsigned long attrs); extern void arm_dma_unmap_sg(struct device *, struct scatterlist *, int, - enum dma_data_direction, struct dma_attrs *attrs); + enum dma_data_direction, unsigned long attrs); extern void arm_dma_sync_sg_for_cpu(struct device *, struct scatterlist *, int, enum dma_data_direction); extern void arm_dma_sync_sg_for_device(struct device *, struct scatterlist *, int, enum dma_data_direction); extern int arm_dma_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr, dma_addr_t dma_addr, size_t size, - struct dma_attrs *attrs); + unsigned long attrs); #endif /* __KERNEL__ */ #endif diff --git a/arch/arm/include/asm/kexec.h b/arch/arm/include/asm/kexec.h index c2b9b4bdec00..1869af6bac5c 100644 --- a/arch/arm/include/asm/kexec.h +++ b/arch/arm/include/asm/kexec.h @@ -53,6 +53,30 @@ static inline void crash_setup_regs(struct pt_regs *newregs, /* Function pointer to optional machine-specific reinitialization */ extern void (*kexec_reinit)(void); +static inline unsigned long phys_to_boot_phys(phys_addr_t phys) +{ + return phys_to_idmap(phys); +} +#define phys_to_boot_phys phys_to_boot_phys + +static inline phys_addr_t boot_phys_to_phys(unsigned long entry) +{ + return idmap_to_phys(entry); +} +#define boot_phys_to_phys boot_phys_to_phys + +static inline unsigned long page_to_boot_pfn(struct page *page) +{ + return page_to_pfn(page) + (arch_phys_to_idmap_offset >> PAGE_SHIFT); +} +#define page_to_boot_pfn page_to_boot_pfn + +static inline struct page *boot_pfn_to_page(unsigned long boot_pfn) +{ + return pfn_to_page(boot_pfn - (arch_phys_to_idmap_offset >> PAGE_SHIFT)); +} +#define boot_pfn_to_page boot_pfn_to_page + #endif /* __ASSEMBLY__ */ #endif /* CONFIG_KEXEC */ diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h index 3d5a5cd071bd..58faff5f1eb2 100644 --- a/arch/arm/include/asm/kvm_asm.h +++ b/arch/arm/include/asm/kvm_asm.h @@ -66,6 +66,8 @@ extern void __kvm_tlb_flush_vmid(struct kvm *kvm); extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); extern void __init_stage2_translation(void); + +extern void __kvm_hyp_reset(unsigned long); #endif #endif /* __ARM_KVM_ASM_H__ */ diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 96387d477e91..de338d93d11b 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -241,8 +241,7 @@ int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, int exception_index); -static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr, - phys_addr_t pgd_ptr, +static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr, unsigned long hyp_stack_ptr, unsigned long vector_ptr) { @@ -251,18 +250,13 @@ static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr, * code. The init code doesn't need to preserve these * registers as r0-r3 are already callee saved according to * the AAPCS. - * Note that we slightly misuse the prototype by casing the + * Note that we slightly misuse the prototype by casting the * stack pointer to a void *. - * - * We don't have enough registers to perform the full init in - * one go. Install the boot PGD first, and then install the - * runtime PGD, stack pointer and vectors. The PGDs are always - * passed as the third argument, in order to be passed into - * r2-r3 to the init code (yes, this is compliant with the - * PCS!). - */ - kvm_call_hyp(NULL, 0, boot_pgd_ptr); + * The PGDs are always passed as the third argument, in order + * to be passed into r2-r3 to the init code (yes, this is + * compliant with the PCS!). + */ kvm_call_hyp((void*)hyp_stack_ptr, vector_ptr, pgd_ptr); } @@ -272,16 +266,13 @@ static inline void __cpu_init_stage2(void) kvm_call_hyp(__init_stage2_translation); } -static inline void __cpu_reset_hyp_mode(phys_addr_t boot_pgd_ptr, +static inline void __cpu_reset_hyp_mode(unsigned long vector_ptr, phys_addr_t phys_idmap_start) { - /* - * TODO - * kvm_call_reset(boot_pgd_ptr, phys_idmap_start); - */ + kvm_call_hyp((void *)virt_to_idmap(__kvm_hyp_reset), vector_ptr); } -static inline int kvm_arch_dev_ioctl_check_extension(long ext) +static inline int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext) { return 0; } diff --git a/arch/arm/include/asm/kvm_hyp.h b/arch/arm/include/asm/kvm_hyp.h index f0e860761380..6eaff28f2ff3 100644 --- a/arch/arm/include/asm/kvm_hyp.h +++ b/arch/arm/include/asm/kvm_hyp.h @@ -25,9 +25,6 @@ #define __hyp_text __section(.hyp.text) notrace -#define kern_hyp_va(v) (v) -#define hyp_kern_va(v) (v) - #define __ACCESS_CP15(CRn, Op1, CRm, Op2) \ "mrc", "mcr", __stringify(p15, Op1, %0, CRn, CRm, Op2), u32 #define __ACCESS_CP15_64(Op1, CRm) \ diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index f9a65061130b..3bb803d6814b 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -26,16 +26,7 @@ * We directly use the kernel VA for the HYP, as we can directly share * the mapping (HTTBR "covers" TTBR1). */ -#define HYP_PAGE_OFFSET_MASK UL(~0) -#define HYP_PAGE_OFFSET PAGE_OFFSET -#define KERN_TO_HYP(kva) (kva) - -/* - * Our virtual mapping for the boot-time MMU-enable code. Must be - * shared across all the page-tables. Conveniently, we use the vectors - * page, where no kernel data will ever be shared with HYP. - */ -#define TRAMPOLINE_VA UL(CONFIG_VECTORS_BASE) +#define kern_hyp_va(kva) (kva) /* * KVM_MMU_CACHE_MIN_PAGES is the number of stage2 page table translation levels. @@ -49,9 +40,8 @@ #include <asm/pgalloc.h> #include <asm/stage2_pgtable.h> -int create_hyp_mappings(void *from, void *to); +int create_hyp_mappings(void *from, void *to, pgprot_t prot); int create_hyp_io_mappings(void *from, void *to, phys_addr_t); -void free_boot_hyp_pgd(void); void free_hyp_pgds(void); void stage2_unmap_vm(struct kvm *kvm); @@ -65,7 +55,6 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run); void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu); phys_addr_t kvm_mmu_get_httbr(void); -phys_addr_t kvm_mmu_get_boot_httbr(void); phys_addr_t kvm_get_idmap_vector(void); phys_addr_t kvm_get_idmap_start(void); int kvm_mmu_init(void); diff --git a/arch/arm/include/asm/mach/pci.h b/arch/arm/include/asm/mach/pci.h index 0070e8520cd4..2d88af5be45f 100644 --- a/arch/arm/include/asm/mach/pci.h +++ b/arch/arm/include/asm/mach/pci.h @@ -22,6 +22,7 @@ struct hw_pci { struct msi_controller *msi_ctrl; struct pci_ops *ops; int nr_controllers; + unsigned int io_optional:1; void **private_data; int (*setup)(int nr, struct pci_sys_data *); struct pci_bus *(*scan)(int nr, struct pci_sys_data *); diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index d62204060cbe..a8d656d9aec7 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -97,7 +97,9 @@ extern pgprot_t pgprot_s2_device; #define PAGE_READONLY_EXEC _MOD_PROT(pgprot_user, L_PTE_USER | L_PTE_RDONLY) #define PAGE_KERNEL _MOD_PROT(pgprot_kernel, L_PTE_XN) #define PAGE_KERNEL_EXEC pgprot_kernel -#define PAGE_HYP _MOD_PROT(pgprot_kernel, L_PTE_HYP) +#define PAGE_HYP _MOD_PROT(pgprot_kernel, L_PTE_HYP | L_PTE_XN) +#define PAGE_HYP_EXEC _MOD_PROT(pgprot_kernel, L_PTE_HYP | L_PTE_RDONLY) +#define PAGE_HYP_RO _MOD_PROT(pgprot_kernel, L_PTE_HYP | L_PTE_RDONLY | L_PTE_XN) #define PAGE_HYP_DEVICE _MOD_PROT(pgprot_hyp_device, L_PTE_HYP) #define PAGE_S2 _MOD_PROT(pgprot_s2, L_PTE_S2_RDONLY) #define PAGE_S2_DEVICE _MOD_PROT(pgprot_s2_device, L_PTE_S2_RDONLY) diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index 62a6f65029e6..a93c0f99acf7 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -480,7 +480,10 @@ arm_copy_from_user(void *to, const void __user *from, unsigned long n); static inline unsigned long __must_check __copy_from_user(void *to, const void __user *from, unsigned long n) { - unsigned int __ua_flags = uaccess_save_and_enable(); + unsigned int __ua_flags; + + check_object_size(to, n, false); + __ua_flags = uaccess_save_and_enable(); n = arm_copy_from_user(to, from, n); uaccess_restore(__ua_flags); return n; @@ -495,11 +498,15 @@ static inline unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n) { #ifndef CONFIG_UACCESS_WITH_MEMCPY - unsigned int __ua_flags = uaccess_save_and_enable(); + unsigned int __ua_flags; + + check_object_size(from, n, true); + __ua_flags = uaccess_save_and_enable(); n = arm_copy_to_user(to, from, n); uaccess_restore(__ua_flags); return n; #else + check_object_size(from, n, true); return arm_copy_to_user(to, from, n); #endif } diff --git a/arch/arm/include/asm/virt.h b/arch/arm/include/asm/virt.h index d4ceaf5f299b..a2e75b84e2ae 100644 --- a/arch/arm/include/asm/virt.h +++ b/arch/arm/include/asm/virt.h @@ -80,6 +80,10 @@ static inline bool is_kernel_in_hyp_mode(void) return false; } +/* The section containing the hypervisor idmap text */ +extern char __hyp_idmap_text_start[]; +extern char __hyp_idmap_text_end[]; + /* The section containing the hypervisor text */ extern char __hyp_text_start[]; extern char __hyp_text_end[]; diff --git a/arch/arm/include/asm/xen/page-coherent.h b/arch/arm/include/asm/xen/page-coherent.h index 9408a994cc91..95ce6ac3a971 100644 --- a/arch/arm/include/asm/xen/page-coherent.h +++ b/arch/arm/include/asm/xen/page-coherent.h @@ -2,15 +2,14 @@ #define _ASM_ARM_XEN_PAGE_COHERENT_H #include <asm/page.h> -#include <linux/dma-attrs.h> #include <linux/dma-mapping.h> void __xen_dma_map_page(struct device *hwdev, struct page *page, dma_addr_t dev_addr, unsigned long offset, size_t size, - enum dma_data_direction dir, struct dma_attrs *attrs); + enum dma_data_direction dir, unsigned long attrs); void __xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs); + unsigned long attrs); void __xen_dma_sync_single_for_cpu(struct device *hwdev, dma_addr_t handle, size_t size, enum dma_data_direction dir); @@ -18,22 +17,20 @@ void __xen_dma_sync_single_for_device(struct device *hwdev, dma_addr_t handle, size_t size, enum dma_data_direction dir); static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size, - dma_addr_t *dma_handle, gfp_t flags, - struct dma_attrs *attrs) + dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs) { return __generic_dma_ops(hwdev)->alloc(hwdev, size, dma_handle, flags, attrs); } static inline void xen_free_coherent_pages(struct device *hwdev, size_t size, - void *cpu_addr, dma_addr_t dma_handle, - struct dma_attrs *attrs) + void *cpu_addr, dma_addr_t dma_handle, unsigned long attrs) { __generic_dma_ops(hwdev)->free(hwdev, size, cpu_addr, dma_handle, attrs); } static inline void xen_dma_map_page(struct device *hwdev, struct page *page, dma_addr_t dev_addr, unsigned long offset, size_t size, - enum dma_data_direction dir, struct dma_attrs *attrs) + enum dma_data_direction dir, unsigned long attrs) { unsigned long page_pfn = page_to_xen_pfn(page); unsigned long dev_pfn = XEN_PFN_DOWN(dev_addr); @@ -58,8 +55,7 @@ static inline void xen_dma_map_page(struct device *hwdev, struct page *page, } static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle, - size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + size_t size, enum dma_data_direction dir, unsigned long attrs) { unsigned long pfn = PFN_DOWN(handle); /* diff --git a/arch/arm/kernel/bios32.c b/arch/arm/kernel/bios32.c index 05e61a2eeabe..2f0e07735d1d 100644 --- a/arch/arm/kernel/bios32.c +++ b/arch/arm/kernel/bios32.c @@ -410,7 +410,8 @@ static int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) return irq; } -static int pcibios_init_resources(int busnr, struct pci_sys_data *sys) +static int pcibios_init_resource(int busnr, struct pci_sys_data *sys, + int io_optional) { int ret; struct resource_entry *window; @@ -420,6 +421,14 @@ static int pcibios_init_resources(int busnr, struct pci_sys_data *sys) &iomem_resource, sys->mem_offset); } + /* + * If a platform says I/O port support is optional, we don't add + * the default I/O space. The platform is responsible for adding + * any I/O space it needs. + */ + if (io_optional) + return 0; + resource_list_for_each_entry(window, &sys->resources) if (resource_type(window->res) == IORESOURCE_IO) return 0; @@ -466,7 +475,7 @@ static void pcibios_init_hw(struct device *parent, struct hw_pci *hw, if (ret > 0) { struct pci_host_bridge *host_bridge; - ret = pcibios_init_resources(nr, sys); + ret = pcibios_init_resource(nr, sys, hw->io_optional); if (ret) { kfree(sys); break; @@ -515,25 +524,23 @@ void pci_common_init_dev(struct device *parent, struct hw_pci *hw) list_for_each_entry(sys, &head, node) { struct pci_bus *bus = sys->bus; - if (!pci_has_flag(PCI_PROBE_ONLY)) { + /* + * We insert PCI resources into the iomem_resource and + * ioport_resource trees in either pci_bus_claim_resources() + * or pci_bus_assign_resources(). + */ + if (pci_has_flag(PCI_PROBE_ONLY)) { + pci_bus_claim_resources(bus); + } else { struct pci_bus *child; - /* - * Size the bridge windows. - */ pci_bus_size_bridges(bus); - - /* - * Assign resources. - */ pci_bus_assign_resources(bus); list_for_each_entry(child, &bus->children, node) pcie_bus_configure_settings(child); } - /* - * Tell drivers about devices found. - */ + pci_bus_add_devices(bus); } } @@ -590,18 +597,6 @@ resource_size_t pcibios_align_resource(void *data, const struct resource *res, return start; } -/** - * pcibios_enable_device - Enable I/O and memory. - * @dev: PCI device to be enabled - */ -int pcibios_enable_device(struct pci_dev *dev, int mask) -{ - if (pci_has_flag(PCI_PROBE_ONLY)) - return 0; - - return pci_enable_resources(dev, mask); -} - int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, enum pci_mmap_state mmap_state, int write_combine) { diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c index 59fd0e24c56b..b18c1ea56bed 100644 --- a/arch/arm/kernel/machine_kexec.c +++ b/arch/arm/kernel/machine_kexec.c @@ -57,7 +57,7 @@ int machine_kexec_prepare(struct kimage *image) for (i = 0; i < image->nr_segments; i++) { current_segment = &image->segment[i]; - if (!memblock_is_region_memory(current_segment->mem, + if (!memblock_is_region_memory(idmap_to_phys(current_segment->mem), current_segment->memsz)) return -EINVAL; diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index da2f6c360f6b..df7f2a75e769 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -848,10 +848,29 @@ static void __init request_standard_resources(const struct machine_desc *mdesc) kernel_data.end = virt_to_phys(_end - 1); for_each_memblock(memory, region) { + phys_addr_t start = __pfn_to_phys(memblock_region_memory_base_pfn(region)); + phys_addr_t end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1; + unsigned long boot_alias_start; + + /* + * Some systems have a special memory alias which is only + * used for booting. We need to advertise this region to + * kexec-tools so they know where bootable RAM is located. + */ + boot_alias_start = phys_to_idmap(start); + if (arm_has_idmap_alias() && boot_alias_start != IDMAP_INVALID_ADDR) { + res = memblock_virt_alloc(sizeof(*res), 0); + res->name = "System RAM (boot alias)"; + res->start = boot_alias_start; + res->end = phys_to_idmap(end); + res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + request_resource(&iomem_resource, res); + } + res = memblock_virt_alloc(sizeof(*res), 0); res->name = "System RAM"; - res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region)); - res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1; + res->start = start; + res->end = end; res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; request_resource(&iomem_resource, res); @@ -1000,9 +1019,25 @@ static void __init reserve_crashkernel(void) (unsigned long)(crash_base >> 20), (unsigned long)(total_mem >> 20)); + /* The crashk resource must always be located in normal mem */ crashk_res.start = crash_base; crashk_res.end = crash_base + crash_size - 1; insert_resource(&iomem_resource, &crashk_res); + + if (arm_has_idmap_alias()) { + /* + * If we have a special RAM alias for use at boot, we + * need to advertise to kexec tools where the alias is. + */ + static struct resource crashk_boot_res = { + .name = "Crash kernel (boot alias)", + .flags = IORESOURCE_BUSY | IORESOURCE_MEM, + }; + + crashk_boot_res.start = phys_to_idmap(crash_base); + crashk_boot_res.end = crashk_boot_res.start + crash_size - 1; + insert_resource(&iomem_resource, &crashk_boot_res); + } } #else static inline void reserve_crashkernel(void) {} diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c index 087acb569b63..5f221acd21ae 100644 --- a/arch/arm/kernel/sys_oabi-compat.c +++ b/arch/arm/kernel/sys_oabi-compat.c @@ -279,8 +279,12 @@ asmlinkage long sys_oabi_epoll_wait(int epfd, mm_segment_t fs; long ret, err, i; - if (maxevents <= 0 || maxevents > (INT_MAX/sizeof(struct epoll_event))) + if (maxevents <= 0 || + maxevents > (INT_MAX/sizeof(*kbuf)) || + maxevents > (INT_MAX/sizeof(*events))) return -EINVAL; + if (!access_ok(VERIFY_WRITE, events, sizeof(*events) * maxevents)) + return -EFAULT; kbuf = kmalloc(sizeof(*kbuf) * maxevents, GFP_KERNEL); if (!kbuf) return -ENOMEM; @@ -317,6 +321,8 @@ asmlinkage long sys_oabi_semtimedop(int semid, if (nsops < 1 || nsops > SEMOPM) return -EINVAL; + if (!access_ok(VERIFY_READ, tsops, sizeof(*tsops) * nsops)) + return -EFAULT; sops = kmalloc(sizeof(*sops) * nsops, GFP_KERNEL); if (!sops) return -ENOMEM; diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index 99420fc1f066..d24e5dd2aa7a 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -44,7 +44,7 @@ #endif #if (defined(CONFIG_SMP_ON_UP) && !defined(CONFIG_DEBUG_SPINLOCK)) || \ - defined(CONFIG_GENERIC_BUG) + defined(CONFIG_GENERIC_BUG) || defined(CONFIG_JUMP_LABEL) #define ARM_EXIT_KEEP(x) x #define ARM_EXIT_DISCARD(x) #else diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index 02abfff68ee5..3e1cd0452d67 100644 --- a/arch/arm/kvm/Kconfig +++ b/arch/arm/kvm/Kconfig @@ -32,6 +32,8 @@ config KVM select KVM_VFIO select HAVE_KVM_EVENTFD select HAVE_KVM_IRQFD + select HAVE_KVM_IRQCHIP + select HAVE_KVM_IRQ_ROUTING depends on ARM_VIRT_EXT && ARM_LPAE && ARM_ARCH_TIMER ---help--- Support hosting virtualized guest machines. @@ -46,13 +48,6 @@ config KVM_ARM_HOST ---help--- Provides host support for ARM processors. -config KVM_NEW_VGIC - bool "New VGIC implementation" - depends on KVM - default y - ---help--- - uses the new VGIC implementation - source drivers/vhost/Kconfig endif # VIRTUALIZATION diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index a596b58f6d37..10d77a66cad5 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile @@ -22,7 +22,6 @@ obj-y += kvm-arm.o init.o interrupts.o obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o -ifeq ($(CONFIG_KVM_NEW_VGIC),y) obj-y += $(KVM)/arm/vgic/vgic.o obj-y += $(KVM)/arm/vgic/vgic-init.o obj-y += $(KVM)/arm/vgic/vgic-irqfd.o @@ -30,9 +29,5 @@ obj-y += $(KVM)/arm/vgic/vgic-v2.o obj-y += $(KVM)/arm/vgic/vgic-mmio.o obj-y += $(KVM)/arm/vgic/vgic-mmio-v2.o obj-y += $(KVM)/arm/vgic/vgic-kvm-device.o -else -obj-y += $(KVM)/arm/vgic.o -obj-y += $(KVM)/arm/vgic-v2.o -obj-y += $(KVM)/arm/vgic-v2-emul.o -endif +obj-y += $(KVM)/irqchip.o obj-y += $(KVM)/arm/arch_timer.o diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index f1bde7c4e736..d94bb9093ead 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -20,6 +20,7 @@ #include <linux/errno.h> #include <linux/err.h> #include <linux/kvm_host.h> +#include <linux/list.h> #include <linux/module.h> #include <linux/vmalloc.h> #include <linux/fs.h> @@ -122,7 +123,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (ret) goto out_fail_alloc; - ret = create_hyp_mappings(kvm, kvm + 1); + ret = create_hyp_mappings(kvm, kvm + 1, PAGE_HYP); if (ret) goto out_free_stage2_pgd; @@ -201,7 +202,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = KVM_MAX_VCPUS; break; default: - r = kvm_arch_dev_ioctl_check_extension(ext); + r = kvm_arch_dev_ioctl_check_extension(kvm, ext); break; } return r; @@ -239,7 +240,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) if (err) goto free_vcpu; - err = create_hyp_mappings(vcpu, vcpu + 1); + err = create_hyp_mappings(vcpu, vcpu + 1, PAGE_HYP); if (err) goto vcpu_uninit; @@ -377,7 +378,7 @@ void force_vm_exit(const cpumask_t *mask) /** * need_new_vmid_gen - check that the VMID is still valid - * @kvm: The VM's VMID to checkt + * @kvm: The VM's VMID to check * * return true if there is a new generation of VMIDs being used * @@ -616,7 +617,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) * Enter the guest */ trace_kvm_entry(*vcpu_pc(vcpu)); - __kvm_guest_enter(); + guest_enter_irqoff(); vcpu->mode = IN_GUEST_MODE; ret = kvm_call_hyp(__kvm_vcpu_run, vcpu); @@ -642,14 +643,14 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) local_irq_enable(); /* - * We do local_irq_enable() before calling kvm_guest_exit() so + * We do local_irq_enable() before calling guest_exit() so * that if a timer interrupt hits while running the guest we * account that tick as being spent in the guest. We enable - * preemption after calling kvm_guest_exit() so that if we get + * preemption after calling guest_exit() so that if we get * preempted we make sure ticks after that is not counted as * guest time. */ - kvm_guest_exit(); + guest_exit(); trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu)); /* @@ -1039,7 +1040,6 @@ long kvm_arch_vm_ioctl(struct file *filp, static void cpu_init_hyp_mode(void *dummy) { - phys_addr_t boot_pgd_ptr; phys_addr_t pgd_ptr; unsigned long hyp_stack_ptr; unsigned long stack_page; @@ -1048,13 +1048,12 @@ static void cpu_init_hyp_mode(void *dummy) /* Switch from the HYP stub to our own HYP init vector */ __hyp_set_vectors(kvm_get_idmap_vector()); - boot_pgd_ptr = kvm_mmu_get_boot_httbr(); pgd_ptr = kvm_mmu_get_httbr(); stack_page = __this_cpu_read(kvm_arm_hyp_stack_page); hyp_stack_ptr = stack_page + PAGE_SIZE; vector_ptr = (unsigned long)kvm_ksym_ref(__kvm_hyp_vector); - __cpu_init_hyp_mode(boot_pgd_ptr, pgd_ptr, hyp_stack_ptr, vector_ptr); + __cpu_init_hyp_mode(pgd_ptr, hyp_stack_ptr, vector_ptr); __cpu_init_stage2(); kvm_arm_init_debug(); @@ -1076,15 +1075,9 @@ static void cpu_hyp_reinit(void) static void cpu_hyp_reset(void) { - phys_addr_t boot_pgd_ptr; - phys_addr_t phys_idmap_start; - - if (!is_kernel_in_hyp_mode()) { - boot_pgd_ptr = kvm_mmu_get_boot_httbr(); - phys_idmap_start = kvm_get_idmap_start(); - - __cpu_reset_hyp_mode(boot_pgd_ptr, phys_idmap_start); - } + if (!is_kernel_in_hyp_mode()) + __cpu_reset_hyp_mode(hyp_default_vectors, + kvm_get_idmap_start()); } static void _kvm_arch_hardware_enable(void *discard) @@ -1294,14 +1287,14 @@ static int init_hyp_mode(void) * Map the Hyp-code called directly from the host */ err = create_hyp_mappings(kvm_ksym_ref(__hyp_text_start), - kvm_ksym_ref(__hyp_text_end)); + kvm_ksym_ref(__hyp_text_end), PAGE_HYP_EXEC); if (err) { kvm_err("Cannot map world-switch code\n"); goto out_err; } err = create_hyp_mappings(kvm_ksym_ref(__start_rodata), - kvm_ksym_ref(__end_rodata)); + kvm_ksym_ref(__end_rodata), PAGE_HYP_RO); if (err) { kvm_err("Cannot map rodata section\n"); goto out_err; @@ -1312,7 +1305,8 @@ static int init_hyp_mode(void) */ for_each_possible_cpu(cpu) { char *stack_page = (char *)per_cpu(kvm_arm_hyp_stack_page, cpu); - err = create_hyp_mappings(stack_page, stack_page + PAGE_SIZE); + err = create_hyp_mappings(stack_page, stack_page + PAGE_SIZE, + PAGE_HYP); if (err) { kvm_err("Cannot map hyp stack\n"); @@ -1324,7 +1318,7 @@ static int init_hyp_mode(void) kvm_cpu_context_t *cpu_ctxt; cpu_ctxt = per_cpu_ptr(kvm_host_cpu_state, cpu); - err = create_hyp_mappings(cpu_ctxt, cpu_ctxt + 1); + err = create_hyp_mappings(cpu_ctxt, cpu_ctxt + 1, PAGE_HYP); if (err) { kvm_err("Cannot map host CPU state: %d\n", err); @@ -1332,10 +1326,6 @@ static int init_hyp_mode(void) } } -#ifndef CONFIG_HOTPLUG_CPU - free_boot_hyp_pgd(); -#endif - /* set size of VMID supported by CPU */ kvm_vmid_bits = kvm_get_vmid_bits(); kvm_info("%d-bit VMID\n", kvm_vmid_bits); diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c index a494def3f195..af93e3ffc9f3 100644 --- a/arch/arm/kvm/emulate.c +++ b/arch/arm/kvm/emulate.c @@ -210,7 +210,7 @@ bool kvm_condition_valid(struct kvm_vcpu *vcpu) * @vcpu: The VCPU pointer * * When exceptions occur while instructions are executed in Thumb IF-THEN - * blocks, the ITSTATE field of the CPSR is not advanved (updated), so we have + * blocks, the ITSTATE field of the CPSR is not advanced (updated), so we have * to do this little bit of work manually. The fields map like this: * * IT[7:0] -> CPSR[26:25],CPSR[15:10] diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index 9093ed0f8b2a..9aca92074f85 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -182,7 +182,7 @@ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu) /** * kvm_arm_copy_reg_indices - get indices of all registers. * - * We do core registers right here, then we apppend coproc regs. + * We do core registers right here, then we append coproc regs. */ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) { diff --git a/arch/arm/kvm/init.S b/arch/arm/kvm/init.S index 1f9ae17476f9..bf89c919efc1 100644 --- a/arch/arm/kvm/init.S +++ b/arch/arm/kvm/init.S @@ -32,23 +32,13 @@ * r2,r3 = Hypervisor pgd pointer * * The init scenario is: - * - We jump in HYP with four parameters: boot HYP pgd, runtime HYP pgd, - * runtime stack, runtime vectors - * - Enable the MMU with the boot pgd - * - Jump to a target into the trampoline page (remember, this is the same - * physical page!) - * - Now switch to the runtime pgd (same VA, and still the same physical - * page!) + * - We jump in HYP with 3 parameters: runtime HYP pgd, runtime stack, + * runtime vectors * - Invalidate TLBs * - Set stack and vectors + * - Setup the page tables + * - Enable the MMU * - Profit! (or eret, if you only care about the code). - * - * As we only have four registers available to pass parameters (and we - * need six), we split the init in two phases: - * - Phase 1: r0 = 0, r1 = 0, r2,r3 contain the boot PGD. - * Provides the basic HYP init, and enable the MMU. - * - Phase 2: r0 = ToS, r1 = vectors, r2,r3 contain the runtime PGD. - * Switches to the runtime PGD, set stack and vectors. */ .text @@ -68,8 +58,11 @@ __kvm_hyp_init: W(b) . __do_hyp_init: - cmp r0, #0 @ We have a SP? - bne phase2 @ Yes, second stage init + @ Set stack pointer + mov sp, r0 + + @ Set HVBAR to point to the HYP vectors + mcr p15, 4, r1, c12, c0, 0 @ HVBAR @ Set the HTTBR to point to the hypervisor PGD pointer passed mcrr p15, 4, rr_lo_hi(r2, r3), c2 @@ -114,34 +107,25 @@ __do_hyp_init: THUMB( ldr r2, =(HSCTLR_M | HSCTLR_A | HSCTLR_TE) ) orr r1, r1, r2 orr r0, r0, r1 - isb mcr p15, 4, r0, c1, c0, 0 @ HSCR + isb - @ End of init phase-1 eret -phase2: - @ Set stack pointer - mov sp, r0 - - @ Set HVBAR to point to the HYP vectors - mcr p15, 4, r1, c12, c0, 0 @ HVBAR - - @ Jump to the trampoline page - ldr r0, =TRAMPOLINE_VA - adr r1, target - bfi r0, r1, #0, #PAGE_SHIFT - ret r0 + @ r0 : stub vectors address +ENTRY(__kvm_hyp_reset) + /* We're now in idmap, disable MMU */ + mrc p15, 4, r1, c1, c0, 0 @ HSCTLR + ldr r2, =(HSCTLR_M | HSCTLR_A | HSCTLR_C | HSCTLR_I) + bic r1, r1, r2 + mcr p15, 4, r1, c1, c0, 0 @ HSCTLR -target: @ We're now in the trampoline code, switch page tables - mcrr p15, 4, rr_lo_hi(r2, r3), c2 + /* Install stub vectors */ + mcr p15, 4, r0, c12, c0, 0 @ HVBAR isb - @ Invalidate the old TLBs - mcr p15, 4, r0, c8, c7, 0 @ TLBIALLH - dsb ish - eret +ENDPROC(__kvm_hyp_reset) .ltorg diff --git a/arch/arm/kvm/irq.h b/arch/arm/kvm/irq.h new file mode 100644 index 000000000000..b74099b905fd --- /dev/null +++ b/arch/arm/kvm/irq.h @@ -0,0 +1,19 @@ +/* + * irq.h: in kernel interrupt controller related definitions + * Copyright (c) 2016 Red Hat, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This header is included by irqchip.c. However, on ARM, interrupt + * controller declarations are located in include/kvm/arm_vgic.h since + * they are mostly shared between arm and arm64. + */ + +#ifndef __IRQ_H +#define __IRQ_H + +#include <kvm/arm_vgic.h> + +#endif diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 45c43aecb8f2..bda27b6b1aa2 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -32,8 +32,6 @@ #include "trace.h" -extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[]; - static pgd_t *boot_hyp_pgd; static pgd_t *hyp_pgd; static pgd_t *merged_hyp_pgd; @@ -484,28 +482,6 @@ static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size) } /** - * free_boot_hyp_pgd - free HYP boot page tables - * - * Free the HYP boot page tables. The bounce page is also freed. - */ -void free_boot_hyp_pgd(void) -{ - mutex_lock(&kvm_hyp_pgd_mutex); - - if (boot_hyp_pgd) { - unmap_hyp_range(boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE); - unmap_hyp_range(boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); - free_pages((unsigned long)boot_hyp_pgd, hyp_pgd_order); - boot_hyp_pgd = NULL; - } - - if (hyp_pgd) - unmap_hyp_range(hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); - - mutex_unlock(&kvm_hyp_pgd_mutex); -} - -/** * free_hyp_pgds - free Hyp-mode page tables * * Assumes hyp_pgd is a page table used strictly in Hyp-mode and @@ -519,15 +495,20 @@ void free_hyp_pgds(void) { unsigned long addr; - free_boot_hyp_pgd(); - mutex_lock(&kvm_hyp_pgd_mutex); + if (boot_hyp_pgd) { + unmap_hyp_range(boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE); + free_pages((unsigned long)boot_hyp_pgd, hyp_pgd_order); + boot_hyp_pgd = NULL; + } + if (hyp_pgd) { + unmap_hyp_range(hyp_pgd, hyp_idmap_start, PAGE_SIZE); for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE) - unmap_hyp_range(hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE); + unmap_hyp_range(hyp_pgd, kern_hyp_va(addr), PGDIR_SIZE); for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE) - unmap_hyp_range(hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE); + unmap_hyp_range(hyp_pgd, kern_hyp_va(addr), PGDIR_SIZE); free_pages((unsigned long)hyp_pgd, hyp_pgd_order); hyp_pgd = NULL; @@ -679,17 +660,18 @@ static phys_addr_t kvm_kaddr_to_phys(void *kaddr) * create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode * @from: The virtual kernel start address of the range * @to: The virtual kernel end address of the range (exclusive) + * @prot: The protection to be applied to this range * * The same virtual address as the kernel virtual address is also used * in Hyp-mode mapping (modulo HYP_PAGE_OFFSET) to the same underlying * physical pages. */ -int create_hyp_mappings(void *from, void *to) +int create_hyp_mappings(void *from, void *to, pgprot_t prot) { phys_addr_t phys_addr; unsigned long virt_addr; - unsigned long start = KERN_TO_HYP((unsigned long)from); - unsigned long end = KERN_TO_HYP((unsigned long)to); + unsigned long start = kern_hyp_va((unsigned long)from); + unsigned long end = kern_hyp_va((unsigned long)to); if (is_kernel_in_hyp_mode()) return 0; @@ -704,7 +686,7 @@ int create_hyp_mappings(void *from, void *to) err = __create_hyp_mappings(hyp_pgd, virt_addr, virt_addr + PAGE_SIZE, __phys_to_pfn(phys_addr), - PAGE_HYP); + prot); if (err) return err; } @@ -723,8 +705,8 @@ int create_hyp_mappings(void *from, void *to) */ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr) { - unsigned long start = KERN_TO_HYP((unsigned long)from); - unsigned long end = KERN_TO_HYP((unsigned long)to); + unsigned long start = kern_hyp_va((unsigned long)from); + unsigned long end = kern_hyp_va((unsigned long)to); if (is_kernel_in_hyp_mode()) return 0; @@ -1687,14 +1669,6 @@ phys_addr_t kvm_mmu_get_httbr(void) return virt_to_phys(hyp_pgd); } -phys_addr_t kvm_mmu_get_boot_httbr(void) -{ - if (__kvm_cpu_uses_extended_idmap()) - return virt_to_phys(merged_hyp_pgd); - else - return virt_to_phys(boot_hyp_pgd); -} - phys_addr_t kvm_get_idmap_vector(void) { return hyp_idmap_vector; @@ -1705,6 +1679,22 @@ phys_addr_t kvm_get_idmap_start(void) return hyp_idmap_start; } +static int kvm_map_idmap_text(pgd_t *pgd) +{ + int err; + + /* Create the idmap in the boot page tables */ + err = __create_hyp_mappings(pgd, + hyp_idmap_start, hyp_idmap_end, + __phys_to_pfn(hyp_idmap_start), + PAGE_HYP_EXEC); + if (err) + kvm_err("Failed to idmap %lx-%lx\n", + hyp_idmap_start, hyp_idmap_end); + + return err; +} + int kvm_mmu_init(void) { int err; @@ -1719,28 +1709,41 @@ int kvm_mmu_init(void) */ BUG_ON((hyp_idmap_start ^ (hyp_idmap_end - 1)) & PAGE_MASK); - hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, hyp_pgd_order); - boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, hyp_pgd_order); + kvm_info("IDMAP page: %lx\n", hyp_idmap_start); + kvm_info("HYP VA range: %lx:%lx\n", + kern_hyp_va(PAGE_OFFSET), kern_hyp_va(~0UL)); - if (!hyp_pgd || !boot_hyp_pgd) { - kvm_err("Hyp mode PGD not allocated\n"); - err = -ENOMEM; + if (hyp_idmap_start >= kern_hyp_va(PAGE_OFFSET) && + hyp_idmap_start < kern_hyp_va(~0UL)) { + /* + * The idmap page is intersecting with the VA space, + * it is not safe to continue further. + */ + kvm_err("IDMAP intersecting with HYP VA, unable to continue\n"); + err = -EINVAL; goto out; } - /* Create the idmap in the boot page tables */ - err = __create_hyp_mappings(boot_hyp_pgd, - hyp_idmap_start, hyp_idmap_end, - __phys_to_pfn(hyp_idmap_start), - PAGE_HYP); - - if (err) { - kvm_err("Failed to idmap %lx-%lx\n", - hyp_idmap_start, hyp_idmap_end); + hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, hyp_pgd_order); + if (!hyp_pgd) { + kvm_err("Hyp mode PGD not allocated\n"); + err = -ENOMEM; goto out; } if (__kvm_cpu_uses_extended_idmap()) { + boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + hyp_pgd_order); + if (!boot_hyp_pgd) { + kvm_err("Hyp boot PGD not allocated\n"); + err = -ENOMEM; + goto out; + } + + err = kvm_map_idmap_text(boot_hyp_pgd); + if (err) + goto out; + merged_hyp_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); if (!merged_hyp_pgd) { kvm_err("Failed to allocate extra HYP pgd\n"); @@ -1748,29 +1751,10 @@ int kvm_mmu_init(void) } __kvm_extend_hypmap(boot_hyp_pgd, hyp_pgd, merged_hyp_pgd, hyp_idmap_start); - return 0; - } - - /* Map the very same page at the trampoline VA */ - err = __create_hyp_mappings(boot_hyp_pgd, - TRAMPOLINE_VA, TRAMPOLINE_VA + PAGE_SIZE, - __phys_to_pfn(hyp_idmap_start), - PAGE_HYP); - if (err) { - kvm_err("Failed to map trampoline @%lx into boot HYP pgd\n", - TRAMPOLINE_VA); - goto out; - } - - /* Map the same page again into the runtime page tables */ - err = __create_hyp_mappings(hyp_pgd, - TRAMPOLINE_VA, TRAMPOLINE_VA + PAGE_SIZE, - __phys_to_pfn(hyp_idmap_start), - PAGE_HYP); - if (err) { - kvm_err("Failed to map trampoline @%lx into runtime HYP pgd\n", - TRAMPOLINE_VA); - goto out; + } else { + err = kvm_map_idmap_text(hyp_pgd); + if (err) + goto out; } return 0; diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c index 0048b5a62a50..4b5e802e57d1 100644 --- a/arch/arm/kvm/reset.c +++ b/arch/arm/kvm/reset.c @@ -52,7 +52,7 @@ static const struct kvm_irq_level cortexa_vtimer_irq = { * @vcpu: The VCPU pointer * * This function finds the right table above and sets the registers on the - * virtual CPU struct to their architectually defined reset values. + * virtual CPU struct to their architecturally defined reset values. */ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) { diff --git a/arch/arm/mach-ep93xx/ts72xx.c b/arch/arm/mach-ep93xx/ts72xx.c index 45b81a2bcd4b..3b39ea353d30 100644 --- a/arch/arm/mach-ep93xx/ts72xx.c +++ b/arch/arm/mach-ep93xx/ts72xx.c @@ -16,7 +16,7 @@ #include <linux/init.h> #include <linux/platform_device.h> #include <linux/io.h> -#include <linux/m48t86.h> +#include <linux/platform_data/rtc-m48t86.h> #include <linux/mtd/nand.h> #include <linux/mtd/partitions.h> diff --git a/arch/arm/mach-integrator/impd1.c b/arch/arm/mach-integrator/impd1.c index 38b0da300dd5..ed9a01484030 100644 --- a/arch/arm/mach-integrator/impd1.c +++ b/arch/arm/mach-integrator/impd1.c @@ -320,11 +320,11 @@ static struct impd1_device impd1_devs[] = { #define IMPD1_VALID_IRQS 0x00000bffU /* - * As this module is bool, it is OK to have this as __init_refok() - no + * As this module is bool, it is OK to have this as __ref() - no * probe calls will be done after the initial system bootup, as devices * are discovered as part of the machine startup. */ -static int __init_refok impd1_probe(struct lm_device *dev) +static int __ref impd1_probe(struct lm_device *dev) { struct impd1_module *impd1; int irq_base; diff --git a/arch/arm/mach-mv78xx0/common.c b/arch/arm/mach-mv78xx0/common.c index 45a05207b418..6af5430d0d97 100644 --- a/arch/arm/mach-mv78xx0/common.c +++ b/arch/arm/mach-mv78xx0/common.c @@ -343,7 +343,7 @@ void __init mv78xx0_init_early(void) DDR_WINDOW_CPU1_BASE, DDR_WINDOW_CPU_SZ); } -void __init_refok mv78xx0_timer_init(void) +void __ref mv78xx0_timer_init(void) { orion_time_init(BRIDGE_VIRT_BASE, BRIDGE_INT_TIMER1_CLR, IRQ_MV78XX0_TIMER_1, get_tclk()); diff --git a/arch/arm/mach-omap2/omap_device.c b/arch/arm/mach-omap2/omap_device.c index c2626f83e0c2..e920dd83e443 100644 --- a/arch/arm/mach-omap2/omap_device.c +++ b/arch/arm/mach-omap2/omap_device.c @@ -209,7 +209,7 @@ static int _omap_device_notifier_call(struct notifier_block *nb, int err; switch (event) { - case BUS_NOTIFY_DEL_DEVICE: + case BUS_NOTIFY_REMOVED_DEVICE: if (pdev->archdata.od) omap_device_delete(pdev->archdata.od); break; diff --git a/arch/arm/mach-orion5x/ts78xx-setup.c b/arch/arm/mach-orion5x/ts78xx-setup.c index 3a58a5d4a28a..8d597267d0c4 100644 --- a/arch/arm/mach-orion5x/ts78xx-setup.c +++ b/arch/arm/mach-orion5x/ts78xx-setup.c @@ -16,7 +16,7 @@ #include <linux/platform_device.h> #include <linux/mv643xx_eth.h> #include <linux/ata_platform.h> -#include <linux/m48t86.h> +#include <linux/platform_data/rtc-m48t86.h> #include <linux/mtd/nand.h> #include <linux/mtd/partitions.h> #include <linux/timeriomem-rng.h> diff --git a/arch/arm/mach-pxa/cm-x270.c b/arch/arm/mach-pxa/cm-x270.c index fa5f51d633a3..be4a66166d61 100644 --- a/arch/arm/mach-pxa/cm-x270.c +++ b/arch/arm/mach-pxa/cm-x270.c @@ -14,7 +14,7 @@ #include <linux/gpio.h> #include <linux/delay.h> -#include <linux/rtc-v3020.h> +#include <linux/platform_data/rtc-v3020.h> #include <video/mbxfb.h> #include <linux/spi/spi.h> diff --git a/arch/arm/mach-pxa/cm-x300.c b/arch/arm/mach-pxa/cm-x300.c index 5f5ac7c8faf0..868448d2cd82 100644 --- a/arch/arm/mach-pxa/cm-x300.c +++ b/arch/arm/mach-pxa/cm-x300.c @@ -25,7 +25,7 @@ #include <linux/gpio.h> #include <linux/dm9000.h> #include <linux/leds.h> -#include <linux/rtc-v3020.h> +#include <linux/platform_data/rtc-v3020.h> #include <linux/pwm.h> #include <linux/pwm_backlight.h> diff --git a/arch/arm/mach-pxa/em-x270.c b/arch/arm/mach-pxa/em-x270.c index 6e0268deec43..03354c21e1f2 100644 --- a/arch/arm/mach-pxa/em-x270.c +++ b/arch/arm/mach-pxa/em-x270.c @@ -14,7 +14,7 @@ #include <linux/delay.h> #include <linux/dm9000.h> -#include <linux/rtc-v3020.h> +#include <linux/platform_data/rtc-v3020.h> #include <linux/mtd/nand.h> #include <linux/mtd/partitions.h> #include <linux/mtd/physmap.h> diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index b7eed75960fe..c6834c0cfd1c 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -128,16 +128,16 @@ static void __dma_page_dev_to_cpu(struct page *, unsigned long, */ static dma_addr_t arm_dma_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { - if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) + if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) __dma_page_cpu_to_dev(page, offset, size, dir); return pfn_to_dma(dev, page_to_pfn(page)) + offset; } static dma_addr_t arm_coherent_dma_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { return pfn_to_dma(dev, page_to_pfn(page)) + offset; } @@ -157,10 +157,9 @@ static dma_addr_t arm_coherent_dma_map_page(struct device *dev, struct page *pag * whatever the device wrote there. */ static void arm_dma_unmap_page(struct device *dev, dma_addr_t handle, - size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + size_t size, enum dma_data_direction dir, unsigned long attrs) { - if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) + if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) __dma_page_dev_to_cpu(pfn_to_page(dma_to_pfn(dev, handle)), handle & ~PAGE_MASK, size, dir); } @@ -198,12 +197,12 @@ struct dma_map_ops arm_dma_ops = { EXPORT_SYMBOL(arm_dma_ops); static void *arm_coherent_dma_alloc(struct device *dev, size_t size, - dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs); + dma_addr_t *handle, gfp_t gfp, unsigned long attrs); static void arm_coherent_dma_free(struct device *dev, size_t size, void *cpu_addr, - dma_addr_t handle, struct dma_attrs *attrs); + dma_addr_t handle, unsigned long attrs); static int arm_coherent_dma_mmap(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, - struct dma_attrs *attrs); + unsigned long attrs); struct dma_map_ops arm_coherent_dma_ops = { .alloc = arm_coherent_dma_alloc, @@ -639,11 +638,11 @@ static void __free_from_contiguous(struct device *dev, struct page *page, dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT); } -static inline pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot) +static inline pgprot_t __get_dma_pgprot(unsigned long attrs, pgprot_t prot) { - prot = dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs) ? - pgprot_writecombine(prot) : - pgprot_dmacoherent(prot); + prot = (attrs & DMA_ATTR_WRITE_COMBINE) ? + pgprot_writecombine(prot) : + pgprot_dmacoherent(prot); return prot; } @@ -751,7 +750,7 @@ static struct arm_dma_allocator remap_allocator = { static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, pgprot_t prot, bool is_coherent, - struct dma_attrs *attrs, const void *caller) + unsigned long attrs, const void *caller) { u64 mask = get_coherent_dma_mask(dev); struct page *page = NULL; @@ -764,7 +763,7 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, .gfp = gfp, .prot = prot, .caller = caller, - .want_vaddr = !dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs), + .want_vaddr = ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) == 0), .coherent_flag = is_coherent ? COHERENT : NORMAL, }; @@ -834,7 +833,7 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, * virtual and bus address for that space. */ void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, - gfp_t gfp, struct dma_attrs *attrs) + gfp_t gfp, unsigned long attrs) { pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL); @@ -843,7 +842,7 @@ void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, } static void *arm_coherent_dma_alloc(struct device *dev, size_t size, - dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs) + dma_addr_t *handle, gfp_t gfp, unsigned long attrs) { return __dma_alloc(dev, size, handle, gfp, PAGE_KERNEL, true, attrs, __builtin_return_address(0)); @@ -851,7 +850,7 @@ static void *arm_coherent_dma_alloc(struct device *dev, size_t size, static int __arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, - struct dma_attrs *attrs) + unsigned long attrs) { int ret = -ENXIO; #ifdef CONFIG_MMU @@ -879,14 +878,14 @@ static int __arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, */ static int arm_coherent_dma_mmap(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, - struct dma_attrs *attrs) + unsigned long attrs) { return __arm_dma_mmap(dev, vma, cpu_addr, dma_addr, size, attrs); } int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, - struct dma_attrs *attrs) + unsigned long attrs) { #ifdef CONFIG_MMU vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot); @@ -898,7 +897,7 @@ int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, * Free a buffer as defined by the above mapping. */ static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr, - dma_addr_t handle, struct dma_attrs *attrs, + dma_addr_t handle, unsigned long attrs, bool is_coherent) { struct page *page = pfn_to_page(dma_to_pfn(dev, handle)); @@ -908,7 +907,7 @@ static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr, .size = PAGE_ALIGN(size), .cpu_addr = cpu_addr, .page = page, - .want_vaddr = !dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs), + .want_vaddr = ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) == 0), }; buf = arm_dma_buffer_find(cpu_addr); @@ -920,20 +919,20 @@ static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr, } void arm_dma_free(struct device *dev, size_t size, void *cpu_addr, - dma_addr_t handle, struct dma_attrs *attrs) + dma_addr_t handle, unsigned long attrs) { __arm_dma_free(dev, size, cpu_addr, handle, attrs, false); } static void arm_coherent_dma_free(struct device *dev, size_t size, void *cpu_addr, - dma_addr_t handle, struct dma_attrs *attrs) + dma_addr_t handle, unsigned long attrs) { __arm_dma_free(dev, size, cpu_addr, handle, attrs, true); } int arm_dma_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr, dma_addr_t handle, size_t size, - struct dma_attrs *attrs) + unsigned long attrs) { struct page *page = pfn_to_page(dma_to_pfn(dev, handle)); int ret; @@ -1066,7 +1065,7 @@ static void __dma_page_dev_to_cpu(struct page *page, unsigned long off, * here. */ int arm_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, - enum dma_data_direction dir, struct dma_attrs *attrs) + enum dma_data_direction dir, unsigned long attrs) { struct dma_map_ops *ops = get_dma_ops(dev); struct scatterlist *s; @@ -1100,7 +1099,7 @@ int arm_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, * rules concerning calls here are the same as for dma_unmap_single(). */ void arm_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, - enum dma_data_direction dir, struct dma_attrs *attrs) + enum dma_data_direction dir, unsigned long attrs) { struct dma_map_ops *ops = get_dma_ops(dev); struct scatterlist *s; @@ -1273,7 +1272,7 @@ static inline void __free_iova(struct dma_iommu_mapping *mapping, static const int iommu_order_array[] = { 9, 8, 4, 0 }; static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, - gfp_t gfp, struct dma_attrs *attrs, + gfp_t gfp, unsigned long attrs, int coherent_flag) { struct page **pages; @@ -1289,7 +1288,7 @@ static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, if (!pages) return NULL; - if (dma_get_attr(DMA_ATTR_FORCE_CONTIGUOUS, attrs)) + if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) { unsigned long order = get_order(size); struct page *page; @@ -1307,7 +1306,7 @@ static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, } /* Go straight to 4K chunks if caller says it's OK. */ - if (dma_get_attr(DMA_ATTR_ALLOC_SINGLE_PAGES, attrs)) + if (attrs & DMA_ATTR_ALLOC_SINGLE_PAGES) order_idx = ARRAY_SIZE(iommu_order_array) - 1; /* @@ -1363,12 +1362,12 @@ error: } static int __iommu_free_buffer(struct device *dev, struct page **pages, - size_t size, struct dma_attrs *attrs) + size_t size, unsigned long attrs) { int count = size >> PAGE_SHIFT; int i; - if (dma_get_attr(DMA_ATTR_FORCE_CONTIGUOUS, attrs)) { + if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) { dma_release_from_contiguous(dev, pages[0], count); } else { for (i = 0; i < count; i++) @@ -1460,14 +1459,14 @@ static struct page **__atomic_get_pages(void *addr) return (struct page **)page; } -static struct page **__iommu_get_pages(void *cpu_addr, struct dma_attrs *attrs) +static struct page **__iommu_get_pages(void *cpu_addr, unsigned long attrs) { struct vm_struct *area; if (__in_atomic_pool(cpu_addr, PAGE_SIZE)) return __atomic_get_pages(cpu_addr); - if (dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs)) + if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) return cpu_addr; area = find_vm_area(cpu_addr); @@ -1511,7 +1510,7 @@ static void __iommu_free_atomic(struct device *dev, void *cpu_addr, } static void *__arm_iommu_alloc_attrs(struct device *dev, size_t size, - dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs, + dma_addr_t *handle, gfp_t gfp, unsigned long attrs, int coherent_flag) { pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL); @@ -1542,7 +1541,7 @@ static void *__arm_iommu_alloc_attrs(struct device *dev, size_t size, if (*handle == DMA_ERROR_CODE) goto err_buffer; - if (dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs)) + if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) return pages; addr = __iommu_alloc_remap(pages, size, gfp, prot, @@ -1560,20 +1559,20 @@ err_buffer: } static void *arm_iommu_alloc_attrs(struct device *dev, size_t size, - dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs) + dma_addr_t *handle, gfp_t gfp, unsigned long attrs) { return __arm_iommu_alloc_attrs(dev, size, handle, gfp, attrs, NORMAL); } static void *arm_coherent_iommu_alloc_attrs(struct device *dev, size_t size, - dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs) + dma_addr_t *handle, gfp_t gfp, unsigned long attrs) { return __arm_iommu_alloc_attrs(dev, size, handle, gfp, attrs, COHERENT); } static int __arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, - struct dma_attrs *attrs) + unsigned long attrs) { unsigned long uaddr = vma->vm_start; unsigned long usize = vma->vm_end - vma->vm_start; @@ -1603,7 +1602,7 @@ static int __arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma } static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, - dma_addr_t dma_addr, size_t size, struct dma_attrs *attrs) + dma_addr_t dma_addr, size_t size, unsigned long attrs) { vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot); @@ -1612,7 +1611,7 @@ static int arm_iommu_mmap_attrs(struct device *dev, static int arm_coherent_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, - dma_addr_t dma_addr, size_t size, struct dma_attrs *attrs) + dma_addr_t dma_addr, size_t size, unsigned long attrs) { return __arm_iommu_mmap_attrs(dev, vma, cpu_addr, dma_addr, size, attrs); } @@ -1622,7 +1621,7 @@ static int arm_coherent_iommu_mmap_attrs(struct device *dev, * Must not be called with IRQs disabled. */ void __arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, - dma_addr_t handle, struct dma_attrs *attrs, int coherent_flag) + dma_addr_t handle, unsigned long attrs, int coherent_flag) { struct page **pages; size = PAGE_ALIGN(size); @@ -1638,7 +1637,7 @@ void __arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, return; } - if (!dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs)) { + if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) == 0) { dma_common_free_remap(cpu_addr, size, VM_ARM_DMA_CONSISTENT | VM_USERMAP); } @@ -1648,20 +1647,20 @@ void __arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, } void arm_iommu_free_attrs(struct device *dev, size_t size, - void *cpu_addr, dma_addr_t handle, struct dma_attrs *attrs) + void *cpu_addr, dma_addr_t handle, unsigned long attrs) { __arm_iommu_free_attrs(dev, size, cpu_addr, handle, attrs, NORMAL); } void arm_coherent_iommu_free_attrs(struct device *dev, size_t size, - void *cpu_addr, dma_addr_t handle, struct dma_attrs *attrs) + void *cpu_addr, dma_addr_t handle, unsigned long attrs) { __arm_iommu_free_attrs(dev, size, cpu_addr, handle, attrs, COHERENT); } static int arm_iommu_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr, dma_addr_t dma_addr, - size_t size, struct dma_attrs *attrs) + size_t size, unsigned long attrs) { unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; struct page **pages = __iommu_get_pages(cpu_addr, attrs); @@ -1699,7 +1698,7 @@ static int __dma_direction_to_prot(enum dma_data_direction dir) */ static int __map_sg_chunk(struct device *dev, struct scatterlist *sg, size_t size, dma_addr_t *handle, - enum dma_data_direction dir, struct dma_attrs *attrs, + enum dma_data_direction dir, unsigned long attrs, bool is_coherent) { struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); @@ -1720,8 +1719,7 @@ static int __map_sg_chunk(struct device *dev, struct scatterlist *sg, phys_addr_t phys = page_to_phys(sg_page(s)); unsigned int len = PAGE_ALIGN(s->offset + s->length); - if (!is_coherent && - !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) + if (!is_coherent && (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir); prot = __dma_direction_to_prot(dir); @@ -1742,7 +1740,7 @@ fail: } static int __iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents, - enum dma_data_direction dir, struct dma_attrs *attrs, + enum dma_data_direction dir, unsigned long attrs, bool is_coherent) { struct scatterlist *s = sg, *dma = sg, *start = sg; @@ -1800,7 +1798,7 @@ bad_mapping: * obtained via sg_dma_{address,length}. */ int arm_coherent_iommu_map_sg(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, struct dma_attrs *attrs) + int nents, enum dma_data_direction dir, unsigned long attrs) { return __iommu_map_sg(dev, sg, nents, dir, attrs, true); } @@ -1818,14 +1816,14 @@ int arm_coherent_iommu_map_sg(struct device *dev, struct scatterlist *sg, * sg_dma_{address,length}. */ int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, struct dma_attrs *attrs) + int nents, enum dma_data_direction dir, unsigned long attrs) { return __iommu_map_sg(dev, sg, nents, dir, attrs, false); } static void __iommu_unmap_sg(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, struct dma_attrs *attrs, - bool is_coherent) + int nents, enum dma_data_direction dir, + unsigned long attrs, bool is_coherent) { struct scatterlist *s; int i; @@ -1834,8 +1832,7 @@ static void __iommu_unmap_sg(struct device *dev, struct scatterlist *sg, if (sg_dma_len(s)) __iommu_remove_mapping(dev, sg_dma_address(s), sg_dma_len(s)); - if (!is_coherent && - !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) + if (!is_coherent && (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) __dma_page_dev_to_cpu(sg_page(s), s->offset, s->length, dir); } @@ -1852,7 +1849,8 @@ static void __iommu_unmap_sg(struct device *dev, struct scatterlist *sg, * rules concerning calls here are the same as for dma_unmap_single(). */ void arm_coherent_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, struct dma_attrs *attrs) + int nents, enum dma_data_direction dir, + unsigned long attrs) { __iommu_unmap_sg(dev, sg, nents, dir, attrs, true); } @@ -1868,7 +1866,8 @@ void arm_coherent_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, * rules concerning calls here are the same as for dma_unmap_single(). */ void arm_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, - enum dma_data_direction dir, struct dma_attrs *attrs) + enum dma_data_direction dir, + unsigned long attrs) { __iommu_unmap_sg(dev, sg, nents, dir, attrs, false); } @@ -1921,7 +1920,7 @@ void arm_iommu_sync_sg_for_device(struct device *dev, struct scatterlist *sg, */ static dma_addr_t arm_coherent_iommu_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); dma_addr_t dma_addr; @@ -1955,9 +1954,9 @@ fail: */ static dma_addr_t arm_iommu_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { - if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) + if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) __dma_page_cpu_to_dev(page, offset, size, dir); return arm_coherent_iommu_map_page(dev, page, offset, size, dir, attrs); @@ -1973,8 +1972,7 @@ static dma_addr_t arm_iommu_map_page(struct device *dev, struct page *page, * Coherent IOMMU aware version of arm_dma_unmap_page() */ static void arm_coherent_iommu_unmap_page(struct device *dev, dma_addr_t handle, - size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + size_t size, enum dma_data_direction dir, unsigned long attrs) { struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); dma_addr_t iova = handle & PAGE_MASK; @@ -1998,8 +1996,7 @@ static void arm_coherent_iommu_unmap_page(struct device *dev, dma_addr_t handle, * IOMMU aware version of arm_dma_unmap_page() */ static void arm_iommu_unmap_page(struct device *dev, dma_addr_t handle, - size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + size_t size, enum dma_data_direction dir, unsigned long attrs) { struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); dma_addr_t iova = handle & PAGE_MASK; @@ -2010,7 +2007,7 @@ static void arm_iommu_unmap_page(struct device *dev, dma_addr_t handle, if (!iova) return; - if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) + if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) __dma_page_dev_to_cpu(page, offset, size, dir); iommu_unmap(mapping->domain, iova, len); diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c index c5f9a9e3d1f3..d062f08f5020 100644 --- a/arch/arm/xen/mm.c +++ b/arch/arm/xen/mm.c @@ -98,11 +98,11 @@ static void __xen_dma_page_cpu_to_dev(struct device *hwdev, dma_addr_t handle, void __xen_dma_map_page(struct device *hwdev, struct page *page, dma_addr_t dev_addr, unsigned long offset, size_t size, - enum dma_data_direction dir, struct dma_attrs *attrs) + enum dma_data_direction dir, unsigned long attrs) { if (is_device_dma_coherent(hwdev)) return; - if (dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) + if (attrs & DMA_ATTR_SKIP_CPU_SYNC) return; __xen_dma_page_cpu_to_dev(hwdev, dev_addr, size, dir); @@ -110,12 +110,12 @@ void __xen_dma_map_page(struct device *hwdev, struct page *page, void __xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { if (is_device_dma_coherent(hwdev)) return; - if (dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) + if (attrs & DMA_ATTR_SKIP_CPU_SYNC) return; __xen_dma_page_dev_to_cpu(hwdev, handle, size, dir); diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 9f8b99e20557..bc3f00f586f1 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -3,6 +3,7 @@ config ARM64 select ACPI_CCA_REQUIRED if ACPI select ACPI_GENERIC_GSI if ACPI select ACPI_REDUCED_HARDWARE_ONLY if ACPI + select ACPI_MCFG if ACPI select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE @@ -22,9 +23,9 @@ config ARM64 select ARM_ARCH_TIMER select ARM_GIC select AUDIT_ARCH_COMPAT_GENERIC - select ARM_GIC_V2M if PCI_MSI + select ARM_GIC_V2M if PCI select ARM_GIC_V3 - select ARM_GIC_V3_ITS if PCI_MSI + select ARM_GIC_V3_ITS if PCI select ARM_PSCI_FW select BUILDTIME_EXTABLE_SORT select CLONE_BACKWARDS @@ -53,6 +54,7 @@ config ARM64 select HAVE_ALIGNED_STRUCT_PAGE if SLUB select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_BITREVERSE + select HAVE_ARCH_HARDENED_USERCOPY select HAVE_ARCH_HUGE_VMAP select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_KASAN if SPARSEMEM_VMEMMAP && !(ARM64_16K_PAGES && ARM64_VA_BITS_48) @@ -78,6 +80,7 @@ config ARM64 select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FUNCTION_TRACER select HAVE_FUNCTION_GRAPH_TRACER + select HAVE_GCC_PLUGINS select HAVE_GENERIC_DMA_COHERENT select HAVE_HW_BREAKPOINT if PERF_EVENTS select HAVE_IRQ_TIME_ACCOUNTING @@ -101,6 +104,7 @@ config ARM64 select OF_EARLY_FLATTREE select OF_NUMA if NUMA && OF select OF_RESERVED_MEM + select PCI_ECAM if ACPI select PERF_USE_VMALLOC select POWER_RESET select POWER_SUPPLY diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index d59b6908a21a..5b54f8c021d8 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -15,7 +15,7 @@ CPPFLAGS_vmlinux.lds = -DTEXT_OFFSET=$(TEXT_OFFSET) GZFLAGS :=-9 ifneq ($(CONFIG_RELOCATABLE),) -LDFLAGS_vmlinux += -pie +LDFLAGS_vmlinux += -pie -Bsymbolic endif KBUILD_DEFCONFIG := defconfig diff --git a/arch/arm64/boot/dts/marvell/armada-3720-db.dts b/arch/arm64/boot/dts/marvell/armada-3720-db.dts index 86110a6ae330..1372e9a6aaa4 100644 --- a/arch/arm64/boot/dts/marvell/armada-3720-db.dts +++ b/arch/arm64/boot/dts/marvell/armada-3720-db.dts @@ -76,3 +76,8 @@ &usb3 { status = "okay"; }; + +/* CON17 (PCIe) / CON12 (mini-PCIe) */ +&pcie0 { + status = "okay"; +}; diff --git a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi index eb29280962d7..c4762538ec01 100644 --- a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi @@ -176,5 +176,30 @@ <0x1d40000 0x40000>; /* GICR */ }; }; + + pcie0: pcie@d0070000 { + compatible = "marvell,armada-3700-pcie"; + device_type = "pci"; + status = "disabled"; + reg = <0 0xd0070000 0 0x20000>; + #address-cells = <3>; + #size-cells = <2>; + bus-range = <0x00 0xff>; + interrupts = <GIC_SPI 29 IRQ_TYPE_LEVEL_HIGH>; + #interrupt-cells = <1>; + msi-parent = <&pcie0>; + msi-controller; + ranges = <0x82000000 0 0xe8000000 0 0xe8000000 0 0x1000000 /* Port 0 MEM */ + 0x81000000 0 0xe9000000 0 0xe9000000 0 0x10000>; /* Port 0 IO*/ + interrupt-map-mask = <0 0 0 7>; + interrupt-map = <0 0 0 1 &pcie_intc 0>, + <0 0 0 2 &pcie_intc 1>, + <0 0 0 3 &pcie_intc 2>, + <0 0 0 4 &pcie_intc 3>; + pcie_intc: interrupt-controller { + interrupt-controller; + #interrupt-cells = <1>; + }; + }; }; }; diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 49dd1bd3ea50..7099f26e3702 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -36,8 +36,9 @@ #define ARM64_HAS_VIRT_HOST_EXTN 11 #define ARM64_WORKAROUND_CAVIUM_27456 12 #define ARM64_HAS_32BIT_EL0 13 +#define ARM64_HYP_OFFSET_LOW 14 -#define ARM64_NCAPS 14 +#define ARM64_NCAPS 15 #ifndef __ASSEMBLY__ diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h index 579b6e654f2d..a55384f4a5d7 100644 --- a/arch/arm64/include/asm/elf.h +++ b/arch/arm64/include/asm/elf.h @@ -140,6 +140,7 @@ typedef struct user_fpsimd_state elf_fpregset_t; #define SET_PERSONALITY(ex) clear_thread_flag(TIF_32BIT); +/* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */ #define ARCH_DLINFO \ do { \ NEW_AUX_ENT(AT_SYSINFO_EHDR, \ diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 2cdb6b551ac6..4b5c977af465 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -178,7 +178,7 @@ /* Hyp System Trap Register */ #define HSTR_EL2_T(x) (1 << x) -/* Hyp Coproccessor Trap Register Shifts */ +/* Hyp Coprocessor Trap Register Shifts */ #define CPTR_EL2_TFP_SHIFT 10 /* Hyp Coprocessor Trap Register */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 49095fc4b482..3eda975837d0 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -47,8 +47,7 @@ int __attribute_const__ kvm_target_cpu(void); int kvm_reset_vcpu(struct kvm_vcpu *vcpu); -int kvm_arch_dev_ioctl_check_extension(long ext); -unsigned long kvm_hyp_reset_entry(void); +int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext); void __extended_idmap_trampoline(phys_addr_t boot_pgd, phys_addr_t idmap_start); struct kvm_arch { @@ -348,8 +347,7 @@ int kvm_perf_teardown(void); struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); -static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr, - phys_addr_t pgd_ptr, +static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr, unsigned long hyp_stack_ptr, unsigned long vector_ptr) { @@ -357,19 +355,14 @@ static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr, * Call initialization code, and switch to the full blown * HYP code. */ - __kvm_call_hyp((void *)boot_pgd_ptr, pgd_ptr, - hyp_stack_ptr, vector_ptr); + __kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr); } -static inline void __cpu_reset_hyp_mode(phys_addr_t boot_pgd_ptr, +void __kvm_hyp_teardown(void); +static inline void __cpu_reset_hyp_mode(unsigned long vector_ptr, phys_addr_t phys_idmap_start) { - /* - * Call reset code, and switch back to stub hyp vectors. - * Uses __kvm_call_hyp() to avoid kaslr's kvm_ksym_ref() translation. - */ - __kvm_call_hyp((void *)kvm_hyp_reset_entry(), - boot_pgd_ptr, phys_idmap_start); + kvm_call_hyp(__kvm_hyp_teardown, phys_idmap_start); } static inline void kvm_arch_hardware_unsetup(void) {} diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index 44eaff70da6a..cff510574fae 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -25,29 +25,6 @@ #define __hyp_text __section(.hyp.text) notrace -static inline unsigned long __kern_hyp_va(unsigned long v) -{ - asm volatile(ALTERNATIVE("and %0, %0, %1", - "nop", - ARM64_HAS_VIRT_HOST_EXTN) - : "+r" (v) : "i" (HYP_PAGE_OFFSET_MASK)); - return v; -} - -#define kern_hyp_va(v) (typeof(v))(__kern_hyp_va((unsigned long)(v))) - -static inline unsigned long __hyp_kern_va(unsigned long v) -{ - u64 offset = PAGE_OFFSET - HYP_PAGE_OFFSET; - asm volatile(ALTERNATIVE("add %0, %0, %1", - "nop", - ARM64_HAS_VIRT_HOST_EXTN) - : "+r" (v) : "r" (offset)); - return v; -} - -#define hyp_kern_va(v) (typeof(v))(__hyp_kern_va((unsigned long)(v))) - #define read_sysreg_elx(r,nvh,vh) \ ({ \ u64 reg; \ diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index f05ac27d033e..b6bb83400cd8 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -29,21 +29,48 @@ * * Instead, give the HYP mode its own VA region at a fixed offset from * the kernel by just masking the top bits (which are all ones for a - * kernel address). + * kernel address). We need to find out how many bits to mask. * - * ARMv8.1 (using VHE) does have a TTBR1_EL2, and doesn't use these - * macros (the entire kernel runs at EL2). + * We want to build a set of page tables that cover both parts of the + * idmap (the trampoline page used to initialize EL2), and our normal + * runtime VA space, at the same time. + * + * Given that the kernel uses VA_BITS for its entire address space, + * and that half of that space (VA_BITS - 1) is used for the linear + * mapping, we can also limit the EL2 space to (VA_BITS - 1). + * + * The main question is "Within the VA_BITS space, does EL2 use the + * top or the bottom half of that space to shadow the kernel's linear + * mapping?". As we need to idmap the trampoline page, this is + * determined by the range in which this page lives. + * + * If the page is in the bottom half, we have to use the top half. If + * the page is in the top half, we have to use the bottom half: + * + * T = __virt_to_phys(__hyp_idmap_text_start) + * if (T & BIT(VA_BITS - 1)) + * HYP_VA_MIN = 0 //idmap in upper half + * else + * HYP_VA_MIN = 1 << (VA_BITS - 1) + * HYP_VA_MAX = HYP_VA_MIN + (1 << (VA_BITS - 1)) - 1 + * + * This of course assumes that the trampoline page exists within the + * VA_BITS range. If it doesn't, then it means we're in the odd case + * where the kernel idmap (as well as HYP) uses more levels than the + * kernel runtime page tables (as seen when the kernel is configured + * for 4k pages, 39bits VA, and yet memory lives just above that + * limit, forcing the idmap to use 4 levels of page tables while the + * kernel itself only uses 3). In this particular case, it doesn't + * matter which side of VA_BITS we use, as we're guaranteed not to + * conflict with anything. + * + * When using VHE, there are no separate hyp mappings and all KVM + * functionality is already mapped as part of the main kernel + * mappings, and none of this applies in that case. */ -#define HYP_PAGE_OFFSET_SHIFT VA_BITS -#define HYP_PAGE_OFFSET_MASK ((UL(1) << HYP_PAGE_OFFSET_SHIFT) - 1) -#define HYP_PAGE_OFFSET (PAGE_OFFSET & HYP_PAGE_OFFSET_MASK) -/* - * Our virtual mapping for the idmap-ed MMU-enable code. Must be - * shared across all the page-tables. Conveniently, we use the last - * possible page, where no kernel mapping will ever exist. - */ -#define TRAMPOLINE_VA (HYP_PAGE_OFFSET_MASK & PAGE_MASK) +#define HYP_PAGE_OFFSET_HIGH_MASK ((UL(1) << VA_BITS) - 1) +#define HYP_PAGE_OFFSET_LOW_MASK ((UL(1) << (VA_BITS - 1)) - 1) #ifdef __ASSEMBLY__ @@ -53,13 +80,33 @@ /* * Convert a kernel VA into a HYP VA. * reg: VA to be converted. + * + * This generates the following sequences: + * - High mask: + * and x0, x0, #HYP_PAGE_OFFSET_HIGH_MASK + * nop + * - Low mask: + * and x0, x0, #HYP_PAGE_OFFSET_HIGH_MASK + * and x0, x0, #HYP_PAGE_OFFSET_LOW_MASK + * - VHE: + * nop + * nop + * + * The "low mask" version works because the mask is a strict subset of + * the "high mask", hence performing the first mask for nothing. + * Should be completely invisible on any viable CPU. */ .macro kern_hyp_va reg -alternative_if_not ARM64_HAS_VIRT_HOST_EXTN - and \reg, \reg, #HYP_PAGE_OFFSET_MASK +alternative_if_not ARM64_HAS_VIRT_HOST_EXTN + and \reg, \reg, #HYP_PAGE_OFFSET_HIGH_MASK alternative_else nop alternative_endif +alternative_if_not ARM64_HYP_OFFSET_LOW + nop +alternative_else + and \reg, \reg, #HYP_PAGE_OFFSET_LOW_MASK +alternative_endif .endm #else @@ -70,7 +117,22 @@ alternative_endif #include <asm/mmu_context.h> #include <asm/pgtable.h> -#define KERN_TO_HYP(kva) ((unsigned long)kva - PAGE_OFFSET + HYP_PAGE_OFFSET) +static inline unsigned long __kern_hyp_va(unsigned long v) +{ + asm volatile(ALTERNATIVE("and %0, %0, %1", + "nop", + ARM64_HAS_VIRT_HOST_EXTN) + : "+r" (v) + : "i" (HYP_PAGE_OFFSET_HIGH_MASK)); + asm volatile(ALTERNATIVE("nop", + "and %0, %0, %1", + ARM64_HYP_OFFSET_LOW) + : "+r" (v) + : "i" (HYP_PAGE_OFFSET_LOW_MASK)); + return v; +} + +#define kern_hyp_va(v) (typeof(v))(__kern_hyp_va((unsigned long)(v))) /* * We currently only support a 40bit IPA. @@ -81,9 +143,8 @@ alternative_endif #include <asm/stage2_pgtable.h> -int create_hyp_mappings(void *from, void *to); +int create_hyp_mappings(void *from, void *to, pgprot_t prot); int create_hyp_io_mappings(void *from, void *to, phys_addr_t); -void free_boot_hyp_pgd(void); void free_hyp_pgds(void); void stage2_unmap_vm(struct kvm *kvm); @@ -97,7 +158,6 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run); void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu); phys_addr_t kvm_mmu_get_httbr(void); -phys_addr_t kvm_mmu_get_boot_httbr(void); phys_addr_t kvm_get_idmap_vector(void); phys_addr_t kvm_get_idmap_start(void); int kvm_mmu_init(void); diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 2813748e2f24..c3ae239db3ee 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -164,6 +164,7 @@ #define PTE_CONT (_AT(pteval_t, 1) << 52) /* Contiguous range */ #define PTE_PXN (_AT(pteval_t, 1) << 53) /* Privileged XN */ #define PTE_UXN (_AT(pteval_t, 1) << 54) /* User XN */ +#define PTE_HYP_XN (_AT(pteval_t, 1) << 54) /* HYP XN */ /* * AttrIndx[2:0] encoding (mapping attributes defined in the MAIR* registers). diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 29fcb33ab401..39f5252673f7 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -55,7 +55,9 @@ #define PAGE_KERNEL_EXEC __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE) #define PAGE_KERNEL_EXEC_CONT __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT) -#define PAGE_HYP __pgprot(_PAGE_DEFAULT | PTE_HYP) +#define PAGE_HYP __pgprot(_PAGE_DEFAULT | PTE_HYP | PTE_HYP_XN) +#define PAGE_HYP_EXEC __pgprot(_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY) +#define PAGE_HYP_RO __pgprot(_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY | PTE_HYP_XN) #define PAGE_HYP_DEVICE __pgprot(PROT_DEVICE_nGnRE | PTE_HYP) #define PAGE_S2 __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 46472a91b6df..e20bd431184a 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -224,6 +224,23 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, set_pte(ptep, pte); } +#define __HAVE_ARCH_PTE_SAME +static inline int pte_same(pte_t pte_a, pte_t pte_b) +{ + pteval_t lhs, rhs; + + lhs = pte_val(pte_a); + rhs = pte_val(pte_b); + + if (pte_present(pte_a)) + lhs &= ~PTE_RDONLY; + + if (pte_present(pte_b)) + rhs &= ~PTE_RDONLY; + + return (lhs == rhs); +} + /* * Huge pte definitions. */ diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 5e834d10b291..c47257c91b77 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -265,22 +265,25 @@ extern unsigned long __must_check __clear_user(void __user *addr, unsigned long static inline unsigned long __must_check __copy_from_user(void *to, const void __user *from, unsigned long n) { kasan_check_write(to, n); - return __arch_copy_from_user(to, from, n); + check_object_size(to, n, false); + return __arch_copy_from_user(to, from, n); } static inline unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n) { kasan_check_read(from, n); - return __arch_copy_to_user(to, from, n); + check_object_size(from, n, true); + return __arch_copy_to_user(to, from, n); } static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n) { kasan_check_write(to, n); - if (access_ok(VERIFY_READ, from, n)) + if (access_ok(VERIFY_READ, from, n)) { + check_object_size(to, n, false); n = __arch_copy_from_user(to, from, n); - else /* security hole - plug it */ + } else /* security hole - plug it */ memset(to, 0, n); return n; } @@ -289,8 +292,10 @@ static inline unsigned long __must_check copy_to_user(void __user *to, const voi { kasan_check_read(from, n); - if (access_ok(VERIFY_WRITE, to, n)) + if (access_ok(VERIFY_WRITE, to, n)) { + check_object_size(from, n, true); n = __arch_copy_to_user(to, from, n); + } return n; } diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index bbc6a8cf83f1..1788545f25bc 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -87,6 +87,10 @@ extern void verify_cpu_run_el(void); static inline void verify_cpu_run_el(void) {} #endif +/* The section containing the hypervisor idmap text */ +extern char __hyp_idmap_text_start[]; +extern char __hyp_idmap_text_end[]; + /* The section containing the hypervisor text */ extern char __hyp_text_start[]; extern char __hyp_text_end[]; diff --git a/arch/arm64/include/uapi/asm/auxvec.h b/arch/arm64/include/uapi/asm/auxvec.h index 22d6d8885854..4cf0c17787a8 100644 --- a/arch/arm64/include/uapi/asm/auxvec.h +++ b/arch/arm64/include/uapi/asm/auxvec.h @@ -19,4 +19,6 @@ /* vDSO location */ #define AT_SYSINFO_EHDR 33 +#define AT_VECTOR_SIZE_ARCH 1 /* entries in ARCH_DLINFO */ + #endif diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index f209ea151dca..3051f86a9b5f 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -87,9 +87,11 @@ struct kvm_regs { /* Supported VGICv3 address types */ #define KVM_VGIC_V3_ADDR_TYPE_DIST 2 #define KVM_VGIC_V3_ADDR_TYPE_REDIST 3 +#define KVM_VGIC_ITS_ADDR_TYPE 4 #define KVM_VGIC_V3_DIST_SIZE SZ_64K #define KVM_VGIC_V3_REDIST_SIZE (2 * SZ_64K) +#define KVM_VGIC_V3_ITS_SIZE (2 * SZ_64K) #define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */ #define KVM_ARM_VCPU_EL1_32BIT 1 /* CPU running a 32bit VM */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 916d27ad79c1..62272eac1352 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -726,6 +726,19 @@ static bool runs_at_el2(const struct arm64_cpu_capabilities *entry, int __unused return is_kernel_in_hyp_mode(); } +static bool hyp_offset_low(const struct arm64_cpu_capabilities *entry, + int __unused) +{ + phys_addr_t idmap_addr = virt_to_phys(__hyp_idmap_text_start); + + /* + * Activate the lower HYP offset only if: + * - the idmap doesn't clash with it, + * - the kernel is not running at EL2. + */ + return idmap_addr > GENMASK(VA_BITS - 2, 0) && !is_kernel_in_hyp_mode(); +} + static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "GIC system register CPU interface", @@ -803,6 +816,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .field_pos = ID_AA64PFR0_EL0_SHIFT, .min_field_value = ID_AA64PFR0_EL0_32BIT_64BIT, }, + { + .desc = "Reduced HYP mapping offset", + .capability = ARM64_HYP_OFFSET_LOW, + .def_scope = SCOPE_SYSTEM, + .matches = hyp_offset_low, + }, {}, }; diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 2c6e598a94dc..b77f58355da1 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -781,40 +781,25 @@ __primary_switch: * Iterate over each entry in the relocation table, and apply the * relocations in place. */ - ldr w8, =__dynsym_offset // offset to symbol table ldr w9, =__rela_offset // offset to reloc table ldr w10, =__rela_size // size of reloc table mov_q x11, KIMAGE_VADDR // default virtual offset add x11, x11, x23 // actual virtual offset - add x8, x8, x11 // __va(.dynsym) add x9, x9, x11 // __va(.rela) add x10, x9, x10 // __va(.rela) + sizeof(.rela) 0: cmp x9, x10 - b.hs 2f + b.hs 1f ldp x11, x12, [x9], #24 ldr x13, [x9, #-8] cmp w12, #R_AARCH64_RELATIVE - b.ne 1f + b.ne 0b add x13, x13, x23 // relocate str x13, [x11, x23] b 0b -1: cmp w12, #R_AARCH64_ABS64 - b.ne 0b - add x12, x12, x12, lsl #1 // symtab offset: 24x top word - add x12, x8, x12, lsr #(32 - 3) // ... shifted into bottom word - ldrsh w14, [x12, #6] // Elf64_Sym::st_shndx - ldr x15, [x12, #8] // Elf64_Sym::st_value - cmp w14, #-0xf // SHN_ABS (0xfff1) ? - add x14, x15, x23 // relocate - csel x15, x14, x15, ne - add x15, x13, x15 - str x15, [x11, x23] - b 0b - -2: +1: #endif ldr x8, =__primary_switched br x8 diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c index 3c4e308b40a0..acf38722457b 100644 --- a/arch/arm64/kernel/pci.c +++ b/arch/arm64/kernel/pci.c @@ -17,6 +17,9 @@ #include <linux/mm.h> #include <linux/of_pci.h> #include <linux/of_platform.h> +#include <linux/pci.h> +#include <linux/pci-acpi.h> +#include <linux/pci-ecam.h> #include <linux/slab.h> /* @@ -36,25 +39,17 @@ resource_size_t pcibios_align_resource(void *data, const struct resource *res, return res->start; } -/** - * pcibios_enable_device - Enable I/O and memory. - * @dev: PCI device to be enabled - * @mask: bitmask of BARs to enable - */ -int pcibios_enable_device(struct pci_dev *dev, int mask) -{ - if (pci_has_flag(PCI_PROBE_ONLY)) - return 0; - - return pci_enable_resources(dev, mask); -} - /* - * Try to assign the IRQ number from DT when adding a new device + * Try to assign the IRQ number when probing a new device */ -int pcibios_add_device(struct pci_dev *dev) +int pcibios_alloc_irq(struct pci_dev *dev) { - dev->irq = of_irq_parse_and_map_pci(dev, 0, 0); + if (acpi_disabled) + dev->irq = of_irq_parse_and_map_pci(dev, 0, 0); +#ifdef CONFIG_ACPI + else + return acpi_pci_irq_enable(dev); +#endif return 0; } @@ -65,13 +60,21 @@ int pcibios_add_device(struct pci_dev *dev) int raw_pci_read(unsigned int domain, unsigned int bus, unsigned int devfn, int reg, int len, u32 *val) { - return -ENXIO; + struct pci_bus *b = pci_find_bus(domain, bus); + + if (!b) + return PCIBIOS_DEVICE_NOT_FOUND; + return b->ops->read(b, devfn, reg, len, val); } int raw_pci_write(unsigned int domain, unsigned int bus, unsigned int devfn, int reg, int len, u32 val) { - return -ENXIO; + struct pci_bus *b = pci_find_bus(domain, bus); + + if (!b) + return PCIBIOS_DEVICE_NOT_FOUND; + return b->ops->write(b, devfn, reg, len, val); } #ifdef CONFIG_NUMA @@ -85,10 +88,124 @@ EXPORT_SYMBOL(pcibus_to_node); #endif #ifdef CONFIG_ACPI -/* Root bridge scanning */ + +struct acpi_pci_generic_root_info { + struct acpi_pci_root_info common; + struct pci_config_window *cfg; /* config space mapping */ +}; + +int acpi_pci_bus_find_domain_nr(struct pci_bus *bus) +{ + struct pci_config_window *cfg = bus->sysdata; + struct acpi_device *adev = to_acpi_device(cfg->parent); + struct acpi_pci_root *root = acpi_driver_data(adev); + + return root->segment; +} + +int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge) +{ + if (!acpi_disabled) { + struct pci_config_window *cfg = bridge->bus->sysdata; + struct acpi_device *adev = to_acpi_device(cfg->parent); + ACPI_COMPANION_SET(&bridge->dev, adev); + } + + return 0; +} + +/* + * Lookup the bus range for the domain in MCFG, and set up config space + * mapping. + */ +static struct pci_config_window * +pci_acpi_setup_ecam_mapping(struct acpi_pci_root *root) +{ + struct resource *bus_res = &root->secondary; + u16 seg = root->segment; + struct pci_config_window *cfg; + struct resource cfgres; + unsigned int bsz; + + /* Use address from _CBA if present, otherwise lookup MCFG */ + if (!root->mcfg_addr) + root->mcfg_addr = pci_mcfg_lookup(seg, bus_res); + + if (!root->mcfg_addr) { + dev_err(&root->device->dev, "%04x:%pR ECAM region not found\n", + seg, bus_res); + return NULL; + } + + bsz = 1 << pci_generic_ecam_ops.bus_shift; + cfgres.start = root->mcfg_addr + bus_res->start * bsz; + cfgres.end = cfgres.start + resource_size(bus_res) * bsz - 1; + cfgres.flags = IORESOURCE_MEM; + cfg = pci_ecam_create(&root->device->dev, &cfgres, bus_res, + &pci_generic_ecam_ops); + if (IS_ERR(cfg)) { + dev_err(&root->device->dev, "%04x:%pR error %ld mapping ECAM\n", + seg, bus_res, PTR_ERR(cfg)); + return NULL; + } + + return cfg; +} + +/* release_info: free resources allocated by init_info */ +static void pci_acpi_generic_release_info(struct acpi_pci_root_info *ci) +{ + struct acpi_pci_generic_root_info *ri; + + ri = container_of(ci, struct acpi_pci_generic_root_info, common); + pci_ecam_free(ri->cfg); + kfree(ri); +} + +static struct acpi_pci_root_ops acpi_pci_root_ops = { + .release_info = pci_acpi_generic_release_info, +}; + +/* Interface called from ACPI code to setup PCI host controller */ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) { - /* TODO: Should be revisited when implementing PCI on ACPI */ - return NULL; + int node = acpi_get_node(root->device->handle); + struct acpi_pci_generic_root_info *ri; + struct pci_bus *bus, *child; + + ri = kzalloc_node(sizeof(*ri), GFP_KERNEL, node); + if (!ri) + return NULL; + + ri->cfg = pci_acpi_setup_ecam_mapping(root); + if (!ri->cfg) { + kfree(ri); + return NULL; + } + + acpi_pci_root_ops.pci_ops = &ri->cfg->ops->pci_ops; + bus = acpi_pci_root_create(root, &acpi_pci_root_ops, &ri->common, + ri->cfg); + if (!bus) + return NULL; + + pci_bus_size_bridges(bus); + pci_bus_assign_resources(bus); + + list_for_each_entry(child, &bus->children, node) + pcie_bus_configure_settings(child); + + return bus; } + +void pcibios_add_bus(struct pci_bus *bus) +{ + acpi_pci_add_bus(bus); +} + +void pcibios_remove_bus(struct pci_bus *bus) +{ + acpi_pci_remove_bus(bus); +} + #endif diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 89d6e177ecbd..659963d40bb4 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -103,6 +103,7 @@ SECTIONS *(.discard) *(.discard.*) *(.interp .dynamic) + *(.dynsym .dynstr .hash) } . = KIMAGE_VADDR + TEXT_OFFSET; @@ -174,19 +175,9 @@ SECTIONS .rela : ALIGN(8) { *(.rela .rela*) } - .dynsym : ALIGN(8) { - *(.dynsym) - } - .dynstr : { - *(.dynstr) - } - .hash : { - *(.hash) - } - __rela_offset = ADDR(.rela) - KIMAGE_VADDR; + __rela_offset = ABSOLUTE(ADDR(.rela) - KIMAGE_VADDR); __rela_size = SIZEOF(.rela); - __dynsym_offset = ADDR(.dynsym) - KIMAGE_VADDR; . = ALIGN(SEGMENT_ALIGN); __init_end = .; diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index c4f26ef91e77..9c9edc98d271 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -36,6 +36,9 @@ config KVM select HAVE_KVM_IRQFD select KVM_ARM_VGIC_V3 select KVM_ARM_PMU if HW_PERF_EVENTS + select HAVE_KVM_MSI + select HAVE_KVM_IRQCHIP + select HAVE_KVM_IRQ_ROUTING ---help--- Support hosting virtualized guest machines. We don't support KVM with 16K page tables yet, due to the multiple @@ -54,13 +57,6 @@ config KVM_ARM_PMU Adds support for a virtual Performance Monitoring Unit (PMU) in virtual machines. -config KVM_NEW_VGIC - bool "New VGIC implementation" - depends on KVM - default y - ---help--- - uses the new VGIC implementation - source drivers/vhost/Kconfig endif # VIRTUALIZATION diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index a7a958ca29d5..695eb3c7ef41 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -20,7 +20,6 @@ kvm-$(CONFIG_KVM_ARM_HOST) += emulate.o inject_fault.o regmap.o kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o kvm-$(CONFIG_KVM_ARM_HOST) += guest.o debug.o reset.o sys_regs.o sys_regs_generic_v8.o -ifeq ($(CONFIG_KVM_NEW_VGIC),y) kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-init.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-irqfd.o @@ -30,12 +29,7 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v2.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v3.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-kvm-device.o -else -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2-emul.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3-emul.o -endif +kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-its.o +kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/irqchip.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o kvm-$(CONFIG_KVM_ARM_PMU) += $(KVM)/arm/pmu.o diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 32fad75bb9ff..3f9e15722473 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -211,7 +211,7 @@ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu) /** * kvm_arm_copy_reg_indices - get indices of all registers. * - * We do core registers right here, then we apppend system regs. + * We do core registers right here, then we append system regs. */ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) { diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S index a873a6d8be90..6b29d3d9e1f2 100644 --- a/arch/arm64/kvm/hyp-init.S +++ b/arch/arm64/kvm/hyp-init.S @@ -53,10 +53,9 @@ __invalid: b . /* - * x0: HYP boot pgd - * x1: HYP pgd - * x2: HYP stack - * x3: HYP vectors + * x0: HYP pgd + * x1: HYP stack + * x2: HYP vectors */ __do_hyp_init: @@ -110,71 +109,27 @@ __do_hyp_init: msr sctlr_el2, x4 isb - /* Skip the trampoline dance if we merged the boot and runtime PGDs */ - cmp x0, x1 - b.eq merged - - /* MMU is now enabled. Get ready for the trampoline dance */ - ldr x4, =TRAMPOLINE_VA - adr x5, target - bfi x4, x5, #0, #PAGE_SHIFT - br x4 - -target: /* We're now in the trampoline code, switch page tables */ - msr ttbr0_el2, x1 - isb - - /* Invalidate the old TLBs */ - tlbi alle2 - dsb sy - -merged: /* Set the stack and new vectors */ + kern_hyp_va x1 + mov sp, x1 kern_hyp_va x2 - mov sp, x2 - kern_hyp_va x3 - msr vbar_el2, x3 + msr vbar_el2, x2 /* Hello, World! */ eret ENDPROC(__kvm_hyp_init) /* - * Reset kvm back to the hyp stub. This is the trampoline dance in - * reverse. If kvm used an extended idmap, __extended_idmap_trampoline - * calls this code directly in the idmap. In this case switching to the - * boot tables is a no-op. - * - * x0: HYP boot pgd - * x1: HYP phys_idmap_start + * Reset kvm back to the hyp stub. */ ENTRY(__kvm_hyp_reset) - /* We're in trampoline code in VA, switch back to boot page tables */ - msr ttbr0_el2, x0 - isb - - /* Ensure the PA branch doesn't find a stale tlb entry or stale code. */ - ic iallu - tlbi alle2 - dsb sy - isb - - /* Branch into PA space */ - adr x0, 1f - bfi x1, x0, #0, #PAGE_SHIFT - br x1 - /* We're now in idmap, disable MMU */ -1: mrs x0, sctlr_el2 + mrs x0, sctlr_el2 ldr x1, =SCTLR_ELx_FLAGS bic x0, x0, x1 // Clear SCTL_M and etc msr sctlr_el2, x0 isb - /* Invalidate the old TLBs */ - tlbi alle2 - dsb sy - /* Install stub vectors */ adr_l x0, __hyp_stub_vectors msr vbar_el2, x0 diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index 70254a65bd5b..ce9e5e5f28cf 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -164,22 +164,3 @@ alternative_endif eret ENDPROC(__fpsimd_guest_restore) - -/* - * When using the extended idmap, we don't have a trampoline page we can use - * while we switch pages tables during __kvm_hyp_reset. Accessing the idmap - * directly would be ideal, but if we're using the extended idmap then the - * idmap is located above HYP_PAGE_OFFSET, and the address will be masked by - * kvm_call_hyp using kern_hyp_va. - * - * x0: HYP boot pgd - * x1: HYP phys_idmap_start - */ -ENTRY(__extended_idmap_trampoline) - mov x4, x1 - adr_l x3, __kvm_hyp_reset - - /* insert __kvm_hyp_reset()s offset into phys_idmap_start */ - bfi x4, x3, #0, #PAGE_SHIFT - br x4 -ENDPROC(__extended_idmap_trampoline) diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index 2d87f36d5cb4..f6d9694ae3b1 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -62,6 +62,21 @@ ENTRY(__vhe_hyp_call) isb ret ENDPROC(__vhe_hyp_call) + +/* + * Compute the idmap address of __kvm_hyp_reset based on the idmap + * start passed as a parameter, and jump there. + * + * x0: HYP phys_idmap_start + */ +ENTRY(__kvm_hyp_teardown) + mov x4, x0 + adr_l x3, __kvm_hyp_reset + + /* insert __kvm_hyp_reset()s offset into phys_idmap_start */ + bfi x4, x3, #0, #PAGE_SHIFT + br x4 +ENDPROC(__kvm_hyp_teardown) el1_sync: // Guest trapped into EL2 save_x0_to_x3 diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 4373997d1a70..ae7855f16ec2 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -299,9 +299,16 @@ static const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:% static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par) { - unsigned long str_va = (unsigned long)__hyp_panic_string; + unsigned long str_va; - __hyp_do_panic(hyp_kern_va(str_va), + /* + * Force the panic string to be loaded from the literal pool, + * making sure it is a kernel address and not a PC-relative + * reference. + */ + asm volatile("ldr %0, =__hyp_panic_string" : "=r" (str_va)); + + __hyp_do_panic(str_va, spsr, elr, read_sysreg(esr_el2), read_sysreg_el2(far), read_sysreg(hpfar_el2), par, diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index e9e0e6db73f6..898c0e6aedd4 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -132,16 +132,14 @@ static u64 get_except_vector(struct kvm_vcpu *vcpu, enum exception_type type) static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr) { unsigned long cpsr = *vcpu_cpsr(vcpu); - bool is_aarch32; + bool is_aarch32 = vcpu_mode_is_32bit(vcpu); u32 esr = 0; - is_aarch32 = vcpu_mode_is_32bit(vcpu); - - *vcpu_spsr(vcpu) = cpsr; *vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu); - *vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync); + *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64; + *vcpu_spsr(vcpu) = cpsr; vcpu_sys_reg(vcpu, FAR_EL1) = addr; @@ -172,11 +170,11 @@ static void inject_undef64(struct kvm_vcpu *vcpu) unsigned long cpsr = *vcpu_cpsr(vcpu); u32 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT); - *vcpu_spsr(vcpu) = cpsr; *vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu); - *vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync); + *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64; + *vcpu_spsr(vcpu) = cpsr; /* * Build an unknown exception, depending on the instruction diff --git a/arch/arm64/kvm/irq.h b/arch/arm64/kvm/irq.h new file mode 100644 index 000000000000..b74099b905fd --- /dev/null +++ b/arch/arm64/kvm/irq.h @@ -0,0 +1,19 @@ +/* + * irq.h: in kernel interrupt controller related definitions + * Copyright (c) 2016 Red Hat, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This header is included by irqchip.c. However, on ARM, interrupt + * controller declarations are located in include/kvm/arm_vgic.h since + * they are mostly shared between arm and arm64. + */ + +#ifndef __IRQ_H +#define __IRQ_H + +#include <kvm/arm_vgic.h> + +#endif diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index b1ad730e1567..5bc460884639 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -65,7 +65,7 @@ static bool cpu_has_32bit_el1(void) * We currently assume that the number of HW registers is uniform * across all CPUs (see cpuinfo_sanity_check). */ -int kvm_arch_dev_ioctl_check_extension(long ext) +int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext) { int r; @@ -86,6 +86,12 @@ int kvm_arch_dev_ioctl_check_extension(long ext) case KVM_CAP_VCPU_ATTRIBUTES: r = 1; break; + case KVM_CAP_MSI_DEVID: + if (!kvm) + r = -EINVAL; + else + r = kvm->arch.vgic.msis_require_devid; + break; default: r = 0; } @@ -98,7 +104,7 @@ int kvm_arch_dev_ioctl_check_extension(long ext) * @vcpu: The VCPU pointer * * This function finds the right table above and sets the registers on - * the virtual CPU struct to their architectually defined reset + * the virtual CPU struct to their architecturally defined reset * values. */ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) @@ -132,31 +138,3 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) /* Reset timer */ return kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq); } - -extern char __hyp_idmap_text_start[]; - -unsigned long kvm_hyp_reset_entry(void) -{ - if (!__kvm_cpu_uses_extended_idmap()) { - unsigned long offset; - - /* - * Find the address of __kvm_hyp_reset() in the trampoline page. - * This is present in the running page tables, and the boot page - * tables, so we call the code here to start the trampoline - * dance in reverse. - */ - offset = (unsigned long)__kvm_hyp_reset - - ((unsigned long)__hyp_idmap_text_start & PAGE_MASK); - - return TRAMPOLINE_VA + offset; - } else { - /* - * KVM is running with merged page tables, which don't have the - * trampoline page mapped. We know the idmap is still mapped, - * but can't be called into directly. Use - * __extended_idmap_trampoline to do the call. - */ - return (unsigned long)kvm_ksym_ref(__extended_idmap_trampoline); - } -} diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index a57d650f552c..b0b225ceca18 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1546,7 +1546,7 @@ static void unhandled_cp_access(struct kvm_vcpu *vcpu, struct sys_reg_params *params) { u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu); - int cp; + int cp = -1; switch(hsr_ec) { case ESR_ELx_EC_CP15_32: @@ -1558,7 +1558,7 @@ static void unhandled_cp_access(struct kvm_vcpu *vcpu, cp = 14; break; default: - WARN_ON((cp = -1)); + WARN_ON(1); } kvm_err("Unsupported guest CP%d access at: %08lx\n", diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index f6c55afab3e2..c4284c432ae8 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -32,10 +32,10 @@ static int swiotlb __read_mostly; -static pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot, +static pgprot_t __get_dma_pgprot(unsigned long attrs, pgprot_t prot, bool coherent) { - if (!coherent || dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs)) + if (!coherent || (attrs & DMA_ATTR_WRITE_COMBINE)) return pgprot_writecombine(prot); return prot; } @@ -91,7 +91,7 @@ static int __free_from_pool(void *start, size_t size) static void *__dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flags, - struct dma_attrs *attrs) + unsigned long attrs) { if (dev == NULL) { WARN_ONCE(1, "Use an actual device structure for DMA allocation\n"); @@ -121,7 +121,7 @@ static void *__dma_alloc_coherent(struct device *dev, size_t size, static void __dma_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, - struct dma_attrs *attrs) + unsigned long attrs) { bool freed; phys_addr_t paddr = dma_to_phys(dev, dma_handle); @@ -140,7 +140,7 @@ static void __dma_free_coherent(struct device *dev, size_t size, static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flags, - struct dma_attrs *attrs) + unsigned long attrs) { struct page *page; void *ptr, *coherent_ptr; @@ -188,7 +188,7 @@ no_mem: static void __dma_free(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, - struct dma_attrs *attrs) + unsigned long attrs) { void *swiotlb_addr = phys_to_virt(dma_to_phys(dev, dma_handle)); @@ -205,7 +205,7 @@ static void __dma_free(struct device *dev, size_t size, static dma_addr_t __swiotlb_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { dma_addr_t dev_addr; @@ -219,7 +219,7 @@ static dma_addr_t __swiotlb_map_page(struct device *dev, struct page *page, static void __swiotlb_unmap_page(struct device *dev, dma_addr_t dev_addr, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { if (!is_device_dma_coherent(dev)) __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); @@ -228,7 +228,7 @@ static void __swiotlb_unmap_page(struct device *dev, dma_addr_t dev_addr, static int __swiotlb_map_sg_attrs(struct device *dev, struct scatterlist *sgl, int nelems, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { struct scatterlist *sg; int i, ret; @@ -245,7 +245,7 @@ static int __swiotlb_map_sg_attrs(struct device *dev, struct scatterlist *sgl, static void __swiotlb_unmap_sg_attrs(struct device *dev, struct scatterlist *sgl, int nelems, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { struct scatterlist *sg; int i; @@ -306,7 +306,7 @@ static void __swiotlb_sync_sg_for_device(struct device *dev, static int __swiotlb_mmap(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, - struct dma_attrs *attrs) + unsigned long attrs) { int ret = -ENXIO; unsigned long nr_vma_pages = (vma->vm_end - vma->vm_start) >> @@ -333,7 +333,7 @@ static int __swiotlb_mmap(struct device *dev, static int __swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr, dma_addr_t handle, size_t size, - struct dma_attrs *attrs) + unsigned long attrs) { int ret = sg_alloc_table(sgt, 1, GFP_KERNEL); @@ -435,21 +435,21 @@ out: static void *__dummy_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flags, - struct dma_attrs *attrs) + unsigned long attrs) { return NULL; } static void __dummy_free(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, - struct dma_attrs *attrs) + unsigned long attrs) { } static int __dummy_mmap(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, - struct dma_attrs *attrs) + unsigned long attrs) { return -ENXIO; } @@ -457,20 +457,20 @@ static int __dummy_mmap(struct device *dev, static dma_addr_t __dummy_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { return DMA_ERROR_CODE; } static void __dummy_unmap_page(struct device *dev, dma_addr_t dev_addr, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { } static int __dummy_map_sg(struct device *dev, struct scatterlist *sgl, int nelems, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { return 0; } @@ -478,7 +478,7 @@ static int __dummy_map_sg(struct device *dev, struct scatterlist *sgl, static void __dummy_unmap_sg(struct device *dev, struct scatterlist *sgl, int nelems, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { } @@ -553,7 +553,7 @@ static void flush_page(struct device *dev, const void *virt, phys_addr_t phys) static void *__iommu_alloc_attrs(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, - struct dma_attrs *attrs) + unsigned long attrs) { bool coherent = is_device_dma_coherent(dev); int ioprot = dma_direction_to_prot(DMA_BIDIRECTIONAL, coherent); @@ -613,7 +613,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size, } static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, - dma_addr_t handle, struct dma_attrs *attrs) + dma_addr_t handle, unsigned long attrs) { size_t iosize = size; @@ -629,7 +629,7 @@ static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, * Hence how dodgy the below logic looks... */ if (__in_atomic_pool(cpu_addr, size)) { - iommu_dma_unmap_page(dev, handle, iosize, 0, NULL); + iommu_dma_unmap_page(dev, handle, iosize, 0, 0); __free_from_pool(cpu_addr, size); } else if (is_vmalloc_addr(cpu_addr)){ struct vm_struct *area = find_vm_area(cpu_addr); @@ -639,14 +639,14 @@ static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, iommu_dma_free(dev, area->pages, iosize, &handle); dma_common_free_remap(cpu_addr, size, VM_USERMAP); } else { - iommu_dma_unmap_page(dev, handle, iosize, 0, NULL); + iommu_dma_unmap_page(dev, handle, iosize, 0, 0); __free_pages(virt_to_page(cpu_addr), get_order(size)); } } static int __iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, - struct dma_attrs *attrs) + unsigned long attrs) { struct vm_struct *area; int ret; @@ -666,7 +666,7 @@ static int __iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, static int __iommu_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr, dma_addr_t dma_addr, - size_t size, struct dma_attrs *attrs) + size_t size, unsigned long attrs) { unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; struct vm_struct *area = find_vm_area(cpu_addr); @@ -707,14 +707,14 @@ static void __iommu_sync_single_for_device(struct device *dev, static dma_addr_t __iommu_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { bool coherent = is_device_dma_coherent(dev); int prot = dma_direction_to_prot(dir, coherent); dma_addr_t dev_addr = iommu_dma_map_page(dev, page, offset, size, prot); if (!iommu_dma_mapping_error(dev, dev_addr) && - !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) + (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) __iommu_sync_single_for_device(dev, dev_addr, size, dir); return dev_addr; @@ -722,9 +722,9 @@ static dma_addr_t __iommu_map_page(struct device *dev, struct page *page, static void __iommu_unmap_page(struct device *dev, dma_addr_t dev_addr, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { - if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) + if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) __iommu_sync_single_for_cpu(dev, dev_addr, size, dir); iommu_dma_unmap_page(dev, dev_addr, size, dir, attrs); @@ -760,11 +760,11 @@ static void __iommu_sync_sg_for_device(struct device *dev, static int __iommu_map_sg_attrs(struct device *dev, struct scatterlist *sgl, int nelems, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { bool coherent = is_device_dma_coherent(dev); - if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) + if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) __iommu_sync_sg_for_device(dev, sgl, nelems, dir); return iommu_dma_map_sg(dev, sgl, nelems, @@ -774,9 +774,9 @@ static int __iommu_map_sg_attrs(struct device *dev, struct scatterlist *sgl, static void __iommu_unmap_sg_attrs(struct device *dev, struct scatterlist *sgl, int nelems, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { - if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) + if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) __iommu_sync_sg_for_cpu(dev, sgl, nelems, dir); iommu_dma_unmap_sg(dev, sgl, nelems, dir, attrs); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 51a558195bb9..4989948d1feb 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -686,9 +686,9 @@ void *__init __fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot) /* * Check whether the physical FDT address is set and meets the minimum * alignment requirement. Since we are relying on MIN_FDT_ALIGN to be - * at least 8 bytes so that we can always access the size field of the - * FDT header after mapping the first chunk, double check here if that - * is indeed the case. + * at least 8 bytes so that we can always access the magic and size + * fields of the FDT header after mapping the first chunk, double check + * here if that is indeed the case. */ BUILD_BUG_ON(MIN_FDT_ALIGN < 8); if (!dt_phys || dt_phys % MIN_FDT_ALIGN) @@ -716,7 +716,7 @@ void *__init __fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot) create_mapping_noalloc(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base, SWAPPER_BLOCK_SIZE, prot); - if (fdt_check_header(dt_virt) != 0) + if (fdt_magic(dt_virt) != FDT_MAGIC) return NULL; *size = fdt_totalsize(dt_virt); diff --git a/arch/avr32/mm/dma-coherent.c b/arch/avr32/mm/dma-coherent.c index 92cf1fb2b3e6..58610d0df7ed 100644 --- a/arch/avr32/mm/dma-coherent.c +++ b/arch/avr32/mm/dma-coherent.c @@ -99,7 +99,7 @@ static void __dma_free(struct device *dev, size_t size, } static void *avr32_dma_alloc(struct device *dev, size_t size, - dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs) + dma_addr_t *handle, gfp_t gfp, unsigned long attrs) { struct page *page; dma_addr_t phys; @@ -109,7 +109,7 @@ static void *avr32_dma_alloc(struct device *dev, size_t size, return NULL; phys = page_to_phys(page); - if (dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs)) { + if (attrs & DMA_ATTR_WRITE_COMBINE) { /* Now, map the page into P3 with write-combining turned on */ *handle = phys; return __ioremap(phys, size, _PAGE_BUFFER); @@ -119,11 +119,11 @@ static void *avr32_dma_alloc(struct device *dev, size_t size, } static void avr32_dma_free(struct device *dev, size_t size, - void *cpu_addr, dma_addr_t handle, struct dma_attrs *attrs) + void *cpu_addr, dma_addr_t handle, unsigned long attrs) { struct page *page; - if (dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs)) { + if (attrs & DMA_ATTR_WRITE_COMBINE) { iounmap(cpu_addr); page = phys_to_page(handle); @@ -142,7 +142,7 @@ static void avr32_dma_free(struct device *dev, size_t size, static dma_addr_t avr32_dma_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, - enum dma_data_direction direction, struct dma_attrs *attrs) + enum dma_data_direction direction, unsigned long attrs) { void *cpu_addr = page_address(page) + offset; @@ -152,7 +152,7 @@ static dma_addr_t avr32_dma_map_page(struct device *dev, struct page *page, static int avr32_dma_map_sg(struct device *dev, struct scatterlist *sglist, int nents, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { int i; struct scatterlist *sg; diff --git a/arch/blackfin/kernel/dma-mapping.c b/arch/blackfin/kernel/dma-mapping.c index 771afe6e4264..53fbbb61aa86 100644 --- a/arch/blackfin/kernel/dma-mapping.c +++ b/arch/blackfin/kernel/dma-mapping.c @@ -79,7 +79,7 @@ static void __free_dma_pages(unsigned long addr, unsigned int pages) } static void *bfin_dma_alloc(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp, struct dma_attrs *attrs) + dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) { void *ret; @@ -94,7 +94,7 @@ static void *bfin_dma_alloc(struct device *dev, size_t size, } static void bfin_dma_free(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_handle, struct dma_attrs *attrs) + dma_addr_t dma_handle, unsigned long attrs) { __free_dma_pages((unsigned long)vaddr, get_pages(size)); } @@ -111,7 +111,7 @@ EXPORT_SYMBOL(__dma_sync); static int bfin_dma_map_sg(struct device *dev, struct scatterlist *sg_list, int nents, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { struct scatterlist *sg; int i; @@ -139,7 +139,7 @@ static void bfin_dma_sync_sg_for_device(struct device *dev, static dma_addr_t bfin_dma_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { dma_addr_t handle = (dma_addr_t)(page_address(page) + offset); diff --git a/arch/blackfin/mm/init.c b/arch/blackfin/mm/init.c index 166842de3dc7..b59cd7c3261a 100644 --- a/arch/blackfin/mm/init.c +++ b/arch/blackfin/mm/init.c @@ -112,7 +112,7 @@ void __init free_initrd_mem(unsigned long start, unsigned long end) } #endif -void __init_refok free_initmem(void) +void __ref free_initmem(void) { #if defined CONFIG_RAMKERNEL && !defined CONFIG_MPU free_initmem_default(-1); diff --git a/arch/c6x/include/asm/dma-mapping.h b/arch/c6x/include/asm/dma-mapping.h index 6b5cd7b0cf32..5717b1e52d96 100644 --- a/arch/c6x/include/asm/dma-mapping.h +++ b/arch/c6x/include/asm/dma-mapping.h @@ -26,8 +26,8 @@ static inline struct dma_map_ops *get_dma_ops(struct device *dev) extern void coherent_mem_init(u32 start, u32 size); void *c6x_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, - gfp_t gfp, struct dma_attrs *attrs); + gfp_t gfp, unsigned long attrs); void c6x_dma_free(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_handle, struct dma_attrs *attrs); + dma_addr_t dma_handle, unsigned long attrs); #endif /* _ASM_C6X_DMA_MAPPING_H */ diff --git a/arch/c6x/kernel/dma.c b/arch/c6x/kernel/dma.c index 8a80f3a250c0..db4a6a301f5e 100644 --- a/arch/c6x/kernel/dma.c +++ b/arch/c6x/kernel/dma.c @@ -38,7 +38,7 @@ static void c6x_dma_sync(dma_addr_t handle, size_t size, static dma_addr_t c6x_dma_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { dma_addr_t handle = virt_to_phys(page_address(page) + offset); @@ -47,13 +47,13 @@ static dma_addr_t c6x_dma_map_page(struct device *dev, struct page *page, } static void c6x_dma_unmap_page(struct device *dev, dma_addr_t handle, - size_t size, enum dma_data_direction dir, struct dma_attrs *attrs) + size_t size, enum dma_data_direction dir, unsigned long attrs) { c6x_dma_sync(handle, size, dir); } static int c6x_dma_map_sg(struct device *dev, struct scatterlist *sglist, - int nents, enum dma_data_direction dir, struct dma_attrs *attrs) + int nents, enum dma_data_direction dir, unsigned long attrs) { struct scatterlist *sg; int i; @@ -67,8 +67,7 @@ static int c6x_dma_map_sg(struct device *dev, struct scatterlist *sglist, } static void c6x_dma_unmap_sg(struct device *dev, struct scatterlist *sglist, - int nents, enum dma_data_direction dir, - struct dma_attrs *attrs) + int nents, enum dma_data_direction dir, unsigned long attrs) { struct scatterlist *sg; int i; diff --git a/arch/c6x/mm/dma-coherent.c b/arch/c6x/mm/dma-coherent.c index f7ee63af2541..95e38ad27c69 100644 --- a/arch/c6x/mm/dma-coherent.c +++ b/arch/c6x/mm/dma-coherent.c @@ -74,7 +74,7 @@ static void __free_dma_pages(u32 addr, int order) * virtual and DMA address for that space. */ void *c6x_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, - gfp_t gfp, struct dma_attrs *attrs) + gfp_t gfp, unsigned long attrs) { u32 paddr; int order; @@ -99,7 +99,7 @@ void *c6x_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, * Free DMA coherent memory as defined by the above mapping. */ void c6x_dma_free(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_handle, struct dma_attrs *attrs) + dma_addr_t dma_handle, unsigned long attrs) { int order; diff --git a/arch/cris/arch-v10/drivers/axisflashmap.c b/arch/cris/arch-v10/drivers/axisflashmap.c index 60d57c590032..bdc25aa43468 100644 --- a/arch/cris/arch-v10/drivers/axisflashmap.c +++ b/arch/cris/arch-v10/drivers/axisflashmap.c @@ -397,7 +397,7 @@ static int __init init_axis_flash(void) if (!romfs_in_flash) { /* Create an RAM device for the root partition (romfs). */ -#if !defined(CONFIG_MTD_MTDRAM) || (CONFIG_MTDRAM_TOTAL_SIZE != 0) || (CONFIG_MTDRAM_ABS_POS != 0) +#if !defined(CONFIG_MTD_MTDRAM) || (CONFIG_MTDRAM_TOTAL_SIZE != 0) /* No use trying to boot this kernel from RAM. Panic! */ printk(KERN_EMERG "axisflashmap: Cannot create an MTD RAM " "device due to kernel (mis)configuration!\n"); diff --git a/arch/cris/arch-v32/drivers/axisflashmap.c b/arch/cris/arch-v32/drivers/axisflashmap.c index bd10d3ba0949..87656c41fec7 100644 --- a/arch/cris/arch-v32/drivers/axisflashmap.c +++ b/arch/cris/arch-v32/drivers/axisflashmap.c @@ -320,7 +320,7 @@ static int __init init_axis_flash(void) * but its size must be configured as 0 so as not to conflict * with our usage. */ -#if !defined(CONFIG_MTD_MTDRAM) || (CONFIG_MTDRAM_TOTAL_SIZE != 0) || (CONFIG_MTDRAM_ABS_POS != 0) +#if !defined(CONFIG_MTD_MTDRAM) || (CONFIG_MTDRAM_TOTAL_SIZE != 0) if (!romfs_in_flash && !nand_boot) { printk(KERN_EMERG "axisflashmap: Cannot create an MTD RAM " "device; configure CONFIG_MTD_MTDRAM with size = 0!\n"); diff --git a/arch/cris/arch-v32/drivers/pci/dma.c b/arch/cris/arch-v32/drivers/pci/dma.c index 8d5efa58cce1..1f0636793f0c 100644 --- a/arch/cris/arch-v32/drivers/pci/dma.c +++ b/arch/cris/arch-v32/drivers/pci/dma.c @@ -17,7 +17,7 @@ #include <asm/io.h> static void *v32_dma_alloc(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp, struct dma_attrs *attrs) + dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) { void *ret; @@ -37,22 +37,21 @@ static void *v32_dma_alloc(struct device *dev, size_t size, } static void v32_dma_free(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_handle, struct dma_attrs *attrs) + dma_addr_t dma_handle, unsigned long attrs) { free_pages((unsigned long)vaddr, get_order(size)); } static inline dma_addr_t v32_dma_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, - enum dma_data_direction direction, - struct dma_attrs *attrs) + enum dma_data_direction direction, unsigned long attrs) { return page_to_phys(page) + offset; } static inline int v32_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { printk("Map sg\n"); return nents; diff --git a/arch/frv/include/asm/mc146818rtc.h b/arch/frv/include/asm/mc146818rtc.h deleted file mode 100644 index 90dfb7a633d1..000000000000 --- a/arch/frv/include/asm/mc146818rtc.h +++ /dev/null @@ -1,16 +0,0 @@ -/* mc146818rtc.h: RTC defs - * - * Copyright (C) 2005 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#ifndef _ASM_MC146818RTC_H -#define _ASM_MC146818RTC_H - - -#endif /* _ASM_MC146818RTC_H */ diff --git a/arch/frv/mb93090-mb00/pci-dma-nommu.c b/arch/frv/mb93090-mb00/pci-dma-nommu.c index 082be49b5df0..90f2e4cb33d6 100644 --- a/arch/frv/mb93090-mb00/pci-dma-nommu.c +++ b/arch/frv/mb93090-mb00/pci-dma-nommu.c @@ -35,7 +35,7 @@ static DEFINE_SPINLOCK(dma_alloc_lock); static LIST_HEAD(dma_alloc_list); static void *frv_dma_alloc(struct device *hwdev, size_t size, dma_addr_t *dma_handle, - gfp_t gfp, struct dma_attrs *attrs) + gfp_t gfp, unsigned long attrs) { struct dma_alloc_record *new; struct list_head *this = &dma_alloc_list; @@ -86,7 +86,7 @@ static void *frv_dma_alloc(struct device *hwdev, size_t size, dma_addr_t *dma_ha } static void frv_dma_free(struct device *hwdev, size_t size, void *vaddr, - dma_addr_t dma_handle, struct dma_attrs *attrs) + dma_addr_t dma_handle, unsigned long attrs) { struct dma_alloc_record *rec; unsigned long flags; @@ -107,7 +107,7 @@ static void frv_dma_free(struct device *hwdev, size_t size, void *vaddr, static int frv_dma_map_sg(struct device *dev, struct scatterlist *sglist, int nents, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { int i; struct scatterlist *sg; @@ -124,7 +124,7 @@ static int frv_dma_map_sg(struct device *dev, struct scatterlist *sglist, static dma_addr_t frv_dma_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, - enum dma_data_direction direction, struct dma_attrs *attrs) + enum dma_data_direction direction, unsigned long attrs) { BUG_ON(direction == DMA_NONE); flush_dcache_page(page); diff --git a/arch/frv/mb93090-mb00/pci-dma.c b/arch/frv/mb93090-mb00/pci-dma.c index 316b7b65348d..f585745b1abc 100644 --- a/arch/frv/mb93090-mb00/pci-dma.c +++ b/arch/frv/mb93090-mb00/pci-dma.c @@ -19,8 +19,7 @@ #include <asm/io.h> static void *frv_dma_alloc(struct device *hwdev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp, - struct dma_attrs *attrs) + dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) { void *ret; @@ -32,14 +31,14 @@ static void *frv_dma_alloc(struct device *hwdev, size_t size, } static void frv_dma_free(struct device *hwdev, size_t size, void *vaddr, - dma_addr_t dma_handle, struct dma_attrs *attrs) + dma_addr_t dma_handle, unsigned long attrs) { consistent_free(vaddr); } static int frv_dma_map_sg(struct device *dev, struct scatterlist *sglist, int nents, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { unsigned long dampr2; void *vaddr; @@ -69,7 +68,7 @@ static int frv_dma_map_sg(struct device *dev, struct scatterlist *sglist, static dma_addr_t frv_dma_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, - enum dma_data_direction direction, struct dma_attrs *attrs) + enum dma_data_direction direction, unsigned long attrs) { flush_dcache_page(page); return (dma_addr_t) page_to_phys(page) + offset; diff --git a/arch/h8300/include/asm/mc146818rtc.h b/arch/h8300/include/asm/mc146818rtc.h deleted file mode 100644 index ab9d9646d241..000000000000 --- a/arch/h8300/include/asm/mc146818rtc.h +++ /dev/null @@ -1,9 +0,0 @@ -/* - * Machine dependent access functions for RTC registers. - */ -#ifndef _H8300_MC146818RTC_H -#define _H8300_MC146818RTC_H - -/* empty include file to satisfy the include in genrtc.c/ide-geometry.c */ - -#endif /* _H8300_MC146818RTC_H */ diff --git a/arch/h8300/kernel/dma.c b/arch/h8300/kernel/dma.c index eeb13d3f2424..3651da045806 100644 --- a/arch/h8300/kernel/dma.c +++ b/arch/h8300/kernel/dma.c @@ -12,7 +12,7 @@ static void *dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, - struct dma_attrs *attrs) + unsigned long attrs) { void *ret; @@ -32,7 +32,7 @@ static void *dma_alloc(struct device *dev, size_t size, static void dma_free(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, - struct dma_attrs *attrs) + unsigned long attrs) { free_pages((unsigned long)vaddr, get_order(size)); @@ -41,14 +41,14 @@ static void dma_free(struct device *dev, size_t size, static dma_addr_t map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { return page_to_phys(page) + offset; } static int map_sg(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { struct scatterlist *sg; int i; diff --git a/arch/hexagon/include/asm/dma-mapping.h b/arch/hexagon/include/asm/dma-mapping.h index aa6203464520..7ef58df909fc 100644 --- a/arch/hexagon/include/asm/dma-mapping.h +++ b/arch/hexagon/include/asm/dma-mapping.h @@ -26,7 +26,6 @@ #include <linux/mm.h> #include <linux/scatterlist.h> #include <linux/dma-debug.h> -#include <linux/dma-attrs.h> #include <asm/io.h> struct device; diff --git a/arch/hexagon/kernel/dma.c b/arch/hexagon/kernel/dma.c index 9e3ddf792bd3..b9017785fb71 100644 --- a/arch/hexagon/kernel/dma.c +++ b/arch/hexagon/kernel/dma.c @@ -51,7 +51,7 @@ static struct gen_pool *coherent_pool; static void *hexagon_dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, gfp_t flag, - struct dma_attrs *attrs) + unsigned long attrs) { void *ret; @@ -84,7 +84,7 @@ static void *hexagon_dma_alloc_coherent(struct device *dev, size_t size, } static void hexagon_free_coherent(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_addr, struct dma_attrs *attrs) + dma_addr_t dma_addr, unsigned long attrs) { gen_pool_free(coherent_pool, (unsigned long) vaddr, size); } @@ -105,7 +105,7 @@ static int check_addr(const char *name, struct device *hwdev, static int hexagon_map_sg(struct device *hwdev, struct scatterlist *sg, int nents, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { struct scatterlist *s; int i; @@ -172,7 +172,7 @@ static inline void dma_sync(void *addr, size_t size, static dma_addr_t hexagon_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { dma_addr_t bus = page_to_phys(page) + offset; WARN_ON(size == 0); diff --git a/arch/hexagon/mm/init.c b/arch/hexagon/mm/init.c index 88977e42af0a..192584d5ac2f 100644 --- a/arch/hexagon/mm/init.c +++ b/arch/hexagon/mm/init.c @@ -93,7 +93,7 @@ void __init mem_init(void) * Todo: free pages between __init_begin and __init_end; possibly * some devtree related stuff as well. */ -void __init_refok free_initmem(void) +void __ref free_initmem(void) { } diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 6a15083cc366..18ca6a9ce566 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -52,6 +52,7 @@ config IA64 select MODULES_USE_ELF_RELA select ARCH_USE_CMPXCHG_LOCKREF select HAVE_ARCH_AUDITSYSCALL + select HAVE_ARCH_HARDENED_USERCOPY default y help The Itanium Processor Family is Intel's 64-bit successor to diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c index a6d6190c9d24..630ee8073899 100644 --- a/arch/ia64/hp/common/sba_iommu.c +++ b/arch/ia64/hp/common/sba_iommu.c @@ -919,7 +919,7 @@ sba_mark_invalid(struct ioc *ioc, dma_addr_t iova, size_t byte_cnt) static dma_addr_t sba_map_page(struct device *dev, struct page *page, unsigned long poff, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { struct ioc *ioc; void *addr = page_address(page) + poff; @@ -1005,7 +1005,7 @@ static dma_addr_t sba_map_page(struct device *dev, struct page *page, static dma_addr_t sba_map_single_attrs(struct device *dev, void *addr, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { return sba_map_page(dev, virt_to_page(addr), (unsigned long)addr & ~PAGE_MASK, size, dir, attrs); @@ -1046,7 +1046,7 @@ sba_mark_clean(struct ioc *ioc, dma_addr_t iova, size_t size) * See Documentation/DMA-API-HOWTO.txt */ static void sba_unmap_page(struct device *dev, dma_addr_t iova, size_t size, - enum dma_data_direction dir, struct dma_attrs *attrs) + enum dma_data_direction dir, unsigned long attrs) { struct ioc *ioc; #if DELAYED_RESOURCE_CNT > 0 @@ -1115,7 +1115,7 @@ static void sba_unmap_page(struct device *dev, dma_addr_t iova, size_t size, } void sba_unmap_single_attrs(struct device *dev, dma_addr_t iova, size_t size, - enum dma_data_direction dir, struct dma_attrs *attrs) + enum dma_data_direction dir, unsigned long attrs) { sba_unmap_page(dev, iova, size, dir, attrs); } @@ -1130,7 +1130,7 @@ void sba_unmap_single_attrs(struct device *dev, dma_addr_t iova, size_t size, */ static void * sba_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, - gfp_t flags, struct dma_attrs *attrs) + gfp_t flags, unsigned long attrs) { struct ioc *ioc; void *addr; @@ -1175,7 +1175,7 @@ sba_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, * device to map single to get an iova mapping. */ *dma_handle = sba_map_single_attrs(&ioc->sac_only_dev->dev, addr, - size, 0, NULL); + size, 0, 0); return addr; } @@ -1191,9 +1191,9 @@ sba_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, * See Documentation/DMA-API-HOWTO.txt */ static void sba_free_coherent(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_handle, struct dma_attrs *attrs) + dma_addr_t dma_handle, unsigned long attrs) { - sba_unmap_single_attrs(dev, dma_handle, size, 0, NULL); + sba_unmap_single_attrs(dev, dma_handle, size, 0, 0); free_pages((unsigned long) vaddr, get_order(size)); } @@ -1442,7 +1442,7 @@ sba_coalesce_chunks(struct ioc *ioc, struct device *dev, static void sba_unmap_sg_attrs(struct device *dev, struct scatterlist *sglist, int nents, enum dma_data_direction dir, - struct dma_attrs *attrs); + unsigned long attrs); /** * sba_map_sg - map Scatter/Gather list * @dev: instance of PCI owned by the driver that's asking. @@ -1455,7 +1455,7 @@ static void sba_unmap_sg_attrs(struct device *dev, struct scatterlist *sglist, */ static int sba_map_sg_attrs(struct device *dev, struct scatterlist *sglist, int nents, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { struct ioc *ioc; int coalesced, filled = 0; @@ -1551,7 +1551,7 @@ static int sba_map_sg_attrs(struct device *dev, struct scatterlist *sglist, */ static void sba_unmap_sg_attrs(struct device *dev, struct scatterlist *sglist, int nents, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { #ifdef ASSERT_PDIR_SANITY struct ioc *ioc; diff --git a/arch/ia64/include/asm/machvec.h b/arch/ia64/include/asm/machvec.h index 9c39bdfc2da8..ed7f09089f12 100644 --- a/arch/ia64/include/asm/machvec.h +++ b/arch/ia64/include/asm/machvec.h @@ -22,7 +22,6 @@ struct pci_bus; struct task_struct; struct pci_dev; struct msi_desc; -struct dma_attrs; typedef void ia64_mv_setup_t (char **); typedef void ia64_mv_cpu_init_t (void); diff --git a/arch/ia64/include/asm/mc146818rtc.h b/arch/ia64/include/asm/mc146818rtc.h deleted file mode 100644 index 407787a237ba..000000000000 --- a/arch/ia64/include/asm/mc146818rtc.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef _ASM_IA64_MC146818RTC_H -#define _ASM_IA64_MC146818RTC_H - -/* - * Machine dependent access functions for RTC registers. - */ - -/* empty include file to satisfy the include in genrtc.c */ - -#endif /* _ASM_IA64_MC146818RTC_H */ diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h index d1212b84fb83..29bd59790d6c 100644 --- a/arch/ia64/include/asm/thread_info.h +++ b/arch/ia64/include/asm/thread_info.h @@ -121,32 +121,4 @@ struct thread_info { /* like TIF_ALLWORK_BITS but sans TIF_SYSCALL_TRACE or TIF_SYSCALL_AUDIT */ #define TIF_WORK_MASK (TIF_ALLWORK_MASK&~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT)) -#define TS_RESTORE_SIGMASK 2 /* restore signal mask in do_signal() */ - -#ifndef __ASSEMBLY__ -#define HAVE_SET_RESTORE_SIGMASK 1 -static inline void set_restore_sigmask(void) -{ - struct thread_info *ti = current_thread_info(); - ti->status |= TS_RESTORE_SIGMASK; - WARN_ON(!test_bit(TIF_SIGPENDING, &ti->flags)); -} -static inline void clear_restore_sigmask(void) -{ - current_thread_info()->status &= ~TS_RESTORE_SIGMASK; -} -static inline bool test_restore_sigmask(void) -{ - return current_thread_info()->status & TS_RESTORE_SIGMASK; -} -static inline bool test_and_clear_restore_sigmask(void) -{ - struct thread_info *ti = current_thread_info(); - if (!(ti->status & TS_RESTORE_SIGMASK)) - return false; - ti->status &= ~TS_RESTORE_SIGMASK; - return true; -} -#endif /* !__ASSEMBLY__ */ - #endif /* _ASM_IA64_THREAD_INFO_H */ diff --git a/arch/ia64/include/asm/uaccess.h b/arch/ia64/include/asm/uaccess.h index 2189d5ddc1ee..465c70982f40 100644 --- a/arch/ia64/include/asm/uaccess.h +++ b/arch/ia64/include/asm/uaccess.h @@ -241,12 +241,18 @@ extern unsigned long __must_check __copy_user (void __user *to, const void __use static inline unsigned long __copy_to_user (void __user *to, const void *from, unsigned long count) { + if (!__builtin_constant_p(count)) + check_object_size(from, count, true); + return __copy_user(to, (__force void __user *) from, count); } static inline unsigned long __copy_from_user (void *to, const void __user *from, unsigned long count) { + if (!__builtin_constant_p(count)) + check_object_size(to, count, false); + return __copy_user((__force void __user *) to, from, count); } @@ -258,8 +264,11 @@ __copy_from_user (void *to, const void __user *from, unsigned long count) const void *__cu_from = (from); \ long __cu_len = (n); \ \ - if (__access_ok(__cu_to, __cu_len, get_fs())) \ - __cu_len = __copy_user(__cu_to, (__force void __user *) __cu_from, __cu_len); \ + if (__access_ok(__cu_to, __cu_len, get_fs())) { \ + if (!__builtin_constant_p(n)) \ + check_object_size(__cu_from, __cu_len, true); \ + __cu_len = __copy_user(__cu_to, (__force void __user *) __cu_from, __cu_len); \ + } \ __cu_len; \ }) @@ -270,8 +279,11 @@ __copy_from_user (void *to, const void __user *from, unsigned long count) long __cu_len = (n); \ \ __chk_user_ptr(__cu_from); \ - if (__access_ok(__cu_from, __cu_len, get_fs())) \ + if (__access_ok(__cu_from, __cu_len, get_fs())) { \ + if (!__builtin_constant_p(n)) \ + check_object_size(__cu_to, __cu_len, false); \ __cu_len = __copy_user((__force void __user *) __cu_to, __cu_from, __cu_len); \ + } \ __cu_len; \ }) diff --git a/arch/ia64/kernel/machine_kexec.c b/arch/ia64/kernel/machine_kexec.c index b72cd7a07222..599507bcec91 100644 --- a/arch/ia64/kernel/machine_kexec.c +++ b/arch/ia64/kernel/machine_kexec.c @@ -163,7 +163,7 @@ void arch_crash_save_vmcoreinfo(void) #endif } -unsigned long paddr_vmcoreinfo_note(void) +phys_addr_t paddr_vmcoreinfo_note(void) { return ia64_tpa((unsigned long)(char *)&vmcoreinfo_note); } diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index 07a4e32ae96a..eb9220cde76c 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -1831,7 +1831,7 @@ format_mca_init_stack(void *mca_data, unsigned long offset, } /* Caller prevents this from being called after init */ -static void * __init_refok mca_bootmem(void) +static void * __ref mca_bootmem(void) { return __alloc_bootmem(sizeof(struct ia64_mca_cpu), KERNEL_STACK_SIZE, 0); diff --git a/arch/ia64/kernel/pci-swiotlb.c b/arch/ia64/kernel/pci-swiotlb.c index 939260aeac98..2933208c0285 100644 --- a/arch/ia64/kernel/pci-swiotlb.c +++ b/arch/ia64/kernel/pci-swiotlb.c @@ -16,7 +16,7 @@ EXPORT_SYMBOL(swiotlb); static void *ia64_swiotlb_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, - struct dma_attrs *attrs) + unsigned long attrs) { if (dev->coherent_dma_mask != DMA_BIT_MASK(64)) gfp |= GFP_DMA; @@ -25,7 +25,7 @@ static void *ia64_swiotlb_alloc_coherent(struct device *dev, size_t size, static void ia64_swiotlb_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_addr, - struct dma_attrs *attrs) + unsigned long attrs) { swiotlb_free_coherent(dev, size, vaddr, dma_addr); } diff --git a/arch/ia64/sn/pci/pci_dma.c b/arch/ia64/sn/pci/pci_dma.c index 8f59907007cb..74c934a997bb 100644 --- a/arch/ia64/sn/pci/pci_dma.c +++ b/arch/ia64/sn/pci/pci_dma.c @@ -77,7 +77,7 @@ EXPORT_SYMBOL(sn_dma_set_mask); */ static void *sn_dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t * dma_handle, gfp_t flags, - struct dma_attrs *attrs) + unsigned long attrs) { void *cpuaddr; unsigned long phys_addr; @@ -138,7 +138,7 @@ static void *sn_dma_alloc_coherent(struct device *dev, size_t size, * any associated IOMMU mappings. */ static void sn_dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, - dma_addr_t dma_handle, struct dma_attrs *attrs) + dma_addr_t dma_handle, unsigned long attrs) { struct pci_dev *pdev = to_pci_dev(dev); struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev); @@ -176,21 +176,18 @@ static void sn_dma_free_coherent(struct device *dev, size_t size, void *cpu_addr static dma_addr_t sn_dma_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { void *cpu_addr = page_address(page) + offset; dma_addr_t dma_addr; unsigned long phys_addr; struct pci_dev *pdev = to_pci_dev(dev); struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev); - int dmabarr; - - dmabarr = dma_get_attr(DMA_ATTR_WRITE_BARRIER, attrs); BUG_ON(!dev_is_pci(dev)); phys_addr = __pa(cpu_addr); - if (dmabarr) + if (attrs & DMA_ATTR_WRITE_BARRIER) dma_addr = provider->dma_map_consistent(pdev, phys_addr, size, SN_DMA_ADDR_PHYS); else @@ -218,7 +215,7 @@ static dma_addr_t sn_dma_map_page(struct device *dev, struct page *page, */ static void sn_dma_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { struct pci_dev *pdev = to_pci_dev(dev); struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev); @@ -240,7 +237,7 @@ static void sn_dma_unmap_page(struct device *dev, dma_addr_t dma_addr, */ static void sn_dma_unmap_sg(struct device *dev, struct scatterlist *sgl, int nhwentries, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { int i; struct pci_dev *pdev = to_pci_dev(dev); @@ -273,16 +270,13 @@ static void sn_dma_unmap_sg(struct device *dev, struct scatterlist *sgl, */ static int sn_dma_map_sg(struct device *dev, struct scatterlist *sgl, int nhwentries, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { unsigned long phys_addr; struct scatterlist *saved_sg = sgl, *sg; struct pci_dev *pdev = to_pci_dev(dev); struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev); int i; - int dmabarr; - - dmabarr = dma_get_attr(DMA_ATTR_WRITE_BARRIER, attrs); BUG_ON(!dev_is_pci(dev)); @@ -292,7 +286,7 @@ static int sn_dma_map_sg(struct device *dev, struct scatterlist *sgl, for_each_sg(sgl, sg, nhwentries, i) { dma_addr_t dma_addr; phys_addr = SG_ENT_PHYS_ADDRESS(sg); - if (dmabarr) + if (attrs & DMA_ATTR_WRITE_BARRIER) dma_addr = provider->dma_map_consistent(pdev, phys_addr, sg->length, diff --git a/arch/m68k/amiga/config.c b/arch/m68k/amiga/config.c index 01693df7f2f6..ec9cc1fdd237 100644 --- a/arch/m68k/amiga/config.c +++ b/arch/m68k/amiga/config.c @@ -35,7 +35,6 @@ #include <asm/amigahw.h> #include <asm/amigaints.h> #include <asm/irq.h> -#include <asm/rtc.h> #include <asm/machdep.h> #include <asm/io.h> diff --git a/arch/m68k/apollo/config.c b/arch/m68k/apollo/config.c index 6e62d66c396e..432bc8bacfc2 100644 --- a/arch/m68k/apollo/config.c +++ b/arch/m68k/apollo/config.c @@ -15,7 +15,6 @@ #include <asm/pgtable.h> #include <asm/apollohw.h> #include <asm/irq.h> -#include <asm/rtc.h> #include <asm/machdep.h> u_long sio01_physaddr; diff --git a/arch/m68k/bvme6000/config.c b/arch/m68k/bvme6000/config.c index 478623dbb209..611d4d9ea2bd 100644 --- a/arch/m68k/bvme6000/config.c +++ b/arch/m68k/bvme6000/config.c @@ -34,7 +34,6 @@ #include <asm/setup.h> #include <asm/irq.h> #include <asm/traps.h> -#include <asm/rtc.h> #include <asm/machdep.h> #include <asm/bvme6000hw.h> diff --git a/arch/m68k/hp300/config.c b/arch/m68k/hp300/config.c index a9befe65adc4..7cfab158fb61 100644 --- a/arch/m68k/hp300/config.c +++ b/arch/m68k/hp300/config.c @@ -12,6 +12,7 @@ #include <linux/string.h> #include <linux/kernel.h> #include <linux/console.h> +#include <linux/rtc.h> #include <asm/bootinfo.h> #include <asm/bootinfo-hp300.h> @@ -20,7 +21,6 @@ #include <asm/blinken.h> #include <asm/io.h> /* readb() and writeb() */ #include <asm/hp300hw.h> -#include <asm/rtc.h> #include "time.h" diff --git a/arch/m68k/include/asm/flat.h b/arch/m68k/include/asm/flat.h index f9454b89a51b..00c392b0cabd 100644 --- a/arch/m68k/include/asm/flat.h +++ b/arch/m68k/include/asm/flat.h @@ -1,5 +1,5 @@ /* - * include/asm-m68knommu/flat.h -- uClinux flat-format executables + * flat.h -- uClinux flat-format executables */ #ifndef __M68KNOMMU_FLAT_H__ @@ -8,8 +8,9 @@ #define flat_argvp_envp_on_stack() 1 #define flat_old_ram_flag(flags) (flags) #define flat_reloc_valid(reloc, size) ((reloc) <= (size)) -#define flat_get_addr_from_rp(rp, relval, flags, p) get_unaligned(rp) -#define flat_put_addr_at_rp(rp, val, relval) put_unaligned(val,rp) +#define flat_get_addr_from_rp(rp, relval, flags, p) \ + ({ unsigned long __val; __get_user_unaligned(__val, rp); __val; }) +#define flat_put_addr_at_rp(rp, val, relval) __put_user_unaligned(val, rp) #define flat_get_relocate_addr(rel) (rel) static inline int flat_set_persistent(unsigned long relval, @@ -18,4 +19,10 @@ static inline int flat_set_persistent(unsigned long relval, return 0; } +#define FLAT_PLAT_INIT(regs) \ + do { \ + if (current->mm) \ + (regs)->d5 = current->mm->start_data; \ + } while (0) + #endif /* __M68KNOMMU_FLAT_H__ */ diff --git a/arch/m68k/include/asm/processor.h b/arch/m68k/include/asm/processor.h index a6ce2ec8d693..c84a2183b3f0 100644 --- a/arch/m68k/include/asm/processor.h +++ b/arch/m68k/include/asm/processor.h @@ -110,7 +110,6 @@ struct thread_struct { #define setframeformat(_regs) do { } while (0) #endif -#ifdef CONFIG_MMU /* * Do necessary setup to start up a newly executed thread. */ @@ -123,26 +122,14 @@ static inline void start_thread(struct pt_regs * regs, unsigned long pc, wrusp(usp); } +#ifdef CONFIG_MMU extern int handle_kernel_fault(struct pt_regs *regs); - #else - -#define start_thread(_regs, _pc, _usp) \ -do { \ - (_regs)->pc = (_pc); \ - setframeformat(_regs); \ - if (current->mm) \ - (_regs)->d5 = current->mm->start_data; \ - (_regs)->sr &= ~0x2000; \ - wrusp(_usp); \ -} while(0) - static inline int handle_kernel_fault(struct pt_regs *regs) { /* Any fault in kernel is fatal on non-mmu */ return 0; } - #endif /* Forward declaration, a strange C thing */ diff --git a/arch/m68k/include/asm/rtc.h b/arch/m68k/include/asm/rtc.h deleted file mode 100644 index a4d08ea122ee..000000000000 --- a/arch/m68k/include/asm/rtc.h +++ /dev/null @@ -1,79 +0,0 @@ -/* include/asm-m68k/rtc.h - * - * Copyright Richard Zidlicky - * implementation details for genrtc/q40rtc driver - */ -/* permission is hereby granted to copy, modify and redistribute this code - * in terms of the GNU Library General Public License, Version 2 or later, - * at your option. - */ - -#ifndef _ASM_RTC_H -#define _ASM_RTC_H - -#ifdef __KERNEL__ - -#include <linux/rtc.h> -#include <asm/errno.h> -#include <asm/machdep.h> - -#define RTC_PIE 0x40 /* periodic interrupt enable */ -#define RTC_AIE 0x20 /* alarm interrupt enable */ -#define RTC_UIE 0x10 /* update-finished interrupt enable */ - -/* some dummy definitions */ -#define RTC_BATT_BAD 0x100 /* battery bad */ -#define RTC_SQWE 0x08 /* enable square-wave output */ -#define RTC_DM_BINARY 0x04 /* all time/date values are BCD if clear */ -#define RTC_24H 0x02 /* 24 hour mode - else hours bit 7 means pm */ -#define RTC_DST_EN 0x01 /* auto switch DST - works f. USA only */ - -static inline unsigned int get_rtc_time(struct rtc_time *time) -{ - /* - * Only the values that we read from the RTC are set. We leave - * tm_wday, tm_yday and tm_isdst untouched. Even though the - * RTC has RTC_DAY_OF_WEEK, we ignore it, as it is only updated - * by the RTC when initially set to a non-zero value. - */ - if (mach_hwclk) - mach_hwclk(0, time); - return RTC_24H; -} - -static inline int set_rtc_time(struct rtc_time *time) -{ - if (mach_hwclk) - return mach_hwclk(1, time); - return -EINVAL; -} - -static inline unsigned int get_rtc_ss(void) -{ - if (mach_get_ss) - return mach_get_ss(); - else{ - struct rtc_time h; - - get_rtc_time(&h); - return h.tm_sec; - } -} - -static inline int get_rtc_pll(struct rtc_pll_info *pll) -{ - if (mach_get_rtc_pll) - return mach_get_rtc_pll(pll); - else - return -EINVAL; -} -static inline int set_rtc_pll(struct rtc_pll_info *pll) -{ - if (mach_set_rtc_pll) - return mach_set_rtc_pll(pll); - else - return -EINVAL; -} -#endif /* __KERNEL__ */ - -#endif /* _ASM__RTC_H */ diff --git a/arch/m68k/kernel/dma.c b/arch/m68k/kernel/dma.c index cbc78b4117b5..8cf97cbadc91 100644 --- a/arch/m68k/kernel/dma.c +++ b/arch/m68k/kernel/dma.c @@ -19,7 +19,7 @@ #if defined(CONFIG_MMU) && !defined(CONFIG_COLDFIRE) static void *m68k_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, - gfp_t flag, struct dma_attrs *attrs) + gfp_t flag, unsigned long attrs) { struct page *page, **map; pgprot_t pgprot; @@ -62,7 +62,7 @@ static void *m68k_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, } static void m68k_dma_free(struct device *dev, size_t size, void *addr, - dma_addr_t handle, struct dma_attrs *attrs) + dma_addr_t handle, unsigned long attrs) { pr_debug("dma_free_coherent: %p, %x\n", addr, handle); vfree(addr); @@ -73,7 +73,7 @@ static void m68k_dma_free(struct device *dev, size_t size, void *addr, #include <asm/cacheflush.h> static void *m68k_dma_alloc(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp, struct dma_attrs *attrs) + dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) { void *ret; /* ignore region specifiers */ @@ -91,7 +91,7 @@ static void *m68k_dma_alloc(struct device *dev, size_t size, } static void m68k_dma_free(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_handle, struct dma_attrs *attrs) + dma_addr_t dma_handle, unsigned long attrs) { free_pages((unsigned long)vaddr, get_order(size)); } @@ -130,7 +130,7 @@ static void m68k_dma_sync_sg_for_device(struct device *dev, static dma_addr_t m68k_dma_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { dma_addr_t handle = page_to_phys(page) + offset; @@ -139,7 +139,7 @@ static dma_addr_t m68k_dma_map_page(struct device *dev, struct page *page, } static int m68k_dma_map_sg(struct device *dev, struct scatterlist *sglist, - int nents, enum dma_data_direction dir, struct dma_attrs *attrs) + int nents, enum dma_data_direction dir, unsigned long attrs) { int i; struct scatterlist *sg; diff --git a/arch/m68k/kernel/time.c b/arch/m68k/kernel/time.c index 3857737e3958..4e5aa2f4f522 100644 --- a/arch/m68k/kernel/time.c +++ b/arch/m68k/kernel/time.c @@ -86,7 +86,49 @@ void read_persistent_clock(struct timespec *ts) } } -#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET +#if defined(CONFIG_ARCH_USES_GETTIMEOFFSET) && IS_ENABLED(CONFIG_RTC_DRV_GENERIC) +static int rtc_generic_get_time(struct device *dev, struct rtc_time *tm) +{ + mach_hwclk(0, tm); + return rtc_valid_tm(tm); +} + +static int rtc_generic_set_time(struct device *dev, struct rtc_time *tm) +{ + if (mach_hwclk(1, tm) < 0) + return -EOPNOTSUPP; + return 0; +} + +static int rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg) +{ + struct rtc_pll_info pll; + struct rtc_pll_info __user *argp = (void __user *)arg; + + switch (cmd) { + case RTC_PLL_GET: + if (!mach_get_rtc_pll || mach_get_rtc_pll(&pll)) + return -EINVAL; + return copy_to_user(argp, &pll, sizeof pll) ? -EFAULT : 0; + + case RTC_PLL_SET: + if (!mach_set_rtc_pll) + return -EINVAL; + if (!capable(CAP_SYS_TIME)) + return -EACCES; + if (copy_from_user(&pll, argp, sizeof(pll))) + return -EFAULT; + return mach_set_rtc_pll(&pll); + } + + return -ENOIOCTLCMD; +} + +static const struct rtc_class_ops generic_rtc_ops = { + .ioctl = rtc_ioctl, + .read_time = rtc_generic_get_time, + .set_time = rtc_generic_set_time, +}; static int __init rtc_init(void) { @@ -95,7 +137,9 @@ static int __init rtc_init(void) if (!mach_hwclk) return -ENODEV; - pdev = platform_device_register_simple("rtc-generic", -1, NULL, 0); + pdev = platform_device_register_data(NULL, "rtc-generic", -1, + &generic_rtc_ops, + sizeof(generic_rtc_ops)); return PTR_ERR_OR_ZERO(pdev); } diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c index 689b47d292ac..2f33a33001e5 100644 --- a/arch/m68k/mac/config.c +++ b/arch/m68k/mac/config.c @@ -10,6 +10,7 @@ * Miscellaneous linux stuff */ +#include <linux/errno.h> #include <linux/module.h> #include <linux/types.h> #include <linux/mm.h> @@ -25,6 +26,7 @@ #include <linux/platform_device.h> #include <linux/adb.h> #include <linux/cuda.h> +#include <linux/rtc.h> #include <asm/setup.h> #include <asm/bootinfo.h> @@ -34,7 +36,6 @@ #include <asm/io.h> #include <asm/irq.h> #include <asm/pgtable.h> -#include <asm/rtc.h> #include <asm/machdep.h> #include <asm/macintosh.h> diff --git a/arch/m68k/mac/misc.c b/arch/m68k/mac/misc.c index 707b61aea203..0fb54a90eac2 100644 --- a/arch/m68k/mac/misc.c +++ b/arch/m68k/mac/misc.c @@ -18,7 +18,6 @@ #include <asm/uaccess.h> #include <asm/io.h> -#include <asm/rtc.h> #include <asm/segment.h> #include <asm/setup.h> #include <asm/macintosh.h> diff --git a/arch/m68k/mvme147/config.c b/arch/m68k/mvme147/config.c index e6a3b56c6481..c11d38dfad08 100644 --- a/arch/m68k/mvme147/config.c +++ b/arch/m68k/mvme147/config.c @@ -32,7 +32,6 @@ #include <asm/setup.h> #include <asm/irq.h> #include <asm/traps.h> -#include <asm/rtc.h> #include <asm/machdep.h> #include <asm/mvme147hw.h> diff --git a/arch/m68k/mvme16x/config.c b/arch/m68k/mvme16x/config.c index a53803cc66cd..58e240939d26 100644 --- a/arch/m68k/mvme16x/config.c +++ b/arch/m68k/mvme16x/config.c @@ -35,7 +35,6 @@ #include <asm/setup.h> #include <asm/irq.h> #include <asm/traps.h> -#include <asm/rtc.h> #include <asm/machdep.h> #include <asm/mvme16xhw.h> diff --git a/arch/m68k/q40/config.c b/arch/m68k/q40/config.c index e90fe903613e..fcb7f05b60b6 100644 --- a/arch/m68k/q40/config.c +++ b/arch/m68k/q40/config.c @@ -12,6 +12,7 @@ * for more details. */ +#include <linux/errno.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/mm.h> @@ -27,7 +28,6 @@ #include <linux/platform_device.h> #include <asm/io.h> -#include <asm/rtc.h> #include <asm/bootinfo.h> #include <asm/pgtable.h> #include <asm/setup.h> diff --git a/arch/m68k/sun3/config.c b/arch/m68k/sun3/config.c index 71884bf01d72..3af34fa3a344 100644 --- a/arch/m68k/sun3/config.c +++ b/arch/m68k/sun3/config.c @@ -26,7 +26,6 @@ #include <asm/pgalloc.h> #include <asm/sun3-head.h> #include <asm/sun3mmu.h> -#include <asm/rtc.h> #include <asm/machdep.h> #include <asm/machines.h> #include <asm/idprom.h> diff --git a/arch/m68k/sun3/intersil.c b/arch/m68k/sun3/intersil.c index 889829e11f1d..2cd0bcbe6f30 100644 --- a/arch/m68k/sun3/intersil.c +++ b/arch/m68k/sun3/intersil.c @@ -14,8 +14,8 @@ #include <linux/rtc.h> #include <asm/errno.h> -#include <asm/rtc.h> #include <asm/intersil.h> +#include <asm/machdep.h> /* bits to set for start/run of the intersil */ diff --git a/arch/m68k/sun3x/time.c b/arch/m68k/sun3x/time.c index c8eb08add6b0..431d3c4306dd 100644 --- a/arch/m68k/sun3x/time.c +++ b/arch/m68k/sun3x/time.c @@ -15,10 +15,10 @@ #include <asm/irq.h> #include <asm/io.h> +#include <asm/machdep.h> #include <asm/traps.h> #include <asm/sun3x.h> #include <asm/sun3ints.h> -#include <asm/rtc.h> #include "time.h" diff --git a/arch/metag/include/asm/cmpxchg_lnkget.h b/arch/metag/include/asm/cmpxchg_lnkget.h index 0154e2807ebb..2369ad394876 100644 --- a/arch/metag/include/asm/cmpxchg_lnkget.h +++ b/arch/metag/include/asm/cmpxchg_lnkget.h @@ -73,7 +73,7 @@ static inline unsigned long __cmpxchg_u32(volatile int *m, unsigned long old, " DCACHE [%2], %0\n" #endif "2:\n" - : "=&d" (temp), "=&da" (retval) + : "=&d" (temp), "=&d" (retval) : "da" (m), "bd" (old), "da" (new) : "cc" ); diff --git a/arch/metag/include/asm/metag_mem.h b/arch/metag/include/asm/metag_mem.h index aa5a076df439..7848bc6d3b61 100644 --- a/arch/metag/include/asm/metag_mem.h +++ b/arch/metag/include/asm/metag_mem.h @@ -881,7 +881,7 @@ #define PERFCTRL_DCSTALL 11 /* Dcache+TLB o/p delayed (per-thread) */ #define PERFCTRL_ICSTALL 12 /* Icache+TLB o/p delayed (per-thread) */ -#define PERFCTRL_INT 13 /* Internal core delailed events (see next) */ +#define PERFCTRL_INT 13 /* Internal core detailed events (see next) */ #define PERFCTRL_EXT 15 /* External source in core periphery */ #endif /* METAC_2_1 */ diff --git a/arch/metag/include/asm/metag_regs.h b/arch/metag/include/asm/metag_regs.h index 40c3f679c5b8..60b750971d8a 100644 --- a/arch/metag/include/asm/metag_regs.h +++ b/arch/metag/include/asm/metag_regs.h @@ -179,7 +179,7 @@ ; is best to dump these registers immediately at the start of a routine ; using a MSETL or SETL instruction- ; -; MSETL [A0StP],D0Ar6,D0Ar4,D0Ar2; Only dump argments expected +; MSETL [A0StP],D0Ar6,D0Ar4,D0Ar2; Only dump arguments expected ;or SETL [A0StP+#8++],D0Ar2 ; Up to two 32-bit args expected ; ; For non-leaf routines it is always necessary to save and restore at least diff --git a/arch/metag/kernel/cachepart.c b/arch/metag/kernel/cachepart.c index 04b7d4f8429a..db944c2e7d88 100644 --- a/arch/metag/kernel/cachepart.c +++ b/arch/metag/kernel/cachepart.c @@ -15,7 +15,7 @@ #define SYSC_DCPART(n) (SYSC_DCPART0 + SYSC_xCPARTn_STRIDE * (n)) #define SYSC_ICPART(n) (SYSC_ICPART0 + SYSC_xCPARTn_STRIDE * (n)) -#define CACHE_ASSOCIATIVITY 4 /* 4 way set-assosiative */ +#define CACHE_ASSOCIATIVITY 4 /* 4 way set-associative */ #define ICACHE 0 #define DCACHE 1 diff --git a/arch/metag/kernel/dma.c b/arch/metag/kernel/dma.c index e12368d02155..0db31e24c541 100644 --- a/arch/metag/kernel/dma.c +++ b/arch/metag/kernel/dma.c @@ -172,7 +172,7 @@ out: * virtual and bus address for that space. */ static void *metag_dma_alloc(struct device *dev, size_t size, - dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs) + dma_addr_t *handle, gfp_t gfp, unsigned long attrs) { struct page *page; struct metag_vm_region *c; @@ -268,7 +268,7 @@ no_page: * free a page as defined by the above mapping. */ static void metag_dma_free(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_handle, struct dma_attrs *attrs) + dma_addr_t dma_handle, unsigned long attrs) { struct metag_vm_region *c; unsigned long flags, addr; @@ -331,13 +331,13 @@ no_area: static int metag_dma_mmap(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, - struct dma_attrs *attrs) + unsigned long attrs) { unsigned long flags, user_size, kern_size; struct metag_vm_region *c; int ret = -ENXIO; - if (dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs)) + if (attrs & DMA_ATTR_WRITE_COMBINE) vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); else vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); @@ -482,7 +482,7 @@ static void dma_sync_for_cpu(void *vaddr, size_t size, int dma_direction) static dma_addr_t metag_dma_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, - enum dma_data_direction direction, struct dma_attrs *attrs) + enum dma_data_direction direction, unsigned long attrs) { dma_sync_for_device((void *)(page_to_phys(page) + offset), size, direction); @@ -491,14 +491,14 @@ static dma_addr_t metag_dma_map_page(struct device *dev, struct page *page, static void metag_dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { dma_sync_for_cpu(phys_to_virt(dma_address), size, direction); } static int metag_dma_map_sg(struct device *dev, struct scatterlist *sglist, int nents, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { struct scatterlist *sg; int i; @@ -516,7 +516,7 @@ static int metag_dma_map_sg(struct device *dev, struct scatterlist *sglist, static void metag_dma_unmap_sg(struct device *dev, struct scatterlist *sglist, int nhwentries, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { struct scatterlist *sg; int i; diff --git a/arch/metag/lib/divsi3.S b/arch/metag/lib/divsi3.S index 7c8a8ae9a0a1..11124cc93dee 100644 --- a/arch/metag/lib/divsi3.S +++ b/arch/metag/lib/divsi3.S @@ -50,7 +50,7 @@ $LIDMCQuick: ADDCC D0Re0,D0Re0,#1 ! If yes result += 1 SUBCC D1Ar1,D1Ar1,D1Re0 ! and A -= Bu ORS D0Ar4,D0Ar4,D0Ar4 ! Return neg result? - NEG D0Ar2,D0Re0 ! Calulate neg result + NEG D0Ar2,D0Re0 ! Calculate neg result MOVMI D0Re0,D0Ar2 ! Yes: Take neg result $LIDMCRet: MOV PC,D1RtP @@ -94,7 +94,7 @@ $LIDMCLoop: LSR D1Re0, D1Re0, #1 ! Shift down B BNZ $LIDMCLoop ! Was single bit in curbit lost? ORS D0Ar4,D0Ar4,D0Ar4 ! Return neg result? - NEG D0Ar2,D0Re0 ! Calulate neg result + NEG D0Ar2,D0Re0 ! Calculate neg result MOVMI D0Re0,D0Ar2 ! Yes: Take neg result MOV PC,D1RtP .size ___divsi3,.-___divsi3 diff --git a/arch/metag/mm/fault.c b/arch/metag/mm/fault.c index 372783a67dda..c765b3621b9b 100644 --- a/arch/metag/mm/fault.c +++ b/arch/metag/mm/fault.c @@ -187,7 +187,7 @@ bad_area_nosemaphore: if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && printk_ratelimit()) { - pr_info("%s%s[%d]: segfault at %lx pc %08x sp %08x write %d trap %#x (%s)", + printk("%s%s[%d]: segfault at %lx pc %08x sp %08x write %d trap %#x (%s)", task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, tsk->comm, task_pid_nr(tsk), address, regs->ctx.CurrPC, regs->ctx.AX[0].U0, diff --git a/arch/metag/mm/init.c b/arch/metag/mm/init.c index 11fa51c89617..c0ec116b3993 100644 --- a/arch/metag/mm/init.c +++ b/arch/metag/mm/init.c @@ -390,7 +390,6 @@ void __init mem_init(void) free_all_bootmem(); mem_init_print_info(NULL); - show_mem(0); } void free_initmem(void) diff --git a/arch/microblaze/include/asm/dma-mapping.h b/arch/microblaze/include/asm/dma-mapping.h index 1884783d15c0..1768d4bdc8d3 100644 --- a/arch/microblaze/include/asm/dma-mapping.h +++ b/arch/microblaze/include/asm/dma-mapping.h @@ -25,7 +25,6 @@ #include <linux/mm.h> #include <linux/scatterlist.h> #include <linux/dma-debug.h> -#include <linux/dma-attrs.h> #include <asm/io.h> #include <asm/cacheflush.h> diff --git a/arch/microblaze/include/asm/pci.h b/arch/microblaze/include/asm/pci.h index fc3ecb55f1b2..2a120bb70e54 100644 --- a/arch/microblaze/include/asm/pci.h +++ b/arch/microblaze/include/asm/pci.h @@ -82,9 +82,6 @@ extern pgprot_t pci_phys_mem_access_prot(struct file *file, pgprot_t prot); #define HAVE_ARCH_PCI_RESOURCE_TO_USER -extern void pci_resource_to_user(const struct pci_dev *dev, int bar, - const struct resource *rsrc, - resource_size_t *start, resource_size_t *end); extern void pcibios_setup_bus_devices(struct pci_bus *bus); extern void pcibios_setup_bus_self(struct pci_bus *bus); diff --git a/arch/microblaze/include/asm/thread_info.h b/arch/microblaze/include/asm/thread_info.h index 383f387b4eee..e7e8954e9815 100644 --- a/arch/microblaze/include/asm/thread_info.h +++ b/arch/microblaze/include/asm/thread_info.h @@ -148,33 +148,6 @@ static inline struct thread_info *current_thread_info(void) */ /* FPU was used by this task this quantum (SMP) */ #define TS_USEDFPU 0x0001 -#define TS_RESTORE_SIGMASK 0x0002 - -#ifndef __ASSEMBLY__ -#define HAVE_SET_RESTORE_SIGMASK 1 -static inline void set_restore_sigmask(void) -{ - struct thread_info *ti = current_thread_info(); - ti->status |= TS_RESTORE_SIGMASK; - WARN_ON(!test_bit(TIF_SIGPENDING, (unsigned long *)&ti->flags)); -} -static inline void clear_restore_sigmask(void) -{ - current_thread_info()->status &= ~TS_RESTORE_SIGMASK; -} -static inline bool test_restore_sigmask(void) -{ - return current_thread_info()->status & TS_RESTORE_SIGMASK; -} -static inline bool test_and_clear_restore_sigmask(void) -{ - struct thread_info *ti = current_thread_info(); - if (!(ti->status & TS_RESTORE_SIGMASK)) - return false; - ti->status &= ~TS_RESTORE_SIGMASK; - return true; -} -#endif #endif /* __KERNEL__ */ #endif /* _ASM_MICROBLAZE_THREAD_INFO_H */ diff --git a/arch/microblaze/kernel/dma.c b/arch/microblaze/kernel/dma.c index bf4dec229437..ec04dc1e2527 100644 --- a/arch/microblaze/kernel/dma.c +++ b/arch/microblaze/kernel/dma.c @@ -17,7 +17,7 @@ static void *dma_direct_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag, - struct dma_attrs *attrs) + unsigned long attrs) { #ifdef NOT_COHERENT_CACHE return consistent_alloc(flag, size, dma_handle); @@ -42,7 +42,7 @@ static void *dma_direct_alloc_coherent(struct device *dev, size_t size, static void dma_direct_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, - struct dma_attrs *attrs) + unsigned long attrs) { #ifdef NOT_COHERENT_CACHE consistent_free(size, vaddr); @@ -53,7 +53,7 @@ static void dma_direct_free_coherent(struct device *dev, size_t size, static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { struct scatterlist *sg; int i; @@ -78,7 +78,7 @@ static inline dma_addr_t dma_direct_map_page(struct device *dev, unsigned long offset, size_t size, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { __dma_sync(page_to_phys(page) + offset, size, direction); return page_to_phys(page) + offset; @@ -88,7 +88,7 @@ static inline void dma_direct_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { /* There is not necessary to do cache cleanup * @@ -157,7 +157,7 @@ dma_direct_sync_sg_for_device(struct device *dev, static int dma_direct_mmap_coherent(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t handle, size_t size, - struct dma_attrs *attrs) + unsigned long attrs) { #ifdef CONFIG_MMU unsigned long user_count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c index 77bc7c7e6522..434639f9a3a6 100644 --- a/arch/microblaze/mm/init.c +++ b/arch/microblaze/mm/init.c @@ -414,7 +414,7 @@ void __init *early_get_page(void) #endif /* CONFIG_MMU */ -void * __init_refok alloc_maybe_bootmem(size_t size, gfp_t mask) +void * __ref alloc_maybe_bootmem(size_t size, gfp_t mask) { if (mem_init_done) return kmalloc(size, mask); @@ -422,7 +422,7 @@ void * __init_refok alloc_maybe_bootmem(size_t size, gfp_t mask) return alloc_bootmem(size); } -void * __init_refok zalloc_maybe_bootmem(size_t size, gfp_t mask) +void * __ref zalloc_maybe_bootmem(size_t size, gfp_t mask) { void *p; diff --git a/arch/microblaze/mm/pgtable.c b/arch/microblaze/mm/pgtable.c index eb99fcc76088..cc732fe357ad 100644 --- a/arch/microblaze/mm/pgtable.c +++ b/arch/microblaze/mm/pgtable.c @@ -234,7 +234,7 @@ unsigned long iopa(unsigned long addr) return pa; } -__init_refok pte_t *pte_alloc_one_kernel(struct mm_struct *mm, +__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { pte_t *pte; diff --git a/arch/microblaze/pci/pci-common.c b/arch/microblaze/pci/pci-common.c index 14cba600da7a..81556b843a8e 100644 --- a/arch/microblaze/pci/pci-common.c +++ b/arch/microblaze/pci/pci-common.c @@ -219,33 +219,6 @@ static struct resource *__pci_mmap_make_offset(struct pci_dev *dev, } /* - * Set vm_page_prot of VMA, as appropriate for this architecture, for a pci - * device mapping. - */ -static pgprot_t __pci_mmap_set_pgprot(struct pci_dev *dev, struct resource *rp, - pgprot_t protection, - enum pci_mmap_state mmap_state, - int write_combine) -{ - pgprot_t prot = protection; - - /* Write combine is always 0 on non-memory space mappings. On - * memory space, if the user didn't pass 1, we check for a - * "prefetchable" resource. This is a bit hackish, but we use - * this to workaround the inability of /sysfs to provide a write - * combine bit - */ - if (mmap_state != pci_mmap_mem) - write_combine = 0; - else if (write_combine == 0) { - if (rp->flags & IORESOURCE_PREFETCH) - write_combine = 1; - } - - return pgprot_noncached(prot); -} - -/* * This one is used by /dev/mem and fbdev who have no clue about the * PCI device, it tries to find the PCI device first and calls the * above routine @@ -317,9 +290,7 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, return -EINVAL; vma->vm_pgoff = offset >> PAGE_SHIFT; - vma->vm_page_prot = __pci_mmap_set_pgprot(dev, rp, - vma->vm_page_prot, - mmap_state, write_combine); + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); ret = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, vma->vm_end - vma->vm_start, vma->vm_page_prot); @@ -473,39 +444,25 @@ void pci_resource_to_user(const struct pci_dev *dev, int bar, const struct resource *rsrc, resource_size_t *start, resource_size_t *end) { - struct pci_controller *hose = pci_bus_to_host(dev->bus); - resource_size_t offset = 0; + struct pci_bus_region region; - if (hose == NULL) + if (rsrc->flags & IORESOURCE_IO) { + pcibios_resource_to_bus(dev->bus, ®ion, + (struct resource *) rsrc); + *start = region.start; + *end = region.end; return; + } - if (rsrc->flags & IORESOURCE_IO) - offset = (unsigned long)hose->io_base_virt - _IO_BASE; - - /* We pass a fully fixed up address to userland for MMIO instead of - * a BAR value because X is lame and expects to be able to use that - * to pass to /dev/mem ! + /* We pass a CPU physical address to userland for MMIO instead of a + * BAR value because X is lame and expects to be able to use that + * to pass to /dev/mem! * - * That means that we'll have potentially 64 bits values where some - * userland apps only expect 32 (like X itself since it thinks only - * Sparc has 64 bits MMIO) but if we don't do that, we break it on - * 32 bits CHRPs :-( - * - * Hopefully, the sysfs insterface is immune to that gunk. Once X - * has been fixed (and the fix spread enough), we can re-enable the - * 2 lines below and pass down a BAR value to userland. In that case - * we'll also have to re-enable the matching code in - * __pci_mmap_make_offset(). - * - * BenH. + * That means we may have 64-bit values where some apps only expect + * 32 (like X itself since it thinks only Sparc has 64-bit MMIO). */ -#if 0 - else if (rsrc->flags & IORESOURCE_MEM) - offset = hose->pci_mem_offset; -#endif - - *start = rsrc->start - offset; - *end = rsrc->end - offset; + *start = rsrc->start; + *end = rsrc->end; } /** diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index ac91939b9b75..26388562e300 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -64,6 +64,7 @@ config MIPS select GENERIC_TIME_VSYSCALL select ARCH_CLOCKSOURCE_DATA select HANDLE_DOMAIN_IRQ + select HAVE_EXIT_THREAD menu "Machine selection" @@ -384,7 +385,7 @@ config MACH_PISTACHIO select CLKSRC_MIPS_GIC select COMMON_CLK select CSRC_R4K - select DMA_MAYBE_COHERENT + select DMA_NONCOHERENT select GPIOLIB select IRQ_MIPS_CPU select LIBFDT @@ -880,7 +881,6 @@ config CAVIUM_OCTEON_SOC select SYS_SUPPORTS_HOTPLUG_CPU if CPU_BIG_ENDIAN select SYS_HAS_EARLY_PRINTK select SYS_HAS_CPU_CAVIUM_OCTEON - select SWAP_IO_SPACE select HW_HAS_PCI select ZONE_DMA32 select HOLES_IN_ZONE @@ -1111,16 +1111,6 @@ config NEED_DMA_MAP_STATE config SYS_HAS_EARLY_PRINTK bool -config HOTPLUG_CPU - bool "Support for hot-pluggable CPUs" - depends on SMP && SYS_SUPPORTS_HOTPLUG_CPU - help - Say Y here to allow turning CPUs off and on. CPUs can be - controlled through /sys/devices/system/cpu. - (Note: power management support will enable this option - automatically on SMP systems. ) - Say N if you want to disable CPU hotplug. - config SYS_SUPPORTS_HOTPLUG_CPU bool @@ -1406,7 +1396,6 @@ config CPU_LOONGSON1B bool "Loongson 1B" depends on SYS_HAS_CPU_LOONGSON1B select CPU_LOONGSON1 - select ARCH_WANT_OPTIONAL_GPIOLIB select LEDS_GPIO_REGISTER help The Loongson 1B is a 32-bit SoC, which implements the MIPS32 @@ -1488,6 +1477,7 @@ config CPU_MIPS64_R2 select CPU_SUPPORTS_HIGHMEM select CPU_SUPPORTS_HUGEPAGES select CPU_SUPPORTS_MSA + select HAVE_KVM help Choose this option to build a kernel for release 2 or later of the MIPS64 architecture. Many modern embedded systems with a 64-bit @@ -1505,6 +1495,7 @@ config CPU_MIPS64_R6 select CPU_SUPPORTS_MSA select GENERIC_CSUM select MIPS_O32_FP64_SUPPORT if MIPS32_O32 + select HAVE_KVM help Choose this option to build a kernel for release 6 or later of the MIPS64 architecture. New MIPS processors, starting with the Warrior @@ -2634,6 +2625,16 @@ config SMP If you don't know what to do here, say N. +config HOTPLUG_CPU + bool "Support for hot-pluggable CPUs" + depends on SMP && SYS_SUPPORTS_HOTPLUG_CPU + help + Say Y here to allow turning CPUs off and on. CPUs can be + controlled through /sys/devices/system/cpu. + (Note: power management support will enable this option + automatically on SMP systems. ) + Say N if you want to disable CPU hotplug. + config SMP_UP bool @@ -2885,10 +2886,10 @@ choice the documented boot protocol using a device tree. config MIPS_RAW_APPENDED_DTB - bool "vmlinux.bin" + bool "vmlinux.bin or vmlinuz.bin" help With this option, the boot code will look for a device tree binary - DTB) appended to raw vmlinux.bin (without decompressor). + DTB) appended to raw vmlinux.bin or vmlinuz.bin. (e.g. cat vmlinux.bin <filename>.dtb > vmlinux_w_dtb). This is meant as a backward compatibility convenience for those @@ -2900,24 +2901,6 @@ choice look like a DTB header after a reboot if no actual DTB is appended to vmlinux.bin. Do not leave this option active in a production kernel if you don't intend to always append a DTB. - - config MIPS_ZBOOT_APPENDED_DTB - bool "vmlinuz.bin" - depends on SYS_SUPPORTS_ZBOOT - help - With this option, the boot code will look for a device tree binary - DTB) appended to raw vmlinuz.bin (with decompressor). - (e.g. cat vmlinuz.bin <filename>.dtb > vmlinuz_w_dtb). - - This is meant as a backward compatibility convenience for those - systems with a bootloader that can't be upgraded to accommodate - the documented boot protocol using a device tree. - - Beware that there is very little in terms of protection against - this option being confused by leftover garbage in memory that might - look like a DTB header after a reboot if no actual DTB is appended - to vmlinuz.bin. Do not leave this option active in a production kernel - if you don't intend to always append a DTB. endchoice choice diff --git a/arch/mips/ath79/setup.c b/arch/mips/ath79/setup.c index 3a0019deb7f7..f206dafbb0a3 100644 --- a/arch/mips/ath79/setup.c +++ b/arch/mips/ath79/setup.c @@ -203,8 +203,8 @@ void __init plat_mem_setup(void) fdt_start = fw_getenvl("fdt_start"); if (fdt_start) __dt_setup_arch((void *)KSEG0ADDR(fdt_start)); - else if (fw_arg0 == -2) - __dt_setup_arch((void *)KSEG0ADDR(fw_arg1)); + else if (fw_passed_dtb) + __dt_setup_arch((void *)KSEG0ADDR(fw_passed_dtb)); if (mips_machtype != ATH79_MACH_GENERIC_OF) { ath79_reset_base = ioremap_nocache(AR71XX_RESET_BASE, diff --git a/arch/mips/bmips/setup.c b/arch/mips/bmips/setup.c index f146d1219bde..6776042679dd 100644 --- a/arch/mips/bmips/setup.c +++ b/arch/mips/bmips/setup.c @@ -162,8 +162,8 @@ void __init plat_mem_setup(void) /* intended to somewhat resemble ARM; see Documentation/arm/Booting */ if (fw_arg0 == 0 && fw_arg1 == 0xffffffff) dtb = phys_to_virt(fw_arg2); - else if (fw_arg0 == -2) /* UHI interface */ - dtb = (void *)fw_arg1; + else if (fw_passed_dtb) /* UHI interface */ + dtb = (void *)fw_passed_dtb; else if (__dtb_start != __dtb_end) dtb = (void *)__dtb_start; else diff --git a/arch/mips/boot/compressed/decompress.c b/arch/mips/boot/compressed/decompress.c index 080cd53bac36..fdf99e9dd4c3 100644 --- a/arch/mips/boot/compressed/decompress.c +++ b/arch/mips/boot/compressed/decompress.c @@ -14,6 +14,7 @@ #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> +#include <linux/libfdt.h> #include <asm/addrspace.h> @@ -36,6 +37,8 @@ extern void puthex(unsigned long long val); #define puthex(val) do {} while (0) #endif +extern char __appended_dtb[]; + void error(char *x) { puts("\n\n"); @@ -114,6 +117,20 @@ void decompress_kernel(unsigned long boot_heap_start) __decompress((char *)zimage_start, zimage_size, 0, 0, (void *)VMLINUX_LOAD_ADDRESS_ULL, 0, 0, error); + if (IS_ENABLED(CONFIG_MIPS_RAW_APPENDED_DTB) && + fdt_magic((void *)&__appended_dtb) == FDT_MAGIC) { + unsigned int image_size, dtb_size; + + dtb_size = fdt_totalsize((void *)&__appended_dtb); + + /* last four bytes is always image size in little endian */ + image_size = le32_to_cpup((void *)&__image_end - 4); + + /* copy dtb to where the booted kernel will expect it */ + memcpy((void *)VMLINUX_LOAD_ADDRESS_ULL + image_size, + __appended_dtb, dtb_size); + } + /* FIXME: should we flush cache here? */ puts("Now, booting the kernel...\n"); } diff --git a/arch/mips/boot/compressed/head.S b/arch/mips/boot/compressed/head.S index c580e853b9fb..409cb483a9ff 100644 --- a/arch/mips/boot/compressed/head.S +++ b/arch/mips/boot/compressed/head.S @@ -25,22 +25,6 @@ start: move s2, a2 move s3, a3 -#ifdef CONFIG_MIPS_ZBOOT_APPENDED_DTB - PTR_LA t0, __appended_dtb -#ifdef CONFIG_CPU_BIG_ENDIAN - li t1, 0xd00dfeed -#else - li t1, 0xedfe0dd0 -#endif - lw t2, (t0) - bne t1, t2, not_found - nop - - move s1, t0 - PTR_LI s0, -2 -not_found: -#endif - /* Clear BSS */ PTR_LA a0, _edata PTR_LA a2, _end diff --git a/arch/mips/boot/dts/cavium-octeon/dlink_dsr-1000n.dts b/arch/mips/boot/dts/cavium-octeon/dlink_dsr-1000n.dts index d6bc994f736f..b134798a0fd7 100644 --- a/arch/mips/boot/dts/cavium-octeon/dlink_dsr-1000n.dts +++ b/arch/mips/boot/dts/cavium-octeon/dlink_dsr-1000n.dts @@ -9,6 +9,7 @@ */ /include/ "octeon_3xxx.dtsi" +#include <dt-bindings/gpio/gpio.h> / { model = "dlink,dsr-1000n"; @@ -63,12 +64,27 @@ usb1 { label = "usb1"; - gpios = <&gpio 9 1>; /* Active low */ + gpios = <&gpio 9 GPIO_ACTIVE_LOW>; }; usb2 { label = "usb2"; - gpios = <&gpio 10 1>; /* Active low */ + gpios = <&gpio 10 GPIO_ACTIVE_LOW>; + }; + + wps { + label = "wps"; + gpios = <&gpio 11 GPIO_ACTIVE_LOW>; + }; + + wireless1 { + label = "5g"; + gpios = <&gpio 17 GPIO_ACTIVE_LOW>; + }; + + wireless2 { + label = "2.4g"; + gpios = <&gpio 18 GPIO_ACTIVE_LOW>; }; }; diff --git a/arch/mips/boot/dts/cavium-octeon/octeon_3xxx.dts b/arch/mips/boot/dts/cavium-octeon/octeon_3xxx.dts index de61f02d3ef6..ca6b4467bcd3 100644 --- a/arch/mips/boot/dts/cavium-octeon/octeon_3xxx.dts +++ b/arch/mips/boot/dts/cavium-octeon/octeon_3xxx.dts @@ -388,16 +388,4 @@ usbn = &usbn; led0 = &led0; }; - - dsr1000n-leds { - compatible = "gpio-leds"; - usb1 { - label = "usb1"; - gpios = <&gpio 9 1>; /* Active low */ - }; - usb2 { - label = "usb2"; - gpios = <&gpio 10 1>; /* Active low */ - }; - }; }; diff --git a/arch/mips/boot/tools/relocs_64.c b/arch/mips/boot/tools/relocs_64.c index b671b5e2dcd8..06066e6ac2f9 100644 --- a/arch/mips/boot/tools/relocs_64.c +++ b/arch/mips/boot/tools/relocs_64.c @@ -9,17 +9,20 @@ typedef uint8_t Elf64_Byte; -typedef struct { - Elf64_Word r_sym; /* Symbol index. */ - Elf64_Byte r_ssym; /* Special symbol. */ - Elf64_Byte r_type3; /* Third relocation. */ - Elf64_Byte r_type2; /* Second relocation. */ - Elf64_Byte r_type; /* First relocation. */ +typedef union { + struct { + Elf64_Word r_sym; /* Symbol index. */ + Elf64_Byte r_ssym; /* Special symbol. */ + Elf64_Byte r_type3; /* Third relocation. */ + Elf64_Byte r_type2; /* Second relocation. */ + Elf64_Byte r_type; /* First relocation. */ + } fields; + Elf64_Xword unused; } Elf64_Mips_Rela; #define ELF_CLASS ELFCLASS64 -#define ELF_R_SYM(val) (((Elf64_Mips_Rela *)(&val))->r_sym) -#define ELF_R_TYPE(val) (((Elf64_Mips_Rela *)(&val))->r_type) +#define ELF_R_SYM(val) (((Elf64_Mips_Rela *)(&val))->fields.r_sym) +#define ELF_R_TYPE(val) (((Elf64_Mips_Rela *)(&val))->fields.r_type) #define ELF_ST_TYPE(o) ELF64_ST_TYPE(o) #define ELF_ST_BIND(o) ELF64_ST_BIND(o) #define ELF_ST_VISIBILITY(o) ELF64_ST_VISIBILITY(o) diff --git a/arch/mips/cavium-octeon/dma-octeon.c b/arch/mips/cavium-octeon/dma-octeon.c index 2cd45f5f9481..fd69528b24fb 100644 --- a/arch/mips/cavium-octeon/dma-octeon.c +++ b/arch/mips/cavium-octeon/dma-octeon.c @@ -125,7 +125,7 @@ static phys_addr_t octeon_small_dma_to_phys(struct device *dev, static dma_addr_t octeon_dma_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { dma_addr_t daddr = swiotlb_map_page(dev, page, offset, size, direction, attrs); @@ -135,7 +135,7 @@ static dma_addr_t octeon_dma_map_page(struct device *dev, struct page *page, } static int octeon_dma_map_sg(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction direction, struct dma_attrs *attrs) + int nents, enum dma_data_direction direction, unsigned long attrs) { int r = swiotlb_map_sg_attrs(dev, sg, nents, direction, attrs); mb(); @@ -157,7 +157,7 @@ static void octeon_dma_sync_sg_for_device(struct device *dev, } static void *octeon_dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp, struct dma_attrs *attrs) + dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) { void *ret; @@ -189,7 +189,7 @@ static void *octeon_dma_alloc_coherent(struct device *dev, size_t size, } static void octeon_dma_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_handle, struct dma_attrs *attrs) + void *vaddr, dma_addr_t dma_handle, unsigned long attrs) { swiotlb_free_coherent(dev, size, vaddr, dma_handle); } diff --git a/arch/mips/cavium-octeon/executive/cvmx-bootmem.c b/arch/mips/cavium-octeon/executive/cvmx-bootmem.c index 504ed61a47cd..b65a6c1ac016 100644 --- a/arch/mips/cavium-octeon/executive/cvmx-bootmem.c +++ b/arch/mips/cavium-octeon/executive/cvmx-bootmem.c @@ -668,7 +668,7 @@ int64_t cvmx_bootmem_phy_named_block_alloc(uint64_t size, uint64_t min_addr, /* * Round size up to mult of minimum alignment bytes We need * the actual size allocated to allow for blocks to be - * coallesced when they are freed. The alloc routine does the + * coalesced when they are freed. The alloc routine does the * same rounding up on all allocations. */ size = ALIGN(size, CVMX_BOOTMEM_ALIGNMENT_SIZE); diff --git a/arch/mips/cavium-octeon/executive/cvmx-helper-board.c b/arch/mips/cavium-octeon/executive/cvmx-helper-board.c index 36e30d65ba05..ff49fc04500c 100644 --- a/arch/mips/cavium-octeon/executive/cvmx-helper-board.c +++ b/arch/mips/cavium-octeon/executive/cvmx-helper-board.c @@ -186,15 +186,6 @@ int cvmx_helper_board_get_mii_address(int ipd_port) return 7 - ipd_port; else return -1; - case CVMX_BOARD_TYPE_CUST_DSR1000N: - /* - * Port 2 connects to Broadcom PHY (B5081). Other ports (0-1) - * connect to a switch (BCM53115). - */ - if (ipd_port == 2) - return 8; - else - return -1; case CVMX_BOARD_TYPE_KONTRON_S1901: if (ipd_port == CVMX_HELPER_BOARD_MGMT_IPD_PORT) return 1; @@ -289,18 +280,6 @@ cvmx_helper_link_info_t __cvmx_helper_board_link_get(int ipd_port) return result; } break; - case CVMX_BOARD_TYPE_CUST_DSR1000N: - if (ipd_port == 0 || ipd_port == 1) { - /* Ports 0 and 1 connect to a switch (BCM53115). */ - result.s.link_up = 1; - result.s.full_duplex = 1; - result.s.speed = 1000; - return result; - } else { - /* Port 2 uses a Broadcom PHY (B5081). */ - is_broadcom_phy = 1; - } - break; } phy_addr = cvmx_helper_board_get_mii_address(ipd_port); @@ -765,7 +744,6 @@ enum cvmx_helper_board_usb_clock_types __cvmx_helper_board_usb_get_clock_type(vo case CVMX_BOARD_TYPE_LANAI2_G: case CVMX_BOARD_TYPE_NIC10E_66: case CVMX_BOARD_TYPE_UBNT_E100: - case CVMX_BOARD_TYPE_CUST_DSR1000N: return USB_CLOCK_TYPE_CRYSTAL_12; case CVMX_BOARD_TYPE_NIC10E: return USB_CLOCK_TYPE_REF_12; diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c index 368eb490354c..5a9b87b7993e 100644 --- a/arch/mips/cavium-octeon/octeon-irq.c +++ b/arch/mips/cavium-octeon/octeon-irq.c @@ -1260,7 +1260,7 @@ static int octeon_irq_gpio_map(struct irq_domain *d, line = (hw + gpiod->base_hwirq) >> 6; bit = (hw + gpiod->base_hwirq) & 63; - if (line > ARRAY_SIZE(octeon_irq_ciu_to_irq) || + if (line >= ARRAY_SIZE(octeon_irq_ciu_to_irq) || octeon_irq_ciu_to_irq[line][bit] != 0) return -EINVAL; @@ -1542,10 +1542,6 @@ static int __init octeon_irq_init_ciu( goto err; } - r = octeon_irq_force_ciu_mapping(ciu_domain, OCTEON_IRQ_USB0, 0, 56); - if (r) - goto err; - r = octeon_irq_force_ciu_mapping(ciu_domain, OCTEON_IRQ_TWSI2, 0, 59); if (r) goto err; @@ -1559,10 +1555,6 @@ static int __init octeon_irq_init_ciu( goto err; } - r = octeon_irq_force_ciu_mapping(ciu_domain, OCTEON_IRQ_USB1, 1, 17); - if (r) - goto err; - /* Enable the CIU lines */ set_c0_status(STATUSF_IP3 | STATUSF_IP2); if (octeon_irq_use_ip4) @@ -2077,10 +2069,6 @@ static int __init octeon_irq_init_ciu2( goto err; } - r = octeon_irq_force_ciu_mapping(ciu_domain, OCTEON_IRQ_USB0, 3, 44); - if (r) - goto err; - for (i = 0; i < 4; i++) { r = octeon_irq_force_ciu_mapping( ciu_domain, i + OCTEON_IRQ_PCI_INT0, 4, i); diff --git a/arch/mips/cavium-octeon/octeon-platform.c b/arch/mips/cavium-octeon/octeon-platform.c index 7aeafedff94e..b31fbc9d6eae 100644 --- a/arch/mips/cavium-octeon/octeon-platform.c +++ b/arch/mips/cavium-octeon/octeon-platform.c @@ -3,33 +3,27 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 2004-2011 Cavium Networks + * Copyright (C) 2004-2016 Cavium Networks * Copyright (C) 2008 Wind River Systems */ -#include <linux/delay.h> #include <linux/init.h> -#include <linux/irq.h> -#include <linux/i2c.h> -#include <linux/usb.h> -#include <linux/dma-mapping.h> +#include <linux/delay.h> #include <linux/etherdevice.h> -#include <linux/module.h> -#include <linux/mutex.h> -#include <linux/slab.h> -#include <linux/platform_device.h> #include <linux/of_platform.h> #include <linux/of_fdt.h> #include <linux/libfdt.h> +#include <linux/usb/ehci_def.h> #include <linux/usb/ehci_pdriver.h> #include <linux/usb/ohci_pdriver.h> #include <asm/octeon/octeon.h> -#include <asm/octeon/cvmx-rnm-defs.h> -#include <asm/octeon/cvmx-helper.h> #include <asm/octeon/cvmx-helper-board.h> #include <asm/octeon/cvmx-uctlx-defs.h> +#define CVMX_UAHCX_EHCI_USBCMD (CVMX_ADD_IO_SEG(0x00016F0000000010ull)) +#define CVMX_UAHCX_OHCI_USBCMD (CVMX_ADD_IO_SEG(0x00016F0000000408ull)) + /* Octeon Random Number Generator. */ static int __init octeon_rng_device_init(void) { @@ -78,12 +72,36 @@ static DEFINE_MUTEX(octeon2_usb_clocks_mutex); static int octeon2_usb_clock_start_cnt; +static int __init octeon2_usb_reset(void) +{ + union cvmx_uctlx_clk_rst_ctl clk_rst_ctl; + u32 ucmd; + + if (!OCTEON_IS_OCTEON2()) + return 0; + + clk_rst_ctl.u64 = cvmx_read_csr(CVMX_UCTLX_CLK_RST_CTL(0)); + if (clk_rst_ctl.s.hrst) { + ucmd = cvmx_read64_uint32(CVMX_UAHCX_EHCI_USBCMD); + ucmd &= ~CMD_RUN; + cvmx_write64_uint32(CVMX_UAHCX_EHCI_USBCMD, ucmd); + mdelay(2); + ucmd |= CMD_RESET; + cvmx_write64_uint32(CVMX_UAHCX_EHCI_USBCMD, ucmd); + ucmd = cvmx_read64_uint32(CVMX_UAHCX_OHCI_USBCMD); + ucmd |= CMD_RUN; + cvmx_write64_uint32(CVMX_UAHCX_OHCI_USBCMD, ucmd); + } + + return 0; +} +arch_initcall(octeon2_usb_reset); + static void octeon2_usb_clocks_start(struct device *dev) { u64 div; union cvmx_uctlx_if_ena if_ena; union cvmx_uctlx_clk_rst_ctl clk_rst_ctl; - union cvmx_uctlx_uphy_ctl_status uphy_ctl_status; union cvmx_uctlx_uphy_portx_ctl_status port_ctl_status; int i; unsigned long io_clk_64_to_ns; @@ -131,6 +149,17 @@ static void octeon2_usb_clocks_start(struct device *dev) if_ena.s.en = 1; cvmx_write_csr(CVMX_UCTLX_IF_ENA(0), if_ena.u64); + for (i = 0; i <= 1; i++) { + port_ctl_status.u64 = + cvmx_read_csr(CVMX_UCTLX_UPHY_PORTX_CTL_STATUS(i, 0)); + /* Set txvreftune to 15 to obtain compliant 'eye' diagram. */ + port_ctl_status.s.txvreftune = 15; + port_ctl_status.s.txrisetune = 1; + port_ctl_status.s.txpreemphasistune = 1; + cvmx_write_csr(CVMX_UCTLX_UPHY_PORTX_CTL_STATUS(i, 0), + port_ctl_status.u64); + } + /* Step 3: Configure the reference clock, PHY, and HCLK */ clk_rst_ctl.u64 = cvmx_read_csr(CVMX_UCTLX_CLK_RST_CTL(0)); @@ -218,29 +247,10 @@ static void octeon2_usb_clocks_start(struct device *dev) clk_rst_ctl.s.p_por = 0; cvmx_write_csr(CVMX_UCTLX_CLK_RST_CTL(0), clk_rst_ctl.u64); - /* Step 5: Wait 1 ms for the PHY clock to start. */ - mdelay(1); + /* Step 5: Wait 3 ms for the PHY clock to start. */ + mdelay(3); - /* - * Step 6: Program the reset input from automatic test - * equipment field in the UPHY CSR - */ - uphy_ctl_status.u64 = cvmx_read_csr(CVMX_UCTLX_UPHY_CTL_STATUS(0)); - uphy_ctl_status.s.ate_reset = 1; - cvmx_write_csr(CVMX_UCTLX_UPHY_CTL_STATUS(0), uphy_ctl_status.u64); - - /* Step 7: Wait for at least 10ns. */ - ndelay(10); - - /* Step 8: Clear the ATE_RESET field in the UPHY CSR. */ - uphy_ctl_status.s.ate_reset = 0; - cvmx_write_csr(CVMX_UCTLX_UPHY_CTL_STATUS(0), uphy_ctl_status.u64); - - /* - * Step 9: Wait for at least 20ns for UPHY to output PHY clock - * signals and OHCI_CLK48 - */ - ndelay(20); + /* Steps 6..9 for ATE only, are skipped. */ /* Step 10: Configure the OHCI_CLK48 and OHCI_CLK12 clocks. */ /* 10a */ @@ -261,6 +271,20 @@ static void octeon2_usb_clocks_start(struct device *dev) clk_rst_ctl.s.p_prst = 1; cvmx_write_csr(CVMX_UCTLX_CLK_RST_CTL(0), clk_rst_ctl.u64); + /* Step 11b */ + udelay(1); + + /* Step 11c */ + clk_rst_ctl.s.p_prst = 0; + cvmx_write_csr(CVMX_UCTLX_CLK_RST_CTL(0), clk_rst_ctl.u64); + + /* Step 11d */ + mdelay(1); + + /* Step 11e */ + clk_rst_ctl.s.p_prst = 1; + cvmx_write_csr(CVMX_UCTLX_CLK_RST_CTL(0), clk_rst_ctl.u64); + /* Step 12: Wait 1 uS. */ udelay(1); @@ -269,21 +293,9 @@ static void octeon2_usb_clocks_start(struct device *dev) cvmx_write_csr(CVMX_UCTLX_CLK_RST_CTL(0), clk_rst_ctl.u64); end_clock: - /* Now we can set some other registers. */ - - for (i = 0; i <= 1; i++) { - port_ctl_status.u64 = - cvmx_read_csr(CVMX_UCTLX_UPHY_PORTX_CTL_STATUS(i, 0)); - /* Set txvreftune to 15 to obtain compliant 'eye' diagram. */ - port_ctl_status.s.txvreftune = 15; - port_ctl_status.s.txrisetune = 1; - port_ctl_status.s.txpreemphasistune = 1; - cvmx_write_csr(CVMX_UCTLX_UPHY_PORTX_CTL_STATUS(i, 0), - port_ctl_status.u64); - } - /* Set uSOF cycle period to 60,000 bits. */ cvmx_write_csr(CVMX_UCTLX_EHCI_FLA(0), 0x20ull); + exit: mutex_unlock(&octeon2_usb_clocks_mutex); } @@ -311,7 +323,11 @@ static struct usb_ehci_pdata octeon_ehci_pdata = { #ifdef __BIG_ENDIAN .big_endian_mmio = 1, #endif - .dma_mask_64 = 1, + /* + * We can DMA from anywhere. But the descriptors must be in + * the lower 4GB. + */ + .dma_mask_64 = 0, .power_on = octeon_ehci_power_on, .power_off = octeon_ehci_power_off, }; @@ -689,6 +705,10 @@ int __init octeon_prune_device_tree(void) if (fdt_check_header(initial_boot_params)) panic("Corrupt Device Tree."); + WARN(octeon_bootinfo->board_type == CVMX_BOARD_TYPE_CUST_DSR1000N, + "Built-in DTB booting is deprecated on %s. Please switch to use appended DTB.", + cvmx_board_type_to_string(octeon_bootinfo->board_type)); + aliases = fdt_path_offset(initial_boot_params, "/aliases"); if (aliases < 0) { pr_err("Error: No /aliases node in device tree."); @@ -1032,13 +1052,6 @@ end_led: } } - if (octeon_bootinfo->board_type != CVMX_BOARD_TYPE_CUST_DSR1000N) { - int dsr1000n_leds = fdt_path_offset(initial_boot_params, - "/dsr1000n-leds"); - if (dsr1000n_leds >= 0) - fdt_nop_node(initial_boot_params, dsr1000n_leds); - } - return 0; } diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c index 64f852b063a8..cb16fcc5f8f0 100644 --- a/arch/mips/cavium-octeon/setup.c +++ b/arch/mips/cavium-octeon/setup.c @@ -40,9 +40,27 @@ #include <asm/octeon/octeon.h> #include <asm/octeon/pci-octeon.h> -#include <asm/octeon/cvmx-mio-defs.h> #include <asm/octeon/cvmx-rst-defs.h> +/* + * TRUE for devices having registers with little-endian byte + * order, FALSE for registers with native-endian byte order. + * PCI mandates little-endian, USB and SATA are configuraable, + * but we chose little-endian for these. + */ +const bool octeon_should_swizzle_table[256] = { + [0x00] = true, /* bootbus/CF */ + [0x1b] = true, /* PCI mmio window */ + [0x1c] = true, /* PCI mmio window */ + [0x1d] = true, /* PCI mmio window */ + [0x1e] = true, /* PCI mmio window */ + [0x68] = true, /* OCTEON III USB */ + [0x69] = true, /* OCTEON III USB */ + [0x6c] = true, /* OCTEON III SATA */ + [0x6f] = true, /* OCTEON II USB */ +}; +EXPORT_SYMBOL(octeon_should_swizzle_table); + #ifdef CONFIG_PCI extern void pci_console_init(const char *arg); #endif diff --git a/arch/mips/cavium-octeon/smp.c b/arch/mips/cavium-octeon/smp.c index 33aab89259f3..4d457d602d3b 100644 --- a/arch/mips/cavium-octeon/smp.c +++ b/arch/mips/cavium-octeon/smp.c @@ -271,6 +271,7 @@ static int octeon_cpu_disable(void) return -ENOTSUPP; set_cpu_online(cpu, false); + calculate_cpu_foreign_map(); cpumask_clear_cpu(cpu, &cpu_callin_map); octeon_fixup_irqs(); diff --git a/arch/mips/cobalt/setup.c b/arch/mips/cobalt/setup.c index 9a8c2fe8d334..c136a18c7221 100644 --- a/arch/mips/cobalt/setup.c +++ b/arch/mips/cobalt/setup.c @@ -42,8 +42,8 @@ const char *get_system_type(void) /* * Cobalt doesn't have PS/2 keyboard/mouse interfaces, - * keyboard conntroller is never used. - * Also PCI-ISA bridge DMA contoroller is never used. + * keyboard controller is never used. + * Also PCI-ISA bridge DMA controller is never used. */ static struct resource cobalt_reserved_resources[] = { { /* dma1 */ diff --git a/arch/mips/configs/ath25_defconfig b/arch/mips/configs/ath25_defconfig new file mode 100644 index 000000000000..2c829950be17 --- /dev/null +++ b/arch/mips/configs/ath25_defconfig @@ -0,0 +1,119 @@ +CONFIG_ATH25=y +# CONFIG_COMPACTION is not set +CONFIG_HZ_100=y +# CONFIG_SECCOMP is not set +# CONFIG_LOCALVERSION_AUTO is not set +CONFIG_SYSVIPC=y +# CONFIG_CROSS_MEMORY_ATTACH is not set +# CONFIG_FHANDLE is not set +CONFIG_HIGH_RES_TIMERS=y +CONFIG_BLK_DEV_INITRD=y +# CONFIG_RD_GZIP is not set +# CONFIG_RD_BZIP2 is not set +# CONFIG_RD_XZ is not set +# CONFIG_RD_LZO is not set +# CONFIG_RD_LZ4 is not set +CONFIG_CC_OPTIMIZE_FOR_SIZE=y +# CONFIG_AIO is not set +CONFIG_EMBEDDED=y +# CONFIG_VM_EVENT_COUNTERS is not set +# CONFIG_SLUB_DEBUG is not set +# CONFIG_COMPAT_BRK is not set +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +# CONFIG_BLK_DEV_BSG is not set +# CONFIG_IOSCHED_CFQ is not set +# CONFIG_SUSPEND is not set +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +# CONFIG_INET_XFRM_MODE_TRANSPORT is not set +# CONFIG_INET_XFRM_MODE_TUNNEL is not set +# CONFIG_INET_XFRM_MODE_BEET is not set +# CONFIG_IPV6 is not set +CONFIG_CFG80211=m +CONFIG_MAC80211=m +CONFIG_MAC80211_DEBUGFS=y +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +# CONFIG_FIRMWARE_IN_KERNEL is not set +CONFIG_MTD=y +CONFIG_MTD_REDBOOT_PARTS=y +CONFIG_MTD_REDBOOT_DIRECTORY_BLOCK=-2 +CONFIG_MTD_CMDLINE_PARTS=y +CONFIG_MTD_BLOCK=y +CONFIG_MTD_CFI=y +CONFIG_MTD_CFI_ADV_OPTIONS=y +CONFIG_MTD_CFI_GEOMETRY=y +# CONFIG_MTD_MAP_BANK_WIDTH_1 is not set +# CONFIG_MTD_MAP_BANK_WIDTH_4 is not set +# CONFIG_MTD_CFI_I2 is not set +CONFIG_MTD_CFI_AMDSTD=y +CONFIG_MTD_COMPLEX_MAPPINGS=y +CONFIG_MTD_PHYSMAP=y +CONFIG_NETDEVICES=y +# CONFIG_ETHERNET is not set +# CONFIG_WLAN_VENDOR_ADMTEK is not set +CONFIG_ATH5K=m +# CONFIG_WLAN_VENDOR_ATMEL is not set +# CONFIG_WLAN_VENDOR_BROADCOM is not set +# CONFIG_WLAN_VENDOR_CISCO is not set +# CONFIG_WLAN_VENDOR_INTEL is not set +# CONFIG_WLAN_VENDOR_INTERSIL is not set +# CONFIG_WLAN_VENDOR_MARVELL is not set +# CONFIG_WLAN_VENDOR_MEDIATEK is not set +# CONFIG_WLAN_VENDOR_RALINK is not set +# CONFIG_WLAN_VENDOR_REALTEK is not set +# CONFIG_WLAN_VENDOR_RSI is not set +# CONFIG_WLAN_VENDOR_ST is not set +# CONFIG_WLAN_VENDOR_TI is not set +# CONFIG_WLAN_VENDOR_ZYDAS is not set +CONFIG_INPUT=m +# CONFIG_INPUT_KEYBOARD is not set +# CONFIG_INPUT_MOUSE is not set +# CONFIG_SERIO is not set +# CONFIG_VT is not set +# CONFIG_LEGACY_PTYS is not set +# CONFIG_DEVKMEM is not set +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +# CONFIG_SERIAL_8250_PCI is not set +CONFIG_SERIAL_8250_NR_UARTS=1 +CONFIG_SERIAL_8250_RUNTIME_UARTS=1 +# CONFIG_HW_RANDOM is not set +# CONFIG_HWMON is not set +# CONFIG_VGA_ARB is not set +CONFIG_USB=m +CONFIG_USB_EHCI_HCD=m +CONFIG_LEDS_CLASS=y +# CONFIG_IOMMU_SUPPORT is not set +# CONFIG_DNOTIFY is not set +# CONFIG_PROC_PAGE_MONITOR is not set +CONFIG_TMPFS=y +CONFIG_TMPFS_XATTR=y +CONFIG_JFFS2_FS=y +CONFIG_JFFS2_SUMMARY=y +CONFIG_JFFS2_FS_XATTR=y +# CONFIG_JFFS2_FS_POSIX_ACL is not set +# CONFIG_JFFS2_FS_SECURITY is not set +CONFIG_JFFS2_COMPRESSION_OPTIONS=y +# CONFIG_JFFS2_ZLIB is not set +CONFIG_SQUASHFS=y +CONFIG_SQUASHFS_FILE_DIRECT=y +CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU=y +# CONFIG_SQUASHFS_ZLIB is not set +CONFIG_SQUASHFS_XZ=y +CONFIG_PRINTK_TIME=y +# CONFIG_ENABLE_MUST_CHECK is not set +CONFIG_STRIP_ASM_SYMS=y +CONFIG_DEBUG_FS=y +# CONFIG_SCHED_DEBUG is not set +# CONFIG_FTRACE is not set +# CONFIG_XZ_DEC_X86 is not set +# CONFIG_XZ_DEC_POWERPC is not set +# CONFIG_XZ_DEC_IA64 is not set +# CONFIG_XZ_DEC_ARM is not set +# CONFIG_XZ_DEC_ARMTHUMB is not set +# CONFIG_XZ_DEC_SPARC is not set diff --git a/arch/mips/configs/cavium_octeon_defconfig b/arch/mips/configs/cavium_octeon_defconfig index dcac308cec39..d470d08362c0 100644 --- a/arch/mips/configs/cavium_octeon_defconfig +++ b/arch/mips/configs/cavium_octeon_defconfig @@ -59,6 +59,8 @@ CONFIG_EEPROM_AT25=y CONFIG_BLK_DEV_SD=y CONFIG_ATA=y CONFIG_SATA_AHCI=y +CONFIG_SATA_AHCI_PLATFORM=y +CONFIG_AHCI_OCTEON=y CONFIG_PATA_OCTEON_CF=y CONFIG_SATA_SIL=y CONFIG_NETDEVICES=y diff --git a/arch/mips/include/asm/addrspace.h b/arch/mips/include/asm/addrspace.h index 3b0e51d5a613..c5b04e752e97 100644 --- a/arch/mips/include/asm/addrspace.h +++ b/arch/mips/include/asm/addrspace.h @@ -45,7 +45,7 @@ /* * Returns the kernel segment base of a given address */ -#define KSEGX(a) ((_ACAST32_ (a)) & 0xe0000000) +#define KSEGX(a) ((_ACAST32_(a)) & _ACAST32_(0xe0000000)) /* * Returns the physical address of a CKSEGx / XKPHYS address diff --git a/arch/mips/include/asm/bootinfo.h b/arch/mips/include/asm/bootinfo.h index 9f67033961a6..ee9f5f2d18fc 100644 --- a/arch/mips/include/asm/bootinfo.h +++ b/arch/mips/include/asm/bootinfo.h @@ -127,6 +127,10 @@ extern char arcs_cmdline[COMMAND_LINE_SIZE]; */ extern unsigned long fw_arg0, fw_arg1, fw_arg2, fw_arg3; +#ifdef CONFIG_USE_OF +extern unsigned long fw_passed_dtb; +#endif + /* * Platform memory detection hook called by setup_arch */ diff --git a/arch/mips/include/asm/dsemul.h b/arch/mips/include/asm/dsemul.h new file mode 100644 index 000000000000..a6e067801f23 --- /dev/null +++ b/arch/mips/include/asm/dsemul.h @@ -0,0 +1,92 @@ +/* + * Copyright (C) 2016 Imagination Technologies + * Author: Paul Burton <paul.burton@imgtec.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#ifndef __MIPS_ASM_DSEMUL_H__ +#define __MIPS_ASM_DSEMUL_H__ + +#include <asm/break.h> +#include <asm/inst.h> + +/* Break instruction with special math emu break code set */ +#define BREAK_MATH(micromips) (((micromips) ? 0x7 : 0xd) | (BRK_MEMU << 16)) + +/* When used as a frame index, indicates the lack of a frame */ +#define BD_EMUFRAME_NONE ((int)BIT(31)) + +struct mm_struct; +struct pt_regs; +struct task_struct; + +/** + * mips_dsemul() - 'Emulate' an instruction from a branch delay slot + * @regs: User thread register context. + * @ir: The instruction to be 'emulated'. + * @branch_pc: The PC of the branch instruction. + * @cont_pc: The PC to continue at following 'emulation'. + * + * Emulate or execute an arbitrary MIPS instruction within the context of + * the current user thread. This is used primarily to handle instructions + * in the delay slots of emulated branch instructions, for example FP + * branch instructions on systems without an FPU. + * + * Return: Zero on success, negative if ir is a NOP, signal number on failure. + */ +extern int mips_dsemul(struct pt_regs *regs, mips_instruction ir, + unsigned long branch_pc, unsigned long cont_pc); + +/** + * do_dsemulret() - Return from a delay slot 'emulation' frame + * @xcp: User thread register context. + * + * Call in response to the BRK_MEMU break instruction used to return to + * the kernel from branch delay slot 'emulation' frames following a call + * to mips_dsemul(). Restores the user thread PC to the value that was + * passed as the cpc parameter to mips_dsemul(). + * + * Return: True if an emulation frame was returned from, else false. + */ +extern bool do_dsemulret(struct pt_regs *xcp); + +/** + * dsemul_thread_cleanup() - Cleanup thread 'emulation' frame + * @tsk: The task structure associated with the thread + * + * If the thread @tsk has a branch delay slot 'emulation' frame + * allocated to it then free that frame. + * + * Return: True if a frame was freed, else false. + */ +extern bool dsemul_thread_cleanup(struct task_struct *tsk); + +/** + * dsemul_thread_rollback() - Rollback from an 'emulation' frame + * @regs: User thread register context. + * + * If the current thread, whose register context is represented by @regs, + * is executing within a delay slot 'emulation' frame then exit that + * frame. The PC will be rolled back to the branch if the instruction + * that was being 'emulated' has not yet executed, or advanced to the + * continuation PC if it has. + * + * Return: True if a frame was exited, else false. + */ +extern bool dsemul_thread_rollback(struct pt_regs *regs); + +/** + * dsemul_mm_cleanup() - Cleanup per-mm delay slot 'emulation' state + * @mm: The struct mm_struct to cleanup state for. + * + * Cleanup state for the given @mm, ensuring that any memory allocated + * for delay slot 'emulation' book-keeping is freed. This is to be called + * before @mm is freed in order to avoid memory leaks. + */ +extern void dsemul_mm_cleanup(struct mm_struct *mm); + +#endif /* __MIPS_ASM_DSEMUL_H__ */ diff --git a/arch/mips/include/asm/elf.h b/arch/mips/include/asm/elf.h index f5f45717968e..2b3dc2973670 100644 --- a/arch/mips/include/asm/elf.h +++ b/arch/mips/include/asm/elf.h @@ -458,6 +458,7 @@ extern const char *__elf_platform; #define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2) #endif +/* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */ #define ARCH_DLINFO \ do { \ NEW_AUX_ENT(AT_SYSINFO_EHDR, \ @@ -498,4 +499,7 @@ extern int arch_check_elf(void *ehdr, bool has_interpreter, void *interp_ehdr, extern void mips_set_personality_nan(struct arch_elf_state *state); extern void mips_set_personality_fp(struct arch_elf_state *state); +#define elf_read_implies_exec(ex, stk) mips_elf_read_implies_exec(&(ex), stk) +extern int mips_elf_read_implies_exec(void *elf_ex, int exstack); + #endif /* _ASM_ELF_H */ diff --git a/arch/mips/include/asm/fpu_emulator.h b/arch/mips/include/asm/fpu_emulator.h index 3225c3c0724b..355dc25172e7 100644 --- a/arch/mips/include/asm/fpu_emulator.h +++ b/arch/mips/include/asm/fpu_emulator.h @@ -24,7 +24,7 @@ #define _ASM_FPU_EMULATOR_H #include <linux/sched.h> -#include <asm/break.h> +#include <asm/dsemul.h> #include <asm/thread_info.h> #include <asm/inst.h> #include <asm/local.h> @@ -60,27 +60,16 @@ do { \ #define MIPS_FPU_EMU_INC_STATS(M) do { } while (0) #endif /* CONFIG_DEBUG_FS */ -extern int mips_dsemul(struct pt_regs *regs, mips_instruction ir, - unsigned long cpc); -extern int do_dsemulret(struct pt_regs *xcp); extern int fpu_emulator_cop1Handler(struct pt_regs *xcp, struct mips_fpu_struct *ctx, int has_fpu, void *__user *fault_addr); int process_fpemu_return(int sig, void __user *fault_addr, unsigned long fcr31); +int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, + unsigned long *contpc); int mm_isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, unsigned long *contpc); -/* - * Instruction inserted following the badinst to further tag the sequence - */ -#define BD_COOKIE 0x0000bd36 /* tne $0, $0 with baggage */ - -/* - * Break instruction with special math emu break code set - */ -#define BREAK_MATH(micromips) (((micromips) ? 0x7 : 0xd) | (BRK_MEMU << 16)) - #define SIGNALLING_NAN 0x7ff800007ff80000LL static inline void fpu_emulator_init_fpu(void) diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 36a391d289aa..b54bcadd8aec 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -19,6 +19,9 @@ #include <linux/threads.h> #include <linux/spinlock.h> +#include <asm/inst.h> +#include <asm/mipsregs.h> + /* MIPS KVM register ids */ #define MIPS_CP0_32(_R, _S) \ (KVM_REG_MIPS_CP0 | KVM_REG_SIZE_U32 | (8 * (_R) + (_S))) @@ -53,6 +56,12 @@ #define KVM_REG_MIPS_CP0_CONFIG7 MIPS_CP0_32(16, 7) #define KVM_REG_MIPS_CP0_XCONTEXT MIPS_CP0_64(20, 0) #define KVM_REG_MIPS_CP0_ERROREPC MIPS_CP0_64(30, 0) +#define KVM_REG_MIPS_CP0_KSCRATCH1 MIPS_CP0_64(31, 2) +#define KVM_REG_MIPS_CP0_KSCRATCH2 MIPS_CP0_64(31, 3) +#define KVM_REG_MIPS_CP0_KSCRATCH3 MIPS_CP0_64(31, 4) +#define KVM_REG_MIPS_CP0_KSCRATCH4 MIPS_CP0_64(31, 5) +#define KVM_REG_MIPS_CP0_KSCRATCH5 MIPS_CP0_64(31, 6) +#define KVM_REG_MIPS_CP0_KSCRATCH6 MIPS_CP0_64(31, 7) #define KVM_MAX_VCPUS 1 @@ -65,8 +74,14 @@ -/* Special address that contains the comm page, used for reducing # of traps */ -#define KVM_GUEST_COMMPAGE_ADDR 0x0 +/* + * Special address that contains the comm page, used for reducing # of traps + * This needs to be within 32Kb of 0x0 (so the zero register can be used), but + * preferably not at 0x0 so that most kernel NULL pointer dereferences can be + * caught. + */ +#define KVM_GUEST_COMMPAGE_ADDR ((PAGE_SIZE > 0x8000) ? 0 : \ + (0x8000 - PAGE_SIZE)) #define KVM_GUEST_KERNEL_MODE(vcpu) ((kvm_read_c0_guest_status(vcpu->arch.cop0) & (ST0_EXL | ST0_ERL)) || \ ((kvm_read_c0_guest_status(vcpu->arch.cop0) & KSU_USER) == 0)) @@ -93,9 +108,6 @@ #define KVM_INVALID_ADDR 0xdeadbeef extern atomic_t kvm_mips_instance; -extern kvm_pfn_t (*kvm_mips_gfn_to_pfn)(struct kvm *kvm, gfn_t gfn); -extern void (*kvm_mips_release_pfn_clean)(kvm_pfn_t pfn); -extern bool (*kvm_mips_is_error_pfn)(kvm_pfn_t pfn); struct kvm_vm_stat { u32 remote_tlb_flush; @@ -126,28 +138,6 @@ struct kvm_vcpu_stat { u32 halt_wakeup; }; -enum kvm_mips_exit_types { - WAIT_EXITS, - CACHE_EXITS, - SIGNAL_EXITS, - INT_EXITS, - COP_UNUSABLE_EXITS, - TLBMOD_EXITS, - TLBMISS_LD_EXITS, - TLBMISS_ST_EXITS, - ADDRERR_ST_EXITS, - ADDRERR_LD_EXITS, - SYSCALL_EXITS, - RESVD_INST_EXITS, - BREAK_INST_EXITS, - TRAP_INST_EXITS, - MSA_FPE_EXITS, - FPE_EXITS, - MSA_DISABLED_EXITS, - FLUSH_DCACHE_EXITS, - MAX_KVM_MIPS_EXIT_TYPES -}; - struct kvm_arch_memory_slot { }; @@ -215,73 +205,6 @@ struct mips_coproc { #define MIPS_CP0_CONFIG4_SEL 4 #define MIPS_CP0_CONFIG5_SEL 5 -/* Config0 register bits */ -#define CP0C0_M 31 -#define CP0C0_K23 28 -#define CP0C0_KU 25 -#define CP0C0_MDU 20 -#define CP0C0_MM 17 -#define CP0C0_BM 16 -#define CP0C0_BE 15 -#define CP0C0_AT 13 -#define CP0C0_AR 10 -#define CP0C0_MT 7 -#define CP0C0_VI 3 -#define CP0C0_K0 0 - -/* Config1 register bits */ -#define CP0C1_M 31 -#define CP0C1_MMU 25 -#define CP0C1_IS 22 -#define CP0C1_IL 19 -#define CP0C1_IA 16 -#define CP0C1_DS 13 -#define CP0C1_DL 10 -#define CP0C1_DA 7 -#define CP0C1_C2 6 -#define CP0C1_MD 5 -#define CP0C1_PC 4 -#define CP0C1_WR 3 -#define CP0C1_CA 2 -#define CP0C1_EP 1 -#define CP0C1_FP 0 - -/* Config2 Register bits */ -#define CP0C2_M 31 -#define CP0C2_TU 28 -#define CP0C2_TS 24 -#define CP0C2_TL 20 -#define CP0C2_TA 16 -#define CP0C2_SU 12 -#define CP0C2_SS 8 -#define CP0C2_SL 4 -#define CP0C2_SA 0 - -/* Config3 Register bits */ -#define CP0C3_M 31 -#define CP0C3_ISA_ON_EXC 16 -#define CP0C3_ULRI 13 -#define CP0C3_DSPP 10 -#define CP0C3_LPA 7 -#define CP0C3_VEIC 6 -#define CP0C3_VInt 5 -#define CP0C3_SP 4 -#define CP0C3_MT 2 -#define CP0C3_SM 1 -#define CP0C3_TL 0 - -/* MMU types, the first four entries have the same layout as the - CP0C0_MT field. */ -enum mips_mmu_types { - MMU_TYPE_NONE, - MMU_TYPE_R4000, - MMU_TYPE_RESERVED, - MMU_TYPE_FMT, - MMU_TYPE_R3000, - MMU_TYPE_R6000, - MMU_TYPE_R8000 -}; - /* Resume Flags */ #define RESUME_FLAG_DR (1<<0) /* Reload guest nonvolatile state? */ #define RESUME_FLAG_HOST (1<<1) /* Resume host? */ @@ -298,11 +221,6 @@ enum emulation_result { EMULATE_PRIV_FAIL, }; -#define MIPS3_PG_G 0x00000001 /* Global; ignore ASID if in lo0 & lo1 */ -#define MIPS3_PG_V 0x00000002 /* Valid */ -#define MIPS3_PG_NV 0x00000000 -#define MIPS3_PG_D 0x00000004 /* Dirty */ - #define mips3_paddr_to_tlbpfn(x) \ (((unsigned long)(x) >> MIPS3_PG_SHIFT) & MIPS3_PG_FRAME) #define mips3_tlbpfn_to_paddr(x) \ @@ -313,13 +231,11 @@ enum emulation_result { #define VPN2_MASK 0xffffe000 #define KVM_ENTRYHI_ASID MIPS_ENTRYHI_ASID -#define TLB_IS_GLOBAL(x) (((x).tlb_lo0 & MIPS3_PG_G) && \ - ((x).tlb_lo1 & MIPS3_PG_G)) +#define TLB_IS_GLOBAL(x) ((x).tlb_lo[0] & (x).tlb_lo[1] & ENTRYLO_G) #define TLB_VPN2(x) ((x).tlb_hi & VPN2_MASK) #define TLB_ASID(x) ((x).tlb_hi & KVM_ENTRYHI_ASID) -#define TLB_IS_VALID(x, va) (((va) & (1 << PAGE_SHIFT)) \ - ? ((x).tlb_lo1 & MIPS3_PG_V) \ - : ((x).tlb_lo0 & MIPS3_PG_V)) +#define TLB_LO_IDX(x, va) (((va) >> PAGE_SHIFT) & 1) +#define TLB_IS_VALID(x, va) ((x).tlb_lo[TLB_LO_IDX(x, va)] & ENTRYLO_V) #define TLB_HI_VPN2_HIT(x, y) ((TLB_VPN2(x) & ~(x).tlb_mask) == \ ((y) & VPN2_MASK & ~(x).tlb_mask)) #define TLB_HI_ASID_HIT(x, y) (TLB_IS_GLOBAL(x) || \ @@ -328,26 +244,23 @@ enum emulation_result { struct kvm_mips_tlb { long tlb_mask; long tlb_hi; - long tlb_lo0; - long tlb_lo1; + long tlb_lo[2]; }; -#define KVM_MIPS_FPU_FPU 0x1 -#define KVM_MIPS_FPU_MSA 0x2 +#define KVM_MIPS_AUX_FPU 0x1 +#define KVM_MIPS_AUX_MSA 0x2 #define KVM_MIPS_GUEST_TLB_SIZE 64 struct kvm_vcpu_arch { - void *host_ebase, *guest_ebase; + void *guest_ebase; int (*vcpu_run)(struct kvm_run *run, struct kvm_vcpu *vcpu); unsigned long host_stack; unsigned long host_gp; /* Host CP0 registers used when handling exits from guest */ unsigned long host_cp0_badvaddr; - unsigned long host_cp0_cause; unsigned long host_cp0_epc; - unsigned long host_cp0_entryhi; - uint32_t guest_inst; + u32 host_cp0_cause; /* GPRS */ unsigned long gprs[32]; @@ -357,8 +270,8 @@ struct kvm_vcpu_arch { /* FPU State */ struct mips_fpu_struct fpu; - /* Which FPU state is loaded (KVM_MIPS_FPU_*) */ - unsigned int fpu_inuse; + /* Which auxiliary state is loaded (KVM_MIPS_AUX_*) */ + unsigned int aux_inuse; /* COP0 State */ struct mips_coproc *cop0; @@ -370,11 +283,11 @@ struct kvm_vcpu_arch { struct hrtimer comparecount_timer; /* Count timer control KVM register */ - uint32_t count_ctl; + u32 count_ctl; /* Count bias from the raw time */ - uint32_t count_bias; + u32 count_bias; /* Frequency of timer in Hz */ - uint32_t count_hz; + u32 count_hz; /* Dynamic nanosecond bias (multiple of count_period) to avoid overflow */ s64 count_dyn_bias; /* Resume time */ @@ -388,7 +301,7 @@ struct kvm_vcpu_arch { /* Bitmask of pending exceptions to be cleared */ unsigned long pending_exceptions_clr; - unsigned long pending_load_cause; + u32 pending_load_cause; /* Save/Restore the entryhi register when are are preempted/scheduled back in */ unsigned long preempt_entryhi; @@ -397,8 +310,8 @@ struct kvm_vcpu_arch { struct kvm_mips_tlb guest_tlb[KVM_MIPS_GUEST_TLB_SIZE]; /* Cached guest kernel/user ASIDs */ - uint32_t guest_user_asid[NR_CPUS]; - uint32_t guest_kernel_asid[NR_CPUS]; + u32 guest_user_asid[NR_CPUS]; + u32 guest_kernel_asid[NR_CPUS]; struct mm_struct guest_kernel_mm, guest_user_mm; int last_sched_cpu; @@ -408,6 +321,7 @@ struct kvm_vcpu_arch { u8 fpu_enabled; u8 msa_enabled; + u8 kscratch_enabled; }; @@ -461,6 +375,18 @@ struct kvm_vcpu_arch { #define kvm_write_c0_guest_config7(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][7] = (val)) #define kvm_read_c0_guest_errorepc(cop0) (cop0->reg[MIPS_CP0_ERROR_PC][0]) #define kvm_write_c0_guest_errorepc(cop0, val) (cop0->reg[MIPS_CP0_ERROR_PC][0] = (val)) +#define kvm_read_c0_guest_kscratch1(cop0) (cop0->reg[MIPS_CP0_DESAVE][2]) +#define kvm_read_c0_guest_kscratch2(cop0) (cop0->reg[MIPS_CP0_DESAVE][3]) +#define kvm_read_c0_guest_kscratch3(cop0) (cop0->reg[MIPS_CP0_DESAVE][4]) +#define kvm_read_c0_guest_kscratch4(cop0) (cop0->reg[MIPS_CP0_DESAVE][5]) +#define kvm_read_c0_guest_kscratch5(cop0) (cop0->reg[MIPS_CP0_DESAVE][6]) +#define kvm_read_c0_guest_kscratch6(cop0) (cop0->reg[MIPS_CP0_DESAVE][7]) +#define kvm_write_c0_guest_kscratch1(cop0, val) (cop0->reg[MIPS_CP0_DESAVE][2] = (val)) +#define kvm_write_c0_guest_kscratch2(cop0, val) (cop0->reg[MIPS_CP0_DESAVE][3] = (val)) +#define kvm_write_c0_guest_kscratch3(cop0, val) (cop0->reg[MIPS_CP0_DESAVE][4] = (val)) +#define kvm_write_c0_guest_kscratch4(cop0, val) (cop0->reg[MIPS_CP0_DESAVE][5] = (val)) +#define kvm_write_c0_guest_kscratch5(cop0, val) (cop0->reg[MIPS_CP0_DESAVE][6] = (val)) +#define kvm_write_c0_guest_kscratch6(cop0, val) (cop0->reg[MIPS_CP0_DESAVE][7] = (val)) /* * Some of the guest registers may be modified asynchronously (e.g. from a @@ -474,7 +400,7 @@ static inline void _kvm_atomic_set_c0_guest_reg(unsigned long *reg, unsigned long temp; do { __asm__ __volatile__( - " .set mips3 \n" + " .set "MIPS_ISA_ARCH_LEVEL" \n" " " __LL "%0, %1 \n" " or %0, %2 \n" " " __SC "%0, %1 \n" @@ -490,7 +416,7 @@ static inline void _kvm_atomic_clear_c0_guest_reg(unsigned long *reg, unsigned long temp; do { __asm__ __volatile__( - " .set mips3 \n" + " .set "MIPS_ISA_ARCH_LEVEL" \n" " " __LL "%0, %1 \n" " and %0, %2 \n" " " __SC "%0, %1 \n" @@ -507,7 +433,7 @@ static inline void _kvm_atomic_change_c0_guest_reg(unsigned long *reg, unsigned long temp; do { __asm__ __volatile__( - " .set mips3 \n" + " .set "MIPS_ISA_ARCH_LEVEL" \n" " " __LL "%0, %1 \n" " and %0, %2 \n" " or %0, %3 \n" @@ -542,7 +468,7 @@ static inline void _kvm_atomic_change_c0_guest_reg(unsigned long *reg, static inline bool kvm_mips_guest_can_have_fpu(struct kvm_vcpu_arch *vcpu) { - return (!__builtin_constant_p(cpu_has_fpu) || cpu_has_fpu) && + return (!__builtin_constant_p(raw_cpu_has_fpu) || raw_cpu_has_fpu) && vcpu->fpu_enabled; } @@ -589,9 +515,11 @@ struct kvm_mips_callbacks { void (*dequeue_io_int)(struct kvm_vcpu *vcpu, struct kvm_mips_interrupt *irq); int (*irq_deliver)(struct kvm_vcpu *vcpu, unsigned int priority, - uint32_t cause); + u32 cause); int (*irq_clear)(struct kvm_vcpu *vcpu, unsigned int priority, - uint32_t cause); + u32 cause); + unsigned long (*num_regs)(struct kvm_vcpu *vcpu); + int (*copy_reg_indices)(struct kvm_vcpu *vcpu, u64 __user *indices); int (*get_one_reg)(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg, s64 *v); int (*set_one_reg)(struct kvm_vcpu *vcpu, @@ -605,8 +533,13 @@ int kvm_mips_emulation_init(struct kvm_mips_callbacks **install_callbacks); /* Debug: dump vcpu state */ int kvm_arch_vcpu_dump_regs(struct kvm_vcpu *vcpu); -/* Trampoline ASM routine to start running in "Guest" context */ -extern int __kvm_mips_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu); +extern int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu); + +/* Building of entry/exception code */ +int kvm_mips_entry_setup(void); +void *kvm_mips_build_vcpu_run(void *addr); +void *kvm_mips_build_exception(void *addr, void *handler); +void *kvm_mips_build_exit(void *addr); /* FPU/MSA context management */ void __kvm_save_fpu(struct kvm_vcpu_arch *vcpu); @@ -622,11 +555,11 @@ void kvm_drop_fpu(struct kvm_vcpu *vcpu); void kvm_lose_fpu(struct kvm_vcpu *vcpu); /* TLB handling */ -uint32_t kvm_get_kernel_asid(struct kvm_vcpu *vcpu); +u32 kvm_get_kernel_asid(struct kvm_vcpu *vcpu); -uint32_t kvm_get_user_asid(struct kvm_vcpu *vcpu); +u32 kvm_get_user_asid(struct kvm_vcpu *vcpu); -uint32_t kvm_get_commpage_asid (struct kvm_vcpu *vcpu); +u32 kvm_get_commpage_asid (struct kvm_vcpu *vcpu); extern int kvm_mips_handle_kseg0_tlb_fault(unsigned long badbaddr, struct kvm_vcpu *vcpu); @@ -635,22 +568,24 @@ extern int kvm_mips_handle_commpage_tlb_fault(unsigned long badvaddr, struct kvm_vcpu *vcpu); extern int kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu, - struct kvm_mips_tlb *tlb, - unsigned long *hpa0, - unsigned long *hpa1); + struct kvm_mips_tlb *tlb); -extern enum emulation_result kvm_mips_handle_tlbmiss(unsigned long cause, - uint32_t *opc, +extern enum emulation_result kvm_mips_handle_tlbmiss(u32 cause, + u32 *opc, struct kvm_run *run, struct kvm_vcpu *vcpu); -extern enum emulation_result kvm_mips_handle_tlbmod(unsigned long cause, - uint32_t *opc, +extern enum emulation_result kvm_mips_handle_tlbmod(u32 cause, + u32 *opc, struct kvm_run *run, struct kvm_vcpu *vcpu); extern void kvm_mips_dump_host_tlbs(void); extern void kvm_mips_dump_guest_tlbs(struct kvm_vcpu *vcpu); +extern int kvm_mips_host_tlb_write(struct kvm_vcpu *vcpu, unsigned long entryhi, + unsigned long entrylo0, + unsigned long entrylo1, + int flush_dcache_mask); extern void kvm_mips_flush_host_tlb(int skip_kseg0); extern int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long entryhi); @@ -667,90 +602,90 @@ extern void kvm_mips_vcpu_load(struct kvm_vcpu *vcpu, int cpu); extern void kvm_mips_vcpu_put(struct kvm_vcpu *vcpu); /* Emulation */ -uint32_t kvm_get_inst(uint32_t *opc, struct kvm_vcpu *vcpu); -enum emulation_result update_pc(struct kvm_vcpu *vcpu, uint32_t cause); +u32 kvm_get_inst(u32 *opc, struct kvm_vcpu *vcpu); +enum emulation_result update_pc(struct kvm_vcpu *vcpu, u32 cause); -extern enum emulation_result kvm_mips_emulate_inst(unsigned long cause, - uint32_t *opc, +extern enum emulation_result kvm_mips_emulate_inst(u32 cause, + u32 *opc, struct kvm_run *run, struct kvm_vcpu *vcpu); -extern enum emulation_result kvm_mips_emulate_syscall(unsigned long cause, - uint32_t *opc, +extern enum emulation_result kvm_mips_emulate_syscall(u32 cause, + u32 *opc, struct kvm_run *run, struct kvm_vcpu *vcpu); -extern enum emulation_result kvm_mips_emulate_tlbmiss_ld(unsigned long cause, - uint32_t *opc, +extern enum emulation_result kvm_mips_emulate_tlbmiss_ld(u32 cause, + u32 *opc, struct kvm_run *run, struct kvm_vcpu *vcpu); -extern enum emulation_result kvm_mips_emulate_tlbinv_ld(unsigned long cause, - uint32_t *opc, +extern enum emulation_result kvm_mips_emulate_tlbinv_ld(u32 cause, + u32 *opc, struct kvm_run *run, struct kvm_vcpu *vcpu); -extern enum emulation_result kvm_mips_emulate_tlbmiss_st(unsigned long cause, - uint32_t *opc, +extern enum emulation_result kvm_mips_emulate_tlbmiss_st(u32 cause, + u32 *opc, struct kvm_run *run, struct kvm_vcpu *vcpu); -extern enum emulation_result kvm_mips_emulate_tlbinv_st(unsigned long cause, - uint32_t *opc, +extern enum emulation_result kvm_mips_emulate_tlbinv_st(u32 cause, + u32 *opc, struct kvm_run *run, struct kvm_vcpu *vcpu); -extern enum emulation_result kvm_mips_emulate_tlbmod(unsigned long cause, - uint32_t *opc, +extern enum emulation_result kvm_mips_emulate_tlbmod(u32 cause, + u32 *opc, struct kvm_run *run, struct kvm_vcpu *vcpu); -extern enum emulation_result kvm_mips_emulate_fpu_exc(unsigned long cause, - uint32_t *opc, +extern enum emulation_result kvm_mips_emulate_fpu_exc(u32 cause, + u32 *opc, struct kvm_run *run, struct kvm_vcpu *vcpu); -extern enum emulation_result kvm_mips_handle_ri(unsigned long cause, - uint32_t *opc, +extern enum emulation_result kvm_mips_handle_ri(u32 cause, + u32 *opc, struct kvm_run *run, struct kvm_vcpu *vcpu); -extern enum emulation_result kvm_mips_emulate_ri_exc(unsigned long cause, - uint32_t *opc, +extern enum emulation_result kvm_mips_emulate_ri_exc(u32 cause, + u32 *opc, struct kvm_run *run, struct kvm_vcpu *vcpu); -extern enum emulation_result kvm_mips_emulate_bp_exc(unsigned long cause, - uint32_t *opc, +extern enum emulation_result kvm_mips_emulate_bp_exc(u32 cause, + u32 *opc, struct kvm_run *run, struct kvm_vcpu *vcpu); -extern enum emulation_result kvm_mips_emulate_trap_exc(unsigned long cause, - uint32_t *opc, +extern enum emulation_result kvm_mips_emulate_trap_exc(u32 cause, + u32 *opc, struct kvm_run *run, struct kvm_vcpu *vcpu); -extern enum emulation_result kvm_mips_emulate_msafpe_exc(unsigned long cause, - uint32_t *opc, +extern enum emulation_result kvm_mips_emulate_msafpe_exc(u32 cause, + u32 *opc, struct kvm_run *run, struct kvm_vcpu *vcpu); -extern enum emulation_result kvm_mips_emulate_fpe_exc(unsigned long cause, - uint32_t *opc, +extern enum emulation_result kvm_mips_emulate_fpe_exc(u32 cause, + u32 *opc, struct kvm_run *run, struct kvm_vcpu *vcpu); -extern enum emulation_result kvm_mips_emulate_msadis_exc(unsigned long cause, - uint32_t *opc, +extern enum emulation_result kvm_mips_emulate_msadis_exc(u32 cause, + u32 *opc, struct kvm_run *run, struct kvm_vcpu *vcpu); extern enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu, struct kvm_run *run); -uint32_t kvm_mips_read_count(struct kvm_vcpu *vcpu); -void kvm_mips_write_count(struct kvm_vcpu *vcpu, uint32_t count); -void kvm_mips_write_compare(struct kvm_vcpu *vcpu, uint32_t compare, bool ack); +u32 kvm_mips_read_count(struct kvm_vcpu *vcpu); +void kvm_mips_write_count(struct kvm_vcpu *vcpu, u32 count); +void kvm_mips_write_compare(struct kvm_vcpu *vcpu, u32 compare, bool ack); void kvm_mips_init_count(struct kvm_vcpu *vcpu); int kvm_mips_set_count_ctl(struct kvm_vcpu *vcpu, s64 count_ctl); int kvm_mips_set_count_resume(struct kvm_vcpu *vcpu, s64 count_resume); @@ -759,27 +694,27 @@ void kvm_mips_count_enable_cause(struct kvm_vcpu *vcpu); void kvm_mips_count_disable_cause(struct kvm_vcpu *vcpu); enum hrtimer_restart kvm_mips_count_timeout(struct kvm_vcpu *vcpu); -enum emulation_result kvm_mips_check_privilege(unsigned long cause, - uint32_t *opc, +enum emulation_result kvm_mips_check_privilege(u32 cause, + u32 *opc, struct kvm_run *run, struct kvm_vcpu *vcpu); -enum emulation_result kvm_mips_emulate_cache(uint32_t inst, - uint32_t *opc, - uint32_t cause, +enum emulation_result kvm_mips_emulate_cache(union mips_instruction inst, + u32 *opc, + u32 cause, struct kvm_run *run, struct kvm_vcpu *vcpu); -enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, - uint32_t *opc, - uint32_t cause, +enum emulation_result kvm_mips_emulate_CP0(union mips_instruction inst, + u32 *opc, + u32 cause, struct kvm_run *run, struct kvm_vcpu *vcpu); -enum emulation_result kvm_mips_emulate_store(uint32_t inst, - uint32_t cause, +enum emulation_result kvm_mips_emulate_store(union mips_instruction inst, + u32 cause, struct kvm_run *run, struct kvm_vcpu *vcpu); -enum emulation_result kvm_mips_emulate_load(uint32_t inst, - uint32_t cause, +enum emulation_result kvm_mips_emulate_load(union mips_instruction inst, + u32 cause, struct kvm_run *run, struct kvm_vcpu *vcpu); @@ -789,13 +724,13 @@ unsigned int kvm_mips_config4_wrmask(struct kvm_vcpu *vcpu); unsigned int kvm_mips_config5_wrmask(struct kvm_vcpu *vcpu); /* Dynamic binary translation */ -extern int kvm_mips_trans_cache_index(uint32_t inst, uint32_t *opc, - struct kvm_vcpu *vcpu); -extern int kvm_mips_trans_cache_va(uint32_t inst, uint32_t *opc, +extern int kvm_mips_trans_cache_index(union mips_instruction inst, + u32 *opc, struct kvm_vcpu *vcpu); +extern int kvm_mips_trans_cache_va(union mips_instruction inst, u32 *opc, struct kvm_vcpu *vcpu); -extern int kvm_mips_trans_mfc0(uint32_t inst, uint32_t *opc, +extern int kvm_mips_trans_mfc0(union mips_instruction inst, u32 *opc, struct kvm_vcpu *vcpu); -extern int kvm_mips_trans_mtc0(uint32_t inst, uint32_t *opc, +extern int kvm_mips_trans_mtc0(union mips_instruction inst, u32 *opc, struct kvm_vcpu *vcpu); /* Misc */ diff --git a/arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h b/arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h index d68e685cde60..bd8b9bbe1771 100644 --- a/arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h @@ -55,7 +55,7 @@ #define cpu_has_mipsmt 0 #define cpu_has_vint 0 #define cpu_has_veic 0 -#define cpu_hwrena_impl_bits 0xc0000000 +#define cpu_hwrena_impl_bits (MIPS_HWRENA_IMPL1 | MIPS_HWRENA_IMPL2) #define cpu_has_wsbh 1 #define cpu_has_rixi (cpu_data[0].cputype != CPU_CAVIUM_OCTEON) diff --git a/arch/mips/include/asm/mach-cavium-octeon/irq.h b/arch/mips/include/asm/mach-cavium-octeon/irq.h index cceae32a0732..64b86b9d30fe 100644 --- a/arch/mips/include/asm/mach-cavium-octeon/irq.h +++ b/arch/mips/include/asm/mach-cavium-octeon/irq.h @@ -42,8 +42,6 @@ enum octeon_irq { OCTEON_IRQ_TIMER1, OCTEON_IRQ_TIMER2, OCTEON_IRQ_TIMER3, - OCTEON_IRQ_USB0, - OCTEON_IRQ_USB1, #ifndef CONFIG_PCI_MSI OCTEON_IRQ_LAST = 127 #endif diff --git a/arch/mips/include/asm/mach-cavium-octeon/mangle-port.h b/arch/mips/include/asm/mach-cavium-octeon/mangle-port.h index 374eefafb320..0cf5ac1f7245 100644 --- a/arch/mips/include/asm/mach-cavium-octeon/mangle-port.h +++ b/arch/mips/include/asm/mach-cavium-octeon/mangle-port.h @@ -12,6 +12,14 @@ #ifdef __BIG_ENDIAN +static inline bool __should_swizzle_bits(volatile void *a) +{ + extern const bool octeon_should_swizzle_table[]; + + unsigned long did = ((unsigned long)a >> 40) & 0xff; + return octeon_should_swizzle_table[did]; +} + # define __swizzle_addr_b(port) (port) # define __swizzle_addr_w(port) (port) # define __swizzle_addr_l(port) (port) @@ -19,6 +27,8 @@ #else /* __LITTLE_ENDIAN */ +#define __should_swizzle_bits(a) false + static inline bool __should_swizzle_addr(unsigned long p) { /* boot bus? */ @@ -35,40 +45,14 @@ static inline bool __should_swizzle_addr(unsigned long p) #endif /* __BIG_ENDIAN */ -/* - * Sane hardware offers swapping of PCI/ISA I/O space accesses in hardware; - * less sane hardware forces software to fiddle with this... - * - * Regardless, if the host bus endianness mismatches that of PCI/ISA, then - * you can't have the numerical value of data and byte addresses within - * multibyte quantities both preserved at the same time. Hence two - * variations of functions: non-prefixed ones that preserve the value - * and prefixed ones that preserve byte addresses. The latters are - * typically used for moving raw data between a peripheral and memory (cf. - * string I/O functions), hence the "__mem_" prefix. - */ -#if defined(CONFIG_SWAP_IO_SPACE) # define ioswabb(a, x) (x) # define __mem_ioswabb(a, x) (x) -# define ioswabw(a, x) le16_to_cpu(x) +# define ioswabw(a, x) (__should_swizzle_bits(a) ? le16_to_cpu(x) : x) # define __mem_ioswabw(a, x) (x) -# define ioswabl(a, x) le32_to_cpu(x) +# define ioswabl(a, x) (__should_swizzle_bits(a) ? le32_to_cpu(x) : x) # define __mem_ioswabl(a, x) (x) -# define ioswabq(a, x) le64_to_cpu(x) +# define ioswabq(a, x) (__should_swizzle_bits(a) ? le64_to_cpu(x) : x) # define __mem_ioswabq(a, x) (x) -#else - -# define ioswabb(a, x) (x) -# define __mem_ioswabb(a, x) (x) -# define ioswabw(a, x) (x) -# define __mem_ioswabw(a, x) cpu_to_le16(x) -# define ioswabl(a, x) (x) -# define __mem_ioswabl(a, x) cpu_to_le32(x) -# define ioswabq(a, x) (x) -# define __mem_ioswabq(a, x) cpu_to_le32(x) - -#endif - #endif /* __ASM_MACH_GENERIC_MANGLE_PORT_H */ diff --git a/arch/mips/include/asm/mips-cm.h b/arch/mips/include/asm/mips-cm.h index 9411a4c0bdad..58e7874e9347 100644 --- a/arch/mips/include/asm/mips-cm.h +++ b/arch/mips/include/asm/mips-cm.h @@ -462,7 +462,7 @@ static inline unsigned int mips_cm_max_vp_width(void) if (mips_cm_revision() >= CM_REV_CM3) return read_gcr_sys_config2() & CM_GCR_SYS_CONFIG2_MAXVPW_MSK; - if (config_enabled(CONFIG_SMP)) + if (IS_ENABLED(CONFIG_SMP)) return smp_num_siblings; return 1; diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h index e1ca65c62f6a..def9d8d13f6e 100644 --- a/arch/mips/include/asm/mipsregs.h +++ b/arch/mips/include/asm/mipsregs.h @@ -53,7 +53,7 @@ #define CP0_SEGCTL2 $5, 4 #define CP0_WIRED $6 #define CP0_INFO $7 -#define CP0_HWRENA $7, 0 +#define CP0_HWRENA $7 #define CP0_BADVADDR $8 #define CP0_BADINSTR $8, 1 #define CP0_COUNT $9 @@ -533,6 +533,7 @@ #define TX49_CONF_CWFON (_ULCAST_(1) << 27) /* Bits specific to the MIPS32/64 PRA. */ +#define MIPS_CONF_VI (_ULCAST_(1) << 3) #define MIPS_CONF_MT (_ULCAST_(7) << 7) #define MIPS_CONF_MT_TLB (_ULCAST_(1) << 7) #define MIPS_CONF_MT_FTLB (_ULCAST_(4) << 7) @@ -853,6 +854,24 @@ #define MIPS_CDMMBASE_ADDR_SHIFT 11 #define MIPS_CDMMBASE_ADDR_START 15 +/* RDHWR register numbers */ +#define MIPS_HWR_CPUNUM 0 /* CPU number */ +#define MIPS_HWR_SYNCISTEP 1 /* SYNCI step size */ +#define MIPS_HWR_CC 2 /* Cycle counter */ +#define MIPS_HWR_CCRES 3 /* Cycle counter resolution */ +#define MIPS_HWR_ULR 29 /* UserLocal */ +#define MIPS_HWR_IMPL1 30 /* Implementation dependent */ +#define MIPS_HWR_IMPL2 31 /* Implementation dependent */ + +/* Bits in HWREna register */ +#define MIPS_HWRENA_CPUNUM (_ULCAST_(1) << MIPS_HWR_CPUNUM) +#define MIPS_HWRENA_SYNCISTEP (_ULCAST_(1) << MIPS_HWR_SYNCISTEP) +#define MIPS_HWRENA_CC (_ULCAST_(1) << MIPS_HWR_CC) +#define MIPS_HWRENA_CCRES (_ULCAST_(1) << MIPS_HWR_CCRES) +#define MIPS_HWRENA_ULR (_ULCAST_(1) << MIPS_HWR_ULR) +#define MIPS_HWRENA_IMPL1 (_ULCAST_(1) << MIPS_HWR_IMPL1) +#define MIPS_HWRENA_IMPL2 (_ULCAST_(1) << MIPS_HWR_IMPL2) + /* * Bitfields in the TX39 family CP0 Configuration Register 3 */ diff --git a/arch/mips/include/asm/mmu.h b/arch/mips/include/asm/mmu.h index 1afa1f986df8..f6ba08d77931 100644 --- a/arch/mips/include/asm/mmu.h +++ b/arch/mips/include/asm/mmu.h @@ -2,11 +2,20 @@ #define __ASM_MMU_H #include <linux/atomic.h> +#include <linux/spinlock.h> +#include <linux/wait.h> typedef struct { unsigned long asid[NR_CPUS]; void *vdso; atomic_t fp_mode_switching; + + /* lock to be held whilst modifying fp_bd_emupage_allocmap */ + spinlock_t bd_emupage_lock; + /* bitmap tracking allocation of fp_bd_emupage */ + unsigned long *bd_emupage_allocmap; + /* wait queue for threads requiring an emuframe */ + wait_queue_head_t bd_emupage_queue; } mm_context_t; #endif /* __ASM_MMU_H */ diff --git a/arch/mips/include/asm/mmu_context.h b/arch/mips/include/asm/mmu_context.h index fc57e135cb0a..ddd57ade1aa8 100644 --- a/arch/mips/include/asm/mmu_context.h +++ b/arch/mips/include/asm/mmu_context.h @@ -16,6 +16,7 @@ #include <linux/smp.h> #include <linux/slab.h> #include <asm/cacheflush.h> +#include <asm/dsemul.h> #include <asm/hazards.h> #include <asm/tlbflush.h> #include <asm-generic/mm_hooks.h> @@ -128,6 +129,10 @@ init_new_context(struct task_struct *tsk, struct mm_struct *mm) atomic_set(&mm->context.fp_mode_switching, 0); + mm->context.bd_emupage_allocmap = NULL; + spin_lock_init(&mm->context.bd_emupage_lock); + init_waitqueue_head(&mm->context.bd_emupage_queue); + return 0; } @@ -162,6 +167,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, */ static inline void destroy_context(struct mm_struct *mm) { + dsemul_mm_cleanup(mm); } #define deactivate_mm(tsk, mm) do { } while (0) diff --git a/arch/mips/include/asm/msa.h b/arch/mips/include/asm/msa.h index ddf496cb2a2a..8967b475ab10 100644 --- a/arch/mips/include/asm/msa.h +++ b/arch/mips/include/asm/msa.h @@ -168,6 +168,7 @@ static inline unsigned int read_msa_##name(void) \ unsigned int reg; \ __asm__ __volatile__( \ " .set push\n" \ + " .set fp=64\n" \ " .set msa\n" \ " cfcmsa %0, $" #cs "\n" \ " .set pop\n" \ @@ -179,6 +180,7 @@ static inline void write_msa_##name(unsigned int val) \ { \ __asm__ __volatile__( \ " .set push\n" \ + " .set fp=64\n" \ " .set msa\n" \ " ctcmsa $" #cs ", %0\n" \ " .set pop\n" \ diff --git a/arch/mips/include/asm/page.h b/arch/mips/include/asm/page.h index 21ed7150fec3..ea0cd9773914 100644 --- a/arch/mips/include/asm/page.h +++ b/arch/mips/include/asm/page.h @@ -162,16 +162,34 @@ typedef struct { unsigned long pgprot; } pgprot_t; /* * __pa()/__va() should be used only during mem init. */ -#ifdef CONFIG_64BIT -#define __pa(x) \ -({ \ - unsigned long __x = (unsigned long)(x); \ - __x < CKSEG0 ? XPHYSADDR(__x) : CPHYSADDR(__x); \ -}) -#else -#define __pa(x) \ - ((unsigned long)(x) - PAGE_OFFSET + PHYS_OFFSET) -#endif +static inline unsigned long ___pa(unsigned long x) +{ + if (config_enabled(CONFIG_64BIT)) { + /* + * For MIPS64 the virtual address may either be in one of + * the compatibility segements ckseg0 or ckseg1, or it may + * be in xkphys. + */ + return x < CKSEG0 ? XPHYSADDR(x) : CPHYSADDR(x); + } + + if (!config_enabled(CONFIG_EVA)) { + /* + * We're using the standard MIPS32 legacy memory map, ie. + * the address x is going to be in kseg0 or kseg1. We can + * handle either case by masking out the desired bits using + * CPHYSADDR. + */ + return CPHYSADDR(x); + } + + /* + * EVA is in use so the memory map could be anything, making it not + * safe to just mask out bits. + */ + return x - PAGE_OFFSET + PHYS_OFFSET; +} +#define __pa(x) ___pa((unsigned long)(x)) #define __va(x) ((void *)((unsigned long)(x) + PAGE_OFFSET - PHYS_OFFSET)) #include <asm/io.h> @@ -229,8 +247,10 @@ extern int __virt_addr_valid(const volatile void *kaddr); #define virt_addr_valid(kaddr) \ __virt_addr_valid((const volatile void *) (kaddr)) -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_DEFAULT_FLAGS \ + (VM_READ | VM_WRITE | \ + ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \ + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) #define UNCAC_ADDR(addr) ((addr) - PAGE_OFFSET + UNCAC_BASE) #define CAC_ADDR(addr) ((addr) - UNCAC_BASE + PAGE_OFFSET) diff --git a/arch/mips/include/asm/pci.h b/arch/mips/include/asm/pci.h index 86b239d9d75d..9b63cd41213d 100644 --- a/arch/mips/include/asm/pci.h +++ b/arch/mips/include/asm/pci.h @@ -80,16 +80,6 @@ extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, #define HAVE_ARCH_PCI_RESOURCE_TO_USER -static inline void pci_resource_to_user(const struct pci_dev *dev, int bar, - const struct resource *rsrc, resource_size_t *start, - resource_size_t *end) -{ - phys_addr_t size = resource_size(rsrc); - - *start = fixup_bigphys_addr(rsrc->start, size); - *end = rsrc->start + size; -} - /* * Dynamic DMA mapping stuff. * MIPS has everything mapped statically. diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h index 7d44e888134f..70128d3f770a 100644 --- a/arch/mips/include/asm/pgtable.h +++ b/arch/mips/include/asm/pgtable.h @@ -159,7 +159,7 @@ static inline void set_pte(pte_t *ptep, pte_t pte) * it better already be global) */ if (pte_none(*buddy)) { - if (!config_enabled(CONFIG_XPA)) + if (!IS_ENABLED(CONFIG_XPA)) buddy->pte_low |= _PAGE_GLOBAL; buddy->pte_high |= _PAGE_GLOBAL; } @@ -172,7 +172,7 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *pt htw_stop(); /* Preserve global status for the pair */ - if (config_enabled(CONFIG_XPA)) { + if (IS_ENABLED(CONFIG_XPA)) { if (ptep_buddy(ptep)->pte_high & _PAGE_GLOBAL) null.pte_high = _PAGE_GLOBAL; } else { @@ -319,7 +319,7 @@ static inline int pte_young(pte_t pte) { return pte.pte_low & _PAGE_ACCESSED; } static inline pte_t pte_wrprotect(pte_t pte) { pte.pte_low &= ~_PAGE_WRITE; - if (!config_enabled(CONFIG_XPA)) + if (!IS_ENABLED(CONFIG_XPA)) pte.pte_low &= ~_PAGE_SILENT_WRITE; pte.pte_high &= ~_PAGE_SILENT_WRITE; return pte; @@ -328,7 +328,7 @@ static inline pte_t pte_wrprotect(pte_t pte) static inline pte_t pte_mkclean(pte_t pte) { pte.pte_low &= ~_PAGE_MODIFIED; - if (!config_enabled(CONFIG_XPA)) + if (!IS_ENABLED(CONFIG_XPA)) pte.pte_low &= ~_PAGE_SILENT_WRITE; pte.pte_high &= ~_PAGE_SILENT_WRITE; return pte; @@ -337,7 +337,7 @@ static inline pte_t pte_mkclean(pte_t pte) static inline pte_t pte_mkold(pte_t pte) { pte.pte_low &= ~_PAGE_ACCESSED; - if (!config_enabled(CONFIG_XPA)) + if (!IS_ENABLED(CONFIG_XPA)) pte.pte_low &= ~_PAGE_SILENT_READ; pte.pte_high &= ~_PAGE_SILENT_READ; return pte; @@ -347,7 +347,7 @@ static inline pte_t pte_mkwrite(pte_t pte) { pte.pte_low |= _PAGE_WRITE; if (pte.pte_low & _PAGE_MODIFIED) { - if (!config_enabled(CONFIG_XPA)) + if (!IS_ENABLED(CONFIG_XPA)) pte.pte_low |= _PAGE_SILENT_WRITE; pte.pte_high |= _PAGE_SILENT_WRITE; } @@ -358,7 +358,7 @@ static inline pte_t pte_mkdirty(pte_t pte) { pte.pte_low |= _PAGE_MODIFIED; if (pte.pte_low & _PAGE_WRITE) { - if (!config_enabled(CONFIG_XPA)) + if (!IS_ENABLED(CONFIG_XPA)) pte.pte_low |= _PAGE_SILENT_WRITE; pte.pte_high |= _PAGE_SILENT_WRITE; } @@ -369,7 +369,7 @@ static inline pte_t pte_mkyoung(pte_t pte) { pte.pte_low |= _PAGE_ACCESSED; if (!(pte.pte_low & _PAGE_NO_READ)) { - if (!config_enabled(CONFIG_XPA)) + if (!IS_ENABLED(CONFIG_XPA)) pte.pte_low |= _PAGE_SILENT_READ; pte.pte_high |= _PAGE_SILENT_READ; } diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h index 7e78b6208d7d..0d36c87acbe2 100644 --- a/arch/mips/include/asm/processor.h +++ b/arch/mips/include/asm/processor.h @@ -11,12 +11,14 @@ #ifndef _ASM_PROCESSOR_H #define _ASM_PROCESSOR_H +#include <linux/atomic.h> #include <linux/cpumask.h> #include <linux/threads.h> #include <asm/cachectl.h> #include <asm/cpu.h> #include <asm/cpu-info.h> +#include <asm/dsemul.h> #include <asm/mipsregs.h> #include <asm/prefetch.h> @@ -78,7 +80,11 @@ extern unsigned int vced_count, vcei_count; #endif -#define STACK_TOP (TASK_SIZE & PAGE_MASK) +/* + * One page above the stack is used for branch delay slot "emulation". + * See dsemul.c for details. + */ +#define STACK_TOP ((TASK_SIZE & PAGE_MASK) - PAGE_SIZE) /* * This decides where the kernel will search for a free chunk of vm @@ -256,6 +262,12 @@ struct thread_struct { /* Saved fpu/fpu emulator stuff. */ struct mips_fpu_struct fpu FPU_ALIGN; + /* Assigned branch delay slot 'emulation' frame */ + atomic_t bd_emu_frame; + /* PC of the branch from a branch delay slot 'emulation' */ + unsigned long bd_emu_branch_pc; + /* PC to continue from following a branch delay slot 'emulation' */ + unsigned long bd_emu_cont_pc; #ifdef CONFIG_MIPS_MT_FPAFF /* Emulated instruction count */ unsigned long emulated_fp; @@ -323,6 +335,10 @@ struct thread_struct { * FPU affinity state (null if not FPAFF) \ */ \ FPAFF_INIT \ + /* Delay slot emulation */ \ + .bd_emu_frame = ATOMIC_INIT(BD_EMUFRAME_NONE), \ + .bd_emu_branch_pc = 0, \ + .bd_emu_cont_pc = 0, \ /* \ * Saved DSP stuff \ */ \ diff --git a/arch/mips/include/asm/r4kcache.h b/arch/mips/include/asm/r4kcache.h index 38902bf97adc..667ca3c467b7 100644 --- a/arch/mips/include/asm/r4kcache.h +++ b/arch/mips/include/asm/r4kcache.h @@ -210,7 +210,11 @@ static inline void protected_writeback_dcache_line(unsigned long addr) static inline void protected_writeback_scache_line(unsigned long addr) { +#ifdef CONFIG_EVA + protected_cachee_op(Hit_Writeback_Inv_SD, addr); +#else protected_cache_op(Hit_Writeback_Inv_SD, addr); +#endif } /* diff --git a/arch/mips/include/asm/seccomp.h b/arch/mips/include/asm/seccomp.h index 684fb3a12ed3..d886d6f7687a 100644 --- a/arch/mips/include/asm/seccomp.h +++ b/arch/mips/include/asm/seccomp.h @@ -16,10 +16,10 @@ static inline const int *get_compat_mode1_syscalls(void) 0, /* null terminated */ }; - if (config_enabled(CONFIG_MIPS32_O32) && test_thread_flag(TIF_32BIT_REGS)) + if (IS_ENABLED(CONFIG_MIPS32_O32) && test_thread_flag(TIF_32BIT_REGS)) return syscalls_O32; - if (config_enabled(CONFIG_MIPS32_N32)) + if (IS_ENABLED(CONFIG_MIPS32_N32)) return syscalls_N32; BUG(); diff --git a/arch/mips/include/asm/setup.h b/arch/mips/include/asm/setup.h index d7bfdeba9e84..4f5279a8308d 100644 --- a/arch/mips/include/asm/setup.h +++ b/arch/mips/include/asm/setup.h @@ -21,6 +21,7 @@ extern void *set_vi_handler(int n, vi_handler_t addr); extern void *set_except_vector(int n, void *addr); extern unsigned long ebase; +extern unsigned int hwrena; extern void per_cpu_trap_init(bool); extern void cpu_cache_init(void); diff --git a/arch/mips/include/asm/signal.h b/arch/mips/include/asm/signal.h index 2292373ff11a..23d6b8015c79 100644 --- a/arch/mips/include/asm/signal.h +++ b/arch/mips/include/asm/signal.h @@ -11,7 +11,7 @@ #include <uapi/asm/signal.h> -#ifdef CONFIG_MIPS32_COMPAT +#ifdef CONFIG_MIPS32_O32 extern struct mips_abi mips_abi_32; #define sig_uses_siginfo(ka, abi) \ @@ -19,8 +19,8 @@ extern struct mips_abi mips_abi_32; ((ka)->sa.sa_flags & SA_SIGINFO)) #else #define sig_uses_siginfo(ka, abi) \ - (config_enabled(CONFIG_64BIT) ? 1 : \ - (config_enabled(CONFIG_TRAD_SIGNALS) ? \ + (IS_ENABLED(CONFIG_64BIT) ? 1 : \ + (IS_ENABLED(CONFIG_TRAD_SIGNALS) ? \ ((ka)->sa.sa_flags & SA_SIGINFO) : 1) ) #endif diff --git a/arch/mips/include/asm/smp.h b/arch/mips/include/asm/smp.h index 03722d4326a1..8bc6c70a4030 100644 --- a/arch/mips/include/asm/smp.h +++ b/arch/mips/include/asm/smp.h @@ -23,7 +23,7 @@ extern int smp_num_siblings; extern cpumask_t cpu_sibling_map[]; extern cpumask_t cpu_core_map[]; -extern cpumask_t cpu_foreign_map; +extern cpumask_t cpu_foreign_map[]; #define raw_smp_processor_id() (current_thread_info()->cpu) @@ -53,6 +53,8 @@ extern cpumask_t cpu_coherent_mask; extern void asmlinkage smp_bootstrap(void); +extern void calculate_cpu_foreign_map(void); + /* * this function sends a 'reschedule' IPI to another CPU. * it goes straight through and wastes no time serializing diff --git a/arch/mips/include/asm/syscall.h b/arch/mips/include/asm/syscall.h index 47bc45a67e9b..d87882513ee3 100644 --- a/arch/mips/include/asm/syscall.h +++ b/arch/mips/include/asm/syscall.h @@ -99,7 +99,7 @@ static inline void syscall_get_arguments(struct task_struct *task, { int ret; /* O32 ABI syscall() - Either 64-bit with O32 or 32-bit */ - if ((config_enabled(CONFIG_32BIT) || + if ((IS_ENABLED(CONFIG_32BIT) || test_tsk_thread_flag(task, TIF_32BIT_REGS)) && (regs->regs[2] == __NR_syscall)) i++; diff --git a/arch/mips/include/asm/uaccess.h b/arch/mips/include/asm/uaccess.h index 7f109d4f64a4..11b965f98d95 100644 --- a/arch/mips/include/asm/uaccess.h +++ b/arch/mips/include/asm/uaccess.h @@ -88,7 +88,7 @@ extern u64 __ua_limit; */ static inline bool eva_kernel_access(void) { - if (!config_enabled(CONFIG_EVA)) + if (!IS_ENABLED(CONFIG_EVA)) return false; return segment_eq(get_fs(), get_ds()); diff --git a/arch/mips/include/asm/uasm.h b/arch/mips/include/asm/uasm.h index b6ecfeee4dbe..f7929f65f7ca 100644 --- a/arch/mips/include/asm/uasm.h +++ b/arch/mips/include/asm/uasm.h @@ -104,8 +104,13 @@ Ip_u1s2(_bltz); Ip_u1s2(_bltzl); Ip_u1u2s3(_bne); Ip_u2s3u1(_cache); +Ip_u1u2(_cfc1); +Ip_u2u1(_cfcmsa); +Ip_u1u2(_ctc1); +Ip_u2u1(_ctcmsa); Ip_u2u1s3(_daddiu); Ip_u3u1u2(_daddu); +Ip_u1(_di); Ip_u2u1msbu3(_dins); Ip_u2u1msbu3(_dinsm); Ip_u1u2(_divu); @@ -141,6 +146,8 @@ Ip_u1(_mfhi); Ip_u1(_mflo); Ip_u1u2u3(_mtc0); Ip_u1u2u3(_mthc0); +Ip_u1(_mthi); +Ip_u1(_mtlo); Ip_u3u1u2(_mul); Ip_u3u1u2(_or); Ip_u2u1u3(_ori); diff --git a/arch/mips/include/uapi/asm/auxvec.h b/arch/mips/include/uapi/asm/auxvec.h index c9c7195272c4..45ba259a3618 100644 --- a/arch/mips/include/uapi/asm/auxvec.h +++ b/arch/mips/include/uapi/asm/auxvec.h @@ -14,4 +14,6 @@ /* Location of VDSO image. */ #define AT_SYSINFO_EHDR 33 +#define AT_VECTOR_SIZE_ARCH 1 /* entries in ARCH_DLINFO */ + #endif /* __ASM_AUXVEC_H */ diff --git a/arch/mips/include/uapi/asm/inst.h b/arch/mips/include/uapi/asm/inst.h index 8051f9aa1379..77429d1622b3 100644 --- a/arch/mips/include/uapi/asm/inst.h +++ b/arch/mips/include/uapi/asm/inst.h @@ -21,20 +21,20 @@ enum major_op { spec_op, bcond_op, j_op, jal_op, beq_op, bne_op, blez_op, bgtz_op, - addi_op, cbcond0_op = addi_op, addiu_op, slti_op, sltiu_op, + addi_op, pop10_op = addi_op, addiu_op, slti_op, sltiu_op, andi_op, ori_op, xori_op, lui_op, cop0_op, cop1_op, cop2_op, cop1x_op, beql_op, bnel_op, blezl_op, bgtzl_op, - daddi_op, cbcond1_op = daddi_op, daddiu_op, ldl_op, ldr_op, + daddi_op, pop30_op = daddi_op, daddiu_op, ldl_op, ldr_op, spec2_op, jalx_op, mdmx_op, msa_op = mdmx_op, spec3_op, lb_op, lh_op, lwl_op, lw_op, lbu_op, lhu_op, lwr_op, lwu_op, sb_op, sh_op, swl_op, sw_op, sdl_op, sdr_op, swr_op, cache_op, ll_op, lwc1_op, lwc2_op, bc6_op = lwc2_op, pref_op, - lld_op, ldc1_op, ldc2_op, beqzcjic_op = ldc2_op, ld_op, + lld_op, ldc1_op, ldc2_op, pop66_op = ldc2_op, ld_op, sc_op, swc1_op, swc2_op, balc6_op = swc2_op, major_3b_op, - scd_op, sdc1_op, sdc2_op, bnezcjialc_op = sdc2_op, sd_op + scd_op, sdc1_op, sdc2_op, pop76_op = sdc2_op, sd_op }; /* @@ -93,6 +93,50 @@ enum spec3_op { }; /* + * Bits 10-6 minor opcode for r6 spec mult/div encodings + */ +enum mult_op { + mult_mult_op = 0x0, + mult_mul_op = 0x2, + mult_muh_op = 0x3, +}; +enum multu_op { + multu_multu_op = 0x0, + multu_mulu_op = 0x2, + multu_muhu_op = 0x3, +}; +enum div_op { + div_div_op = 0x0, + div_div6_op = 0x2, + div_mod_op = 0x3, +}; +enum divu_op { + divu_divu_op = 0x0, + divu_divu6_op = 0x2, + divu_modu_op = 0x3, +}; +enum dmult_op { + dmult_dmult_op = 0x0, + dmult_dmul_op = 0x2, + dmult_dmuh_op = 0x3, +}; +enum dmultu_op { + dmultu_dmultu_op = 0x0, + dmultu_dmulu_op = 0x2, + dmultu_dmuhu_op = 0x3, +}; +enum ddiv_op { + ddiv_ddiv_op = 0x0, + ddiv_ddiv6_op = 0x2, + ddiv_dmod_op = 0x3, +}; +enum ddivu_op { + ddivu_ddivu_op = 0x0, + ddivu_ddivu6_op = 0x2, + ddivu_dmodu_op = 0x3, +}; + +/* * rt field of bcond opcodes. */ enum rt_op { @@ -103,7 +147,7 @@ enum rt_op { bltzal_op, bgezal_op, bltzall_op, bgezall_op, rt_op_0x14, rt_op_0x15, rt_op_0x16, rt_op_0x17, rt_op_0x18, rt_op_0x19, rt_op_0x1a, rt_op_0x1b, - bposge32_op, rt_op_0x1d, rt_op_0x1e, rt_op_0x1f + bposge32_op, rt_op_0x1d, rt_op_0x1e, synci_op }; /* @@ -238,6 +282,21 @@ enum bshfl_func { }; /* + * MSA minor opcodes. + */ +enum msa_func { + msa_elm_op = 0x19, +}; + +/* + * MSA ELM opcodes. + */ +enum msa_elm { + msa_ctc_op = 0x3e, + msa_cfc_op = 0x7e, +}; + +/* * func field for MSA MI10 format. */ enum msa_mi10_func { @@ -264,7 +323,7 @@ enum mm_major_op { mm_pool32b_op, mm_pool16b_op, mm_lhu16_op, mm_andi16_op, mm_addiu32_op, mm_lhu32_op, mm_sh32_op, mm_lh32_op, mm_pool32i_op, mm_pool16c_op, mm_lwsp16_op, mm_pool16d_op, - mm_ori32_op, mm_pool32f_op, mm_reserved1_op, mm_reserved2_op, + mm_ori32_op, mm_pool32f_op, mm_pool32s_op, mm_reserved2_op, mm_pool32c_op, mm_lwgp16_op, mm_lw16_op, mm_pool16e_op, mm_xori32_op, mm_jals32_op, mm_addiupc_op, mm_reserved3_op, mm_reserved4_op, mm_pool16f_op, mm_sb16_op, mm_beqz16_op, @@ -360,7 +419,10 @@ enum mm_32axf_minor_op { mm_mflo32_op = 0x075, mm_jalrhb_op = 0x07c, mm_tlbwi_op = 0x08d, + mm_mthi32_op = 0x0b5, mm_tlbwr_op = 0x0cd, + mm_mtlo32_op = 0x0f5, + mm_di_op = 0x11d, mm_jalrs_op = 0x13c, mm_jalrshb_op = 0x17c, mm_sync_op = 0x1ad, @@ -479,6 +541,13 @@ enum mm_32f_73_minor_op { }; /* + * (microMIPS) POOL32S minor opcodes. + */ +enum mm_32s_minor_op { + mm_32s_elm_op = 0x16, +}; + +/* * (microMIPS) POOL16C minor opcodes. */ enum mm_16c_minor_op { @@ -586,6 +655,36 @@ struct r_format { /* Register format */ ;)))))) }; +struct c0r_format { /* C0 register format */ + __BITFIELD_FIELD(unsigned int opcode : 6, + __BITFIELD_FIELD(unsigned int rs : 5, + __BITFIELD_FIELD(unsigned int rt : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + __BITFIELD_FIELD(unsigned int z: 8, + __BITFIELD_FIELD(unsigned int sel : 3, + ;)))))) +}; + +struct mfmc0_format { /* MFMC0 register format */ + __BITFIELD_FIELD(unsigned int opcode : 6, + __BITFIELD_FIELD(unsigned int rs : 5, + __BITFIELD_FIELD(unsigned int rt : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + __BITFIELD_FIELD(unsigned int re : 5, + __BITFIELD_FIELD(unsigned int sc : 1, + __BITFIELD_FIELD(unsigned int : 2, + __BITFIELD_FIELD(unsigned int sel : 3, + ;)))))))) +}; + +struct co_format { /* C0 CO format */ + __BITFIELD_FIELD(unsigned int opcode : 6, + __BITFIELD_FIELD(unsigned int co : 1, + __BITFIELD_FIELD(unsigned int code : 19, + __BITFIELD_FIELD(unsigned int func : 6, + ;)))) +}; + struct p_format { /* Performance counter format (R10000) */ __BITFIELD_FIELD(unsigned int opcode : 6, __BITFIELD_FIELD(unsigned int rs : 5, @@ -937,6 +1036,9 @@ union mips_instruction { struct u_format u_format; struct c_format c_format; struct r_format r_format; + struct c0r_format c0r_format; + struct mfmc0_format mfmc0_format; + struct co_format co_format; struct p_format p_format; struct f_format f_format; struct ma_format ma_format; diff --git a/arch/mips/jz4740/setup.c b/arch/mips/jz4740/setup.c index 0914ef775b5f..6d0152321819 100644 --- a/arch/mips/jz4740/setup.c +++ b/arch/mips/jz4740/setup.c @@ -75,7 +75,7 @@ void __init device_tree_init(void) const char *get_system_type(void) { - if (config_enabled(CONFIG_MACH_JZ4780)) + if (IS_ENABLED(CONFIG_MACH_JZ4780)) return "JZ4780"; return "JZ4740"; diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile index e6053d07072f..4a603a3ea657 100644 --- a/arch/mips/kernel/Makefile +++ b/arch/mips/kernel/Makefile @@ -71,7 +71,7 @@ obj-$(CONFIG_32BIT) += scall32-o32.o obj-$(CONFIG_64BIT) += scall64-64.o obj-$(CONFIG_MIPS32_COMPAT) += linux32.o ptrace32.o signal32.o obj-$(CONFIG_MIPS32_N32) += binfmt_elfn32.o scall64-n32.o signal_n32.o -obj-$(CONFIG_MIPS32_O32) += binfmt_elfo32.o scall64-o32.o +obj-$(CONFIG_MIPS32_O32) += binfmt_elfo32.o scall64-o32.o signal_o32.o obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_PROC_FS) += proc.o diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c index 1ea973b2abb1..fae2f9447792 100644 --- a/arch/mips/kernel/asm-offsets.c +++ b/arch/mips/kernel/asm-offsets.c @@ -339,71 +339,9 @@ void output_pm_defines(void) } #endif -void output_cpuinfo_defines(void) -{ - COMMENT(" MIPS cpuinfo offsets. "); - DEFINE(CPUINFO_SIZE, sizeof(struct cpuinfo_mips)); -#ifdef CONFIG_MIPS_ASID_BITS_VARIABLE - OFFSET(CPUINFO_ASID_MASK, cpuinfo_mips, asid_mask); -#endif -} - void output_kvm_defines(void) { COMMENT(" KVM/MIPS Specfic offsets. "); - DEFINE(VCPU_ARCH_SIZE, sizeof(struct kvm_vcpu_arch)); - OFFSET(VCPU_RUN, kvm_vcpu, run); - OFFSET(VCPU_HOST_ARCH, kvm_vcpu, arch); - - OFFSET(VCPU_HOST_EBASE, kvm_vcpu_arch, host_ebase); - OFFSET(VCPU_GUEST_EBASE, kvm_vcpu_arch, guest_ebase); - - OFFSET(VCPU_HOST_STACK, kvm_vcpu_arch, host_stack); - OFFSET(VCPU_HOST_GP, kvm_vcpu_arch, host_gp); - - OFFSET(VCPU_HOST_CP0_BADVADDR, kvm_vcpu_arch, host_cp0_badvaddr); - OFFSET(VCPU_HOST_CP0_CAUSE, kvm_vcpu_arch, host_cp0_cause); - OFFSET(VCPU_HOST_EPC, kvm_vcpu_arch, host_cp0_epc); - OFFSET(VCPU_HOST_ENTRYHI, kvm_vcpu_arch, host_cp0_entryhi); - - OFFSET(VCPU_GUEST_INST, kvm_vcpu_arch, guest_inst); - - OFFSET(VCPU_R0, kvm_vcpu_arch, gprs[0]); - OFFSET(VCPU_R1, kvm_vcpu_arch, gprs[1]); - OFFSET(VCPU_R2, kvm_vcpu_arch, gprs[2]); - OFFSET(VCPU_R3, kvm_vcpu_arch, gprs[3]); - OFFSET(VCPU_R4, kvm_vcpu_arch, gprs[4]); - OFFSET(VCPU_R5, kvm_vcpu_arch, gprs[5]); - OFFSET(VCPU_R6, kvm_vcpu_arch, gprs[6]); - OFFSET(VCPU_R7, kvm_vcpu_arch, gprs[7]); - OFFSET(VCPU_R8, kvm_vcpu_arch, gprs[8]); - OFFSET(VCPU_R9, kvm_vcpu_arch, gprs[9]); - OFFSET(VCPU_R10, kvm_vcpu_arch, gprs[10]); - OFFSET(VCPU_R11, kvm_vcpu_arch, gprs[11]); - OFFSET(VCPU_R12, kvm_vcpu_arch, gprs[12]); - OFFSET(VCPU_R13, kvm_vcpu_arch, gprs[13]); - OFFSET(VCPU_R14, kvm_vcpu_arch, gprs[14]); - OFFSET(VCPU_R15, kvm_vcpu_arch, gprs[15]); - OFFSET(VCPU_R16, kvm_vcpu_arch, gprs[16]); - OFFSET(VCPU_R17, kvm_vcpu_arch, gprs[17]); - OFFSET(VCPU_R18, kvm_vcpu_arch, gprs[18]); - OFFSET(VCPU_R19, kvm_vcpu_arch, gprs[19]); - OFFSET(VCPU_R20, kvm_vcpu_arch, gprs[20]); - OFFSET(VCPU_R21, kvm_vcpu_arch, gprs[21]); - OFFSET(VCPU_R22, kvm_vcpu_arch, gprs[22]); - OFFSET(VCPU_R23, kvm_vcpu_arch, gprs[23]); - OFFSET(VCPU_R24, kvm_vcpu_arch, gprs[24]); - OFFSET(VCPU_R25, kvm_vcpu_arch, gprs[25]); - OFFSET(VCPU_R26, kvm_vcpu_arch, gprs[26]); - OFFSET(VCPU_R27, kvm_vcpu_arch, gprs[27]); - OFFSET(VCPU_R28, kvm_vcpu_arch, gprs[28]); - OFFSET(VCPU_R29, kvm_vcpu_arch, gprs[29]); - OFFSET(VCPU_R30, kvm_vcpu_arch, gprs[30]); - OFFSET(VCPU_R31, kvm_vcpu_arch, gprs[31]); - OFFSET(VCPU_LO, kvm_vcpu_arch, lo); - OFFSET(VCPU_HI, kvm_vcpu_arch, hi); - OFFSET(VCPU_PC, kvm_vcpu_arch, pc); - BLANK(); OFFSET(VCPU_FPR0, kvm_vcpu_arch, fpu.fpr[0]); OFFSET(VCPU_FPR1, kvm_vcpu_arch, fpu.fpr[1]); @@ -441,14 +379,6 @@ void output_kvm_defines(void) OFFSET(VCPU_FCR31, kvm_vcpu_arch, fpu.fcr31); OFFSET(VCPU_MSA_CSR, kvm_vcpu_arch, fpu.msacsr); BLANK(); - - OFFSET(VCPU_COP0, kvm_vcpu_arch, cop0); - OFFSET(VCPU_GUEST_KERNEL_ASID, kvm_vcpu_arch, guest_kernel_asid); - OFFSET(VCPU_GUEST_USER_ASID, kvm_vcpu_arch, guest_user_asid); - - OFFSET(COP0_TLB_HI, mips_coproc, reg[MIPS_CP0_TLB_HI][0]); - OFFSET(COP0_STATUS, mips_coproc, reg[MIPS_CP0_STATUS][0]); - BLANK(); } #ifdef CONFIG_MIPS_CPS diff --git a/arch/mips/kernel/branch.c b/arch/mips/kernel/branch.c index 6dc3f1fdaccc..46c227fc98f5 100644 --- a/arch/mips/kernel/branch.c +++ b/arch/mips/kernel/branch.c @@ -790,7 +790,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, epc += 4 + (insn.i_format.simmediate << 2); regs->cp0_epc = epc; break; - case beqzcjic_op: + case pop66_op: if (!cpu_has_mips_r6) { ret = -SIGILL; break; @@ -798,7 +798,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, /* Compact branch: BEQZC || JIC */ regs->cp0_epc += 8; break; - case bnezcjialc_op: + case pop76_op: if (!cpu_has_mips_r6) { ret = -SIGILL; break; @@ -809,8 +809,8 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, regs->cp0_epc += 8; break; #endif - case cbcond0_op: - case cbcond1_op: + case pop10_op: + case pop30_op: /* Only valid for MIPS R6 */ if (!cpu_has_mips_r6) { ret = -SIGILL; diff --git a/arch/mips/kernel/cevt-r4k.c b/arch/mips/kernel/cevt-r4k.c index e4c21bbf9422..804d2a2a19fe 100644 --- a/arch/mips/kernel/cevt-r4k.c +++ b/arch/mips/kernel/cevt-r4k.c @@ -276,12 +276,7 @@ int r4k_clockevent_init(void) CLOCK_EVT_FEAT_C3STOP | CLOCK_EVT_FEAT_PERCPU; - clockevent_set_clock(cd, mips_hpt_frequency); - - /* Calculate the min / max delta */ - cd->max_delta_ns = clockevent_delta2ns(0x7fffffff, cd); min_delta = calculate_min_delta(); - cd->min_delta_ns = clockevent_delta2ns(min_delta, cd); cd->rating = 300; cd->irq = irq; @@ -289,7 +284,7 @@ int r4k_clockevent_init(void) cd->set_next_event = mips_next_event; cd->event_handler = mips_event_handler; - clockevents_register_device(cd); + clockevents_config_and_register(cd, mips_hpt_frequency, min_delta, 0x7fffffff); if (cp0_timer_irq_installed) return 0; diff --git a/arch/mips/kernel/cpu-bugs64.c b/arch/mips/kernel/cpu-bugs64.c index 6392dbe504fb..a378e44688f5 100644 --- a/arch/mips/kernel/cpu-bugs64.c +++ b/arch/mips/kernel/cpu-bugs64.c @@ -244,7 +244,7 @@ static inline void check_daddi(void) panic(bug64hit, !DADDI_WAR ? daddiwar : nowar); } -int daddiu_bug = config_enabled(CONFIG_CPU_MIPSR6) ? 0 : -1; +int daddiu_bug = IS_ENABLED(CONFIG_CPU_MIPSR6) ? 0 : -1; static inline void check_daddiu(void) { @@ -314,7 +314,7 @@ static inline void check_daddiu(void) void __init check_bugs64_early(void) { - if (!config_enabled(CONFIG_CPU_MIPSR6)) { + if (!IS_ENABLED(CONFIG_CPU_MIPSR6)) { check_mult_sh(); check_daddiu(); } @@ -322,6 +322,6 @@ void __init check_bugs64_early(void) void __init check_bugs64(void) { - if (!config_enabled(CONFIG_CPU_MIPSR6)) + if (!IS_ENABLED(CONFIG_CPU_MIPSR6)) check_daddi(); } diff --git a/arch/mips/kernel/csrc-r4k.c b/arch/mips/kernel/csrc-r4k.c index 1f910563fdf6..d76275da54cb 100644 --- a/arch/mips/kernel/csrc-r4k.c +++ b/arch/mips/kernel/csrc-r4k.c @@ -23,7 +23,7 @@ static struct clocksource clocksource_mips = { .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; -static u64 notrace r4k_read_sched_clock(void) +static u64 __maybe_unused notrace r4k_read_sched_clock(void) { return read_c0_count(); } @@ -82,7 +82,9 @@ int __init init_r4k_clocksource(void) clocksource_register_hz(&clocksource_mips, mips_hpt_frequency); +#ifndef CONFIG_CPU_FREQ sched_clock_register(r4k_read_sched_clock, 32, mips_hpt_frequency); +#endif return 0; } diff --git a/arch/mips/kernel/elf.c b/arch/mips/kernel/elf.c index 891f5ee63983..6430bff21fff 100644 --- a/arch/mips/kernel/elf.c +++ b/arch/mips/kernel/elf.c @@ -8,9 +8,12 @@ * option) any later version. */ +#include <linux/binfmts.h> #include <linux/elf.h> +#include <linux/export.h> #include <linux/sched.h> +#include <asm/cpu-features.h> #include <asm/cpu-info.h> /* Whether to accept legacy-NaN and 2008-NaN user binaries. */ @@ -179,7 +182,7 @@ int arch_check_elf(void *_ehdr, bool has_interpreter, void *_interp_ehdr, return -ELIBBAD; } - if (!config_enabled(CONFIG_MIPS_O32_FP64_SUPPORT)) + if (!IS_ENABLED(CONFIG_MIPS_O32_FP64_SUPPORT)) return 0; fp_abi = state->fp_abi; @@ -285,7 +288,7 @@ void mips_set_personality_fp(struct arch_elf_state *state) * not be worried about N32/N64 binaries. */ - if (!config_enabled(CONFIG_MIPS_O32_FP64_SUPPORT)) + if (!IS_ENABLED(CONFIG_MIPS_O32_FP64_SUPPORT)) return; switch (state->overall_fp_mode) { @@ -326,3 +329,19 @@ void mips_set_personality_nan(struct arch_elf_state *state) BUG(); } } + +int mips_elf_read_implies_exec(void *elf_ex, int exstack) +{ + if (exstack != EXSTACK_DISABLE_X) { + /* The binary doesn't request a non-executable stack */ + return 1; + } + + if (!cpu_has_rixi) { + /* The CPU doesn't support non-executable memory */ + return 1; + } + + return 0; +} +EXPORT_SYMBOL(mips_elf_read_implies_exec); diff --git a/arch/mips/kernel/head.S b/arch/mips/kernel/head.S index 56e8fede3fd8..cf052204eb0a 100644 --- a/arch/mips/kernel/head.S +++ b/arch/mips/kernel/head.S @@ -93,21 +93,24 @@ NESTED(kernel_entry, 16, sp) # kernel entry point jr t0 0: +#ifdef CONFIG_USE_OF #ifdef CONFIG_MIPS_RAW_APPENDED_DTB - PTR_LA t0, __appended_dtb + PTR_LA t2, __appended_dtb #ifdef CONFIG_CPU_BIG_ENDIAN li t1, 0xd00dfeed #else li t1, 0xedfe0dd0 #endif - lw t2, (t0) - bne t1, t2, not_found - nop + lw t0, (t2) + beq t0, t1, dtb_found +#endif + li t1, -2 + beq a0, t1, dtb_found + move t2, a1 - move a1, t0 - PTR_LI a0, -2 -not_found: + li t2, 0 +dtb_found: #endif PTR_LA t0, __bss_start # clear .bss LONG_S zero, (t0) @@ -122,6 +125,10 @@ not_found: LONG_S a2, fw_arg2 LONG_S a3, fw_arg3 +#ifdef CONFIG_USE_OF + LONG_S t2, fw_passed_dtb +#endif + MTC0 zero, CP0_CONTEXT # clear context register PTR_LA $28, init_thread_union /* Set the SP after an empty pt_regs. */ diff --git a/arch/mips/kernel/mips-cm.c b/arch/mips/kernel/mips-cm.c index 760217bbb2fa..659e6d3ae335 100644 --- a/arch/mips/kernel/mips-cm.c +++ b/arch/mips/kernel/mips-cm.c @@ -251,7 +251,7 @@ int mips_cm_probe(void) mips_cm_probe_l2sync(); /* determine register width for this CM */ - mips_cm_is64 = config_enabled(CONFIG_64BIT) && (mips_cm_revision() >= CM_REV_CM3); + mips_cm_is64 = IS_ENABLED(CONFIG_64BIT) && (mips_cm_revision() >= CM_REV_CM3); for_each_possible_cpu(cpu) spin_lock_init(&per_cpu(cm_core_lock, cpu)); diff --git a/arch/mips/kernel/mips-r2-to-r6-emul.c b/arch/mips/kernel/mips-r2-to-r6-emul.c index 7ff2a557f4aa..c3372cac6db2 100644 --- a/arch/mips/kernel/mips-r2-to-r6-emul.c +++ b/arch/mips/kernel/mips-r2-to-r6-emul.c @@ -84,7 +84,7 @@ static inline int mipsr6_emul(struct pt_regs *regs, u32 ir) (s32)MIPSInst_SIMM(ir); return 0; case daddiu_op: - if (config_enabled(CONFIG_32BIT)) + if (IS_ENABLED(CONFIG_32BIT)) break; if (MIPSInst_RT(ir)) @@ -143,7 +143,7 @@ static inline int mipsr6_emul(struct pt_regs *regs, u32 ir) (u32)regs->regs[MIPSInst_RT(ir)]); return 0; case dsll_op: - if (config_enabled(CONFIG_32BIT) || MIPSInst_RS(ir)) + if (IS_ENABLED(CONFIG_32BIT) || MIPSInst_RS(ir)) break; if (MIPSInst_RD(ir)) @@ -152,7 +152,7 @@ static inline int mipsr6_emul(struct pt_regs *regs, u32 ir) MIPSInst_FD(ir)); return 0; case dsrl_op: - if (config_enabled(CONFIG_32BIT) || MIPSInst_RS(ir)) + if (IS_ENABLED(CONFIG_32BIT) || MIPSInst_RS(ir)) break; if (MIPSInst_RD(ir)) @@ -161,7 +161,7 @@ static inline int mipsr6_emul(struct pt_regs *regs, u32 ir) MIPSInst_FD(ir)); return 0; case daddu_op: - if (config_enabled(CONFIG_32BIT) || MIPSInst_FD(ir)) + if (IS_ENABLED(CONFIG_32BIT) || MIPSInst_FD(ir)) break; if (MIPSInst_RD(ir)) @@ -170,7 +170,7 @@ static inline int mipsr6_emul(struct pt_regs *regs, u32 ir) (u64)regs->regs[MIPSInst_RT(ir)]; return 0; case dsubu_op: - if (config_enabled(CONFIG_32BIT) || MIPSInst_FD(ir)) + if (IS_ENABLED(CONFIG_32BIT) || MIPSInst_FD(ir)) break; if (MIPSInst_RD(ir)) @@ -283,7 +283,7 @@ static int jr_func(struct pt_regs *regs, u32 ir) err = mipsr6_emul(regs, nir); if (err > 0) { regs->cp0_epc = nepc; - err = mips_dsemul(regs, nir, cepc); + err = mips_dsemul(regs, nir, epc, cepc); if (err == SIGILL) err = SIGEMT; MIPS_R2_STATS(dsemul); @@ -498,7 +498,7 @@ static int dmult_func(struct pt_regs *regs, u32 ir) s64 res; s64 rt, rs; - if (config_enabled(CONFIG_32BIT)) + if (IS_ENABLED(CONFIG_32BIT)) return SIGILL; rt = regs->regs[MIPSInst_RT(ir)]; @@ -530,7 +530,7 @@ static int dmultu_func(struct pt_regs *regs, u32 ir) u64 res; u64 rt, rs; - if (config_enabled(CONFIG_32BIT)) + if (IS_ENABLED(CONFIG_32BIT)) return SIGILL; rt = regs->regs[MIPSInst_RT(ir)]; @@ -561,7 +561,7 @@ static int ddiv_func(struct pt_regs *regs, u32 ir) { s64 rt, rs; - if (config_enabled(CONFIG_32BIT)) + if (IS_ENABLED(CONFIG_32BIT)) return SIGILL; rt = regs->regs[MIPSInst_RT(ir)]; @@ -586,7 +586,7 @@ static int ddivu_func(struct pt_regs *regs, u32 ir) { u64 rt, rs; - if (config_enabled(CONFIG_32BIT)) + if (IS_ENABLED(CONFIG_32BIT)) return SIGILL; rt = regs->regs[MIPSInst_RT(ir)]; @@ -825,7 +825,7 @@ static int dclz_func(struct pt_regs *regs, u32 ir) u64 res; u64 rs; - if (config_enabled(CONFIG_32BIT)) + if (IS_ENABLED(CONFIG_32BIT)) return SIGILL; if (!MIPSInst_RD(ir)) @@ -852,7 +852,7 @@ static int dclo_func(struct pt_regs *regs, u32 ir) u64 res; u64 rs; - if (config_enabled(CONFIG_32BIT)) + if (IS_ENABLED(CONFIG_32BIT)) return SIGILL; if (!MIPSInst_RD(ir)) @@ -1033,7 +1033,7 @@ repeat: if (nir) { err = mipsr6_emul(regs, nir); if (err > 0) { - err = mips_dsemul(regs, nir, cpc); + err = mips_dsemul(regs, nir, epc, cpc); if (err == SIGILL) err = SIGEMT; MIPS_R2_STATS(dsemul); @@ -1082,7 +1082,7 @@ repeat: if (nir) { err = mipsr6_emul(regs, nir); if (err > 0) { - err = mips_dsemul(regs, nir, cpc); + err = mips_dsemul(regs, nir, epc, cpc); if (err == SIGILL) err = SIGEMT; MIPS_R2_STATS(dsemul); @@ -1149,7 +1149,7 @@ repeat: if (nir) { err = mipsr6_emul(regs, nir); if (err > 0) { - err = mips_dsemul(regs, nir, cpc); + err = mips_dsemul(regs, nir, epc, cpc); if (err == SIGILL) err = SIGEMT; MIPS_R2_STATS(dsemul); @@ -1484,7 +1484,7 @@ fpu_emul: break; case ldl_op: - if (config_enabled(CONFIG_32BIT)) { + if (IS_ENABLED(CONFIG_32BIT)) { err = SIGILL; break; } @@ -1603,7 +1603,7 @@ fpu_emul: break; case ldr_op: - if (config_enabled(CONFIG_32BIT)) { + if (IS_ENABLED(CONFIG_32BIT)) { err = SIGILL; break; } @@ -1722,7 +1722,7 @@ fpu_emul: break; case sdl_op: - if (config_enabled(CONFIG_32BIT)) { + if (IS_ENABLED(CONFIG_32BIT)) { err = SIGILL; break; } @@ -1840,7 +1840,7 @@ fpu_emul: break; case sdr_op: - if (config_enabled(CONFIG_32BIT)) { + if (IS_ENABLED(CONFIG_32BIT)) { err = SIGILL; break; } @@ -2072,7 +2072,7 @@ fpu_emul: break; case lld_op: - if (config_enabled(CONFIG_32BIT)) { + if (IS_ENABLED(CONFIG_32BIT)) { err = SIGILL; break; } @@ -2133,7 +2133,7 @@ fpu_emul: break; case scd_op: - if (config_enabled(CONFIG_32BIT)) { + if (IS_ENABLED(CONFIG_32BIT)) { err = SIGILL; break; } diff --git a/arch/mips/kernel/pm-cps.c b/arch/mips/kernel/pm-cps.c index adda3ffb9b78..5b31a9405ebc 100644 --- a/arch/mips/kernel/pm-cps.c +++ b/arch/mips/kernel/pm-cps.c @@ -148,7 +148,7 @@ int cps_pm_enter_state(enum cps_pm_state state) } /* Setup the VPE to run mips_cps_pm_restore when started again */ - if (config_enabled(CONFIG_CPU_PM) && state == CPS_PM_POWER_GATED) { + if (IS_ENABLED(CONFIG_CPU_PM) && state == CPS_PM_POWER_GATED) { /* Power gating relies upon CPS SMP */ if (!mips_cps_smp_in_use()) return -EINVAL; @@ -387,7 +387,7 @@ static void * __init cps_gen_entry_code(unsigned cpu, enum cps_pm_state state) memset(labels, 0, sizeof(labels)); memset(relocs, 0, sizeof(relocs)); - if (config_enabled(CONFIG_CPU_PM) && state == CPS_PM_POWER_GATED) { + if (IS_ENABLED(CONFIG_CPU_PM) && state == CPS_PM_POWER_GATED) { /* Power gating relies upon CPS SMP */ if (!mips_cps_smp_in_use()) goto out_err; diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index 813ed7829c61..7429ad09fbe3 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -30,6 +30,7 @@ #include <asm/asm.h> #include <asm/bootinfo.h> #include <asm/cpu.h> +#include <asm/dsemul.h> #include <asm/dsp.h> #include <asm/fpu.h> #include <asm/msa.h> @@ -68,11 +69,22 @@ void start_thread(struct pt_regs * regs, unsigned long pc, unsigned long sp) lose_fpu(0); clear_thread_flag(TIF_MSA_CTX_LIVE); clear_used_math(); + atomic_set(¤t->thread.bd_emu_frame, BD_EMUFRAME_NONE); init_dsp(); regs->cp0_epc = pc; regs->regs[29] = sp; } +void exit_thread(struct task_struct *tsk) +{ + /* + * User threads may have allocated a delay slot emulation frame. + * If so, clean up that allocation. + */ + if (!(current->flags & PF_KTHREAD)) + dsemul_thread_cleanup(tsk); +} + int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { /* @@ -159,6 +171,8 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, clear_tsk_thread_flag(p, TIF_FPUBOUND); #endif /* CONFIG_MIPS_MT_FPAFF */ + atomic_set(&p->thread.bd_emu_frame, BD_EMUFRAME_NONE); + if (clone_flags & CLONE_SETTLS) ti->tp_value = regs->regs[7]; diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index 9c0b387d6427..51d3988933f8 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S @@ -348,7 +348,7 @@ EXPORT(sysn32_call_table) PTR sys_ni_syscall /* available, was setaltroot */ PTR sys_add_key PTR sys_request_key - PTR sys_keyctl /* 6245 */ + PTR compat_sys_keyctl /* 6245 */ PTR sys_set_thread_area PTR sys_inotify_init PTR sys_inotify_add_watch diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index f4f28b1580de..6efa7136748f 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -504,7 +504,7 @@ EXPORT(sys32_call_table) PTR sys_ni_syscall /* available, was setaltroot */ PTR sys_add_key /* 4280 */ PTR sys_request_key - PTR sys_keyctl + PTR compat_sys_keyctl PTR sys_set_thread_area PTR sys_inotify_init PTR sys_inotify_add_watch /* 4285 */ diff --git a/arch/mips/kernel/segment.c b/arch/mips/kernel/segment.c index 87bc74a5a518..2703f218202e 100644 --- a/arch/mips/kernel/segment.c +++ b/arch/mips/kernel/segment.c @@ -26,17 +26,20 @@ static void build_segment_config(char *str, unsigned int cfg) /* * Access modes MK, MSK and MUSK are mapped segments. Therefore - * there is no direct physical address mapping. + * there is no direct physical address mapping unless it becomes + * unmapped uncached at error level due to EU. */ - if ((am == 0) || (am > 3)) { + if ((am == 0) || (am > 3) || (cfg & MIPS_SEGCFG_EU)) str += sprintf(str, " %03lx", ((cfg & MIPS_SEGCFG_PA) >> MIPS_SEGCFG_PA_SHIFT)); + else + str += sprintf(str, " UND"); + + if ((am == 0) || (am > 3)) str += sprintf(str, " %01ld", ((cfg & MIPS_SEGCFG_C) >> MIPS_SEGCFG_C_SHIFT)); - } else { - str += sprintf(str, " UND"); + else str += sprintf(str, " U"); - } /* Exception configuration. */ str += sprintf(str, " %01ld\n", diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index ef408a03e818..36cf8d65c47d 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -875,6 +875,10 @@ void __init setup_arch(char **cmdline_p) unsigned long kernelsp[NR_CPUS]; unsigned long fw_arg0, fw_arg1, fw_arg2, fw_arg3; +#ifdef CONFIG_USE_OF +unsigned long fw_passed_dtb; +#endif + #ifdef CONFIG_DEBUG_FS struct dentry *mips_debugfs_dir; static int __init debugfs_mips(void) diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c index ae4231452115..9e224469c788 100644 --- a/arch/mips/kernel/signal.c +++ b/arch/mips/kernel/signal.c @@ -165,7 +165,7 @@ static int save_msa_extcontext(void __user *buf) * should already have been done when handling scalar FP * context. */ - BUG_ON(config_enabled(CONFIG_EVA)); + BUG_ON(IS_ENABLED(CONFIG_EVA)); err = __put_user(read_msa_csr(), &msa->csr); err |= _save_msa_all_upper(&msa->wr); @@ -195,7 +195,7 @@ static int restore_msa_extcontext(void __user *buf, unsigned int size) unsigned int csr; int i, err; - if (!config_enabled(CONFIG_CPU_HAS_MSA)) + if (!IS_ENABLED(CONFIG_CPU_HAS_MSA)) return SIGSYS; if (size != sizeof(*msa)) @@ -215,7 +215,7 @@ static int restore_msa_extcontext(void __user *buf, unsigned int size) * scalar FP context, so FPU & MSA should have already been * disabled whilst handling scalar FP context. */ - BUG_ON(config_enabled(CONFIG_EVA)); + BUG_ON(IS_ENABLED(CONFIG_EVA)); write_msa_csr(csr); err |= _restore_msa_all_upper(&msa->wr); @@ -315,7 +315,7 @@ int protected_save_fp_context(void __user *sc) * EVA does not have userland equivalents of ldc1 or sdc1, so * save to the kernel FP context & copy that to userland below. */ - if (config_enabled(CONFIG_EVA)) + if (IS_ENABLED(CONFIG_EVA)) lose_fpu(1); while (1) { @@ -378,7 +378,7 @@ int protected_restore_fp_context(void __user *sc) * disable the FPU here such that the code below simply copies to * the kernel FP context. */ - if (config_enabled(CONFIG_EVA)) + if (IS_ENABLED(CONFIG_EVA)) lose_fpu(0); while (1) { @@ -772,6 +772,14 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) struct mips_abi *abi = current->thread.abi; void *vdso = current->mm->context.vdso; + /* + * If we were emulating a delay slot instruction, exit that frame such + * that addresses in the sigframe are as expected for userland and we + * don't have a problem if we reuse the thread's frame for an + * instruction within the signal handler. + */ + dsemul_thread_rollback(regs); + if (regs->regs[0]) { switch(regs->regs[2]) { case ERESTART_RESTARTBLOCK: diff --git a/arch/mips/kernel/signal32.c b/arch/mips/kernel/signal32.c index 78c8349d151c..97b7c51b8251 100644 --- a/arch/mips/kernel/signal32.c +++ b/arch/mips/kernel/signal32.c @@ -6,129 +6,26 @@ * Copyright (C) 1991, 1992 Linus Torvalds * Copyright (C) 1994 - 2000, 2006 Ralf Baechle * Copyright (C) 1999, 2000 Silicon Graphics, Inc. + * Copyright (C) 2016, Imagination Technologies Ltd. */ -#include <linux/cache.h> -#include <linux/compat.h> -#include <linux/sched.h> -#include <linux/mm.h> -#include <linux/smp.h> +#include <linux/compiler.h> +#include <linux/errno.h> #include <linux/kernel.h> #include <linux/signal.h> #include <linux/syscalls.h> -#include <linux/errno.h> -#include <linux/wait.h> -#include <linux/ptrace.h> -#include <linux/suspend.h> -#include <linux/compiler.h> -#include <linux/uaccess.h> -#include <asm/abi.h> -#include <asm/asm.h> +#include <asm/compat.h> #include <asm/compat-signal.h> -#include <linux/bitops.h> -#include <asm/cacheflush.h> -#include <asm/sim.h> -#include <asm/ucontext.h> -#include <asm/fpu.h> -#include <asm/war.h> -#include <asm/dsp.h> +#include <asm/uaccess.h> +#include <asm/unistd.h> #include "signal-common.h" -/* - * Including <asm/unistd.h> would give use the 64-bit syscall numbers ... - */ -#define __NR_O32_restart_syscall 4253 - /* 32-bit compatibility types */ typedef unsigned int __sighandler32_t; typedef void (*vfptr_t)(void); -struct ucontext32 { - u32 uc_flags; - s32 uc_link; - compat_stack_t uc_stack; - struct sigcontext32 uc_mcontext; - compat_sigset_t uc_sigmask; /* mask last for extensibility */ -}; - -struct sigframe32 { - u32 sf_ass[4]; /* argument save space for o32 */ - u32 sf_pad[2]; /* Was: signal trampoline */ - struct sigcontext32 sf_sc; - compat_sigset_t sf_mask; -}; - -struct rt_sigframe32 { - u32 rs_ass[4]; /* argument save space for o32 */ - u32 rs_pad[2]; /* Was: signal trampoline */ - compat_siginfo_t rs_info; - struct ucontext32 rs_uc; -}; - -static int setup_sigcontext32(struct pt_regs *regs, - struct sigcontext32 __user *sc) -{ - int err = 0; - int i; - - err |= __put_user(regs->cp0_epc, &sc->sc_pc); - - err |= __put_user(0, &sc->sc_regs[0]); - for (i = 1; i < 32; i++) - err |= __put_user(regs->regs[i], &sc->sc_regs[i]); - - err |= __put_user(regs->hi, &sc->sc_mdhi); - err |= __put_user(regs->lo, &sc->sc_mdlo); - if (cpu_has_dsp) { - err |= __put_user(rddsp(DSP_MASK), &sc->sc_dsp); - err |= __put_user(mfhi1(), &sc->sc_hi1); - err |= __put_user(mflo1(), &sc->sc_lo1); - err |= __put_user(mfhi2(), &sc->sc_hi2); - err |= __put_user(mflo2(), &sc->sc_lo2); - err |= __put_user(mfhi3(), &sc->sc_hi3); - err |= __put_user(mflo3(), &sc->sc_lo3); - } - - /* - * Save FPU state to signal context. Signal handler - * will "inherit" current FPU state. - */ - err |= protected_save_fp_context(sc); - - return err; -} - -static int restore_sigcontext32(struct pt_regs *regs, - struct sigcontext32 __user *sc) -{ - int err = 0; - s32 treg; - int i; - - /* Always make any pending restarted system calls return -EINTR */ - current->restart_block.fn = do_no_restart_syscall; - - err |= __get_user(regs->cp0_epc, &sc->sc_pc); - err |= __get_user(regs->hi, &sc->sc_mdhi); - err |= __get_user(regs->lo, &sc->sc_mdlo); - if (cpu_has_dsp) { - err |= __get_user(treg, &sc->sc_hi1); mthi1(treg); - err |= __get_user(treg, &sc->sc_lo1); mtlo1(treg); - err |= __get_user(treg, &sc->sc_hi2); mthi2(treg); - err |= __get_user(treg, &sc->sc_lo2); mtlo2(treg); - err |= __get_user(treg, &sc->sc_hi3); mthi3(treg); - err |= __get_user(treg, &sc->sc_lo3); mtlo3(treg); - err |= __get_user(treg, &sc->sc_dsp); wrdsp(treg, DSP_MASK); - } - - for (i = 1; i < 32; i++) - err |= __get_user(regs->regs[i], &sc->sc_regs[i]); - - return err ?: protected_restore_fp_context(sc); -} - /* * Atomically swap in the new signal mask, and wait for a signal. */ @@ -247,176 +144,3 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) return 0; } - -asmlinkage void sys32_sigreturn(nabi_no_regargs struct pt_regs regs) -{ - struct sigframe32 __user *frame; - sigset_t blocked; - int sig; - - frame = (struct sigframe32 __user *) regs.regs[29]; - if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) - goto badframe; - if (__copy_conv_sigset_from_user(&blocked, &frame->sf_mask)) - goto badframe; - - set_current_blocked(&blocked); - - sig = restore_sigcontext32(®s, &frame->sf_sc); - if (sig < 0) - goto badframe; - else if (sig) - force_sig(sig, current); - - /* - * Don't let your children do this ... - */ - __asm__ __volatile__( - "move\t$29, %0\n\t" - "j\tsyscall_exit" - :/* no outputs */ - :"r" (®s)); - /* Unreached */ - -badframe: - force_sig(SIGSEGV, current); -} - -asmlinkage void sys32_rt_sigreturn(nabi_no_regargs struct pt_regs regs) -{ - struct rt_sigframe32 __user *frame; - sigset_t set; - int sig; - - frame = (struct rt_sigframe32 __user *) regs.regs[29]; - if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) - goto badframe; - if (__copy_conv_sigset_from_user(&set, &frame->rs_uc.uc_sigmask)) - goto badframe; - - set_current_blocked(&set); - - sig = restore_sigcontext32(®s, &frame->rs_uc.uc_mcontext); - if (sig < 0) - goto badframe; - else if (sig) - force_sig(sig, current); - - if (compat_restore_altstack(&frame->rs_uc.uc_stack)) - goto badframe; - - /* - * Don't let your children do this ... - */ - __asm__ __volatile__( - "move\t$29, %0\n\t" - "j\tsyscall_exit" - :/* no outputs */ - :"r" (®s)); - /* Unreached */ - -badframe: - force_sig(SIGSEGV, current); -} - -static int setup_frame_32(void *sig_return, struct ksignal *ksig, - struct pt_regs *regs, sigset_t *set) -{ - struct sigframe32 __user *frame; - int err = 0; - - frame = get_sigframe(ksig, regs, sizeof(*frame)); - if (!access_ok(VERIFY_WRITE, frame, sizeof (*frame))) - return -EFAULT; - - err |= setup_sigcontext32(regs, &frame->sf_sc); - err |= __copy_conv_sigset_to_user(&frame->sf_mask, set); - - if (err) - return -EFAULT; - - /* - * Arguments to signal handler: - * - * a0 = signal number - * a1 = 0 (should be cause) - * a2 = pointer to struct sigcontext - * - * $25 and c0_epc point to the signal handler, $29 points to the - * struct sigframe. - */ - regs->regs[ 4] = ksig->sig; - regs->regs[ 5] = 0; - regs->regs[ 6] = (unsigned long) &frame->sf_sc; - regs->regs[29] = (unsigned long) frame; - regs->regs[31] = (unsigned long) sig_return; - regs->cp0_epc = regs->regs[25] = (unsigned long) ksig->ka.sa.sa_handler; - - DEBUGP("SIG deliver (%s:%d): sp=0x%p pc=0x%lx ra=0x%lx\n", - current->comm, current->pid, - frame, regs->cp0_epc, regs->regs[31]); - - return 0; -} - -static int setup_rt_frame_32(void *sig_return, struct ksignal *ksig, - struct pt_regs *regs, sigset_t *set) -{ - struct rt_sigframe32 __user *frame; - int err = 0; - - frame = get_sigframe(ksig, regs, sizeof(*frame)); - if (!access_ok(VERIFY_WRITE, frame, sizeof (*frame))) - return -EFAULT; - - /* Convert (siginfo_t -> compat_siginfo_t) and copy to user. */ - err |= copy_siginfo_to_user32(&frame->rs_info, &ksig->info); - - /* Create the ucontext. */ - err |= __put_user(0, &frame->rs_uc.uc_flags); - err |= __put_user(0, &frame->rs_uc.uc_link); - err |= __compat_save_altstack(&frame->rs_uc.uc_stack, regs->regs[29]); - err |= setup_sigcontext32(regs, &frame->rs_uc.uc_mcontext); - err |= __copy_conv_sigset_to_user(&frame->rs_uc.uc_sigmask, set); - - if (err) - return -EFAULT; - - /* - * Arguments to signal handler: - * - * a0 = signal number - * a1 = 0 (should be cause) - * a2 = pointer to ucontext - * - * $25 and c0_epc point to the signal handler, $29 points to - * the struct rt_sigframe32. - */ - regs->regs[ 4] = ksig->sig; - regs->regs[ 5] = (unsigned long) &frame->rs_info; - regs->regs[ 6] = (unsigned long) &frame->rs_uc; - regs->regs[29] = (unsigned long) frame; - regs->regs[31] = (unsigned long) sig_return; - regs->cp0_epc = regs->regs[25] = (unsigned long) ksig->ka.sa.sa_handler; - - DEBUGP("SIG deliver (%s:%d): sp=0x%p pc=0x%lx ra=0x%lx\n", - current->comm, current->pid, - frame, regs->cp0_epc, regs->regs[31]); - - return 0; -} - -/* - * o32 compatibility on 64-bit kernels, without DSP ASE - */ -struct mips_abi mips_abi_32 = { - .setup_frame = setup_frame_32, - .setup_rt_frame = setup_rt_frame_32, - .restart = __NR_O32_restart_syscall, - - .off_sc_fpregs = offsetof(struct sigcontext32, sc_fpregs), - .off_sc_fpc_csr = offsetof(struct sigcontext32, sc_fpc_csr), - .off_sc_used_math = offsetof(struct sigcontext32, sc_used_math), - - .vdso = &vdso_image_o32, -}; diff --git a/arch/mips/kernel/signal_o32.c b/arch/mips/kernel/signal_o32.c new file mode 100644 index 000000000000..5e169fc5ca5c --- /dev/null +++ b/arch/mips/kernel/signal_o32.c @@ -0,0 +1,285 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright (C) 1994 - 2000, 2006 Ralf Baechle + * Copyright (C) 1999, 2000 Silicon Graphics, Inc. + * Copyright (C) 2016, Imagination Technologies Ltd. + */ +#include <linux/compiler.h> +#include <linux/errno.h> +#include <linux/signal.h> +#include <linux/uaccess.h> + +#include <asm/abi.h> +#include <asm/compat-signal.h> +#include <asm/dsp.h> +#include <asm/sim.h> +#include <asm/unistd.h> + +#include "signal-common.h" + +/* + * Including <asm/unistd.h> would give use the 64-bit syscall numbers ... + */ +#define __NR_O32_restart_syscall 4253 + +struct sigframe32 { + u32 sf_ass[4]; /* argument save space for o32 */ + u32 sf_pad[2]; /* Was: signal trampoline */ + struct sigcontext32 sf_sc; + compat_sigset_t sf_mask; +}; + +struct ucontext32 { + u32 uc_flags; + s32 uc_link; + compat_stack_t uc_stack; + struct sigcontext32 uc_mcontext; + compat_sigset_t uc_sigmask; /* mask last for extensibility */ +}; + +struct rt_sigframe32 { + u32 rs_ass[4]; /* argument save space for o32 */ + u32 rs_pad[2]; /* Was: signal trampoline */ + compat_siginfo_t rs_info; + struct ucontext32 rs_uc; +}; + +static int setup_sigcontext32(struct pt_regs *regs, + struct sigcontext32 __user *sc) +{ + int err = 0; + int i; + + err |= __put_user(regs->cp0_epc, &sc->sc_pc); + + err |= __put_user(0, &sc->sc_regs[0]); + for (i = 1; i < 32; i++) + err |= __put_user(regs->regs[i], &sc->sc_regs[i]); + + err |= __put_user(regs->hi, &sc->sc_mdhi); + err |= __put_user(regs->lo, &sc->sc_mdlo); + if (cpu_has_dsp) { + err |= __put_user(rddsp(DSP_MASK), &sc->sc_dsp); + err |= __put_user(mfhi1(), &sc->sc_hi1); + err |= __put_user(mflo1(), &sc->sc_lo1); + err |= __put_user(mfhi2(), &sc->sc_hi2); + err |= __put_user(mflo2(), &sc->sc_lo2); + err |= __put_user(mfhi3(), &sc->sc_hi3); + err |= __put_user(mflo3(), &sc->sc_lo3); + } + + /* + * Save FPU state to signal context. Signal handler + * will "inherit" current FPU state. + */ + err |= protected_save_fp_context(sc); + + return err; +} + +static int restore_sigcontext32(struct pt_regs *regs, + struct sigcontext32 __user *sc) +{ + int err = 0; + s32 treg; + int i; + + /* Always make any pending restarted system calls return -EINTR */ + current->restart_block.fn = do_no_restart_syscall; + + err |= __get_user(regs->cp0_epc, &sc->sc_pc); + err |= __get_user(regs->hi, &sc->sc_mdhi); + err |= __get_user(regs->lo, &sc->sc_mdlo); + if (cpu_has_dsp) { + err |= __get_user(treg, &sc->sc_hi1); mthi1(treg); + err |= __get_user(treg, &sc->sc_lo1); mtlo1(treg); + err |= __get_user(treg, &sc->sc_hi2); mthi2(treg); + err |= __get_user(treg, &sc->sc_lo2); mtlo2(treg); + err |= __get_user(treg, &sc->sc_hi3); mthi3(treg); + err |= __get_user(treg, &sc->sc_lo3); mtlo3(treg); + err |= __get_user(treg, &sc->sc_dsp); wrdsp(treg, DSP_MASK); + } + + for (i = 1; i < 32; i++) + err |= __get_user(regs->regs[i], &sc->sc_regs[i]); + + return err ?: protected_restore_fp_context(sc); +} + +static int setup_frame_32(void *sig_return, struct ksignal *ksig, + struct pt_regs *regs, sigset_t *set) +{ + struct sigframe32 __user *frame; + int err = 0; + + frame = get_sigframe(ksig, regs, sizeof(*frame)); + if (!access_ok(VERIFY_WRITE, frame, sizeof (*frame))) + return -EFAULT; + + err |= setup_sigcontext32(regs, &frame->sf_sc); + err |= __copy_conv_sigset_to_user(&frame->sf_mask, set); + + if (err) + return -EFAULT; + + /* + * Arguments to signal handler: + * + * a0 = signal number + * a1 = 0 (should be cause) + * a2 = pointer to struct sigcontext + * + * $25 and c0_epc point to the signal handler, $29 points to the + * struct sigframe. + */ + regs->regs[ 4] = ksig->sig; + regs->regs[ 5] = 0; + regs->regs[ 6] = (unsigned long) &frame->sf_sc; + regs->regs[29] = (unsigned long) frame; + regs->regs[31] = (unsigned long) sig_return; + regs->cp0_epc = regs->regs[25] = (unsigned long) ksig->ka.sa.sa_handler; + + DEBUGP("SIG deliver (%s:%d): sp=0x%p pc=0x%lx ra=0x%lx\n", + current->comm, current->pid, + frame, regs->cp0_epc, regs->regs[31]); + + return 0; +} + +asmlinkage void sys32_rt_sigreturn(nabi_no_regargs struct pt_regs regs) +{ + struct rt_sigframe32 __user *frame; + sigset_t set; + int sig; + + frame = (struct rt_sigframe32 __user *) regs.regs[29]; + if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) + goto badframe; + if (__copy_conv_sigset_from_user(&set, &frame->rs_uc.uc_sigmask)) + goto badframe; + + set_current_blocked(&set); + + sig = restore_sigcontext32(®s, &frame->rs_uc.uc_mcontext); + if (sig < 0) + goto badframe; + else if (sig) + force_sig(sig, current); + + if (compat_restore_altstack(&frame->rs_uc.uc_stack)) + goto badframe; + + /* + * Don't let your children do this ... + */ + __asm__ __volatile__( + "move\t$29, %0\n\t" + "j\tsyscall_exit" + :/* no outputs */ + :"r" (®s)); + /* Unreached */ + +badframe: + force_sig(SIGSEGV, current); +} + +static int setup_rt_frame_32(void *sig_return, struct ksignal *ksig, + struct pt_regs *regs, sigset_t *set) +{ + struct rt_sigframe32 __user *frame; + int err = 0; + + frame = get_sigframe(ksig, regs, sizeof(*frame)); + if (!access_ok(VERIFY_WRITE, frame, sizeof (*frame))) + return -EFAULT; + + /* Convert (siginfo_t -> compat_siginfo_t) and copy to user. */ + err |= copy_siginfo_to_user32(&frame->rs_info, &ksig->info); + + /* Create the ucontext. */ + err |= __put_user(0, &frame->rs_uc.uc_flags); + err |= __put_user(0, &frame->rs_uc.uc_link); + err |= __compat_save_altstack(&frame->rs_uc.uc_stack, regs->regs[29]); + err |= setup_sigcontext32(regs, &frame->rs_uc.uc_mcontext); + err |= __copy_conv_sigset_to_user(&frame->rs_uc.uc_sigmask, set); + + if (err) + return -EFAULT; + + /* + * Arguments to signal handler: + * + * a0 = signal number + * a1 = 0 (should be cause) + * a2 = pointer to ucontext + * + * $25 and c0_epc point to the signal handler, $29 points to + * the struct rt_sigframe32. + */ + regs->regs[ 4] = ksig->sig; + regs->regs[ 5] = (unsigned long) &frame->rs_info; + regs->regs[ 6] = (unsigned long) &frame->rs_uc; + regs->regs[29] = (unsigned long) frame; + regs->regs[31] = (unsigned long) sig_return; + regs->cp0_epc = regs->regs[25] = (unsigned long) ksig->ka.sa.sa_handler; + + DEBUGP("SIG deliver (%s:%d): sp=0x%p pc=0x%lx ra=0x%lx\n", + current->comm, current->pid, + frame, regs->cp0_epc, regs->regs[31]); + + return 0; +} + +/* + * o32 compatibility on 64-bit kernels, without DSP ASE + */ +struct mips_abi mips_abi_32 = { + .setup_frame = setup_frame_32, + .setup_rt_frame = setup_rt_frame_32, + .restart = __NR_O32_restart_syscall, + + .off_sc_fpregs = offsetof(struct sigcontext32, sc_fpregs), + .off_sc_fpc_csr = offsetof(struct sigcontext32, sc_fpc_csr), + .off_sc_used_math = offsetof(struct sigcontext32, sc_used_math), + + .vdso = &vdso_image_o32, +}; + + +asmlinkage void sys32_sigreturn(nabi_no_regargs struct pt_regs regs) +{ + struct sigframe32 __user *frame; + sigset_t blocked; + int sig; + + frame = (struct sigframe32 __user *) regs.regs[29]; + if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) + goto badframe; + if (__copy_conv_sigset_from_user(&blocked, &frame->sf_mask)) + goto badframe; + + set_current_blocked(&blocked); + + sig = restore_sigcontext32(®s, &frame->sf_sc); + if (sig < 0) + goto badframe; + else if (sig) + force_sig(sig, current); + + /* + * Don't let your children do this ... + */ + __asm__ __volatile__( + "move\t$29, %0\n\t" + "j\tsyscall_exit" + :/* no outputs */ + :"r" (®s)); + /* Unreached */ + +badframe: + force_sig(SIGSEGV, current); +} diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c index e02addc0307f..6d0f1321e084 100644 --- a/arch/mips/kernel/smp-bmips.c +++ b/arch/mips/kernel/smp-bmips.c @@ -363,6 +363,7 @@ static int bmips_cpu_disable(void) pr_info("SMP: CPU%d is offline\n", cpu); set_cpu_online(cpu, false); + calculate_cpu_foreign_map(); cpumask_clear_cpu(cpu, &cpu_callin_map); clear_c0_status(IE_IRQ5); diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c index 4ed36f288d64..e9d9fc6c754c 100644 --- a/arch/mips/kernel/smp-cps.c +++ b/arch/mips/kernel/smp-cps.c @@ -46,8 +46,8 @@ static unsigned core_vpe_count(unsigned core) if (threads_disabled) return 1; - if ((!config_enabled(CONFIG_MIPS_MT_SMP) || !cpu_has_mipsmt) - && (!config_enabled(CONFIG_CPU_MIPSR6) || !cpu_has_vp)) + if ((!IS_ENABLED(CONFIG_MIPS_MT_SMP) || !cpu_has_mipsmt) + && (!IS_ENABLED(CONFIG_CPU_MIPSR6) || !cpu_has_vp)) return 1; mips_cm_lock_other(core, 0); @@ -206,7 +206,7 @@ err_out: } } -static void boot_core(unsigned core) +static void boot_core(unsigned int core, unsigned int vpe_id) { u32 access, stat, seq_state; unsigned timeout; @@ -233,8 +233,9 @@ static void boot_core(unsigned core) mips_cpc_lock_other(core); if (mips_cm_revision() >= CM_REV_CM3) { - /* Run VP0 following the reset */ - write_cpc_co_vp_run(0x1); + /* Run only the requested VP following the reset */ + write_cpc_co_vp_stop(0xf); + write_cpc_co_vp_run(1 << vpe_id); /* * Ensure that the VP_RUN register is written before the @@ -306,7 +307,7 @@ static void cps_boot_secondary(int cpu, struct task_struct *idle) if (!test_bit(core, core_power)) { /* Boot a VPE on a powered down core */ - boot_core(core); + boot_core(core, vpe_id); goto out; } @@ -397,6 +398,7 @@ static int cps_cpu_disable(void) atomic_sub(1 << cpu_vpe_id(¤t_cpu_data), &core_cfg->vpe_mask); smp_mb__after_atomic(); set_cpu_online(cpu, false); + calculate_cpu_foreign_map(); cpumask_clear_cpu(cpu, &cpu_callin_map); return 0; @@ -411,14 +413,16 @@ static enum { void play_dead(void) { - unsigned cpu, core; + unsigned int cpu, core, vpe_id; local_irq_disable(); idle_task_exit(); cpu = smp_processor_id(); cpu_death = CPU_DEATH_POWER; - if (cpu_has_mipsmt) { + pr_debug("CPU%d going offline\n", cpu); + + if (cpu_has_mipsmt || cpu_has_vp) { core = cpu_data[cpu].core; /* Look for another online VPE within the core */ @@ -439,10 +443,21 @@ void play_dead(void) complete(&cpu_death_chosen); if (cpu_death == CPU_DEATH_HALT) { - /* Halt this TC */ - write_c0_tchalt(TCHALT_H); - instruction_hazard(); + vpe_id = cpu_vpe_id(&cpu_data[cpu]); + + pr_debug("Halting core %d VP%d\n", core, vpe_id); + if (cpu_has_mipsmt) { + /* Halt this TC */ + write_c0_tchalt(TCHALT_H); + instruction_hazard(); + } else if (cpu_has_vp) { + write_cpc_cl_vp_stop(1 << vpe_id); + + /* Ensure that the VP_STOP register is written */ + wmb(); + } } else { + pr_debug("Gating power to core %d\n", core); /* Power down the core */ cps_pm_enter_state(CPS_PM_POWER_GATED); } @@ -469,6 +484,7 @@ static void wait_for_sibling_halt(void *ptr_cpu) static void cps_cpu_die(unsigned int cpu) { unsigned core = cpu_data[cpu].core; + unsigned int vpe_id = cpu_vpe_id(&cpu_data[cpu]); unsigned stat; int err; @@ -497,10 +513,12 @@ static void cps_cpu_die(unsigned int cpu) * in which case the CPC will refuse to power down the core. */ do { + mips_cm_lock_other(core, vpe_id); mips_cpc_lock_other(core); stat = read_cpc_co_stat_conf(); stat &= CPC_Cx_STAT_CONF_SEQSTATE_MSK; mips_cpc_unlock_other(); + mips_cm_unlock_other(); } while (stat != CPC_Cx_STAT_CONF_SEQSTATE_D0 && stat != CPC_Cx_STAT_CONF_SEQSTATE_D2 && stat != CPC_Cx_STAT_CONF_SEQSTATE_U2); @@ -517,6 +535,12 @@ static void cps_cpu_die(unsigned int cpu) (void *)(unsigned long)cpu, 1); if (err) panic("Failed to call remote sibling CPU\n"); + } else if (cpu_has_vp) { + do { + mips_cm_lock_other(core, vpe_id); + stat = read_cpc_co_vp_running(); + mips_cm_unlock_other(); + } while (stat & (1 << vpe_id)); } } diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index f9d01e953acb..f95f094f36e4 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -72,7 +72,7 @@ EXPORT_SYMBOL(cpu_core_map); * A logcal cpu mask containing only one VPE per core to * reduce the number of IPIs on large MT systems. */ -cpumask_t cpu_foreign_map __read_mostly; +cpumask_t cpu_foreign_map[NR_CPUS] __read_mostly; EXPORT_SYMBOL(cpu_foreign_map); /* representing cpus for which sibling maps can be computed */ @@ -124,7 +124,7 @@ static inline void set_cpu_core_map(int cpu) * Calculate a new cpu_foreign_map mask whenever a * new cpu appears or disappears. */ -static inline void calculate_cpu_foreign_map(void) +void calculate_cpu_foreign_map(void) { int i, k, core_present; cpumask_t temp_foreign_map; @@ -141,7 +141,9 @@ static inline void calculate_cpu_foreign_map(void) cpumask_set_cpu(i, &temp_foreign_map); } - cpumask_copy(&cpu_foreign_map, &temp_foreign_map); + for_each_online_cpu(i) + cpumask_andnot(&cpu_foreign_map[i], + &temp_foreign_map, &cpu_sibling_map[i]); } struct plat_smp_ops *mp_ops; @@ -344,16 +346,9 @@ asmlinkage void start_secondary(void) static void stop_this_cpu(void *dummy) { /* - * Remove this CPU. Be a bit slow here and - * set the bits for every online CPU so we don't miss - * any IPI whilst taking this VPE down. + * Remove this CPU: */ - cpumask_copy(&cpu_foreign_map, cpu_online_mask); - - /* Make it visible to every other CPU */ - smp_mb(); - set_cpu_online(smp_processor_id(), false); calculate_cpu_foreign_map(); local_irq_disable(); @@ -512,10 +507,17 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned l smp_on_other_tlbs(flush_tlb_range_ipi, &fd); } else { unsigned int cpu; + int exec = vma->vm_flags & VM_EXEC; for_each_online_cpu(cpu) { + /* + * flush_cache_range() will only fully flush icache if + * the VMA is executable, otherwise we must invalidate + * ASID without it appearing to has_valid_asid() as if + * mm has been completely unused by that CPU. + */ if (cpu != smp_processor_id() && cpu_context(cpu, mm)) - cpu_context(cpu, mm) = 0; + cpu_context(cpu, mm) = !exec; } } local_flush_tlb_range(vma, start, end); @@ -560,8 +562,14 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long page) unsigned int cpu; for_each_online_cpu(cpu) { + /* + * flush_cache_page() only does partial flushes, so + * invalidate ASID without it appearing to + * has_valid_asid() as if mm has been completely unused + * by that CPU. + */ if (cpu != smp_processor_id() && cpu_context(cpu, vma->vm_mm)) - cpu_context(cpu, vma->vm_mm) = 0; + cpu_context(cpu, vma->vm_mm) = 1; } } local_flush_tlb_page(vma, page); diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 4a1712b5abdf..3de85be2486a 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -619,17 +619,17 @@ static int simulate_rdhwr(struct pt_regs *regs, int rd, int rt) perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0); switch (rd) { - case 0: /* CPU number */ + case MIPS_HWR_CPUNUM: /* CPU number */ regs->regs[rt] = smp_processor_id(); return 0; - case 1: /* SYNCI length */ + case MIPS_HWR_SYNCISTEP: /* SYNCI length */ regs->regs[rt] = min(current_cpu_data.dcache.linesz, current_cpu_data.icache.linesz); return 0; - case 2: /* Read count register */ + case MIPS_HWR_CC: /* Read count register */ regs->regs[rt] = read_c0_count(); return 0; - case 3: /* Count register resolution */ + case MIPS_HWR_CCRES: /* Count register resolution */ switch (current_cpu_type()) { case CPU_20KC: case CPU_25KF: @@ -639,7 +639,7 @@ static int simulate_rdhwr(struct pt_regs *regs, int rd, int rt) regs->regs[rt] = 2; } return 0; - case 29: + case MIPS_HWR_ULR: /* Read UserLocal register */ regs->regs[rt] = ti->tp_value; return 0; default: @@ -704,6 +704,7 @@ asmlinkage void do_ov(struct pt_regs *regs) int process_fpemu_return(int sig, void __user *fault_addr, unsigned long fcr31) { struct siginfo si = { 0 }; + struct vm_area_struct *vma; switch (sig) { case 0: @@ -744,7 +745,8 @@ int process_fpemu_return(int sig, void __user *fault_addr, unsigned long fcr31) si.si_addr = fault_addr; si.si_signo = sig; down_read(¤t->mm->mmap_sem); - if (find_vma(current->mm, (unsigned long)fault_addr)) + vma = find_vma(current->mm, (unsigned long)fault_addr); + if (vma && (vma->vm_start <= (unsigned long)fault_addr)) si.si_code = SEGV_ACCERR; else si.si_code = SEGV_MAPERR; @@ -1859,6 +1861,7 @@ void __noreturn nmi_exception_handler(struct pt_regs *regs) #define VECTORSPACING 0x100 /* for EI/VI mode */ unsigned long ebase; +EXPORT_SYMBOL_GPL(ebase); unsigned long exception_handlers[32]; unsigned long vi_handlers[64]; @@ -2063,16 +2066,22 @@ static void configure_status(void) status_set); } +unsigned int hwrena; +EXPORT_SYMBOL_GPL(hwrena); + /* configure HWRENA register */ static void configure_hwrena(void) { - unsigned int hwrena = cpu_hwrena_impl_bits; + hwrena = cpu_hwrena_impl_bits; if (cpu_has_mips_r2_r6) - hwrena |= 0x0000000f; + hwrena |= MIPS_HWRENA_CPUNUM | + MIPS_HWRENA_SYNCISTEP | + MIPS_HWRENA_CC | + MIPS_HWRENA_CCRES; if (!noulri && cpu_has_userlocal) - hwrena |= (1 << 29); + hwrena |= MIPS_HWRENA_ULR; if (hwrena) write_c0_hwrena(hwrena); diff --git a/arch/mips/kernel/unaligned.c b/arch/mips/kernel/unaligned.c index 28b3af73a17b..f1c308dbbc4a 100644 --- a/arch/mips/kernel/unaligned.c +++ b/arch/mips/kernel/unaligned.c @@ -1025,7 +1025,7 @@ static void emulate_load_store_insn(struct pt_regs *regs, if (!access_ok(VERIFY_READ, addr, 2)) goto sigbus; - if (config_enabled(CONFIG_EVA)) { + if (IS_ENABLED(CONFIG_EVA)) { if (segment_eq(get_fs(), get_ds())) LoadHW(addr, value, res); else @@ -1044,7 +1044,7 @@ static void emulate_load_store_insn(struct pt_regs *regs, if (!access_ok(VERIFY_READ, addr, 4)) goto sigbus; - if (config_enabled(CONFIG_EVA)) { + if (IS_ENABLED(CONFIG_EVA)) { if (segment_eq(get_fs(), get_ds())) LoadW(addr, value, res); else @@ -1063,7 +1063,7 @@ static void emulate_load_store_insn(struct pt_regs *regs, if (!access_ok(VERIFY_READ, addr, 2)) goto sigbus; - if (config_enabled(CONFIG_EVA)) { + if (IS_ENABLED(CONFIG_EVA)) { if (segment_eq(get_fs(), get_ds())) LoadHWU(addr, value, res); else @@ -1131,7 +1131,7 @@ static void emulate_load_store_insn(struct pt_regs *regs, compute_return_epc(regs); value = regs->regs[insn.i_format.rt]; - if (config_enabled(CONFIG_EVA)) { + if (IS_ENABLED(CONFIG_EVA)) { if (segment_eq(get_fs(), get_ds())) StoreHW(addr, value, res); else @@ -1151,7 +1151,7 @@ static void emulate_load_store_insn(struct pt_regs *regs, compute_return_epc(regs); value = regs->regs[insn.i_format.rt]; - if (config_enabled(CONFIG_EVA)) { + if (IS_ENABLED(CONFIG_EVA)) { if (segment_eq(get_fs(), get_ds())) StoreW(addr, value, res); else diff --git a/arch/mips/kernel/vdso.c b/arch/mips/kernel/vdso.c index 54e1663ce639..9abe447a4b48 100644 --- a/arch/mips/kernel/vdso.c +++ b/arch/mips/kernel/vdso.c @@ -107,6 +107,16 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) if (down_write_killable(&mm->mmap_sem)) return -EINTR; + /* Map delay slot emulation page */ + base = mmap_region(NULL, STACK_TOP, PAGE_SIZE, + VM_READ|VM_WRITE|VM_EXEC| + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, + 0); + if (IS_ERR_VALUE(base)) { + ret = base; + goto out; + } + /* * Determine total area size. This includes the VDSO data itself, the * data page, and the GIC user page if present. Always create a mapping diff --git a/arch/mips/kvm/Kconfig b/arch/mips/kvm/Kconfig index 2ae12825529f..7c56d6b124d1 100644 --- a/arch/mips/kvm/Kconfig +++ b/arch/mips/kvm/Kconfig @@ -17,6 +17,7 @@ if VIRTUALIZATION config KVM tristate "Kernel-based Virtual Machine (KVM) support" depends on HAVE_KVM + select EXPORT_UASM select PREEMPT_NOTIFIERS select ANON_INODES select KVM_MMIO diff --git a/arch/mips/kvm/Makefile b/arch/mips/kvm/Makefile index 637ebbebd549..847429de780d 100644 --- a/arch/mips/kvm/Makefile +++ b/arch/mips/kvm/Makefile @@ -7,9 +7,10 @@ EXTRA_CFLAGS += -Ivirt/kvm -Iarch/mips/kvm common-objs-$(CONFIG_CPU_HAS_MSA) += msa.o -kvm-objs := $(common-objs-y) mips.o emulate.o locore.o \ +kvm-objs := $(common-objs-y) mips.o emulate.o entry.o \ interrupt.o stats.o commpage.o \ dyntrans.o trap_emul.o fpu.o +kvm-objs += mmu.o obj-$(CONFIG_KVM) += kvm.o obj-y += callback.o tlb.o diff --git a/arch/mips/kvm/commpage.c b/arch/mips/kvm/commpage.c index 2d6e976d1add..a36b77e1705c 100644 --- a/arch/mips/kvm/commpage.c +++ b/arch/mips/kvm/commpage.c @@ -4,7 +4,7 @@ * for more details. * * commpage, currently used for Virtual COP0 registers. - * Mapped into the guest kernel @ 0x0. + * Mapped into the guest kernel @ KVM_GUEST_COMMPAGE_ADDR. * * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. * Authors: Sanjay Lal <sanjayl@kymasys.com> diff --git a/arch/mips/kvm/dyntrans.c b/arch/mips/kvm/dyntrans.c index f1527a465c1b..d280894915ed 100644 --- a/arch/mips/kvm/dyntrans.c +++ b/arch/mips/kvm/dyntrans.c @@ -11,6 +11,7 @@ #include <linux/errno.h> #include <linux/err.h> +#include <linux/highmem.h> #include <linux/kvm_host.h> #include <linux/module.h> #include <linux/vmalloc.h> @@ -20,125 +21,114 @@ #include "commpage.h" -#define SYNCI_TEMPLATE 0x041f0000 -#define SYNCI_BASE(x) (((x) >> 21) & 0x1f) -#define SYNCI_OFFSET ((x) & 0xffff) +/** + * kvm_mips_trans_replace() - Replace trapping instruction in guest memory. + * @vcpu: Virtual CPU. + * @opc: PC of instruction to replace. + * @replace: Instruction to write + */ +static int kvm_mips_trans_replace(struct kvm_vcpu *vcpu, u32 *opc, + union mips_instruction replace) +{ + unsigned long paddr, flags; + void *vaddr; + + if (KVM_GUEST_KSEGX((unsigned long)opc) == KVM_GUEST_KSEG0) { + paddr = kvm_mips_translate_guest_kseg0_to_hpa(vcpu, + (unsigned long)opc); + vaddr = kmap_atomic(pfn_to_page(PHYS_PFN(paddr))); + vaddr += paddr & ~PAGE_MASK; + memcpy(vaddr, (void *)&replace, sizeof(u32)); + local_flush_icache_range((unsigned long)vaddr, + (unsigned long)vaddr + 32); + kunmap_atomic(vaddr); + } else if (KVM_GUEST_KSEGX((unsigned long) opc) == KVM_GUEST_KSEG23) { + local_irq_save(flags); + memcpy((void *)opc, (void *)&replace, sizeof(u32)); + local_flush_icache_range((unsigned long)opc, + (unsigned long)opc + 32); + local_irq_restore(flags); + } else { + kvm_err("%s: Invalid address: %p\n", __func__, opc); + return -EFAULT; + } -#define LW_TEMPLATE 0x8c000000 -#define CLEAR_TEMPLATE 0x00000020 -#define SW_TEMPLATE 0xac000000 + return 0; +} -int kvm_mips_trans_cache_index(uint32_t inst, uint32_t *opc, +int kvm_mips_trans_cache_index(union mips_instruction inst, u32 *opc, struct kvm_vcpu *vcpu) { - int result = 0; - unsigned long kseg0_opc; - uint32_t synci_inst = 0x0; + union mips_instruction nop_inst = { 0 }; /* Replace the CACHE instruction, with a NOP */ - kseg0_opc = - CKSEG0ADDR(kvm_mips_translate_guest_kseg0_to_hpa - (vcpu, (unsigned long) opc)); - memcpy((void *)kseg0_opc, (void *)&synci_inst, sizeof(uint32_t)); - local_flush_icache_range(kseg0_opc, kseg0_opc + 32); - - return result; + return kvm_mips_trans_replace(vcpu, opc, nop_inst); } /* * Address based CACHE instructions are transformed into synci(s). A little * heavy for just D-cache invalidates, but avoids an expensive trap */ -int kvm_mips_trans_cache_va(uint32_t inst, uint32_t *opc, +int kvm_mips_trans_cache_va(union mips_instruction inst, u32 *opc, struct kvm_vcpu *vcpu) { - int result = 0; - unsigned long kseg0_opc; - uint32_t synci_inst = SYNCI_TEMPLATE, base, offset; - - base = (inst >> 21) & 0x1f; - offset = inst & 0xffff; - synci_inst |= (base << 21); - synci_inst |= offset; - - kseg0_opc = - CKSEG0ADDR(kvm_mips_translate_guest_kseg0_to_hpa - (vcpu, (unsigned long) opc)); - memcpy((void *)kseg0_opc, (void *)&synci_inst, sizeof(uint32_t)); - local_flush_icache_range(kseg0_opc, kseg0_opc + 32); - - return result; + union mips_instruction synci_inst = { 0 }; + + synci_inst.i_format.opcode = bcond_op; + synci_inst.i_format.rs = inst.i_format.rs; + synci_inst.i_format.rt = synci_op; + if (cpu_has_mips_r6) + synci_inst.i_format.simmediate = inst.spec3_format.simmediate; + else + synci_inst.i_format.simmediate = inst.i_format.simmediate; + + return kvm_mips_trans_replace(vcpu, opc, synci_inst); } -int kvm_mips_trans_mfc0(uint32_t inst, uint32_t *opc, struct kvm_vcpu *vcpu) +int kvm_mips_trans_mfc0(union mips_instruction inst, u32 *opc, + struct kvm_vcpu *vcpu) { - int32_t rt, rd, sel; - uint32_t mfc0_inst; - unsigned long kseg0_opc, flags; - - rt = (inst >> 16) & 0x1f; - rd = (inst >> 11) & 0x1f; - sel = inst & 0x7; + union mips_instruction mfc0_inst = { 0 }; + u32 rd, sel; - if ((rd == MIPS_CP0_ERRCTL) && (sel == 0)) { - mfc0_inst = CLEAR_TEMPLATE; - mfc0_inst |= ((rt & 0x1f) << 16); - } else { - mfc0_inst = LW_TEMPLATE; - mfc0_inst |= ((rt & 0x1f) << 16); - mfc0_inst |= offsetof(struct kvm_mips_commpage, - cop0.reg[rd][sel]); - } + rd = inst.c0r_format.rd; + sel = inst.c0r_format.sel; - if (KVM_GUEST_KSEGX(opc) == KVM_GUEST_KSEG0) { - kseg0_opc = - CKSEG0ADDR(kvm_mips_translate_guest_kseg0_to_hpa - (vcpu, (unsigned long) opc)); - memcpy((void *)kseg0_opc, (void *)&mfc0_inst, sizeof(uint32_t)); - local_flush_icache_range(kseg0_opc, kseg0_opc + 32); - } else if (KVM_GUEST_KSEGX((unsigned long) opc) == KVM_GUEST_KSEG23) { - local_irq_save(flags); - memcpy((void *)opc, (void *)&mfc0_inst, sizeof(uint32_t)); - local_flush_icache_range((unsigned long)opc, - (unsigned long)opc + 32); - local_irq_restore(flags); + if (rd == MIPS_CP0_ERRCTL && sel == 0) { + mfc0_inst.r_format.opcode = spec_op; + mfc0_inst.r_format.rd = inst.c0r_format.rt; + mfc0_inst.r_format.func = add_op; } else { - kvm_err("%s: Invalid address: %p\n", __func__, opc); - return -EFAULT; + mfc0_inst.i_format.opcode = lw_op; + mfc0_inst.i_format.rt = inst.c0r_format.rt; + mfc0_inst.i_format.simmediate = KVM_GUEST_COMMPAGE_ADDR | + offsetof(struct kvm_mips_commpage, cop0.reg[rd][sel]); +#ifdef CONFIG_CPU_BIG_ENDIAN + if (sizeof(vcpu->arch.cop0->reg[0][0]) == 8) + mfc0_inst.i_format.simmediate |= 4; +#endif } - return 0; + return kvm_mips_trans_replace(vcpu, opc, mfc0_inst); } -int kvm_mips_trans_mtc0(uint32_t inst, uint32_t *opc, struct kvm_vcpu *vcpu) +int kvm_mips_trans_mtc0(union mips_instruction inst, u32 *opc, + struct kvm_vcpu *vcpu) { - int32_t rt, rd, sel; - uint32_t mtc0_inst = SW_TEMPLATE; - unsigned long kseg0_opc, flags; - - rt = (inst >> 16) & 0x1f; - rd = (inst >> 11) & 0x1f; - sel = inst & 0x7; - - mtc0_inst |= ((rt & 0x1f) << 16); - mtc0_inst |= offsetof(struct kvm_mips_commpage, cop0.reg[rd][sel]); - - if (KVM_GUEST_KSEGX(opc) == KVM_GUEST_KSEG0) { - kseg0_opc = - CKSEG0ADDR(kvm_mips_translate_guest_kseg0_to_hpa - (vcpu, (unsigned long) opc)); - memcpy((void *)kseg0_opc, (void *)&mtc0_inst, sizeof(uint32_t)); - local_flush_icache_range(kseg0_opc, kseg0_opc + 32); - } else if (KVM_GUEST_KSEGX((unsigned long) opc) == KVM_GUEST_KSEG23) { - local_irq_save(flags); - memcpy((void *)opc, (void *)&mtc0_inst, sizeof(uint32_t)); - local_flush_icache_range((unsigned long)opc, - (unsigned long)opc + 32); - local_irq_restore(flags); - } else { - kvm_err("%s: Invalid address: %p\n", __func__, opc); - return -EFAULT; - } - - return 0; + union mips_instruction mtc0_inst = { 0 }; + u32 rd, sel; + + rd = inst.c0r_format.rd; + sel = inst.c0r_format.sel; + + mtc0_inst.i_format.opcode = sw_op; + mtc0_inst.i_format.rt = inst.c0r_format.rt; + mtc0_inst.i_format.simmediate = KVM_GUEST_COMMPAGE_ADDR | + offsetof(struct kvm_mips_commpage, cop0.reg[rd][sel]); +#ifdef CONFIG_CPU_BIG_ENDIAN + if (sizeof(vcpu->arch.cop0->reg[0][0]) == 8) + mtc0_inst.i_format.simmediate |= 4; +#endif + + return kvm_mips_trans_replace(vcpu, opc, mtc0_inst); } diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c index 645c8a1982a7..6eb52b9c9818 100644 --- a/arch/mips/kvm/emulate.c +++ b/arch/mips/kvm/emulate.c @@ -52,7 +52,7 @@ unsigned long kvm_compute_return_epc(struct kvm_vcpu *vcpu, goto unaligned; /* Read the instruction */ - insn.word = kvm_get_inst((uint32_t *) epc, vcpu); + insn.word = kvm_get_inst((u32 *) epc, vcpu); if (insn.word == KVM_INVALID_INST) return KVM_INVALID_INST; @@ -161,9 +161,12 @@ unsigned long kvm_compute_return_epc(struct kvm_vcpu *vcpu, nextpc = epc; break; - case blez_op: /* not really i_format */ - case blezl_op: - /* rt field assumed to be zero */ + case blez_op: /* POP06 */ +#ifndef CONFIG_CPU_MIPSR6 + case blezl_op: /* removed in R6 */ +#endif + if (insn.i_format.rt != 0) + goto compact_branch; if ((long)arch->gprs[insn.i_format.rs] <= 0) epc = epc + 4 + (insn.i_format.simmediate << 2); else @@ -171,9 +174,12 @@ unsigned long kvm_compute_return_epc(struct kvm_vcpu *vcpu, nextpc = epc; break; - case bgtz_op: - case bgtzl_op: - /* rt field assumed to be zero */ + case bgtz_op: /* POP07 */ +#ifndef CONFIG_CPU_MIPSR6 + case bgtzl_op: /* removed in R6 */ +#endif + if (insn.i_format.rt != 0) + goto compact_branch; if ((long)arch->gprs[insn.i_format.rs] > 0) epc = epc + 4 + (insn.i_format.simmediate << 2); else @@ -185,6 +191,40 @@ unsigned long kvm_compute_return_epc(struct kvm_vcpu *vcpu, case cop1_op: kvm_err("%s: unsupported cop1_op\n", __func__); break; + +#ifdef CONFIG_CPU_MIPSR6 + /* R6 added the following compact branches with forbidden slots */ + case blezl_op: /* POP26 */ + case bgtzl_op: /* POP27 */ + /* only rt == 0 isn't compact branch */ + if (insn.i_format.rt != 0) + goto compact_branch; + break; + case pop10_op: + case pop30_op: + /* only rs == rt == 0 is reserved, rest are compact branches */ + if (insn.i_format.rs != 0 || insn.i_format.rt != 0) + goto compact_branch; + break; + case pop66_op: + case pop76_op: + /* only rs == 0 isn't compact branch */ + if (insn.i_format.rs != 0) + goto compact_branch; + break; +compact_branch: + /* + * If we've hit an exception on the forbidden slot, then + * the branch must not have been taken. + */ + epc += 8; + nextpc = epc; + break; +#else +compact_branch: + /* Compact branches not supported before R6 */ + break; +#endif } return nextpc; @@ -198,7 +238,7 @@ sigill: return nextpc; } -enum emulation_result update_pc(struct kvm_vcpu *vcpu, uint32_t cause) +enum emulation_result update_pc(struct kvm_vcpu *vcpu, u32 cause) { unsigned long branch_pc; enum emulation_result er = EMULATE_DONE; @@ -243,7 +283,7 @@ static inline int kvm_mips_count_disabled(struct kvm_vcpu *vcpu) * * Assumes !kvm_mips_count_disabled(@vcpu) (guest CP0_Count timer is running). */ -static uint32_t kvm_mips_ktime_to_count(struct kvm_vcpu *vcpu, ktime_t now) +static u32 kvm_mips_ktime_to_count(struct kvm_vcpu *vcpu, ktime_t now) { s64 now_ns, periods; u64 delta; @@ -300,11 +340,11 @@ static inline ktime_t kvm_mips_count_time(struct kvm_vcpu *vcpu) * * Returns: The current value of the guest CP0_Count register. */ -static uint32_t kvm_mips_read_count_running(struct kvm_vcpu *vcpu, ktime_t now) +static u32 kvm_mips_read_count_running(struct kvm_vcpu *vcpu, ktime_t now) { struct mips_coproc *cop0 = vcpu->arch.cop0; ktime_t expires, threshold; - uint32_t count, compare; + u32 count, compare; int running; /* Calculate the biased and scaled guest CP0_Count */ @@ -315,7 +355,7 @@ static uint32_t kvm_mips_read_count_running(struct kvm_vcpu *vcpu, ktime_t now) * Find whether CP0_Count has reached the closest timer interrupt. If * not, we shouldn't inject it. */ - if ((int32_t)(count - compare) < 0) + if ((s32)(count - compare) < 0) return count; /* @@ -360,7 +400,7 @@ static uint32_t kvm_mips_read_count_running(struct kvm_vcpu *vcpu, ktime_t now) * * Returns: The current guest CP0_Count value. */ -uint32_t kvm_mips_read_count(struct kvm_vcpu *vcpu) +u32 kvm_mips_read_count(struct kvm_vcpu *vcpu) { struct mips_coproc *cop0 = vcpu->arch.cop0; @@ -387,8 +427,7 @@ uint32_t kvm_mips_read_count(struct kvm_vcpu *vcpu) * * Returns: The ktime at the point of freeze. */ -static ktime_t kvm_mips_freeze_hrtimer(struct kvm_vcpu *vcpu, - uint32_t *count) +static ktime_t kvm_mips_freeze_hrtimer(struct kvm_vcpu *vcpu, u32 *count) { ktime_t now; @@ -419,16 +458,16 @@ static ktime_t kvm_mips_freeze_hrtimer(struct kvm_vcpu *vcpu, * Assumes !kvm_mips_count_disabled(@vcpu) (guest CP0_Count timer is running). */ static void kvm_mips_resume_hrtimer(struct kvm_vcpu *vcpu, - ktime_t now, uint32_t count) + ktime_t now, u32 count) { struct mips_coproc *cop0 = vcpu->arch.cop0; - uint32_t compare; + u32 compare; u64 delta; ktime_t expire; /* Calculate timeout (wrap 0 to 2^32) */ compare = kvm_read_c0_guest_compare(cop0); - delta = (u64)(uint32_t)(compare - count - 1) + 1; + delta = (u64)(u32)(compare - count - 1) + 1; delta = div_u64(delta * NSEC_PER_SEC, vcpu->arch.count_hz); expire = ktime_add_ns(now, delta); @@ -444,7 +483,7 @@ static void kvm_mips_resume_hrtimer(struct kvm_vcpu *vcpu, * * Sets the CP0_Count value and updates the timer accordingly. */ -void kvm_mips_write_count(struct kvm_vcpu *vcpu, uint32_t count) +void kvm_mips_write_count(struct kvm_vcpu *vcpu, u32 count) { struct mips_coproc *cop0 = vcpu->arch.cop0; ktime_t now; @@ -538,13 +577,13 @@ int kvm_mips_set_count_hz(struct kvm_vcpu *vcpu, s64 count_hz) * If @ack, atomically acknowledge any pending timer interrupt, otherwise ensure * any pending timer interrupt is preserved. */ -void kvm_mips_write_compare(struct kvm_vcpu *vcpu, uint32_t compare, bool ack) +void kvm_mips_write_compare(struct kvm_vcpu *vcpu, u32 compare, bool ack) { struct mips_coproc *cop0 = vcpu->arch.cop0; int dc; u32 old_compare = kvm_read_c0_guest_compare(cop0); ktime_t now; - uint32_t count; + u32 count; /* if unchanged, must just be an ack */ if (old_compare == compare) { @@ -585,7 +624,7 @@ void kvm_mips_write_compare(struct kvm_vcpu *vcpu, uint32_t compare, bool ack) static ktime_t kvm_mips_count_disable(struct kvm_vcpu *vcpu) { struct mips_coproc *cop0 = vcpu->arch.cop0; - uint32_t count; + u32 count; ktime_t now; /* Stop hrtimer */ @@ -632,7 +671,7 @@ void kvm_mips_count_disable_cause(struct kvm_vcpu *vcpu) void kvm_mips_count_enable_cause(struct kvm_vcpu *vcpu) { struct mips_coproc *cop0 = vcpu->arch.cop0; - uint32_t count; + u32 count; kvm_clear_c0_guest_cause(cop0, CAUSEF_DC); @@ -661,7 +700,7 @@ int kvm_mips_set_count_ctl(struct kvm_vcpu *vcpu, s64 count_ctl) s64 changed = count_ctl ^ vcpu->arch.count_ctl; s64 delta; ktime_t expire, now; - uint32_t count, compare; + u32 count, compare; /* Only allow defined bits to be changed */ if (changed & ~(s64)(KVM_REG_MIPS_COUNT_CTL_DC)) @@ -687,7 +726,7 @@ int kvm_mips_set_count_ctl(struct kvm_vcpu *vcpu, s64 count_ctl) */ count = kvm_read_c0_guest_count(cop0); compare = kvm_read_c0_guest_compare(cop0); - delta = (u64)(uint32_t)(compare - count - 1) + 1; + delta = (u64)(u32)(compare - count - 1) + 1; delta = div_u64(delta * NSEC_PER_SEC, vcpu->arch.count_hz); expire = ktime_add_ns(vcpu->arch.count_resume, delta); @@ -776,7 +815,7 @@ enum emulation_result kvm_mips_emul_wait(struct kvm_vcpu *vcpu) vcpu->arch.pending_exceptions); ++vcpu->stat.wait_exits; - trace_kvm_exit(vcpu, WAIT_EXITS); + trace_kvm_exit(vcpu, KVM_TRACE_EXIT_WAIT); if (!vcpu->arch.pending_exceptions) { vcpu->arch.wait = 1; kvm_vcpu_block(vcpu); @@ -801,9 +840,9 @@ enum emulation_result kvm_mips_emul_wait(struct kvm_vcpu *vcpu) enum emulation_result kvm_mips_emul_tlbr(struct kvm_vcpu *vcpu) { struct mips_coproc *cop0 = vcpu->arch.cop0; - uint32_t pc = vcpu->arch.pc; + unsigned long pc = vcpu->arch.pc; - kvm_err("[%#x] COP0_TLBR [%ld]\n", pc, kvm_read_c0_guest_index(cop0)); + kvm_err("[%#lx] COP0_TLBR [%ld]\n", pc, kvm_read_c0_guest_index(cop0)); return EMULATE_FAIL; } @@ -813,11 +852,11 @@ enum emulation_result kvm_mips_emul_tlbwi(struct kvm_vcpu *vcpu) struct mips_coproc *cop0 = vcpu->arch.cop0; int index = kvm_read_c0_guest_index(cop0); struct kvm_mips_tlb *tlb = NULL; - uint32_t pc = vcpu->arch.pc; + unsigned long pc = vcpu->arch.pc; if (index < 0 || index >= KVM_MIPS_GUEST_TLB_SIZE) { kvm_debug("%s: illegal index: %d\n", __func__, index); - kvm_debug("[%#x] COP0_TLBWI [%d] (entryhi: %#lx, entrylo0: %#lx entrylo1: %#lx, mask: %#lx)\n", + kvm_debug("[%#lx] COP0_TLBWI [%d] (entryhi: %#lx, entrylo0: %#lx entrylo1: %#lx, mask: %#lx)\n", pc, index, kvm_read_c0_guest_entryhi(cop0), kvm_read_c0_guest_entrylo0(cop0), kvm_read_c0_guest_entrylo1(cop0), @@ -834,10 +873,10 @@ enum emulation_result kvm_mips_emul_tlbwi(struct kvm_vcpu *vcpu) tlb->tlb_mask = kvm_read_c0_guest_pagemask(cop0); tlb->tlb_hi = kvm_read_c0_guest_entryhi(cop0); - tlb->tlb_lo0 = kvm_read_c0_guest_entrylo0(cop0); - tlb->tlb_lo1 = kvm_read_c0_guest_entrylo1(cop0); + tlb->tlb_lo[0] = kvm_read_c0_guest_entrylo0(cop0); + tlb->tlb_lo[1] = kvm_read_c0_guest_entrylo1(cop0); - kvm_debug("[%#x] COP0_TLBWI [%d] (entryhi: %#lx, entrylo0: %#lx entrylo1: %#lx, mask: %#lx)\n", + kvm_debug("[%#lx] COP0_TLBWI [%d] (entryhi: %#lx, entrylo0: %#lx entrylo1: %#lx, mask: %#lx)\n", pc, index, kvm_read_c0_guest_entryhi(cop0), kvm_read_c0_guest_entrylo0(cop0), kvm_read_c0_guest_entrylo1(cop0), @@ -851,7 +890,7 @@ enum emulation_result kvm_mips_emul_tlbwr(struct kvm_vcpu *vcpu) { struct mips_coproc *cop0 = vcpu->arch.cop0; struct kvm_mips_tlb *tlb = NULL; - uint32_t pc = vcpu->arch.pc; + unsigned long pc = vcpu->arch.pc; int index; get_random_bytes(&index, sizeof(index)); @@ -867,10 +906,10 @@ enum emulation_result kvm_mips_emul_tlbwr(struct kvm_vcpu *vcpu) tlb->tlb_mask = kvm_read_c0_guest_pagemask(cop0); tlb->tlb_hi = kvm_read_c0_guest_entryhi(cop0); - tlb->tlb_lo0 = kvm_read_c0_guest_entrylo0(cop0); - tlb->tlb_lo1 = kvm_read_c0_guest_entrylo1(cop0); + tlb->tlb_lo[0] = kvm_read_c0_guest_entrylo0(cop0); + tlb->tlb_lo[1] = kvm_read_c0_guest_entrylo1(cop0); - kvm_debug("[%#x] COP0_TLBWR[%d] (entryhi: %#lx, entrylo0: %#lx entrylo1: %#lx)\n", + kvm_debug("[%#lx] COP0_TLBWR[%d] (entryhi: %#lx, entrylo0: %#lx entrylo1: %#lx)\n", pc, index, kvm_read_c0_guest_entryhi(cop0), kvm_read_c0_guest_entrylo0(cop0), kvm_read_c0_guest_entrylo1(cop0)); @@ -882,14 +921,14 @@ enum emulation_result kvm_mips_emul_tlbp(struct kvm_vcpu *vcpu) { struct mips_coproc *cop0 = vcpu->arch.cop0; long entryhi = kvm_read_c0_guest_entryhi(cop0); - uint32_t pc = vcpu->arch.pc; + unsigned long pc = vcpu->arch.pc; int index = -1; index = kvm_mips_guest_tlb_lookup(vcpu, entryhi); kvm_write_c0_guest_index(cop0, index); - kvm_debug("[%#x] COP0_TLBP (entryhi: %#lx), index: %d\n", pc, entryhi, + kvm_debug("[%#lx] COP0_TLBP (entryhi: %#lx), index: %d\n", pc, entryhi, index); return EMULATE_DONE; @@ -922,8 +961,8 @@ unsigned int kvm_mips_config1_wrmask(struct kvm_vcpu *vcpu) */ unsigned int kvm_mips_config3_wrmask(struct kvm_vcpu *vcpu) { - /* Config4 is optional */ - unsigned int mask = MIPS_CONF_M; + /* Config4 and ULRI are optional */ + unsigned int mask = MIPS_CONF_M | MIPS_CONF3_ULRI; /* Permit MSA to be present if MSA is supported */ if (kvm_mips_guest_can_have_msa(&vcpu->arch)) @@ -942,7 +981,12 @@ unsigned int kvm_mips_config3_wrmask(struct kvm_vcpu *vcpu) unsigned int kvm_mips_config4_wrmask(struct kvm_vcpu *vcpu) { /* Config5 is optional */ - return MIPS_CONF_M; + unsigned int mask = MIPS_CONF_M; + + /* KScrExist */ + mask |= (unsigned int)vcpu->arch.kscratch_enabled << 16; + + return mask; } /** @@ -973,14 +1017,14 @@ unsigned int kvm_mips_config5_wrmask(struct kvm_vcpu *vcpu) return mask; } -enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, - uint32_t cause, struct kvm_run *run, +enum emulation_result kvm_mips_emulate_CP0(union mips_instruction inst, + u32 *opc, u32 cause, + struct kvm_run *run, struct kvm_vcpu *vcpu) { struct mips_coproc *cop0 = vcpu->arch.cop0; enum emulation_result er = EMULATE_DONE; - int32_t rt, rd, copz, sel, co_bit, op; - uint32_t pc = vcpu->arch.pc; + u32 rt, rd, sel; unsigned long curr_pc; /* @@ -992,16 +1036,8 @@ enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, if (er == EMULATE_FAIL) return er; - copz = (inst >> 21) & 0x1f; - rt = (inst >> 16) & 0x1f; - rd = (inst >> 11) & 0x1f; - sel = inst & 0x7; - co_bit = (inst >> 25) & 1; - - if (co_bit) { - op = (inst) & 0xff; - - switch (op) { + if (inst.co_format.co) { + switch (inst.co_format.func) { case tlbr_op: /* Read indexed TLB entry */ er = kvm_mips_emul_tlbr(vcpu); break; @@ -1020,47 +1056,58 @@ enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, case eret_op: er = kvm_mips_emul_eret(vcpu); goto dont_update_pc; - break; case wait_op: er = kvm_mips_emul_wait(vcpu); break; } } else { - switch (copz) { + rt = inst.c0r_format.rt; + rd = inst.c0r_format.rd; + sel = inst.c0r_format.sel; + + switch (inst.c0r_format.rs) { case mfc_op: #ifdef CONFIG_KVM_MIPS_DEBUG_COP0_COUNTERS cop0->stat[rd][sel]++; #endif /* Get reg */ if ((rd == MIPS_CP0_COUNT) && (sel == 0)) { - vcpu->arch.gprs[rt] = kvm_mips_read_count(vcpu); + vcpu->arch.gprs[rt] = + (s32)kvm_mips_read_count(vcpu); } else if ((rd == MIPS_CP0_ERRCTL) && (sel == 0)) { vcpu->arch.gprs[rt] = 0x0; #ifdef CONFIG_KVM_MIPS_DYN_TRANS kvm_mips_trans_mfc0(inst, opc, vcpu); #endif } else { - vcpu->arch.gprs[rt] = cop0->reg[rd][sel]; + vcpu->arch.gprs[rt] = (s32)cop0->reg[rd][sel]; #ifdef CONFIG_KVM_MIPS_DYN_TRANS kvm_mips_trans_mfc0(inst, opc, vcpu); #endif } - kvm_debug - ("[%#x] MFCz[%d][%d], vcpu->arch.gprs[%d]: %#lx\n", - pc, rd, sel, rt, vcpu->arch.gprs[rt]); - + trace_kvm_hwr(vcpu, KVM_TRACE_MFC0, + KVM_TRACE_COP0(rd, sel), + vcpu->arch.gprs[rt]); break; case dmfc_op: vcpu->arch.gprs[rt] = cop0->reg[rd][sel]; + + trace_kvm_hwr(vcpu, KVM_TRACE_DMFC0, + KVM_TRACE_COP0(rd, sel), + vcpu->arch.gprs[rt]); break; case mtc_op: #ifdef CONFIG_KVM_MIPS_DEBUG_COP0_COUNTERS cop0->stat[rd][sel]++; #endif + trace_kvm_hwr(vcpu, KVM_TRACE_MTC0, + KVM_TRACE_COP0(rd, sel), + vcpu->arch.gprs[rt]); + if ((rd == MIPS_CP0_TLB_INDEX) && (vcpu->arch.gprs[rt] >= KVM_MIPS_GUEST_TLB_SIZE)) { @@ -1078,16 +1125,15 @@ enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, kvm_err("MTCz, cop0->reg[EBASE]: %#lx\n", kvm_read_c0_guest_ebase(cop0)); } else if (rd == MIPS_CP0_TLB_HI && sel == 0) { - uint32_t nasid = + u32 nasid = vcpu->arch.gprs[rt] & KVM_ENTRYHI_ASID; if ((KSEGX(vcpu->arch.gprs[rt]) != CKSEG0) && ((kvm_read_c0_guest_entryhi(cop0) & KVM_ENTRYHI_ASID) != nasid)) { - kvm_debug("MTCz, change ASID from %#lx to %#lx\n", + trace_kvm_asid_change(vcpu, kvm_read_c0_guest_entryhi(cop0) - & KVM_ENTRYHI_ASID, - vcpu->arch.gprs[rt] - & KVM_ENTRYHI_ASID); + & KVM_ENTRYHI_ASID, + nasid); /* Blow away the shadow host TLBs */ kvm_mips_flush_host_tlb(1); @@ -1100,10 +1146,6 @@ enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, kvm_mips_write_count(vcpu, vcpu->arch.gprs[rt]); goto done; } else if ((rd == MIPS_CP0_COMPARE) && (sel == 0)) { - kvm_debug("[%#x] MTCz, COMPARE %#lx <- %#lx\n", - pc, kvm_read_c0_guest_compare(cop0), - vcpu->arch.gprs[rt]); - /* If we are writing to COMPARE */ /* Clear pending timer interrupt, if any */ kvm_mips_write_compare(vcpu, @@ -1155,7 +1197,7 @@ enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, * it first. */ if (change & ST0_CU1 && !(val & ST0_FR) && - vcpu->arch.fpu_inuse & KVM_MIPS_FPU_MSA) + vcpu->arch.aux_inuse & KVM_MIPS_AUX_MSA) kvm_lose_fpu(vcpu); /* @@ -1166,7 +1208,7 @@ enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, * the near future. */ if (change & ST0_CU1 && - vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU) + vcpu->arch.aux_inuse & KVM_MIPS_AUX_FPU) change_c0_status(ST0_CU1, val); preempt_enable(); @@ -1201,7 +1243,7 @@ enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, * context is already loaded. */ if (change & MIPS_CONF5_FRE && - vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU) + vcpu->arch.aux_inuse & KVM_MIPS_AUX_FPU) change_c0_config5(MIPS_CONF5_FRE, val); /* @@ -1211,7 +1253,7 @@ enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, * quickly enabled again in the near future. */ if (change & MIPS_CONF5_MSAEN && - vcpu->arch.fpu_inuse & KVM_MIPS_FPU_MSA) + vcpu->arch.aux_inuse & KVM_MIPS_AUX_MSA) change_c0_config5(MIPS_CONF5_MSAEN, val); @@ -1219,7 +1261,7 @@ enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, kvm_write_c0_guest_config5(cop0, val); } else if ((rd == MIPS_CP0_CAUSE) && (sel == 0)) { - uint32_t old_cause, new_cause; + u32 old_cause, new_cause; old_cause = kvm_read_c0_guest_cause(cop0); new_cause = vcpu->arch.gprs[rt]; @@ -1233,20 +1275,30 @@ enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, else kvm_mips_count_enable_cause(vcpu); } + } else if ((rd == MIPS_CP0_HWRENA) && (sel == 0)) { + u32 mask = MIPS_HWRENA_CPUNUM | + MIPS_HWRENA_SYNCISTEP | + MIPS_HWRENA_CC | + MIPS_HWRENA_CCRES; + + if (kvm_read_c0_guest_config3(cop0) & + MIPS_CONF3_ULRI) + mask |= MIPS_HWRENA_ULR; + cop0->reg[rd][sel] = vcpu->arch.gprs[rt] & mask; } else { cop0->reg[rd][sel] = vcpu->arch.gprs[rt]; #ifdef CONFIG_KVM_MIPS_DYN_TRANS kvm_mips_trans_mtc0(inst, opc, vcpu); #endif } - - kvm_debug("[%#x] MTCz, cop0->reg[%d][%d]: %#lx\n", pc, - rd, sel, cop0->reg[rd][sel]); break; case dmtc_op: kvm_err("!!!!!!![%#lx]dmtc_op: rt: %d, rd: %d, sel: %d!!!!!!\n", vcpu->arch.pc, rt, rd, sel); + trace_kvm_hwr(vcpu, KVM_TRACE_DMTC0, + KVM_TRACE_COP0(rd, sel), + vcpu->arch.gprs[rt]); er = EMULATE_FAIL; break; @@ -1258,7 +1310,7 @@ enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, vcpu->arch.gprs[rt] = kvm_read_c0_guest_status(cop0); /* EI */ - if (inst & 0x20) { + if (inst.mfmc0_format.sc) { kvm_debug("[%#lx] mfmc0_op: EI\n", vcpu->arch.pc); kvm_set_c0_guest_status(cop0, ST0_IE); @@ -1272,9 +1324,8 @@ enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, case wrpgpr_op: { - uint32_t css = - cop0->reg[MIPS_CP0_STATUS][2] & 0xf; - uint32_t pss = + u32 css = cop0->reg[MIPS_CP0_STATUS][2] & 0xf; + u32 pss = (cop0->reg[MIPS_CP0_STATUS][2] >> 6) & 0xf; /* * We don't support any shadow register sets, so @@ -1291,7 +1342,7 @@ enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, break; default: kvm_err("[%#lx]MachEmulateCP0: unsupported COP0, copz: 0x%x\n", - vcpu->arch.pc, copz); + vcpu->arch.pc, inst.c0r_format.rs); er = EMULATE_FAIL; break; } @@ -1312,13 +1363,14 @@ dont_update_pc: return er; } -enum emulation_result kvm_mips_emulate_store(uint32_t inst, uint32_t cause, +enum emulation_result kvm_mips_emulate_store(union mips_instruction inst, + u32 cause, struct kvm_run *run, struct kvm_vcpu *vcpu) { enum emulation_result er = EMULATE_DO_MMIO; - int32_t op, base, rt, offset; - uint32_t bytes; + u32 rt; + u32 bytes; void *data = run->mmio.data; unsigned long curr_pc; @@ -1331,12 +1383,9 @@ enum emulation_result kvm_mips_emulate_store(uint32_t inst, uint32_t cause, if (er == EMULATE_FAIL) return er; - rt = (inst >> 16) & 0x1f; - base = (inst >> 21) & 0x1f; - offset = inst & 0xffff; - op = (inst >> 26) & 0x3f; + rt = inst.i_format.rt; - switch (op) { + switch (inst.i_format.opcode) { case sb_op: bytes = 1; if (bytes > sizeof(run->mmio.data)) { @@ -1357,7 +1406,7 @@ enum emulation_result kvm_mips_emulate_store(uint32_t inst, uint32_t cause, *(u8 *) data = vcpu->arch.gprs[rt]; kvm_debug("OP_SB: eaddr: %#lx, gpr: %#lx, data: %#x\n", vcpu->arch.host_cp0_badvaddr, vcpu->arch.gprs[rt], - *(uint8_t *) data); + *(u8 *) data); break; @@ -1379,11 +1428,11 @@ enum emulation_result kvm_mips_emulate_store(uint32_t inst, uint32_t cause, run->mmio.is_write = 1; vcpu->mmio_needed = 1; vcpu->mmio_is_write = 1; - *(uint32_t *) data = vcpu->arch.gprs[rt]; + *(u32 *) data = vcpu->arch.gprs[rt]; kvm_debug("[%#lx] OP_SW: eaddr: %#lx, gpr: %#lx, data: %#x\n", vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr, - vcpu->arch.gprs[rt], *(uint32_t *) data); + vcpu->arch.gprs[rt], *(u32 *) data); break; case sh_op: @@ -1404,15 +1453,16 @@ enum emulation_result kvm_mips_emulate_store(uint32_t inst, uint32_t cause, run->mmio.is_write = 1; vcpu->mmio_needed = 1; vcpu->mmio_is_write = 1; - *(uint16_t *) data = vcpu->arch.gprs[rt]; + *(u16 *) data = vcpu->arch.gprs[rt]; kvm_debug("[%#lx] OP_SH: eaddr: %#lx, gpr: %#lx, data: %#x\n", vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr, - vcpu->arch.gprs[rt], *(uint32_t *) data); + vcpu->arch.gprs[rt], *(u32 *) data); break; default: - kvm_err("Store not yet supported"); + kvm_err("Store not yet supported (inst=0x%08x)\n", + inst.word); er = EMULATE_FAIL; break; } @@ -1424,18 +1474,16 @@ enum emulation_result kvm_mips_emulate_store(uint32_t inst, uint32_t cause, return er; } -enum emulation_result kvm_mips_emulate_load(uint32_t inst, uint32_t cause, - struct kvm_run *run, +enum emulation_result kvm_mips_emulate_load(union mips_instruction inst, + u32 cause, struct kvm_run *run, struct kvm_vcpu *vcpu) { enum emulation_result er = EMULATE_DO_MMIO; - int32_t op, base, rt, offset; - uint32_t bytes; + u32 op, rt; + u32 bytes; - rt = (inst >> 16) & 0x1f; - base = (inst >> 21) & 0x1f; - offset = inst & 0xffff; - op = (inst >> 26) & 0x3f; + rt = inst.i_format.rt; + op = inst.i_format.opcode; vcpu->arch.pending_load_cause = cause; vcpu->arch.io_gpr = rt; @@ -1521,7 +1569,8 @@ enum emulation_result kvm_mips_emulate_load(uint32_t inst, uint32_t cause, break; default: - kvm_err("Load not yet supported"); + kvm_err("Load not yet supported (inst=0x%08x)\n", + inst.word); er = EMULATE_FAIL; break; } @@ -1529,40 +1578,15 @@ enum emulation_result kvm_mips_emulate_load(uint32_t inst, uint32_t cause, return er; } -int kvm_mips_sync_icache(unsigned long va, struct kvm_vcpu *vcpu) -{ - unsigned long offset = (va & ~PAGE_MASK); - struct kvm *kvm = vcpu->kvm; - unsigned long pa; - gfn_t gfn; - kvm_pfn_t pfn; - - gfn = va >> PAGE_SHIFT; - - if (gfn >= kvm->arch.guest_pmap_npages) { - kvm_err("%s: Invalid gfn: %#llx\n", __func__, gfn); - kvm_mips_dump_host_tlbs(); - kvm_arch_vcpu_dump_regs(vcpu); - return -1; - } - pfn = kvm->arch.guest_pmap[gfn]; - pa = (pfn << PAGE_SHIFT) | offset; - - kvm_debug("%s: va: %#lx, unmapped: %#x\n", __func__, va, - CKSEG0ADDR(pa)); - - local_flush_icache_range(CKSEG0ADDR(pa), 32); - return 0; -} - -enum emulation_result kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc, - uint32_t cause, +enum emulation_result kvm_mips_emulate_cache(union mips_instruction inst, + u32 *opc, u32 cause, struct kvm_run *run, struct kvm_vcpu *vcpu) { struct mips_coproc *cop0 = vcpu->arch.cop0; enum emulation_result er = EMULATE_DONE; - int32_t offset, cache, op_inst, op, base; + u32 cache, op_inst, op, base; + s16 offset; struct kvm_vcpu_arch *arch = &vcpu->arch; unsigned long va; unsigned long curr_pc; @@ -1576,9 +1600,12 @@ enum emulation_result kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc, if (er == EMULATE_FAIL) return er; - base = (inst >> 21) & 0x1f; - op_inst = (inst >> 16) & 0x1f; - offset = (int16_t)inst; + base = inst.i_format.rs; + op_inst = inst.i_format.rt; + if (cpu_has_mips_r6) + offset = inst.spec3_format.simmediate; + else + offset = inst.i_format.simmediate; cache = op_inst & CacheOp_Cache; op = op_inst & CacheOp_Op; @@ -1634,7 +1661,6 @@ enum emulation_result kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc, (cop0) & KVM_ENTRYHI_ASID)); if (index < 0) { - vcpu->arch.host_cp0_entryhi = (va & VPN2_MASK); vcpu->arch.host_cp0_badvaddr = va; vcpu->arch.pc = curr_pc; er = kvm_mips_emulate_tlbmiss_ld(cause, NULL, run, @@ -1659,9 +1685,7 @@ enum emulation_result kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc, * We fault an entry from the guest tlb to the * shadow host TLB */ - kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb, - NULL, - NULL); + kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb); } } } else { @@ -1714,20 +1738,20 @@ dont_update_pc: return er; } -enum emulation_result kvm_mips_emulate_inst(unsigned long cause, uint32_t *opc, +enum emulation_result kvm_mips_emulate_inst(u32 cause, u32 *opc, struct kvm_run *run, struct kvm_vcpu *vcpu) { + union mips_instruction inst; enum emulation_result er = EMULATE_DONE; - uint32_t inst; /* Fetch the instruction. */ if (cause & CAUSEF_BD) opc += 1; - inst = kvm_get_inst(opc, vcpu); + inst.word = kvm_get_inst(opc, vcpu); - switch (((union mips_instruction)inst).r_format.opcode) { + switch (inst.r_format.opcode) { case cop0_op: er = kvm_mips_emulate_CP0(inst, opc, cause, run, vcpu); break; @@ -1744,15 +1768,31 @@ enum emulation_result kvm_mips_emulate_inst(unsigned long cause, uint32_t *opc, er = kvm_mips_emulate_load(inst, cause, run, vcpu); break; +#ifndef CONFIG_CPU_MIPSR6 case cache_op: ++vcpu->stat.cache_exits; - trace_kvm_exit(vcpu, CACHE_EXITS); + trace_kvm_exit(vcpu, KVM_TRACE_EXIT_CACHE); er = kvm_mips_emulate_cache(inst, opc, cause, run, vcpu); break; +#else + case spec3_op: + switch (inst.spec3_format.func) { + case cache6_op: + ++vcpu->stat.cache_exits; + trace_kvm_exit(vcpu, KVM_TRACE_EXIT_CACHE); + er = kvm_mips_emulate_cache(inst, opc, cause, run, + vcpu); + break; + default: + goto unknown; + }; + break; +unknown: +#endif default: kvm_err("Instruction emulation not supported (%p/%#x)\n", opc, - inst); + inst.word); kvm_arch_vcpu_dump_regs(vcpu); er = EMULATE_FAIL; break; @@ -1761,8 +1801,8 @@ enum emulation_result kvm_mips_emulate_inst(unsigned long cause, uint32_t *opc, return er; } -enum emulation_result kvm_mips_emulate_syscall(unsigned long cause, - uint32_t *opc, +enum emulation_result kvm_mips_emulate_syscall(u32 cause, + u32 *opc, struct kvm_run *run, struct kvm_vcpu *vcpu) { @@ -1796,8 +1836,8 @@ enum emulation_result kvm_mips_emulate_syscall(unsigned long cause, return er; } -enum emulation_result kvm_mips_emulate_tlbmiss_ld(unsigned long cause, - uint32_t *opc, +enum emulation_result kvm_mips_emulate_tlbmiss_ld(u32 cause, + u32 *opc, struct kvm_run *run, struct kvm_vcpu *vcpu) { @@ -1842,8 +1882,8 @@ enum emulation_result kvm_mips_emulate_tlbmiss_ld(unsigned long cause, return EMULATE_DONE; } -enum emulation_result kvm_mips_emulate_tlbinv_ld(unsigned long cause, - uint32_t *opc, +enum emulation_result kvm_mips_emulate_tlbinv_ld(u32 cause, + u32 *opc, struct kvm_run *run, struct kvm_vcpu *vcpu) { @@ -1888,8 +1928,8 @@ enum emulation_result kvm_mips_emulate_tlbinv_ld(unsigned long cause, return EMULATE_DONE; } -enum emulation_result kvm_mips_emulate_tlbmiss_st(unsigned long cause, - uint32_t *opc, +enum emulation_result kvm_mips_emulate_tlbmiss_st(u32 cause, + u32 *opc, struct kvm_run *run, struct kvm_vcpu *vcpu) { @@ -1932,8 +1972,8 @@ enum emulation_result kvm_mips_emulate_tlbmiss_st(unsigned long cause, return EMULATE_DONE; } -enum emulation_result kvm_mips_emulate_tlbinv_st(unsigned long cause, - uint32_t *opc, +enum emulation_result kvm_mips_emulate_tlbinv_st(u32 cause, + u32 *opc, struct kvm_run *run, struct kvm_vcpu *vcpu) { @@ -1977,7 +2017,7 @@ enum emulation_result kvm_mips_emulate_tlbinv_st(unsigned long cause, } /* TLBMOD: store into address matching TLB with Dirty bit off */ -enum emulation_result kvm_mips_handle_tlbmod(unsigned long cause, uint32_t *opc, +enum emulation_result kvm_mips_handle_tlbmod(u32 cause, u32 *opc, struct kvm_run *run, struct kvm_vcpu *vcpu) { @@ -2005,8 +2045,8 @@ enum emulation_result kvm_mips_handle_tlbmod(unsigned long cause, uint32_t *opc, return er; } -enum emulation_result kvm_mips_emulate_tlbmod(unsigned long cause, - uint32_t *opc, +enum emulation_result kvm_mips_emulate_tlbmod(u32 cause, + u32 *opc, struct kvm_run *run, struct kvm_vcpu *vcpu) { @@ -2048,8 +2088,8 @@ enum emulation_result kvm_mips_emulate_tlbmod(unsigned long cause, return EMULATE_DONE; } -enum emulation_result kvm_mips_emulate_fpu_exc(unsigned long cause, - uint32_t *opc, +enum emulation_result kvm_mips_emulate_fpu_exc(u32 cause, + u32 *opc, struct kvm_run *run, struct kvm_vcpu *vcpu) { @@ -2077,8 +2117,8 @@ enum emulation_result kvm_mips_emulate_fpu_exc(unsigned long cause, return EMULATE_DONE; } -enum emulation_result kvm_mips_emulate_ri_exc(unsigned long cause, - uint32_t *opc, +enum emulation_result kvm_mips_emulate_ri_exc(u32 cause, + u32 *opc, struct kvm_run *run, struct kvm_vcpu *vcpu) { @@ -2112,8 +2152,8 @@ enum emulation_result kvm_mips_emulate_ri_exc(unsigned long cause, return er; } -enum emulation_result kvm_mips_emulate_bp_exc(unsigned long cause, - uint32_t *opc, +enum emulation_result kvm_mips_emulate_bp_exc(u32 cause, + u32 *opc, struct kvm_run *run, struct kvm_vcpu *vcpu) { @@ -2147,8 +2187,8 @@ enum emulation_result kvm_mips_emulate_bp_exc(unsigned long cause, return er; } -enum emulation_result kvm_mips_emulate_trap_exc(unsigned long cause, - uint32_t *opc, +enum emulation_result kvm_mips_emulate_trap_exc(u32 cause, + u32 *opc, struct kvm_run *run, struct kvm_vcpu *vcpu) { @@ -2182,8 +2222,8 @@ enum emulation_result kvm_mips_emulate_trap_exc(unsigned long cause, return er; } -enum emulation_result kvm_mips_emulate_msafpe_exc(unsigned long cause, - uint32_t *opc, +enum emulation_result kvm_mips_emulate_msafpe_exc(u32 cause, + u32 *opc, struct kvm_run *run, struct kvm_vcpu *vcpu) { @@ -2217,8 +2257,8 @@ enum emulation_result kvm_mips_emulate_msafpe_exc(unsigned long cause, return er; } -enum emulation_result kvm_mips_emulate_fpe_exc(unsigned long cause, - uint32_t *opc, +enum emulation_result kvm_mips_emulate_fpe_exc(u32 cause, + u32 *opc, struct kvm_run *run, struct kvm_vcpu *vcpu) { @@ -2252,8 +2292,8 @@ enum emulation_result kvm_mips_emulate_fpe_exc(unsigned long cause, return er; } -enum emulation_result kvm_mips_emulate_msadis_exc(unsigned long cause, - uint32_t *opc, +enum emulation_result kvm_mips_emulate_msadis_exc(u32 cause, + u32 *opc, struct kvm_run *run, struct kvm_vcpu *vcpu) { @@ -2287,22 +2327,7 @@ enum emulation_result kvm_mips_emulate_msadis_exc(unsigned long cause, return er; } -/* ll/sc, rdhwr, sync emulation */ - -#define OPCODE 0xfc000000 -#define BASE 0x03e00000 -#define RT 0x001f0000 -#define OFFSET 0x0000ffff -#define LL 0xc0000000 -#define SC 0xe0000000 -#define SPEC0 0x00000000 -#define SPEC3 0x7c000000 -#define RD 0x0000f800 -#define FUNC 0x0000003f -#define SYNC 0x0000000f -#define RDHWR 0x0000003b - -enum emulation_result kvm_mips_handle_ri(unsigned long cause, uint32_t *opc, +enum emulation_result kvm_mips_handle_ri(u32 cause, u32 *opc, struct kvm_run *run, struct kvm_vcpu *vcpu) { @@ -2310,7 +2335,7 @@ enum emulation_result kvm_mips_handle_ri(unsigned long cause, uint32_t *opc, struct kvm_vcpu_arch *arch = &vcpu->arch; enum emulation_result er = EMULATE_DONE; unsigned long curr_pc; - uint32_t inst; + union mips_instruction inst; /* * Update PC and hold onto current PC in case there is @@ -2325,17 +2350,22 @@ enum emulation_result kvm_mips_handle_ri(unsigned long cause, uint32_t *opc, if (cause & CAUSEF_BD) opc += 1; - inst = kvm_get_inst(opc, vcpu); + inst.word = kvm_get_inst(opc, vcpu); - if (inst == KVM_INVALID_INST) { + if (inst.word == KVM_INVALID_INST) { kvm_err("%s: Cannot get inst @ %p\n", __func__, opc); return EMULATE_FAIL; } - if ((inst & OPCODE) == SPEC3 && (inst & FUNC) == RDHWR) { + if (inst.r_format.opcode == spec3_op && + inst.r_format.func == rdhwr_op && + inst.r_format.rs == 0 && + (inst.r_format.re >> 3) == 0) { int usermode = !KVM_GUEST_KERNEL_MODE(vcpu); - int rd = (inst & RD) >> 11; - int rt = (inst & RT) >> 16; + int rd = inst.r_format.rd; + int rt = inst.r_format.rt; + int sel = inst.r_format.re & 0x7; + /* If usermode, check RDHWR rd is allowed by guest HWREna */ if (usermode && !(kvm_read_c0_guest_hwrena(cop0) & BIT(rd))) { kvm_debug("RDHWR %#x disallowed by HWREna @ %p\n", @@ -2343,17 +2373,17 @@ enum emulation_result kvm_mips_handle_ri(unsigned long cause, uint32_t *opc, goto emulate_ri; } switch (rd) { - case 0: /* CPU number */ - arch->gprs[rt] = 0; + case MIPS_HWR_CPUNUM: /* CPU number */ + arch->gprs[rt] = vcpu->vcpu_id; break; - case 1: /* SYNCI length */ + case MIPS_HWR_SYNCISTEP: /* SYNCI length */ arch->gprs[rt] = min(current_cpu_data.dcache.linesz, current_cpu_data.icache.linesz); break; - case 2: /* Read count register */ - arch->gprs[rt] = kvm_mips_read_count(vcpu); + case MIPS_HWR_CC: /* Read count register */ + arch->gprs[rt] = (s32)kvm_mips_read_count(vcpu); break; - case 3: /* Count register resolution */ + case MIPS_HWR_CCRES: /* Count register resolution */ switch (current_cpu_data.cputype) { case CPU_20KC: case CPU_25KF: @@ -2363,7 +2393,7 @@ enum emulation_result kvm_mips_handle_ri(unsigned long cause, uint32_t *opc, arch->gprs[rt] = 2; } break; - case 29: + case MIPS_HWR_ULR: /* Read UserLocal register */ arch->gprs[rt] = kvm_read_c0_guest_userlocal(cop0); break; @@ -2371,8 +2401,12 @@ enum emulation_result kvm_mips_handle_ri(unsigned long cause, uint32_t *opc, kvm_debug("RDHWR %#x not supported @ %p\n", rd, opc); goto emulate_ri; } + + trace_kvm_hwr(vcpu, KVM_TRACE_RDHWR, KVM_TRACE_HWR(rd, sel), + vcpu->arch.gprs[rt]); } else { - kvm_debug("Emulate RI not supported @ %p: %#x\n", opc, inst); + kvm_debug("Emulate RI not supported @ %p: %#x\n", + opc, inst.word); goto emulate_ri; } @@ -2405,19 +2439,19 @@ enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu, switch (run->mmio.len) { case 4: - *gpr = *(int32_t *) run->mmio.data; + *gpr = *(s32 *) run->mmio.data; break; case 2: if (vcpu->mmio_needed == 2) - *gpr = *(int16_t *) run->mmio.data; + *gpr = *(s16 *) run->mmio.data; else - *gpr = *(uint16_t *)run->mmio.data; + *gpr = *(u16 *)run->mmio.data; break; case 1: if (vcpu->mmio_needed == 2) - *gpr = *(int8_t *) run->mmio.data; + *gpr = *(s8 *) run->mmio.data; else *gpr = *(u8 *) run->mmio.data; break; @@ -2432,12 +2466,12 @@ done: return er; } -static enum emulation_result kvm_mips_emulate_exc(unsigned long cause, - uint32_t *opc, +static enum emulation_result kvm_mips_emulate_exc(u32 cause, + u32 *opc, struct kvm_run *run, struct kvm_vcpu *vcpu) { - uint32_t exccode = (cause >> CAUSEB_EXCCODE) & 0x1f; + u32 exccode = (cause >> CAUSEB_EXCCODE) & 0x1f; struct mips_coproc *cop0 = vcpu->arch.cop0; struct kvm_vcpu_arch *arch = &vcpu->arch; enum emulation_result er = EMULATE_DONE; @@ -2470,13 +2504,13 @@ static enum emulation_result kvm_mips_emulate_exc(unsigned long cause, return er; } -enum emulation_result kvm_mips_check_privilege(unsigned long cause, - uint32_t *opc, +enum emulation_result kvm_mips_check_privilege(u32 cause, + u32 *opc, struct kvm_run *run, struct kvm_vcpu *vcpu) { enum emulation_result er = EMULATE_DONE; - uint32_t exccode = (cause >> CAUSEB_EXCCODE) & 0x1f; + u32 exccode = (cause >> CAUSEB_EXCCODE) & 0x1f; unsigned long badvaddr = vcpu->arch.host_cp0_badvaddr; int usermode = !KVM_GUEST_KERNEL_MODE(vcpu); @@ -2566,18 +2600,18 @@ enum emulation_result kvm_mips_check_privilege(unsigned long cause, * (2) TLB entry is present in the Guest TLB but not in the shadow, in this * case we inject the TLB from the Guest TLB into the shadow host TLB */ -enum emulation_result kvm_mips_handle_tlbmiss(unsigned long cause, - uint32_t *opc, +enum emulation_result kvm_mips_handle_tlbmiss(u32 cause, + u32 *opc, struct kvm_run *run, struct kvm_vcpu *vcpu) { enum emulation_result er = EMULATE_DONE; - uint32_t exccode = (cause >> CAUSEB_EXCCODE) & 0x1f; + u32 exccode = (cause >> CAUSEB_EXCCODE) & 0x1f; unsigned long va = vcpu->arch.host_cp0_badvaddr; int index; - kvm_debug("kvm_mips_handle_tlbmiss: badvaddr: %#lx, entryhi: %#lx\n", - vcpu->arch.host_cp0_badvaddr, vcpu->arch.host_cp0_entryhi); + kvm_debug("kvm_mips_handle_tlbmiss: badvaddr: %#lx\n", + vcpu->arch.host_cp0_badvaddr); /* * KVM would not have got the exception if this entry was valid in the @@ -2620,13 +2654,12 @@ enum emulation_result kvm_mips_handle_tlbmiss(unsigned long cause, } } else { kvm_debug("Injecting hi: %#lx, lo0: %#lx, lo1: %#lx into shadow host TLB\n", - tlb->tlb_hi, tlb->tlb_lo0, tlb->tlb_lo1); + tlb->tlb_hi, tlb->tlb_lo[0], tlb->tlb_lo[1]); /* * OK we have a Guest TLB entry, now inject it into the * shadow host TLB */ - kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb, NULL, - NULL); + kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb); } } diff --git a/arch/mips/kvm/entry.c b/arch/mips/kvm/entry.c new file mode 100644 index 000000000000..6a02b3a3fa65 --- /dev/null +++ b/arch/mips/kvm/entry.c @@ -0,0 +1,701 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Generation of main entry point for the guest, exception handling. + * + * Copyright (C) 2012 MIPS Technologies, Inc. + * Authors: Sanjay Lal <sanjayl@kymasys.com> + * + * Copyright (C) 2016 Imagination Technologies Ltd. + */ + +#include <linux/kvm_host.h> +#include <asm/msa.h> +#include <asm/setup.h> +#include <asm/uasm.h> + +/* Register names */ +#define ZERO 0 +#define AT 1 +#define V0 2 +#define V1 3 +#define A0 4 +#define A1 5 + +#if _MIPS_SIM == _MIPS_SIM_ABI32 +#define T0 8 +#define T1 9 +#define T2 10 +#define T3 11 +#endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */ + +#if _MIPS_SIM == _MIPS_SIM_ABI64 || _MIPS_SIM == _MIPS_SIM_NABI32 +#define T0 12 +#define T1 13 +#define T2 14 +#define T3 15 +#endif /* _MIPS_SIM == _MIPS_SIM_ABI64 || _MIPS_SIM == _MIPS_SIM_NABI32 */ + +#define S0 16 +#define S1 17 +#define T9 25 +#define K0 26 +#define K1 27 +#define GP 28 +#define SP 29 +#define RA 31 + +/* Some CP0 registers */ +#define C0_HWRENA 7, 0 +#define C0_BADVADDR 8, 0 +#define C0_ENTRYHI 10, 0 +#define C0_STATUS 12, 0 +#define C0_CAUSE 13, 0 +#define C0_EPC 14, 0 +#define C0_EBASE 15, 1 +#define C0_CONFIG5 16, 5 +#define C0_DDATA_LO 28, 3 +#define C0_ERROREPC 30, 0 + +#define CALLFRAME_SIZ 32 + +#ifdef CONFIG_64BIT +#define ST0_KX_IF_64 ST0_KX +#else +#define ST0_KX_IF_64 0 +#endif + +static unsigned int scratch_vcpu[2] = { C0_DDATA_LO }; +static unsigned int scratch_tmp[2] = { C0_ERROREPC }; + +enum label_id { + label_fpu_1 = 1, + label_msa_1, + label_return_to_host, + label_kernel_asid, + label_exit_common, +}; + +UASM_L_LA(_fpu_1) +UASM_L_LA(_msa_1) +UASM_L_LA(_return_to_host) +UASM_L_LA(_kernel_asid) +UASM_L_LA(_exit_common) + +static void *kvm_mips_build_enter_guest(void *addr); +static void *kvm_mips_build_ret_from_exit(void *addr); +static void *kvm_mips_build_ret_to_guest(void *addr); +static void *kvm_mips_build_ret_to_host(void *addr); + +/** + * kvm_mips_entry_setup() - Perform global setup for entry code. + * + * Perform global setup for entry code, such as choosing a scratch register. + * + * Returns: 0 on success. + * -errno on failure. + */ +int kvm_mips_entry_setup(void) +{ + /* + * We prefer to use KScratchN registers if they are available over the + * defaults above, which may not work on all cores. + */ + unsigned int kscratch_mask = cpu_data[0].kscratch_mask & 0xfc; + + /* Pick a scratch register for storing VCPU */ + if (kscratch_mask) { + scratch_vcpu[0] = 31; + scratch_vcpu[1] = ffs(kscratch_mask) - 1; + kscratch_mask &= ~BIT(scratch_vcpu[1]); + } + + /* Pick a scratch register to use as a temp for saving state */ + if (kscratch_mask) { + scratch_tmp[0] = 31; + scratch_tmp[1] = ffs(kscratch_mask) - 1; + kscratch_mask &= ~BIT(scratch_tmp[1]); + } + + return 0; +} + +static void kvm_mips_build_save_scratch(u32 **p, unsigned int tmp, + unsigned int frame) +{ + /* Save the VCPU scratch register value in cp0_epc of the stack frame */ + UASM_i_MFC0(p, tmp, scratch_vcpu[0], scratch_vcpu[1]); + UASM_i_SW(p, tmp, offsetof(struct pt_regs, cp0_epc), frame); + + /* Save the temp scratch register value in cp0_cause of stack frame */ + if (scratch_tmp[0] == 31) { + UASM_i_MFC0(p, tmp, scratch_tmp[0], scratch_tmp[1]); + UASM_i_SW(p, tmp, offsetof(struct pt_regs, cp0_cause), frame); + } +} + +static void kvm_mips_build_restore_scratch(u32 **p, unsigned int tmp, + unsigned int frame) +{ + /* + * Restore host scratch register values saved by + * kvm_mips_build_save_scratch(). + */ + UASM_i_LW(p, tmp, offsetof(struct pt_regs, cp0_epc), frame); + UASM_i_MTC0(p, tmp, scratch_vcpu[0], scratch_vcpu[1]); + + if (scratch_tmp[0] == 31) { + UASM_i_LW(p, tmp, offsetof(struct pt_regs, cp0_cause), frame); + UASM_i_MTC0(p, tmp, scratch_tmp[0], scratch_tmp[1]); + } +} + +/** + * build_set_exc_base() - Assemble code to write exception base address. + * @p: Code buffer pointer. + * @reg: Source register (generated code may set WG bit in @reg). + * + * Assemble code to modify the exception base address in the EBase register, + * using the appropriately sized access and setting the WG bit if necessary. + */ +static inline void build_set_exc_base(u32 **p, unsigned int reg) +{ + if (cpu_has_ebase_wg) { + /* Set WG so that all the bits get written */ + uasm_i_ori(p, reg, reg, MIPS_EBASE_WG); + UASM_i_MTC0(p, reg, C0_EBASE); + } else { + uasm_i_mtc0(p, reg, C0_EBASE); + } +} + +/** + * kvm_mips_build_vcpu_run() - Assemble function to start running a guest VCPU. + * @addr: Address to start writing code. + * + * Assemble the start of the vcpu_run function to run a guest VCPU. The function + * conforms to the following prototype: + * + * int vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu); + * + * The exit from the guest and return to the caller is handled by the code + * generated by kvm_mips_build_ret_to_host(). + * + * Returns: Next address after end of written function. + */ +void *kvm_mips_build_vcpu_run(void *addr) +{ + u32 *p = addr; + unsigned int i; + + /* + * A0: run + * A1: vcpu + */ + + /* k0/k1 not being used in host kernel context */ + UASM_i_ADDIU(&p, K1, SP, -(int)sizeof(struct pt_regs)); + for (i = 16; i < 32; ++i) { + if (i == 24) + i = 28; + UASM_i_SW(&p, i, offsetof(struct pt_regs, regs[i]), K1); + } + + /* Save host status */ + uasm_i_mfc0(&p, V0, C0_STATUS); + UASM_i_SW(&p, V0, offsetof(struct pt_regs, cp0_status), K1); + + /* Save scratch registers, will be used to store pointer to vcpu etc */ + kvm_mips_build_save_scratch(&p, V1, K1); + + /* VCPU scratch register has pointer to vcpu */ + UASM_i_MTC0(&p, A1, scratch_vcpu[0], scratch_vcpu[1]); + + /* Offset into vcpu->arch */ + UASM_i_ADDIU(&p, K1, A1, offsetof(struct kvm_vcpu, arch)); + + /* + * Save the host stack to VCPU, used for exception processing + * when we exit from the Guest + */ + UASM_i_SW(&p, SP, offsetof(struct kvm_vcpu_arch, host_stack), K1); + + /* Save the kernel gp as well */ + UASM_i_SW(&p, GP, offsetof(struct kvm_vcpu_arch, host_gp), K1); + + /* + * Setup status register for running the guest in UM, interrupts + * are disabled + */ + UASM_i_LA(&p, K0, ST0_EXL | KSU_USER | ST0_BEV | ST0_KX_IF_64); + uasm_i_mtc0(&p, K0, C0_STATUS); + uasm_i_ehb(&p); + + /* load up the new EBASE */ + UASM_i_LW(&p, K0, offsetof(struct kvm_vcpu_arch, guest_ebase), K1); + build_set_exc_base(&p, K0); + + /* + * Now that the new EBASE has been loaded, unset BEV, set + * interrupt mask as it was but make sure that timer interrupts + * are enabled + */ + uasm_i_addiu(&p, K0, ZERO, ST0_EXL | KSU_USER | ST0_IE | ST0_KX_IF_64); + uasm_i_andi(&p, V0, V0, ST0_IM); + uasm_i_or(&p, K0, K0, V0); + uasm_i_mtc0(&p, K0, C0_STATUS); + uasm_i_ehb(&p); + + p = kvm_mips_build_enter_guest(p); + + return p; +} + +/** + * kvm_mips_build_enter_guest() - Assemble code to resume guest execution. + * @addr: Address to start writing code. + * + * Assemble the code to resume guest execution. This code is common between the + * initial entry into the guest from the host, and returning from the exit + * handler back to the guest. + * + * Returns: Next address after end of written function. + */ +static void *kvm_mips_build_enter_guest(void *addr) +{ + u32 *p = addr; + unsigned int i; + struct uasm_label labels[2]; + struct uasm_reloc relocs[2]; + struct uasm_label *l = labels; + struct uasm_reloc *r = relocs; + + memset(labels, 0, sizeof(labels)); + memset(relocs, 0, sizeof(relocs)); + + /* Set Guest EPC */ + UASM_i_LW(&p, T0, offsetof(struct kvm_vcpu_arch, pc), K1); + UASM_i_MTC0(&p, T0, C0_EPC); + + /* Set the ASID for the Guest Kernel */ + UASM_i_LW(&p, T0, offsetof(struct kvm_vcpu_arch, cop0), K1); + UASM_i_LW(&p, T0, offsetof(struct mips_coproc, reg[MIPS_CP0_STATUS][0]), + T0); + uasm_i_andi(&p, T0, T0, KSU_USER | ST0_ERL | ST0_EXL); + uasm_i_xori(&p, T0, T0, KSU_USER); + uasm_il_bnez(&p, &r, T0, label_kernel_asid); + UASM_i_ADDIU(&p, T1, K1, + offsetof(struct kvm_vcpu_arch, guest_kernel_asid)); + /* else user */ + UASM_i_ADDIU(&p, T1, K1, + offsetof(struct kvm_vcpu_arch, guest_user_asid)); + uasm_l_kernel_asid(&l, p); + + /* t1: contains the base of the ASID array, need to get the cpu id */ + /* smp_processor_id */ + uasm_i_lw(&p, T2, offsetof(struct thread_info, cpu), GP); + /* x4 */ + uasm_i_sll(&p, T2, T2, 2); + UASM_i_ADDU(&p, T3, T1, T2); + uasm_i_lw(&p, K0, 0, T3); +#ifdef CONFIG_MIPS_ASID_BITS_VARIABLE + /* x sizeof(struct cpuinfo_mips)/4 */ + uasm_i_addiu(&p, T3, ZERO, sizeof(struct cpuinfo_mips)/4); + uasm_i_mul(&p, T2, T2, T3); + + UASM_i_LA_mostly(&p, AT, (long)&cpu_data[0].asid_mask); + UASM_i_ADDU(&p, AT, AT, T2); + UASM_i_LW(&p, T2, uasm_rel_lo((long)&cpu_data[0].asid_mask), AT); + uasm_i_and(&p, K0, K0, T2); +#else + uasm_i_andi(&p, K0, K0, MIPS_ENTRYHI_ASID); +#endif + uasm_i_mtc0(&p, K0, C0_ENTRYHI); + uasm_i_ehb(&p); + + /* Disable RDHWR access */ + uasm_i_mtc0(&p, ZERO, C0_HWRENA); + + /* load the guest context from VCPU and return */ + for (i = 1; i < 32; ++i) { + /* Guest k0/k1 loaded later */ + if (i == K0 || i == K1) + continue; + UASM_i_LW(&p, i, offsetof(struct kvm_vcpu_arch, gprs[i]), K1); + } + +#ifndef CONFIG_CPU_MIPSR6 + /* Restore hi/lo */ + UASM_i_LW(&p, K0, offsetof(struct kvm_vcpu_arch, hi), K1); + uasm_i_mthi(&p, K0); + + UASM_i_LW(&p, K0, offsetof(struct kvm_vcpu_arch, lo), K1); + uasm_i_mtlo(&p, K0); +#endif + + /* Restore the guest's k0/k1 registers */ + UASM_i_LW(&p, K0, offsetof(struct kvm_vcpu_arch, gprs[K0]), K1); + UASM_i_LW(&p, K1, offsetof(struct kvm_vcpu_arch, gprs[K1]), K1); + + /* Jump to guest */ + uasm_i_eret(&p); + + uasm_resolve_relocs(relocs, labels); + + return p; +} + +/** + * kvm_mips_build_exception() - Assemble first level guest exception handler. + * @addr: Address to start writing code. + * @handler: Address of common handler (within range of @addr). + * + * Assemble exception vector code for guest execution. The generated vector will + * branch to the common exception handler generated by kvm_mips_build_exit(). + * + * Returns: Next address after end of written function. + */ +void *kvm_mips_build_exception(void *addr, void *handler) +{ + u32 *p = addr; + struct uasm_label labels[2]; + struct uasm_reloc relocs[2]; + struct uasm_label *l = labels; + struct uasm_reloc *r = relocs; + + memset(labels, 0, sizeof(labels)); + memset(relocs, 0, sizeof(relocs)); + + /* Save guest k1 into scratch register */ + UASM_i_MTC0(&p, K1, scratch_tmp[0], scratch_tmp[1]); + + /* Get the VCPU pointer from the VCPU scratch register */ + UASM_i_MFC0(&p, K1, scratch_vcpu[0], scratch_vcpu[1]); + UASM_i_ADDIU(&p, K1, K1, offsetof(struct kvm_vcpu, arch)); + + /* Save guest k0 into VCPU structure */ + UASM_i_SW(&p, K0, offsetof(struct kvm_vcpu_arch, gprs[K0]), K1); + + /* Branch to the common handler */ + uasm_il_b(&p, &r, label_exit_common); + uasm_i_nop(&p); + + uasm_l_exit_common(&l, handler); + uasm_resolve_relocs(relocs, labels); + + return p; +} + +/** + * kvm_mips_build_exit() - Assemble common guest exit handler. + * @addr: Address to start writing code. + * + * Assemble the generic guest exit handling code. This is called by the + * exception vectors (generated by kvm_mips_build_exception()), and calls + * kvm_mips_handle_exit(), then either resumes the guest or returns to the host + * depending on the return value. + * + * Returns: Next address after end of written function. + */ +void *kvm_mips_build_exit(void *addr) +{ + u32 *p = addr; + unsigned int i; + struct uasm_label labels[3]; + struct uasm_reloc relocs[3]; + struct uasm_label *l = labels; + struct uasm_reloc *r = relocs; + + memset(labels, 0, sizeof(labels)); + memset(relocs, 0, sizeof(relocs)); + + /* + * Generic Guest exception handler. We end up here when the guest + * does something that causes a trap to kernel mode. + * + * Both k0/k1 registers will have already been saved (k0 into the vcpu + * structure, and k1 into the scratch_tmp register). + * + * The k1 register will already contain the kvm_vcpu_arch pointer. + */ + + /* Start saving Guest context to VCPU */ + for (i = 0; i < 32; ++i) { + /* Guest k0/k1 saved later */ + if (i == K0 || i == K1) + continue; + UASM_i_SW(&p, i, offsetof(struct kvm_vcpu_arch, gprs[i]), K1); + } + +#ifndef CONFIG_CPU_MIPSR6 + /* We need to save hi/lo and restore them on the way out */ + uasm_i_mfhi(&p, T0); + UASM_i_SW(&p, T0, offsetof(struct kvm_vcpu_arch, hi), K1); + + uasm_i_mflo(&p, T0); + UASM_i_SW(&p, T0, offsetof(struct kvm_vcpu_arch, lo), K1); +#endif + + /* Finally save guest k1 to VCPU */ + uasm_i_ehb(&p); + UASM_i_MFC0(&p, T0, scratch_tmp[0], scratch_tmp[1]); + UASM_i_SW(&p, T0, offsetof(struct kvm_vcpu_arch, gprs[K1]), K1); + + /* Now that context has been saved, we can use other registers */ + + /* Restore vcpu */ + UASM_i_MFC0(&p, A1, scratch_vcpu[0], scratch_vcpu[1]); + uasm_i_move(&p, S1, A1); + + /* Restore run (vcpu->run) */ + UASM_i_LW(&p, A0, offsetof(struct kvm_vcpu, run), A1); + /* Save pointer to run in s0, will be saved by the compiler */ + uasm_i_move(&p, S0, A0); + + /* + * Save Host level EPC, BadVaddr and Cause to VCPU, useful to process + * the exception + */ + UASM_i_MFC0(&p, K0, C0_EPC); + UASM_i_SW(&p, K0, offsetof(struct kvm_vcpu_arch, pc), K1); + + UASM_i_MFC0(&p, K0, C0_BADVADDR); + UASM_i_SW(&p, K0, offsetof(struct kvm_vcpu_arch, host_cp0_badvaddr), + K1); + + uasm_i_mfc0(&p, K0, C0_CAUSE); + uasm_i_sw(&p, K0, offsetof(struct kvm_vcpu_arch, host_cp0_cause), K1); + + /* Now restore the host state just enough to run the handlers */ + + /* Switch EBASE to the one used by Linux */ + /* load up the host EBASE */ + uasm_i_mfc0(&p, V0, C0_STATUS); + + uasm_i_lui(&p, AT, ST0_BEV >> 16); + uasm_i_or(&p, K0, V0, AT); + + uasm_i_mtc0(&p, K0, C0_STATUS); + uasm_i_ehb(&p); + + UASM_i_LA_mostly(&p, K0, (long)&ebase); + UASM_i_LW(&p, K0, uasm_rel_lo((long)&ebase), K0); + build_set_exc_base(&p, K0); + + if (raw_cpu_has_fpu) { + /* + * If FPU is enabled, save FCR31 and clear it so that later + * ctc1's don't trigger FPE for pending exceptions. + */ + uasm_i_lui(&p, AT, ST0_CU1 >> 16); + uasm_i_and(&p, V1, V0, AT); + uasm_il_beqz(&p, &r, V1, label_fpu_1); + uasm_i_nop(&p); + uasm_i_cfc1(&p, T0, 31); + uasm_i_sw(&p, T0, offsetof(struct kvm_vcpu_arch, fpu.fcr31), + K1); + uasm_i_ctc1(&p, ZERO, 31); + uasm_l_fpu_1(&l, p); + } + + if (cpu_has_msa) { + /* + * If MSA is enabled, save MSACSR and clear it so that later + * instructions don't trigger MSAFPE for pending exceptions. + */ + uasm_i_mfc0(&p, T0, C0_CONFIG5); + uasm_i_ext(&p, T0, T0, 27, 1); /* MIPS_CONF5_MSAEN */ + uasm_il_beqz(&p, &r, T0, label_msa_1); + uasm_i_nop(&p); + uasm_i_cfcmsa(&p, T0, MSA_CSR); + uasm_i_sw(&p, T0, offsetof(struct kvm_vcpu_arch, fpu.msacsr), + K1); + uasm_i_ctcmsa(&p, MSA_CSR, ZERO); + uasm_l_msa_1(&l, p); + } + + /* Now that the new EBASE has been loaded, unset BEV and KSU_USER */ + uasm_i_addiu(&p, AT, ZERO, ~(ST0_EXL | KSU_USER | ST0_IE)); + uasm_i_and(&p, V0, V0, AT); + uasm_i_lui(&p, AT, ST0_CU0 >> 16); + uasm_i_or(&p, V0, V0, AT); + uasm_i_mtc0(&p, V0, C0_STATUS); + uasm_i_ehb(&p); + + /* Load up host GP */ + UASM_i_LW(&p, GP, offsetof(struct kvm_vcpu_arch, host_gp), K1); + + /* Need a stack before we can jump to "C" */ + UASM_i_LW(&p, SP, offsetof(struct kvm_vcpu_arch, host_stack), K1); + + /* Saved host state */ + UASM_i_ADDIU(&p, SP, SP, -(int)sizeof(struct pt_regs)); + + /* + * XXXKYMA do we need to load the host ASID, maybe not because the + * kernel entries are marked GLOBAL, need to verify + */ + + /* Restore host scratch registers, as we'll have clobbered them */ + kvm_mips_build_restore_scratch(&p, K0, SP); + + /* Restore RDHWR access */ + UASM_i_LA_mostly(&p, K0, (long)&hwrena); + uasm_i_lw(&p, K0, uasm_rel_lo((long)&hwrena), K0); + uasm_i_mtc0(&p, K0, C0_HWRENA); + + /* Jump to handler */ + /* + * XXXKYMA: not sure if this is safe, how large is the stack?? + * Now jump to the kvm_mips_handle_exit() to see if we can deal + * with this in the kernel + */ + UASM_i_LA(&p, T9, (unsigned long)kvm_mips_handle_exit); + uasm_i_jalr(&p, RA, T9); + UASM_i_ADDIU(&p, SP, SP, -CALLFRAME_SIZ); + + uasm_resolve_relocs(relocs, labels); + + p = kvm_mips_build_ret_from_exit(p); + + return p; +} + +/** + * kvm_mips_build_ret_from_exit() - Assemble guest exit return handler. + * @addr: Address to start writing code. + * + * Assemble the code to handle the return from kvm_mips_handle_exit(), either + * resuming the guest or returning to the host depending on the return value. + * + * Returns: Next address after end of written function. + */ +static void *kvm_mips_build_ret_from_exit(void *addr) +{ + u32 *p = addr; + struct uasm_label labels[2]; + struct uasm_reloc relocs[2]; + struct uasm_label *l = labels; + struct uasm_reloc *r = relocs; + + memset(labels, 0, sizeof(labels)); + memset(relocs, 0, sizeof(relocs)); + + /* Return from handler Make sure interrupts are disabled */ + uasm_i_di(&p, ZERO); + uasm_i_ehb(&p); + + /* + * XXXKYMA: k0/k1 could have been blown away if we processed + * an exception while we were handling the exception from the + * guest, reload k1 + */ + + uasm_i_move(&p, K1, S1); + UASM_i_ADDIU(&p, K1, K1, offsetof(struct kvm_vcpu, arch)); + + /* + * Check return value, should tell us if we are returning to the + * host (handle I/O etc)or resuming the guest + */ + uasm_i_andi(&p, T0, V0, RESUME_HOST); + uasm_il_bnez(&p, &r, T0, label_return_to_host); + uasm_i_nop(&p); + + p = kvm_mips_build_ret_to_guest(p); + + uasm_l_return_to_host(&l, p); + p = kvm_mips_build_ret_to_host(p); + + uasm_resolve_relocs(relocs, labels); + + return p; +} + +/** + * kvm_mips_build_ret_to_guest() - Assemble code to return to the guest. + * @addr: Address to start writing code. + * + * Assemble the code to handle return from the guest exit handler + * (kvm_mips_handle_exit()) back to the guest. + * + * Returns: Next address after end of written function. + */ +static void *kvm_mips_build_ret_to_guest(void *addr) +{ + u32 *p = addr; + + /* Put the saved pointer to vcpu (s1) back into the scratch register */ + UASM_i_MTC0(&p, S1, scratch_vcpu[0], scratch_vcpu[1]); + + /* Load up the Guest EBASE to minimize the window where BEV is set */ + UASM_i_LW(&p, T0, offsetof(struct kvm_vcpu_arch, guest_ebase), K1); + + /* Switch EBASE back to the one used by KVM */ + uasm_i_mfc0(&p, V1, C0_STATUS); + uasm_i_lui(&p, AT, ST0_BEV >> 16); + uasm_i_or(&p, K0, V1, AT); + uasm_i_mtc0(&p, K0, C0_STATUS); + uasm_i_ehb(&p); + build_set_exc_base(&p, T0); + + /* Setup status register for running guest in UM */ + uasm_i_ori(&p, V1, V1, ST0_EXL | KSU_USER | ST0_IE); + UASM_i_LA(&p, AT, ~(ST0_CU0 | ST0_MX)); + uasm_i_and(&p, V1, V1, AT); + uasm_i_mtc0(&p, V1, C0_STATUS); + uasm_i_ehb(&p); + + p = kvm_mips_build_enter_guest(p); + + return p; +} + +/** + * kvm_mips_build_ret_to_host() - Assemble code to return to the host. + * @addr: Address to start writing code. + * + * Assemble the code to handle return from the guest exit handler + * (kvm_mips_handle_exit()) back to the host, i.e. to the caller of the vcpu_run + * function generated by kvm_mips_build_vcpu_run(). + * + * Returns: Next address after end of written function. + */ +static void *kvm_mips_build_ret_to_host(void *addr) +{ + u32 *p = addr; + unsigned int i; + + /* EBASE is already pointing to Linux */ + UASM_i_LW(&p, K1, offsetof(struct kvm_vcpu_arch, host_stack), K1); + UASM_i_ADDIU(&p, K1, K1, -(int)sizeof(struct pt_regs)); + + /* + * r2/v0 is the return code, shift it down by 2 (arithmetic) + * to recover the err code + */ + uasm_i_sra(&p, K0, V0, 2); + uasm_i_move(&p, V0, K0); + + /* Load context saved on the host stack */ + for (i = 16; i < 31; ++i) { + if (i == 24) + i = 28; + UASM_i_LW(&p, i, offsetof(struct pt_regs, regs[i]), K1); + } + + /* Restore RDHWR access */ + UASM_i_LA_mostly(&p, K0, (long)&hwrena); + uasm_i_lw(&p, K0, uasm_rel_lo((long)&hwrena), K0); + uasm_i_mtc0(&p, K0, C0_HWRENA); + + /* Restore RA, which is the address we will return to */ + UASM_i_LW(&p, RA, offsetof(struct pt_regs, regs[RA]), K1); + uasm_i_jr(&p, RA); + uasm_i_nop(&p); + + return p; +} + diff --git a/arch/mips/kvm/fpu.S b/arch/mips/kvm/fpu.S index 531fbf5131c0..16f17c6390dd 100644 --- a/arch/mips/kvm/fpu.S +++ b/arch/mips/kvm/fpu.S @@ -14,13 +14,16 @@ #include <asm/mipsregs.h> #include <asm/regdef.h> +/* preprocessor replaces the fp in ".set fp=64" with $30 otherwise */ +#undef fp + .set noreorder .set noat LEAF(__kvm_save_fpu) .set push - .set mips64r2 SET_HARDFLOAT + .set fp=64 mfc0 t0, CP0_STATUS sll t0, t0, 5 # is Status.FR set? bgez t0, 1f # no: skip odd doubles @@ -63,8 +66,8 @@ LEAF(__kvm_save_fpu) LEAF(__kvm_restore_fpu) .set push - .set mips64r2 SET_HARDFLOAT + .set fp=64 mfc0 t0, CP0_STATUS sll t0, t0, 5 # is Status.FR set? bgez t0, 1f # no: skip odd doubles diff --git a/arch/mips/kvm/interrupt.c b/arch/mips/kvm/interrupt.c index 95f790663b0c..ad28dac6b7e9 100644 --- a/arch/mips/kvm/interrupt.c +++ b/arch/mips/kvm/interrupt.c @@ -22,12 +22,12 @@ #include "interrupt.h" -void kvm_mips_queue_irq(struct kvm_vcpu *vcpu, uint32_t priority) +void kvm_mips_queue_irq(struct kvm_vcpu *vcpu, unsigned int priority) { set_bit(priority, &vcpu->arch.pending_exceptions); } -void kvm_mips_dequeue_irq(struct kvm_vcpu *vcpu, uint32_t priority) +void kvm_mips_dequeue_irq(struct kvm_vcpu *vcpu, unsigned int priority) { clear_bit(priority, &vcpu->arch.pending_exceptions); } @@ -114,10 +114,10 @@ void kvm_mips_dequeue_io_int_cb(struct kvm_vcpu *vcpu, /* Deliver the interrupt of the corresponding priority, if possible. */ int kvm_mips_irq_deliver_cb(struct kvm_vcpu *vcpu, unsigned int priority, - uint32_t cause) + u32 cause) { int allowed = 0; - uint32_t exccode; + u32 exccode; struct kvm_vcpu_arch *arch = &vcpu->arch; struct mips_coproc *cop0 = vcpu->arch.cop0; @@ -196,12 +196,12 @@ int kvm_mips_irq_deliver_cb(struct kvm_vcpu *vcpu, unsigned int priority, } int kvm_mips_irq_clear_cb(struct kvm_vcpu *vcpu, unsigned int priority, - uint32_t cause) + u32 cause) { return 1; } -void kvm_mips_deliver_interrupts(struct kvm_vcpu *vcpu, uint32_t cause) +void kvm_mips_deliver_interrupts(struct kvm_vcpu *vcpu, u32 cause) { unsigned long *pending = &vcpu->arch.pending_exceptions; unsigned long *pending_clr = &vcpu->arch.pending_exceptions_clr; diff --git a/arch/mips/kvm/interrupt.h b/arch/mips/kvm/interrupt.h index 2143884709e4..fb118a2c8379 100644 --- a/arch/mips/kvm/interrupt.h +++ b/arch/mips/kvm/interrupt.h @@ -28,17 +28,13 @@ #define MIPS_EXC_MAX 12 /* XXXSL More to follow */ -extern char __kvm_mips_vcpu_run_end[]; -extern char mips32_exception[], mips32_exceptionEnd[]; -extern char mips32_GuestException[], mips32_GuestExceptionEnd[]; - #define C_TI (_ULCAST_(1) << 30) #define KVM_MIPS_IRQ_DELIVER_ALL_AT_ONCE (0) #define KVM_MIPS_IRQ_CLEAR_ALL_AT_ONCE (0) -void kvm_mips_queue_irq(struct kvm_vcpu *vcpu, uint32_t priority); -void kvm_mips_dequeue_irq(struct kvm_vcpu *vcpu, uint32_t priority); +void kvm_mips_queue_irq(struct kvm_vcpu *vcpu, unsigned int priority); +void kvm_mips_dequeue_irq(struct kvm_vcpu *vcpu, unsigned int priority); int kvm_mips_pending_timer(struct kvm_vcpu *vcpu); void kvm_mips_queue_timer_int_cb(struct kvm_vcpu *vcpu); @@ -48,7 +44,7 @@ void kvm_mips_queue_io_int_cb(struct kvm_vcpu *vcpu, void kvm_mips_dequeue_io_int_cb(struct kvm_vcpu *vcpu, struct kvm_mips_interrupt *irq); int kvm_mips_irq_deliver_cb(struct kvm_vcpu *vcpu, unsigned int priority, - uint32_t cause); + u32 cause); int kvm_mips_irq_clear_cb(struct kvm_vcpu *vcpu, unsigned int priority, - uint32_t cause); -void kvm_mips_deliver_interrupts(struct kvm_vcpu *vcpu, uint32_t cause); + u32 cause); +void kvm_mips_deliver_interrupts(struct kvm_vcpu *vcpu, u32 cause); diff --git a/arch/mips/kvm/locore.S b/arch/mips/kvm/locore.S deleted file mode 100644 index 828fcfc1cd7f..000000000000 --- a/arch/mips/kvm/locore.S +++ /dev/null @@ -1,605 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Main entry point for the guest, exception handling. - * - * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. - * Authors: Sanjay Lal <sanjayl@kymasys.com> - */ - -#include <asm/asm.h> -#include <asm/asmmacro.h> -#include <asm/regdef.h> -#include <asm/mipsregs.h> -#include <asm/stackframe.h> -#include <asm/asm-offsets.h> - -#define _C_LABEL(x) x -#define MIPSX(name) mips32_ ## name -#define CALLFRAME_SIZ 32 - -/* - * VECTOR - * exception vector entrypoint - */ -#define VECTOR(x, regmask) \ - .ent _C_LABEL(x),0; \ - EXPORT(x); - -#define VECTOR_END(x) \ - EXPORT(x); - -/* Overload, Danger Will Robinson!! */ -#define PT_HOST_USERLOCAL PT_EPC - -#define CP0_DDATA_LO $28,3 - -/* Resume Flags */ -#define RESUME_FLAG_HOST (1<<1) /* Resume host? */ - -#define RESUME_GUEST 0 -#define RESUME_HOST RESUME_FLAG_HOST - -/* - * __kvm_mips_vcpu_run: entry point to the guest - * a0: run - * a1: vcpu - */ - .set noreorder - -FEXPORT(__kvm_mips_vcpu_run) - /* k0/k1 not being used in host kernel context */ - INT_ADDIU k1, sp, -PT_SIZE - LONG_S $16, PT_R16(k1) - LONG_S $17, PT_R17(k1) - LONG_S $18, PT_R18(k1) - LONG_S $19, PT_R19(k1) - LONG_S $20, PT_R20(k1) - LONG_S $21, PT_R21(k1) - LONG_S $22, PT_R22(k1) - LONG_S $23, PT_R23(k1) - - LONG_S $28, PT_R28(k1) - LONG_S $29, PT_R29(k1) - LONG_S $30, PT_R30(k1) - LONG_S $31, PT_R31(k1) - - /* Save hi/lo */ - mflo v0 - LONG_S v0, PT_LO(k1) - mfhi v1 - LONG_S v1, PT_HI(k1) - - /* Save host status */ - mfc0 v0, CP0_STATUS - LONG_S v0, PT_STATUS(k1) - - /* Save DDATA_LO, will be used to store pointer to vcpu */ - mfc0 v1, CP0_DDATA_LO - LONG_S v1, PT_HOST_USERLOCAL(k1) - - /* DDATA_LO has pointer to vcpu */ - mtc0 a1, CP0_DDATA_LO - - /* Offset into vcpu->arch */ - INT_ADDIU k1, a1, VCPU_HOST_ARCH - - /* - * Save the host stack to VCPU, used for exception processing - * when we exit from the Guest - */ - LONG_S sp, VCPU_HOST_STACK(k1) - - /* Save the kernel gp as well */ - LONG_S gp, VCPU_HOST_GP(k1) - - /* - * Setup status register for running the guest in UM, interrupts - * are disabled - */ - li k0, (ST0_EXL | KSU_USER | ST0_BEV) - mtc0 k0, CP0_STATUS - ehb - - /* load up the new EBASE */ - LONG_L k0, VCPU_GUEST_EBASE(k1) - mtc0 k0, CP0_EBASE - - /* - * Now that the new EBASE has been loaded, unset BEV, set - * interrupt mask as it was but make sure that timer interrupts - * are enabled - */ - li k0, (ST0_EXL | KSU_USER | ST0_IE) - andi v0, v0, ST0_IM - or k0, k0, v0 - mtc0 k0, CP0_STATUS - ehb - - /* Set Guest EPC */ - LONG_L t0, VCPU_PC(k1) - mtc0 t0, CP0_EPC - -FEXPORT(__kvm_mips_load_asid) - /* Set the ASID for the Guest Kernel */ - PTR_L t0, VCPU_COP0(k1) - LONG_L t0, COP0_STATUS(t0) - andi t0, KSU_USER | ST0_ERL | ST0_EXL - xori t0, KSU_USER - bnez t0, 1f /* If kernel */ - INT_ADDIU t1, k1, VCPU_GUEST_KERNEL_ASID /* (BD) */ - INT_ADDIU t1, k1, VCPU_GUEST_USER_ASID /* else user */ -1: - /* t1: contains the base of the ASID array, need to get the cpu id */ - LONG_L t2, TI_CPU($28) /* smp_processor_id */ - INT_SLL t2, t2, 2 /* x4 */ - REG_ADDU t3, t1, t2 - LONG_L k0, (t3) -#ifdef CONFIG_MIPS_ASID_BITS_VARIABLE - li t3, CPUINFO_SIZE/4 - mul t2, t2, t3 /* x sizeof(struct cpuinfo_mips)/4 */ - LONG_L t2, (cpu_data + CPUINFO_ASID_MASK)(t2) - and k0, k0, t2 -#else - andi k0, k0, MIPS_ENTRYHI_ASID -#endif - mtc0 k0, CP0_ENTRYHI - ehb - - /* Disable RDHWR access */ - mtc0 zero, CP0_HWRENA - - .set noat - /* Now load up the Guest Context from VCPU */ - LONG_L $1, VCPU_R1(k1) - LONG_L $2, VCPU_R2(k1) - LONG_L $3, VCPU_R3(k1) - - LONG_L $4, VCPU_R4(k1) - LONG_L $5, VCPU_R5(k1) - LONG_L $6, VCPU_R6(k1) - LONG_L $7, VCPU_R7(k1) - - LONG_L $8, VCPU_R8(k1) - LONG_L $9, VCPU_R9(k1) - LONG_L $10, VCPU_R10(k1) - LONG_L $11, VCPU_R11(k1) - LONG_L $12, VCPU_R12(k1) - LONG_L $13, VCPU_R13(k1) - LONG_L $14, VCPU_R14(k1) - LONG_L $15, VCPU_R15(k1) - LONG_L $16, VCPU_R16(k1) - LONG_L $17, VCPU_R17(k1) - LONG_L $18, VCPU_R18(k1) - LONG_L $19, VCPU_R19(k1) - LONG_L $20, VCPU_R20(k1) - LONG_L $21, VCPU_R21(k1) - LONG_L $22, VCPU_R22(k1) - LONG_L $23, VCPU_R23(k1) - LONG_L $24, VCPU_R24(k1) - LONG_L $25, VCPU_R25(k1) - - /* k0/k1 loaded up later */ - - LONG_L $28, VCPU_R28(k1) - LONG_L $29, VCPU_R29(k1) - LONG_L $30, VCPU_R30(k1) - LONG_L $31, VCPU_R31(k1) - - /* Restore hi/lo */ - LONG_L k0, VCPU_LO(k1) - mtlo k0 - - LONG_L k0, VCPU_HI(k1) - mthi k0 - -FEXPORT(__kvm_mips_load_k0k1) - /* Restore the guest's k0/k1 registers */ - LONG_L k0, VCPU_R26(k1) - LONG_L k1, VCPU_R27(k1) - - /* Jump to guest */ - eret -EXPORT(__kvm_mips_vcpu_run_end) - -VECTOR(MIPSX(exception), unknown) -/* Find out what mode we came from and jump to the proper handler. */ - mtc0 k0, CP0_ERROREPC #01: Save guest k0 - ehb #02: - - mfc0 k0, CP0_EBASE #02: Get EBASE - INT_SRL k0, k0, 10 #03: Get rid of CPUNum - INT_SLL k0, k0, 10 #04 - LONG_S k1, 0x3000(k0) #05: Save k1 @ offset 0x3000 - INT_ADDIU k0, k0, 0x2000 #06: Exception handler is - # installed @ offset 0x2000 - j k0 #07: jump to the function - nop #08: branch delay slot -VECTOR_END(MIPSX(exceptionEnd)) -.end MIPSX(exception) - -/* - * Generic Guest exception handler. We end up here when the guest - * does something that causes a trap to kernel mode. - */ -NESTED (MIPSX(GuestException), CALLFRAME_SIZ, ra) - /* Get the VCPU pointer from DDTATA_LO */ - mfc0 k1, CP0_DDATA_LO - INT_ADDIU k1, k1, VCPU_HOST_ARCH - - /* Start saving Guest context to VCPU */ - LONG_S $0, VCPU_R0(k1) - LONG_S $1, VCPU_R1(k1) - LONG_S $2, VCPU_R2(k1) - LONG_S $3, VCPU_R3(k1) - LONG_S $4, VCPU_R4(k1) - LONG_S $5, VCPU_R5(k1) - LONG_S $6, VCPU_R6(k1) - LONG_S $7, VCPU_R7(k1) - LONG_S $8, VCPU_R8(k1) - LONG_S $9, VCPU_R9(k1) - LONG_S $10, VCPU_R10(k1) - LONG_S $11, VCPU_R11(k1) - LONG_S $12, VCPU_R12(k1) - LONG_S $13, VCPU_R13(k1) - LONG_S $14, VCPU_R14(k1) - LONG_S $15, VCPU_R15(k1) - LONG_S $16, VCPU_R16(k1) - LONG_S $17, VCPU_R17(k1) - LONG_S $18, VCPU_R18(k1) - LONG_S $19, VCPU_R19(k1) - LONG_S $20, VCPU_R20(k1) - LONG_S $21, VCPU_R21(k1) - LONG_S $22, VCPU_R22(k1) - LONG_S $23, VCPU_R23(k1) - LONG_S $24, VCPU_R24(k1) - LONG_S $25, VCPU_R25(k1) - - /* Guest k0/k1 saved later */ - - LONG_S $28, VCPU_R28(k1) - LONG_S $29, VCPU_R29(k1) - LONG_S $30, VCPU_R30(k1) - LONG_S $31, VCPU_R31(k1) - - .set at - - /* We need to save hi/lo and restore them on the way out */ - mfhi t0 - LONG_S t0, VCPU_HI(k1) - - mflo t0 - LONG_S t0, VCPU_LO(k1) - - /* Finally save guest k0/k1 to VCPU */ - mfc0 t0, CP0_ERROREPC - LONG_S t0, VCPU_R26(k1) - - /* Get GUEST k1 and save it in VCPU */ - PTR_LI t1, ~0x2ff - mfc0 t0, CP0_EBASE - and t0, t0, t1 - LONG_L t0, 0x3000(t0) - LONG_S t0, VCPU_R27(k1) - - /* Now that context has been saved, we can use other registers */ - - /* Restore vcpu */ - mfc0 a1, CP0_DDATA_LO - move s1, a1 - - /* Restore run (vcpu->run) */ - LONG_L a0, VCPU_RUN(a1) - /* Save pointer to run in s0, will be saved by the compiler */ - move s0, a0 - - /* - * Save Host level EPC, BadVaddr and Cause to VCPU, useful to - * process the exception - */ - mfc0 k0,CP0_EPC - LONG_S k0, VCPU_PC(k1) - - mfc0 k0, CP0_BADVADDR - LONG_S k0, VCPU_HOST_CP0_BADVADDR(k1) - - mfc0 k0, CP0_CAUSE - LONG_S k0, VCPU_HOST_CP0_CAUSE(k1) - - mfc0 k0, CP0_ENTRYHI - LONG_S k0, VCPU_HOST_ENTRYHI(k1) - - /* Now restore the host state just enough to run the handlers */ - - /* Switch EBASE to the one used by Linux */ - /* load up the host EBASE */ - mfc0 v0, CP0_STATUS - - or k0, v0, ST0_BEV - - mtc0 k0, CP0_STATUS - ehb - - LONG_L k0, VCPU_HOST_EBASE(k1) - mtc0 k0,CP0_EBASE - - /* - * If FPU is enabled, save FCR31 and clear it so that later ctc1's don't - * trigger FPE for pending exceptions. - */ - and v1, v0, ST0_CU1 - beqz v1, 1f - nop - .set push - SET_HARDFLOAT - cfc1 t0, fcr31 - sw t0, VCPU_FCR31(k1) - ctc1 zero,fcr31 - .set pop -1: - -#ifdef CONFIG_CPU_HAS_MSA - /* - * If MSA is enabled, save MSACSR and clear it so that later - * instructions don't trigger MSAFPE for pending exceptions. - */ - mfc0 t0, CP0_CONFIG3 - ext t0, t0, 28, 1 /* MIPS_CONF3_MSAP */ - beqz t0, 1f - nop - mfc0 t0, CP0_CONFIG5 - ext t0, t0, 27, 1 /* MIPS_CONF5_MSAEN */ - beqz t0, 1f - nop - _cfcmsa t0, MSA_CSR - sw t0, VCPU_MSA_CSR(k1) - _ctcmsa MSA_CSR, zero -1: -#endif - - /* Now that the new EBASE has been loaded, unset BEV and KSU_USER */ - and v0, v0, ~(ST0_EXL | KSU_USER | ST0_IE) - or v0, v0, ST0_CU0 - mtc0 v0, CP0_STATUS - ehb - - /* Load up host GP */ - LONG_L gp, VCPU_HOST_GP(k1) - - /* Need a stack before we can jump to "C" */ - LONG_L sp, VCPU_HOST_STACK(k1) - - /* Saved host state */ - INT_ADDIU sp, sp, -PT_SIZE - - /* - * XXXKYMA do we need to load the host ASID, maybe not because the - * kernel entries are marked GLOBAL, need to verify - */ - - /* Restore host DDATA_LO */ - LONG_L k0, PT_HOST_USERLOCAL(sp) - mtc0 k0, CP0_DDATA_LO - - /* Restore RDHWR access */ - PTR_LI k0, 0x2000000F - mtc0 k0, CP0_HWRENA - - /* Jump to handler */ -FEXPORT(__kvm_mips_jump_to_handler) - /* - * XXXKYMA: not sure if this is safe, how large is the stack?? - * Now jump to the kvm_mips_handle_exit() to see if we can deal - * with this in the kernel - */ - PTR_LA t9, kvm_mips_handle_exit - jalr.hb t9 - INT_ADDIU sp, sp, -CALLFRAME_SIZ /* BD Slot */ - - /* Return from handler Make sure interrupts are disabled */ - di - ehb - - /* - * XXXKYMA: k0/k1 could have been blown away if we processed - * an exception while we were handling the exception from the - * guest, reload k1 - */ - - move k1, s1 - INT_ADDIU k1, k1, VCPU_HOST_ARCH - - /* - * Check return value, should tell us if we are returning to the - * host (handle I/O etc)or resuming the guest - */ - andi t0, v0, RESUME_HOST - bnez t0, __kvm_mips_return_to_host - nop - -__kvm_mips_return_to_guest: - /* Put the saved pointer to vcpu (s1) back into the DDATA_LO Register */ - mtc0 s1, CP0_DDATA_LO - - /* Load up the Guest EBASE to minimize the window where BEV is set */ - LONG_L t0, VCPU_GUEST_EBASE(k1) - - /* Switch EBASE back to the one used by KVM */ - mfc0 v1, CP0_STATUS - or k0, v1, ST0_BEV - mtc0 k0, CP0_STATUS - ehb - mtc0 t0, CP0_EBASE - - /* Setup status register for running guest in UM */ - or v1, v1, (ST0_EXL | KSU_USER | ST0_IE) - and v1, v1, ~(ST0_CU0 | ST0_MX) - mtc0 v1, CP0_STATUS - ehb - - /* Set Guest EPC */ - LONG_L t0, VCPU_PC(k1) - mtc0 t0, CP0_EPC - - /* Set the ASID for the Guest Kernel */ - PTR_L t0, VCPU_COP0(k1) - LONG_L t0, COP0_STATUS(t0) - andi t0, KSU_USER | ST0_ERL | ST0_EXL - xori t0, KSU_USER - bnez t0, 1f /* If kernel */ - INT_ADDIU t1, k1, VCPU_GUEST_KERNEL_ASID /* (BD) */ - INT_ADDIU t1, k1, VCPU_GUEST_USER_ASID /* else user */ -1: - /* t1: contains the base of the ASID array, need to get the cpu id */ - LONG_L t2, TI_CPU($28) /* smp_processor_id */ - INT_SLL t2, t2, 2 /* x4 */ - REG_ADDU t3, t1, t2 - LONG_L k0, (t3) -#ifdef CONFIG_MIPS_ASID_BITS_VARIABLE - li t3, CPUINFO_SIZE/4 - mul t2, t2, t3 /* x sizeof(struct cpuinfo_mips)/4 */ - LONG_L t2, (cpu_data + CPUINFO_ASID_MASK)(t2) - and k0, k0, t2 -#else - andi k0, k0, MIPS_ENTRYHI_ASID -#endif - mtc0 k0, CP0_ENTRYHI - ehb - - /* Disable RDHWR access */ - mtc0 zero, CP0_HWRENA - - .set noat - /* load the guest context from VCPU and return */ - LONG_L $0, VCPU_R0(k1) - LONG_L $1, VCPU_R1(k1) - LONG_L $2, VCPU_R2(k1) - LONG_L $3, VCPU_R3(k1) - LONG_L $4, VCPU_R4(k1) - LONG_L $5, VCPU_R5(k1) - LONG_L $6, VCPU_R6(k1) - LONG_L $7, VCPU_R7(k1) - LONG_L $8, VCPU_R8(k1) - LONG_L $9, VCPU_R9(k1) - LONG_L $10, VCPU_R10(k1) - LONG_L $11, VCPU_R11(k1) - LONG_L $12, VCPU_R12(k1) - LONG_L $13, VCPU_R13(k1) - LONG_L $14, VCPU_R14(k1) - LONG_L $15, VCPU_R15(k1) - LONG_L $16, VCPU_R16(k1) - LONG_L $17, VCPU_R17(k1) - LONG_L $18, VCPU_R18(k1) - LONG_L $19, VCPU_R19(k1) - LONG_L $20, VCPU_R20(k1) - LONG_L $21, VCPU_R21(k1) - LONG_L $22, VCPU_R22(k1) - LONG_L $23, VCPU_R23(k1) - LONG_L $24, VCPU_R24(k1) - LONG_L $25, VCPU_R25(k1) - - /* $/k1 loaded later */ - LONG_L $28, VCPU_R28(k1) - LONG_L $29, VCPU_R29(k1) - LONG_L $30, VCPU_R30(k1) - LONG_L $31, VCPU_R31(k1) - -FEXPORT(__kvm_mips_skip_guest_restore) - LONG_L k0, VCPU_HI(k1) - mthi k0 - - LONG_L k0, VCPU_LO(k1) - mtlo k0 - - LONG_L k0, VCPU_R26(k1) - LONG_L k1, VCPU_R27(k1) - - eret - .set at - -__kvm_mips_return_to_host: - /* EBASE is already pointing to Linux */ - LONG_L k1, VCPU_HOST_STACK(k1) - INT_ADDIU k1,k1, -PT_SIZE - - /* Restore host DDATA_LO */ - LONG_L k0, PT_HOST_USERLOCAL(k1) - mtc0 k0, CP0_DDATA_LO - - /* - * r2/v0 is the return code, shift it down by 2 (arithmetic) - * to recover the err code - */ - INT_SRA k0, v0, 2 - move $2, k0 - - /* Load context saved on the host stack */ - LONG_L $16, PT_R16(k1) - LONG_L $17, PT_R17(k1) - LONG_L $18, PT_R18(k1) - LONG_L $19, PT_R19(k1) - LONG_L $20, PT_R20(k1) - LONG_L $21, PT_R21(k1) - LONG_L $22, PT_R22(k1) - LONG_L $23, PT_R23(k1) - - LONG_L $28, PT_R28(k1) - LONG_L $29, PT_R29(k1) - LONG_L $30, PT_R30(k1) - - LONG_L k0, PT_HI(k1) - mthi k0 - - LONG_L k0, PT_LO(k1) - mtlo k0 - - /* Restore RDHWR access */ - PTR_LI k0, 0x2000000F - mtc0 k0, CP0_HWRENA - - /* Restore RA, which is the address we will return to */ - LONG_L ra, PT_R31(k1) - j ra - nop - -VECTOR_END(MIPSX(GuestExceptionEnd)) -.end MIPSX(GuestException) - -MIPSX(exceptions): - #### - ##### The exception handlers. - ##### - .word _C_LABEL(MIPSX(GuestException)) # 0 - .word _C_LABEL(MIPSX(GuestException)) # 1 - .word _C_LABEL(MIPSX(GuestException)) # 2 - .word _C_LABEL(MIPSX(GuestException)) # 3 - .word _C_LABEL(MIPSX(GuestException)) # 4 - .word _C_LABEL(MIPSX(GuestException)) # 5 - .word _C_LABEL(MIPSX(GuestException)) # 6 - .word _C_LABEL(MIPSX(GuestException)) # 7 - .word _C_LABEL(MIPSX(GuestException)) # 8 - .word _C_LABEL(MIPSX(GuestException)) # 9 - .word _C_LABEL(MIPSX(GuestException)) # 10 - .word _C_LABEL(MIPSX(GuestException)) # 11 - .word _C_LABEL(MIPSX(GuestException)) # 12 - .word _C_LABEL(MIPSX(GuestException)) # 13 - .word _C_LABEL(MIPSX(GuestException)) # 14 - .word _C_LABEL(MIPSX(GuestException)) # 15 - .word _C_LABEL(MIPSX(GuestException)) # 16 - .word _C_LABEL(MIPSX(GuestException)) # 17 - .word _C_LABEL(MIPSX(GuestException)) # 18 - .word _C_LABEL(MIPSX(GuestException)) # 19 - .word _C_LABEL(MIPSX(GuestException)) # 20 - .word _C_LABEL(MIPSX(GuestException)) # 21 - .word _C_LABEL(MIPSX(GuestException)) # 22 - .word _C_LABEL(MIPSX(GuestException)) # 23 - .word _C_LABEL(MIPSX(GuestException)) # 24 - .word _C_LABEL(MIPSX(GuestException)) # 25 - .word _C_LABEL(MIPSX(GuestException)) # 26 - .word _C_LABEL(MIPSX(GuestException)) # 27 - .word _C_LABEL(MIPSX(GuestException)) # 28 - .word _C_LABEL(MIPSX(GuestException)) # 29 - .word _C_LABEL(MIPSX(GuestException)) # 30 - .word _C_LABEL(MIPSX(GuestException)) # 31 diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 44da5259f390..a6ea084b4d9d 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -9,6 +9,7 @@ * Authors: Sanjay Lal <sanjayl@kymasys.com> */ +#include <linux/bitops.h> #include <linux/errno.h> #include <linux/err.h> #include <linux/kdebug.h> @@ -147,7 +148,7 @@ void kvm_mips_free_vcpus(struct kvm *kvm) /* Put the pages we reserved for the guest pmap */ for (i = 0; i < kvm->arch.guest_pmap_npages; i++) { if (kvm->arch.guest_pmap[i] != KVM_INVALID_PAGE) - kvm_mips_release_pfn_clean(kvm->arch.guest_pmap[i]); + kvm_release_pfn_clean(kvm->arch.guest_pmap[i]); } kfree(kvm->arch.guest_pmap); @@ -244,10 +245,27 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, } } +static inline void dump_handler(const char *symbol, void *start, void *end) +{ + u32 *p; + + pr_debug("LEAF(%s)\n", symbol); + + pr_debug("\t.set push\n"); + pr_debug("\t.set noreorder\n"); + + for (p = start; p < (u32 *)end; ++p) + pr_debug("\t.word\t0x%08x\t\t# %p\n", *p, p); + + pr_debug("\t.set\tpop\n"); + + pr_debug("\tEND(%s)\n", symbol); +} + struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) { - int err, size, offset; - void *gebase; + int err, size; + void *gebase, *p, *handler; int i; struct kvm_vcpu *vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL); @@ -273,9 +291,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) else size = 0x4000; - /* Save Linux EBASE */ - vcpu->arch.host_ebase = (void *)read_c0_ebase(); - gebase = kzalloc(ALIGN(size, PAGE_SIZE), GFP_KERNEL); if (!gebase) { @@ -285,44 +300,53 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) kvm_debug("Allocated %d bytes for KVM Exception Handlers @ %p\n", ALIGN(size, PAGE_SIZE), gebase); + /* + * Check new ebase actually fits in CP0_EBase. The lack of a write gate + * limits us to the low 512MB of physical address space. If the memory + * we allocate is out of range, just give up now. + */ + if (!cpu_has_ebase_wg && virt_to_phys(gebase) >= 0x20000000) { + kvm_err("CP0_EBase.WG required for guest exception base %pK\n", + gebase); + err = -ENOMEM; + goto out_free_gebase; + } + /* Save new ebase */ vcpu->arch.guest_ebase = gebase; - /* Copy L1 Guest Exception handler to correct offset */ + /* Build guest exception vectors dynamically in unmapped memory */ + handler = gebase + 0x2000; /* TLB Refill, EXL = 0 */ - memcpy(gebase, mips32_exception, - mips32_exceptionEnd - mips32_exception); + kvm_mips_build_exception(gebase, handler); /* General Exception Entry point */ - memcpy(gebase + 0x180, mips32_exception, - mips32_exceptionEnd - mips32_exception); + kvm_mips_build_exception(gebase + 0x180, handler); /* For vectored interrupts poke the exception code @ all offsets 0-7 */ for (i = 0; i < 8; i++) { kvm_debug("L1 Vectored handler @ %p\n", gebase + 0x200 + (i * VECTORSPACING)); - memcpy(gebase + 0x200 + (i * VECTORSPACING), mips32_exception, - mips32_exceptionEnd - mips32_exception); + kvm_mips_build_exception(gebase + 0x200 + i * VECTORSPACING, + handler); } - /* General handler, relocate to unmapped space for sanity's sake */ - offset = 0x2000; - kvm_debug("Installing KVM Exception handlers @ %p, %#x bytes\n", - gebase + offset, - mips32_GuestExceptionEnd - mips32_GuestException); + /* General exit handler */ + p = handler; + p = kvm_mips_build_exit(p); - memcpy(gebase + offset, mips32_GuestException, - mips32_GuestExceptionEnd - mips32_GuestException); + /* Guest entry routine */ + vcpu->arch.vcpu_run = p; + p = kvm_mips_build_vcpu_run(p); -#ifdef MODULE - offset += mips32_GuestExceptionEnd - mips32_GuestException; - memcpy(gebase + offset, (char *)__kvm_mips_vcpu_run, - __kvm_mips_vcpu_run_end - (char *)__kvm_mips_vcpu_run); - vcpu->arch.vcpu_run = gebase + offset; -#else - vcpu->arch.vcpu_run = __kvm_mips_vcpu_run; -#endif + /* Dump the generated code */ + pr_debug("#include <asm/asm.h>\n"); + pr_debug("#include <asm/regdef.h>\n"); + pr_debug("\n"); + dump_handler("kvm_vcpu_run", vcpu->arch.vcpu_run, p); + dump_handler("kvm_gen_exc", gebase + 0x180, gebase + 0x200); + dump_handler("kvm_exit", gebase + 0x2000, vcpu->arch.vcpu_run); /* Invalidate the icache for these ranges */ local_flush_icache_range((unsigned long)gebase, @@ -408,17 +432,19 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) kvm_mips_deliver_interrupts(vcpu, kvm_read_c0_guest_cause(vcpu->arch.cop0)); - __kvm_guest_enter(); + guest_enter_irqoff(); /* Disable hardware page table walking while in guest */ htw_stop(); + trace_kvm_enter(vcpu); r = vcpu->arch.vcpu_run(run, vcpu); + trace_kvm_out(vcpu); /* Re-enable HTW before enabling interrupts */ htw_start(); - __kvm_guest_exit(); + guest_exit_irqoff(); local_irq_enable(); if (vcpu->sigset_active) @@ -507,8 +533,10 @@ static u64 kvm_mips_get_one_regs[] = { KVM_REG_MIPS_R30, KVM_REG_MIPS_R31, +#ifndef CONFIG_CPU_MIPSR6 KVM_REG_MIPS_HI, KVM_REG_MIPS_LO, +#endif KVM_REG_MIPS_PC, KVM_REG_MIPS_CP0_INDEX, @@ -539,6 +567,104 @@ static u64 kvm_mips_get_one_regs[] = { KVM_REG_MIPS_COUNT_HZ, }; +static u64 kvm_mips_get_one_regs_fpu[] = { + KVM_REG_MIPS_FCR_IR, + KVM_REG_MIPS_FCR_CSR, +}; + +static u64 kvm_mips_get_one_regs_msa[] = { + KVM_REG_MIPS_MSA_IR, + KVM_REG_MIPS_MSA_CSR, +}; + +static u64 kvm_mips_get_one_regs_kscratch[] = { + KVM_REG_MIPS_CP0_KSCRATCH1, + KVM_REG_MIPS_CP0_KSCRATCH2, + KVM_REG_MIPS_CP0_KSCRATCH3, + KVM_REG_MIPS_CP0_KSCRATCH4, + KVM_REG_MIPS_CP0_KSCRATCH5, + KVM_REG_MIPS_CP0_KSCRATCH6, +}; + +static unsigned long kvm_mips_num_regs(struct kvm_vcpu *vcpu) +{ + unsigned long ret; + + ret = ARRAY_SIZE(kvm_mips_get_one_regs); + if (kvm_mips_guest_can_have_fpu(&vcpu->arch)) { + ret += ARRAY_SIZE(kvm_mips_get_one_regs_fpu) + 48; + /* odd doubles */ + if (boot_cpu_data.fpu_id & MIPS_FPIR_F64) + ret += 16; + } + if (kvm_mips_guest_can_have_msa(&vcpu->arch)) + ret += ARRAY_SIZE(kvm_mips_get_one_regs_msa) + 32; + ret += __arch_hweight8(vcpu->arch.kscratch_enabled); + ret += kvm_mips_callbacks->num_regs(vcpu); + + return ret; +} + +static int kvm_mips_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices) +{ + u64 index; + unsigned int i; + + if (copy_to_user(indices, kvm_mips_get_one_regs, + sizeof(kvm_mips_get_one_regs))) + return -EFAULT; + indices += ARRAY_SIZE(kvm_mips_get_one_regs); + + if (kvm_mips_guest_can_have_fpu(&vcpu->arch)) { + if (copy_to_user(indices, kvm_mips_get_one_regs_fpu, + sizeof(kvm_mips_get_one_regs_fpu))) + return -EFAULT; + indices += ARRAY_SIZE(kvm_mips_get_one_regs_fpu); + + for (i = 0; i < 32; ++i) { + index = KVM_REG_MIPS_FPR_32(i); + if (copy_to_user(indices, &index, sizeof(index))) + return -EFAULT; + ++indices; + + /* skip odd doubles if no F64 */ + if (i & 1 && !(boot_cpu_data.fpu_id & MIPS_FPIR_F64)) + continue; + + index = KVM_REG_MIPS_FPR_64(i); + if (copy_to_user(indices, &index, sizeof(index))) + return -EFAULT; + ++indices; + } + } + + if (kvm_mips_guest_can_have_msa(&vcpu->arch)) { + if (copy_to_user(indices, kvm_mips_get_one_regs_msa, + sizeof(kvm_mips_get_one_regs_msa))) + return -EFAULT; + indices += ARRAY_SIZE(kvm_mips_get_one_regs_msa); + + for (i = 0; i < 32; ++i) { + index = KVM_REG_MIPS_VEC_128(i); + if (copy_to_user(indices, &index, sizeof(index))) + return -EFAULT; + ++indices; + } + } + + for (i = 0; i < 6; ++i) { + if (!(vcpu->arch.kscratch_enabled & BIT(i + 2))) + continue; + + if (copy_to_user(indices, &kvm_mips_get_one_regs_kscratch[i], + sizeof(kvm_mips_get_one_regs_kscratch[i]))) + return -EFAULT; + ++indices; + } + + return kvm_mips_callbacks->copy_reg_indices(vcpu, indices); +} + static int kvm_mips_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { @@ -554,12 +680,14 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu, case KVM_REG_MIPS_R0 ... KVM_REG_MIPS_R31: v = (long)vcpu->arch.gprs[reg->id - KVM_REG_MIPS_R0]; break; +#ifndef CONFIG_CPU_MIPSR6 case KVM_REG_MIPS_HI: v = (long)vcpu->arch.hi; break; case KVM_REG_MIPS_LO: v = (long)vcpu->arch.lo; break; +#endif case KVM_REG_MIPS_PC: v = (long)vcpu->arch.pc; break; @@ -688,17 +816,37 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu, case KVM_REG_MIPS_CP0_ERROREPC: v = (long)kvm_read_c0_guest_errorepc(cop0); break; + case KVM_REG_MIPS_CP0_KSCRATCH1 ... KVM_REG_MIPS_CP0_KSCRATCH6: + idx = reg->id - KVM_REG_MIPS_CP0_KSCRATCH1 + 2; + if (!(vcpu->arch.kscratch_enabled & BIT(idx))) + return -EINVAL; + switch (idx) { + case 2: + v = (long)kvm_read_c0_guest_kscratch1(cop0); + break; + case 3: + v = (long)kvm_read_c0_guest_kscratch2(cop0); + break; + case 4: + v = (long)kvm_read_c0_guest_kscratch3(cop0); + break; + case 5: + v = (long)kvm_read_c0_guest_kscratch4(cop0); + break; + case 6: + v = (long)kvm_read_c0_guest_kscratch5(cop0); + break; + case 7: + v = (long)kvm_read_c0_guest_kscratch6(cop0); + break; + } + break; /* registers to be handled specially */ - case KVM_REG_MIPS_CP0_COUNT: - case KVM_REG_MIPS_COUNT_CTL: - case KVM_REG_MIPS_COUNT_RESUME: - case KVM_REG_MIPS_COUNT_HZ: + default: ret = kvm_mips_callbacks->get_one_reg(vcpu, reg, &v); if (ret) return ret; break; - default: - return -EINVAL; } if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U64) { u64 __user *uaddr64 = (u64 __user *)(long)reg->addr; @@ -755,12 +903,14 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu, case KVM_REG_MIPS_R1 ... KVM_REG_MIPS_R31: vcpu->arch.gprs[reg->id - KVM_REG_MIPS_R0] = v; break; +#ifndef CONFIG_CPU_MIPSR6 case KVM_REG_MIPS_HI: vcpu->arch.hi = v; break; case KVM_REG_MIPS_LO: vcpu->arch.lo = v; break; +#endif case KVM_REG_MIPS_PC: vcpu->arch.pc = v; break; @@ -859,22 +1009,34 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu, case KVM_REG_MIPS_CP0_ERROREPC: kvm_write_c0_guest_errorepc(cop0, v); break; + case KVM_REG_MIPS_CP0_KSCRATCH1 ... KVM_REG_MIPS_CP0_KSCRATCH6: + idx = reg->id - KVM_REG_MIPS_CP0_KSCRATCH1 + 2; + if (!(vcpu->arch.kscratch_enabled & BIT(idx))) + return -EINVAL; + switch (idx) { + case 2: + kvm_write_c0_guest_kscratch1(cop0, v); + break; + case 3: + kvm_write_c0_guest_kscratch2(cop0, v); + break; + case 4: + kvm_write_c0_guest_kscratch3(cop0, v); + break; + case 5: + kvm_write_c0_guest_kscratch4(cop0, v); + break; + case 6: + kvm_write_c0_guest_kscratch5(cop0, v); + break; + case 7: + kvm_write_c0_guest_kscratch6(cop0, v); + break; + } + break; /* registers to be handled specially */ - case KVM_REG_MIPS_CP0_COUNT: - case KVM_REG_MIPS_CP0_COMPARE: - case KVM_REG_MIPS_CP0_CAUSE: - case KVM_REG_MIPS_CP0_CONFIG: - case KVM_REG_MIPS_CP0_CONFIG1: - case KVM_REG_MIPS_CP0_CONFIG2: - case KVM_REG_MIPS_CP0_CONFIG3: - case KVM_REG_MIPS_CP0_CONFIG4: - case KVM_REG_MIPS_CP0_CONFIG5: - case KVM_REG_MIPS_COUNT_CTL: - case KVM_REG_MIPS_COUNT_RESUME: - case KVM_REG_MIPS_COUNT_HZ: - return kvm_mips_callbacks->set_one_reg(vcpu, reg, v); default: - return -EINVAL; + return kvm_mips_callbacks->set_one_reg(vcpu, reg, v); } return 0; } @@ -927,23 +1089,18 @@ long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, } case KVM_GET_REG_LIST: { struct kvm_reg_list __user *user_list = argp; - u64 __user *reg_dest; struct kvm_reg_list reg_list; unsigned n; if (copy_from_user(®_list, user_list, sizeof(reg_list))) return -EFAULT; n = reg_list.n; - reg_list.n = ARRAY_SIZE(kvm_mips_get_one_regs); + reg_list.n = kvm_mips_num_regs(vcpu); if (copy_to_user(user_list, ®_list, sizeof(reg_list))) return -EFAULT; if (n < reg_list.n) return -E2BIG; - reg_dest = user_list->reg; - if (copy_to_user(reg_dest, kvm_mips_get_one_regs, - sizeof(kvm_mips_get_one_regs))) - return -EFAULT; - return 0; + return kvm_mips_copy_reg_indices(vcpu, user_list->reg); } case KVM_NMI: /* Treat the NMI as a CPU reset */ @@ -1222,7 +1379,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) static void kvm_mips_set_c0_status(void) { - uint32_t status = read_c0_status(); + u32 status = read_c0_status(); if (cpu_has_dsp) status |= (ST0_MX); @@ -1236,9 +1393,9 @@ static void kvm_mips_set_c0_status(void) */ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) { - uint32_t cause = vcpu->arch.host_cp0_cause; - uint32_t exccode = (cause >> CAUSEB_EXCCODE) & 0x1f; - uint32_t __user *opc = (uint32_t __user *) vcpu->arch.pc; + u32 cause = vcpu->arch.host_cp0_cause; + u32 exccode = (cause >> CAUSEB_EXCCODE) & 0x1f; + u32 __user *opc = (u32 __user *) vcpu->arch.pc; unsigned long badvaddr = vcpu->arch.host_cp0_badvaddr; enum emulation_result er = EMULATE_DONE; int ret = RESUME_GUEST; @@ -1260,6 +1417,7 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) kvm_debug("kvm_mips_handle_exit: cause: %#x, PC: %p, kvm_run: %p, kvm_vcpu: %p\n", cause, opc, run, vcpu); + trace_kvm_exit(vcpu, exccode); /* * Do a privilege check, if in UM most of these exit conditions end up @@ -1279,7 +1437,6 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) kvm_debug("[%d]EXCCODE_INT @ %p\n", vcpu->vcpu_id, opc); ++vcpu->stat.int_exits; - trace_kvm_exit(vcpu, INT_EXITS); if (need_resched()) cond_resched(); @@ -1291,7 +1448,6 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) kvm_debug("EXCCODE_CPU: @ PC: %p\n", opc); ++vcpu->stat.cop_unusable_exits; - trace_kvm_exit(vcpu, COP_UNUSABLE_EXITS); ret = kvm_mips_callbacks->handle_cop_unusable(vcpu); /* XXXKYMA: Might need to return to user space */ if (run->exit_reason == KVM_EXIT_IRQ_WINDOW_OPEN) @@ -1300,7 +1456,6 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) case EXCCODE_MOD: ++vcpu->stat.tlbmod_exits; - trace_kvm_exit(vcpu, TLBMOD_EXITS); ret = kvm_mips_callbacks->handle_tlb_mod(vcpu); break; @@ -1310,7 +1465,6 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) badvaddr); ++vcpu->stat.tlbmiss_st_exits; - trace_kvm_exit(vcpu, TLBMISS_ST_EXITS); ret = kvm_mips_callbacks->handle_tlb_st_miss(vcpu); break; @@ -1319,61 +1473,51 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) cause, opc, badvaddr); ++vcpu->stat.tlbmiss_ld_exits; - trace_kvm_exit(vcpu, TLBMISS_LD_EXITS); ret = kvm_mips_callbacks->handle_tlb_ld_miss(vcpu); break; case EXCCODE_ADES: ++vcpu->stat.addrerr_st_exits; - trace_kvm_exit(vcpu, ADDRERR_ST_EXITS); ret = kvm_mips_callbacks->handle_addr_err_st(vcpu); break; case EXCCODE_ADEL: ++vcpu->stat.addrerr_ld_exits; - trace_kvm_exit(vcpu, ADDRERR_LD_EXITS); ret = kvm_mips_callbacks->handle_addr_err_ld(vcpu); break; case EXCCODE_SYS: ++vcpu->stat.syscall_exits; - trace_kvm_exit(vcpu, SYSCALL_EXITS); ret = kvm_mips_callbacks->handle_syscall(vcpu); break; case EXCCODE_RI: ++vcpu->stat.resvd_inst_exits; - trace_kvm_exit(vcpu, RESVD_INST_EXITS); ret = kvm_mips_callbacks->handle_res_inst(vcpu); break; case EXCCODE_BP: ++vcpu->stat.break_inst_exits; - trace_kvm_exit(vcpu, BREAK_INST_EXITS); ret = kvm_mips_callbacks->handle_break(vcpu); break; case EXCCODE_TR: ++vcpu->stat.trap_inst_exits; - trace_kvm_exit(vcpu, TRAP_INST_EXITS); ret = kvm_mips_callbacks->handle_trap(vcpu); break; case EXCCODE_MSAFPE: ++vcpu->stat.msa_fpe_exits; - trace_kvm_exit(vcpu, MSA_FPE_EXITS); ret = kvm_mips_callbacks->handle_msa_fpe(vcpu); break; case EXCCODE_FPE: ++vcpu->stat.fpe_exits; - trace_kvm_exit(vcpu, FPE_EXITS); ret = kvm_mips_callbacks->handle_fpe(vcpu); break; case EXCCODE_MSADIS: ++vcpu->stat.msa_disabled_exits; - trace_kvm_exit(vcpu, MSA_DISABLED_EXITS); ret = kvm_mips_callbacks->handle_msa_disabled(vcpu); break; @@ -1400,11 +1544,13 @@ skip_emul: run->exit_reason = KVM_EXIT_INTR; ret = (-EINTR << 2) | RESUME_HOST; ++vcpu->stat.signal_exits; - trace_kvm_exit(vcpu, SIGNAL_EXITS); + trace_kvm_exit(vcpu, KVM_TRACE_EXIT_SIGNAL); } } if (ret == RESUME_GUEST) { + trace_kvm_reenter(vcpu); + /* * If FPU / MSA are enabled (i.e. the guest's FPU / MSA context * is live), restore FCR31 / MSACSR. @@ -1450,7 +1596,7 @@ void kvm_own_fpu(struct kvm_vcpu *vcpu) * not to clobber the status register directly via the commpage. */ if (cpu_has_msa && sr & ST0_CU1 && !(sr & ST0_FR) && - vcpu->arch.fpu_inuse & KVM_MIPS_FPU_MSA) + vcpu->arch.aux_inuse & KVM_MIPS_AUX_MSA) kvm_lose_fpu(vcpu); /* @@ -1465,9 +1611,12 @@ void kvm_own_fpu(struct kvm_vcpu *vcpu) enable_fpu_hazard(); /* If guest FPU state not active, restore it now */ - if (!(vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU)) { + if (!(vcpu->arch.aux_inuse & KVM_MIPS_AUX_FPU)) { __kvm_restore_fpu(&vcpu->arch); - vcpu->arch.fpu_inuse |= KVM_MIPS_FPU_FPU; + vcpu->arch.aux_inuse |= KVM_MIPS_AUX_FPU; + trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, KVM_TRACE_AUX_FPU); + } else { + trace_kvm_aux(vcpu, KVM_TRACE_AUX_ENABLE, KVM_TRACE_AUX_FPU); } preempt_enable(); @@ -1494,8 +1643,8 @@ void kvm_own_msa(struct kvm_vcpu *vcpu) * interacts with MSA state, so play it safe and save it first. */ if (!(sr & ST0_FR) && - (vcpu->arch.fpu_inuse & (KVM_MIPS_FPU_FPU | - KVM_MIPS_FPU_MSA)) == KVM_MIPS_FPU_FPU) + (vcpu->arch.aux_inuse & (KVM_MIPS_AUX_FPU | + KVM_MIPS_AUX_MSA)) == KVM_MIPS_AUX_FPU) kvm_lose_fpu(vcpu); change_c0_status(ST0_CU1 | ST0_FR, sr); @@ -1509,22 +1658,26 @@ void kvm_own_msa(struct kvm_vcpu *vcpu) set_c0_config5(MIPS_CONF5_MSAEN); enable_fpu_hazard(); - switch (vcpu->arch.fpu_inuse & (KVM_MIPS_FPU_FPU | KVM_MIPS_FPU_MSA)) { - case KVM_MIPS_FPU_FPU: + switch (vcpu->arch.aux_inuse & (KVM_MIPS_AUX_FPU | KVM_MIPS_AUX_MSA)) { + case KVM_MIPS_AUX_FPU: /* * Guest FPU state already loaded, only restore upper MSA state */ __kvm_restore_msa_upper(&vcpu->arch); - vcpu->arch.fpu_inuse |= KVM_MIPS_FPU_MSA; + vcpu->arch.aux_inuse |= KVM_MIPS_AUX_MSA; + trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, KVM_TRACE_AUX_MSA); break; case 0: /* Neither FPU or MSA already active, restore full MSA state */ __kvm_restore_msa(&vcpu->arch); - vcpu->arch.fpu_inuse |= KVM_MIPS_FPU_MSA; + vcpu->arch.aux_inuse |= KVM_MIPS_AUX_MSA; if (kvm_mips_guest_has_fpu(&vcpu->arch)) - vcpu->arch.fpu_inuse |= KVM_MIPS_FPU_FPU; + vcpu->arch.aux_inuse |= KVM_MIPS_AUX_FPU; + trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, + KVM_TRACE_AUX_FPU_MSA); break; default: + trace_kvm_aux(vcpu, KVM_TRACE_AUX_ENABLE, KVM_TRACE_AUX_MSA); break; } @@ -1536,13 +1689,15 @@ void kvm_own_msa(struct kvm_vcpu *vcpu) void kvm_drop_fpu(struct kvm_vcpu *vcpu) { preempt_disable(); - if (cpu_has_msa && vcpu->arch.fpu_inuse & KVM_MIPS_FPU_MSA) { + if (cpu_has_msa && vcpu->arch.aux_inuse & KVM_MIPS_AUX_MSA) { disable_msa(); - vcpu->arch.fpu_inuse &= ~KVM_MIPS_FPU_MSA; + trace_kvm_aux(vcpu, KVM_TRACE_AUX_DISCARD, KVM_TRACE_AUX_MSA); + vcpu->arch.aux_inuse &= ~KVM_MIPS_AUX_MSA; } - if (vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU) { + if (vcpu->arch.aux_inuse & KVM_MIPS_AUX_FPU) { clear_c0_status(ST0_CU1 | ST0_FR); - vcpu->arch.fpu_inuse &= ~KVM_MIPS_FPU_FPU; + trace_kvm_aux(vcpu, KVM_TRACE_AUX_DISCARD, KVM_TRACE_AUX_FPU); + vcpu->arch.aux_inuse &= ~KVM_MIPS_AUX_FPU; } preempt_enable(); } @@ -1558,25 +1713,27 @@ void kvm_lose_fpu(struct kvm_vcpu *vcpu) */ preempt_disable(); - if (cpu_has_msa && vcpu->arch.fpu_inuse & KVM_MIPS_FPU_MSA) { + if (cpu_has_msa && vcpu->arch.aux_inuse & KVM_MIPS_AUX_MSA) { set_c0_config5(MIPS_CONF5_MSAEN); enable_fpu_hazard(); __kvm_save_msa(&vcpu->arch); + trace_kvm_aux(vcpu, KVM_TRACE_AUX_SAVE, KVM_TRACE_AUX_FPU_MSA); /* Disable MSA & FPU */ disable_msa(); - if (vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU) { + if (vcpu->arch.aux_inuse & KVM_MIPS_AUX_FPU) { clear_c0_status(ST0_CU1 | ST0_FR); disable_fpu_hazard(); } - vcpu->arch.fpu_inuse &= ~(KVM_MIPS_FPU_FPU | KVM_MIPS_FPU_MSA); - } else if (vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU) { + vcpu->arch.aux_inuse &= ~(KVM_MIPS_AUX_FPU | KVM_MIPS_AUX_MSA); + } else if (vcpu->arch.aux_inuse & KVM_MIPS_AUX_FPU) { set_c0_status(ST0_CU1); enable_fpu_hazard(); __kvm_save_fpu(&vcpu->arch); - vcpu->arch.fpu_inuse &= ~KVM_MIPS_FPU_FPU; + vcpu->arch.aux_inuse &= ~KVM_MIPS_AUX_FPU; + trace_kvm_aux(vcpu, KVM_TRACE_AUX_SAVE, KVM_TRACE_AUX_FPU); /* Disable FPU */ clear_c0_status(ST0_CU1 | ST0_FR); @@ -1638,6 +1795,10 @@ static int __init kvm_mips_init(void) { int ret; + ret = kvm_mips_entry_setup(); + if (ret) + return ret; + ret = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); if (ret) @@ -1645,18 +1806,6 @@ static int __init kvm_mips_init(void) register_die_notifier(&kvm_mips_csr_die_notifier); - /* - * On MIPS, kernel modules are executed from "mapped space", which - * requires TLBs. The TLB handling code is statically linked with - * the rest of the kernel (tlb.c) to avoid the possibility of - * double faulting. The issue is that the TLB code references - * routines that are part of the the KVM module, which are only - * available once the module is loaded. - */ - kvm_mips_gfn_to_pfn = gfn_to_pfn; - kvm_mips_release_pfn_clean = kvm_release_pfn_clean; - kvm_mips_is_error_pfn = is_error_pfn; - return 0; } @@ -1664,10 +1813,6 @@ static void __exit kvm_mips_exit(void) { kvm_exit(); - kvm_mips_gfn_to_pfn = NULL; - kvm_mips_release_pfn_clean = NULL; - kvm_mips_is_error_pfn = NULL; - unregister_die_notifier(&kvm_mips_csr_die_notifier); } diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c new file mode 100644 index 000000000000..57319ee57c4f --- /dev/null +++ b/arch/mips/kvm/mmu.c @@ -0,0 +1,375 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * KVM/MIPS MMU handling in the KVM module. + * + * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. + * Authors: Sanjay Lal <sanjayl@kymasys.com> + */ + +#include <linux/highmem.h> +#include <linux/kvm_host.h> +#include <asm/mmu_context.h> + +static u32 kvm_mips_get_kernel_asid(struct kvm_vcpu *vcpu) +{ + int cpu = smp_processor_id(); + + return vcpu->arch.guest_kernel_asid[cpu] & + cpu_asid_mask(&cpu_data[cpu]); +} + +static u32 kvm_mips_get_user_asid(struct kvm_vcpu *vcpu) +{ + int cpu = smp_processor_id(); + + return vcpu->arch.guest_user_asid[cpu] & + cpu_asid_mask(&cpu_data[cpu]); +} + +static int kvm_mips_map_page(struct kvm *kvm, gfn_t gfn) +{ + int srcu_idx, err = 0; + kvm_pfn_t pfn; + + if (kvm->arch.guest_pmap[gfn] != KVM_INVALID_PAGE) + return 0; + + srcu_idx = srcu_read_lock(&kvm->srcu); + pfn = gfn_to_pfn(kvm, gfn); + + if (is_error_pfn(pfn)) { + kvm_err("Couldn't get pfn for gfn %#llx!\n", gfn); + err = -EFAULT; + goto out; + } + + kvm->arch.guest_pmap[gfn] = pfn; +out: + srcu_read_unlock(&kvm->srcu, srcu_idx); + return err; +} + +/* Translate guest KSEG0 addresses to Host PA */ +unsigned long kvm_mips_translate_guest_kseg0_to_hpa(struct kvm_vcpu *vcpu, + unsigned long gva) +{ + gfn_t gfn; + unsigned long offset = gva & ~PAGE_MASK; + struct kvm *kvm = vcpu->kvm; + + if (KVM_GUEST_KSEGX(gva) != KVM_GUEST_KSEG0) { + kvm_err("%s/%p: Invalid gva: %#lx\n", __func__, + __builtin_return_address(0), gva); + return KVM_INVALID_PAGE; + } + + gfn = (KVM_GUEST_CPHYSADDR(gva) >> PAGE_SHIFT); + + if (gfn >= kvm->arch.guest_pmap_npages) { + kvm_err("%s: Invalid gfn: %#llx, GVA: %#lx\n", __func__, gfn, + gva); + return KVM_INVALID_PAGE; + } + + if (kvm_mips_map_page(vcpu->kvm, gfn) < 0) + return KVM_INVALID_ADDR; + + return (kvm->arch.guest_pmap[gfn] << PAGE_SHIFT) + offset; +} + +/* XXXKYMA: Must be called with interrupts disabled */ +int kvm_mips_handle_kseg0_tlb_fault(unsigned long badvaddr, + struct kvm_vcpu *vcpu) +{ + gfn_t gfn; + kvm_pfn_t pfn0, pfn1; + unsigned long vaddr = 0; + unsigned long entryhi = 0, entrylo0 = 0, entrylo1 = 0; + struct kvm *kvm = vcpu->kvm; + const int flush_dcache_mask = 0; + int ret; + + if (KVM_GUEST_KSEGX(badvaddr) != KVM_GUEST_KSEG0) { + kvm_err("%s: Invalid BadVaddr: %#lx\n", __func__, badvaddr); + kvm_mips_dump_host_tlbs(); + return -1; + } + + gfn = (KVM_GUEST_CPHYSADDR(badvaddr) >> PAGE_SHIFT); + if (gfn >= kvm->arch.guest_pmap_npages) { + kvm_err("%s: Invalid gfn: %#llx, BadVaddr: %#lx\n", __func__, + gfn, badvaddr); + kvm_mips_dump_host_tlbs(); + return -1; + } + vaddr = badvaddr & (PAGE_MASK << 1); + + if (kvm_mips_map_page(vcpu->kvm, gfn) < 0) + return -1; + + if (kvm_mips_map_page(vcpu->kvm, gfn ^ 0x1) < 0) + return -1; + + pfn0 = kvm->arch.guest_pmap[gfn & ~0x1]; + pfn1 = kvm->arch.guest_pmap[gfn | 0x1]; + + entrylo0 = mips3_paddr_to_tlbpfn(pfn0 << PAGE_SHIFT) | + ((_page_cachable_default >> _CACHE_SHIFT) << ENTRYLO_C_SHIFT) | + ENTRYLO_D | ENTRYLO_V; + entrylo1 = mips3_paddr_to_tlbpfn(pfn1 << PAGE_SHIFT) | + ((_page_cachable_default >> _CACHE_SHIFT) << ENTRYLO_C_SHIFT) | + ENTRYLO_D | ENTRYLO_V; + + preempt_disable(); + entryhi = (vaddr | kvm_mips_get_kernel_asid(vcpu)); + ret = kvm_mips_host_tlb_write(vcpu, entryhi, entrylo0, entrylo1, + flush_dcache_mask); + preempt_enable(); + + return ret; +} + +int kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu, + struct kvm_mips_tlb *tlb) +{ + unsigned long entryhi = 0, entrylo0 = 0, entrylo1 = 0; + struct kvm *kvm = vcpu->kvm; + kvm_pfn_t pfn0, pfn1; + int ret; + + if ((tlb->tlb_hi & VPN2_MASK) == 0) { + pfn0 = 0; + pfn1 = 0; + } else { + if (kvm_mips_map_page(kvm, mips3_tlbpfn_to_paddr(tlb->tlb_lo[0]) + >> PAGE_SHIFT) < 0) + return -1; + + if (kvm_mips_map_page(kvm, mips3_tlbpfn_to_paddr(tlb->tlb_lo[1]) + >> PAGE_SHIFT) < 0) + return -1; + + pfn0 = kvm->arch.guest_pmap[ + mips3_tlbpfn_to_paddr(tlb->tlb_lo[0]) >> PAGE_SHIFT]; + pfn1 = kvm->arch.guest_pmap[ + mips3_tlbpfn_to_paddr(tlb->tlb_lo[1]) >> PAGE_SHIFT]; + } + + /* Get attributes from the Guest TLB */ + entrylo0 = mips3_paddr_to_tlbpfn(pfn0 << PAGE_SHIFT) | + ((_page_cachable_default >> _CACHE_SHIFT) << ENTRYLO_C_SHIFT) | + (tlb->tlb_lo[0] & ENTRYLO_D) | + (tlb->tlb_lo[0] & ENTRYLO_V); + entrylo1 = mips3_paddr_to_tlbpfn(pfn1 << PAGE_SHIFT) | + ((_page_cachable_default >> _CACHE_SHIFT) << ENTRYLO_C_SHIFT) | + (tlb->tlb_lo[1] & ENTRYLO_D) | + (tlb->tlb_lo[1] & ENTRYLO_V); + + kvm_debug("@ %#lx tlb_lo0: 0x%08lx tlb_lo1: 0x%08lx\n", vcpu->arch.pc, + tlb->tlb_lo[0], tlb->tlb_lo[1]); + + preempt_disable(); + entryhi = (tlb->tlb_hi & VPN2_MASK) | (KVM_GUEST_KERNEL_MODE(vcpu) ? + kvm_mips_get_kernel_asid(vcpu) : + kvm_mips_get_user_asid(vcpu)); + ret = kvm_mips_host_tlb_write(vcpu, entryhi, entrylo0, entrylo1, + tlb->tlb_mask); + preempt_enable(); + + return ret; +} + +void kvm_get_new_mmu_context(struct mm_struct *mm, unsigned long cpu, + struct kvm_vcpu *vcpu) +{ + unsigned long asid = asid_cache(cpu); + + asid += cpu_asid_inc(); + if (!(asid & cpu_asid_mask(&cpu_data[cpu]))) { + if (cpu_has_vtag_icache) + flush_icache_all(); + + kvm_local_flush_tlb_all(); /* start new asid cycle */ + + if (!asid) /* fix version if needed */ + asid = asid_first_version(cpu); + } + + cpu_context(cpu, mm) = asid_cache(cpu) = asid; +} + +/** + * kvm_mips_migrate_count() - Migrate timer. + * @vcpu: Virtual CPU. + * + * Migrate CP0_Count hrtimer to the current CPU by cancelling and restarting it + * if it was running prior to being cancelled. + * + * Must be called when the VCPU is migrated to a different CPU to ensure that + * timer expiry during guest execution interrupts the guest and causes the + * interrupt to be delivered in a timely manner. + */ +static void kvm_mips_migrate_count(struct kvm_vcpu *vcpu) +{ + if (hrtimer_cancel(&vcpu->arch.comparecount_timer)) + hrtimer_restart(&vcpu->arch.comparecount_timer); +} + +/* Restore ASID once we are scheduled back after preemption */ +void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +{ + unsigned long asid_mask = cpu_asid_mask(&cpu_data[cpu]); + unsigned long flags; + int newasid = 0; + + kvm_debug("%s: vcpu %p, cpu: %d\n", __func__, vcpu, cpu); + + /* Allocate new kernel and user ASIDs if needed */ + + local_irq_save(flags); + + if ((vcpu->arch.guest_kernel_asid[cpu] ^ asid_cache(cpu)) & + asid_version_mask(cpu)) { + kvm_get_new_mmu_context(&vcpu->arch.guest_kernel_mm, cpu, vcpu); + vcpu->arch.guest_kernel_asid[cpu] = + vcpu->arch.guest_kernel_mm.context.asid[cpu]; + kvm_get_new_mmu_context(&vcpu->arch.guest_user_mm, cpu, vcpu); + vcpu->arch.guest_user_asid[cpu] = + vcpu->arch.guest_user_mm.context.asid[cpu]; + newasid++; + + kvm_debug("[%d]: cpu_context: %#lx\n", cpu, + cpu_context(cpu, current->mm)); + kvm_debug("[%d]: Allocated new ASID for Guest Kernel: %#x\n", + cpu, vcpu->arch.guest_kernel_asid[cpu]); + kvm_debug("[%d]: Allocated new ASID for Guest User: %#x\n", cpu, + vcpu->arch.guest_user_asid[cpu]); + } + + if (vcpu->arch.last_sched_cpu != cpu) { + kvm_debug("[%d->%d]KVM VCPU[%d] switch\n", + vcpu->arch.last_sched_cpu, cpu, vcpu->vcpu_id); + /* + * Migrate the timer interrupt to the current CPU so that it + * always interrupts the guest and synchronously triggers a + * guest timer interrupt. + */ + kvm_mips_migrate_count(vcpu); + } + + if (!newasid) { + /* + * If we preempted while the guest was executing, then reload + * the pre-empted ASID + */ + if (current->flags & PF_VCPU) { + write_c0_entryhi(vcpu->arch. + preempt_entryhi & asid_mask); + ehb(); + } + } else { + /* New ASIDs were allocated for the VM */ + + /* + * Were we in guest context? If so then the pre-empted ASID is + * no longer valid, we need to set it to what it should be based + * on the mode of the Guest (Kernel/User) + */ + if (current->flags & PF_VCPU) { + if (KVM_GUEST_KERNEL_MODE(vcpu)) + write_c0_entryhi(vcpu->arch. + guest_kernel_asid[cpu] & + asid_mask); + else + write_c0_entryhi(vcpu->arch. + guest_user_asid[cpu] & + asid_mask); + ehb(); + } + } + + /* restore guest state to registers */ + kvm_mips_callbacks->vcpu_set_regs(vcpu); + + local_irq_restore(flags); + +} + +/* ASID can change if another task is scheduled during preemption */ +void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) +{ + unsigned long flags; + int cpu; + + local_irq_save(flags); + + cpu = smp_processor_id(); + + vcpu->arch.preempt_entryhi = read_c0_entryhi(); + vcpu->arch.last_sched_cpu = cpu; + + /* save guest state in registers */ + kvm_mips_callbacks->vcpu_get_regs(vcpu); + + if (((cpu_context(cpu, current->mm) ^ asid_cache(cpu)) & + asid_version_mask(cpu))) { + kvm_debug("%s: Dropping MMU Context: %#lx\n", __func__, + cpu_context(cpu, current->mm)); + drop_mmu_context(current->mm, cpu); + } + write_c0_entryhi(cpu_asid(cpu, current->mm)); + ehb(); + + local_irq_restore(flags); +} + +u32 kvm_get_inst(u32 *opc, struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + unsigned long paddr, flags, vpn2, asid; + unsigned long va = (unsigned long)opc; + void *vaddr; + u32 inst; + int index; + + if (KVM_GUEST_KSEGX(va) < KVM_GUEST_KSEG0 || + KVM_GUEST_KSEGX(va) == KVM_GUEST_KSEG23) { + local_irq_save(flags); + index = kvm_mips_host_tlb_lookup(vcpu, va); + if (index >= 0) { + inst = *(opc); + } else { + vpn2 = va & VPN2_MASK; + asid = kvm_read_c0_guest_entryhi(cop0) & + KVM_ENTRYHI_ASID; + index = kvm_mips_guest_tlb_lookup(vcpu, vpn2 | asid); + if (index < 0) { + kvm_err("%s: get_user_failed for %p, vcpu: %p, ASID: %#lx\n", + __func__, opc, vcpu, read_c0_entryhi()); + kvm_mips_dump_host_tlbs(); + kvm_mips_dump_guest_tlbs(vcpu); + local_irq_restore(flags); + return KVM_INVALID_INST; + } + kvm_mips_handle_mapped_seg_tlb_fault(vcpu, + &vcpu->arch. + guest_tlb[index]); + inst = *(opc); + } + local_irq_restore(flags); + } else if (KVM_GUEST_KSEGX(va) == KVM_GUEST_KSEG0) { + paddr = kvm_mips_translate_guest_kseg0_to_hpa(vcpu, va); + vaddr = kmap_atomic(pfn_to_page(PHYS_PFN(paddr))); + vaddr += paddr & ~PAGE_MASK; + inst = *(u32 *)vaddr; + kunmap_atomic(vaddr); + } else { + kvm_err("%s: illegal address: %p\n", __func__, opc); + return KVM_INVALID_INST; + } + + return inst; +} diff --git a/arch/mips/kvm/stats.c b/arch/mips/kvm/stats.c index 888bb67070ac..53f851a61554 100644 --- a/arch/mips/kvm/stats.c +++ b/arch/mips/kvm/stats.c @@ -11,27 +11,6 @@ #include <linux/kvm_host.h> -char *kvm_mips_exit_types_str[MAX_KVM_MIPS_EXIT_TYPES] = { - "WAIT", - "CACHE", - "Signal", - "Interrupt", - "COP0/1 Unusable", - "TLB Mod", - "TLB Miss (LD)", - "TLB Miss (ST)", - "Address Err (ST)", - "Address Error (LD)", - "System Call", - "Reserved Inst", - "Break Inst", - "Trap Inst", - "MSA FPE", - "FPE", - "MSA Disabled", - "D-Cache Flushes", -}; - char *kvm_cop0_str[N_MIPS_COPROC_REGS] = { "Index", "Random", diff --git a/arch/mips/kvm/tlb.c b/arch/mips/kvm/tlb.c index ed021ae7867a..254377d8e0b9 100644 --- a/arch/mips/kvm/tlb.c +++ b/arch/mips/kvm/tlb.c @@ -14,7 +14,7 @@ #include <linux/smp.h> #include <linux/mm.h> #include <linux/delay.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/kvm_host.h> #include <linux/srcu.h> @@ -24,6 +24,7 @@ #include <asm/pgtable.h> #include <asm/cacheflush.h> #include <asm/tlb.h> +#include <asm/tlbdebug.h> #undef CONFIG_MIPS_MT #include <asm/r4kcache.h> @@ -32,22 +33,10 @@ #define KVM_GUEST_PC_TLB 0 #define KVM_GUEST_SP_TLB 1 -#define PRIx64 "llx" - atomic_t kvm_mips_instance; EXPORT_SYMBOL_GPL(kvm_mips_instance); -/* These function pointers are initialized once the KVM module is loaded */ -kvm_pfn_t (*kvm_mips_gfn_to_pfn)(struct kvm *kvm, gfn_t gfn); -EXPORT_SYMBOL_GPL(kvm_mips_gfn_to_pfn); - -void (*kvm_mips_release_pfn_clean)(kvm_pfn_t pfn); -EXPORT_SYMBOL_GPL(kvm_mips_release_pfn_clean); - -bool (*kvm_mips_is_error_pfn)(kvm_pfn_t pfn); -EXPORT_SYMBOL_GPL(kvm_mips_is_error_pfn); - -uint32_t kvm_mips_get_kernel_asid(struct kvm_vcpu *vcpu) +static u32 kvm_mips_get_kernel_asid(struct kvm_vcpu *vcpu) { int cpu = smp_processor_id(); @@ -55,7 +44,7 @@ uint32_t kvm_mips_get_kernel_asid(struct kvm_vcpu *vcpu) cpu_asid_mask(&cpu_data[cpu]); } -uint32_t kvm_mips_get_user_asid(struct kvm_vcpu *vcpu) +static u32 kvm_mips_get_user_asid(struct kvm_vcpu *vcpu) { int cpu = smp_processor_id(); @@ -63,7 +52,7 @@ uint32_t kvm_mips_get_user_asid(struct kvm_vcpu *vcpu) cpu_asid_mask(&cpu_data[cpu]); } -inline uint32_t kvm_mips_get_commpage_asid(struct kvm_vcpu *vcpu) +inline u32 kvm_mips_get_commpage_asid(struct kvm_vcpu *vcpu) { return vcpu->kvm->arch.commpage_tlb; } @@ -72,50 +61,15 @@ inline uint32_t kvm_mips_get_commpage_asid(struct kvm_vcpu *vcpu) void kvm_mips_dump_host_tlbs(void) { - unsigned long old_entryhi; - unsigned long old_pagemask; - struct kvm_mips_tlb tlb; unsigned long flags; - int i; local_irq_save(flags); - old_entryhi = read_c0_entryhi(); - old_pagemask = read_c0_pagemask(); - kvm_info("HOST TLBs:\n"); - kvm_info("ASID: %#lx\n", read_c0_entryhi() & - cpu_asid_mask(¤t_cpu_data)); - - for (i = 0; i < current_cpu_data.tlbsize; i++) { - write_c0_index(i); - mtc0_tlbw_hazard(); - - tlb_read(); - tlbw_use_hazard(); + dump_tlb_regs(); + pr_info("\n"); + dump_tlb_all(); - tlb.tlb_hi = read_c0_entryhi(); - tlb.tlb_lo0 = read_c0_entrylo0(); - tlb.tlb_lo1 = read_c0_entrylo1(); - tlb.tlb_mask = read_c0_pagemask(); - - kvm_info("TLB%c%3d Hi 0x%08lx ", - (tlb.tlb_lo0 | tlb.tlb_lo1) & MIPS3_PG_V ? ' ' : '*', - i, tlb.tlb_hi); - kvm_info("Lo0=0x%09" PRIx64 " %c%c attr %lx ", - (uint64_t) mips3_tlbpfn_to_paddr(tlb.tlb_lo0), - (tlb.tlb_lo0 & MIPS3_PG_D) ? 'D' : ' ', - (tlb.tlb_lo0 & MIPS3_PG_G) ? 'G' : ' ', - (tlb.tlb_lo0 >> 3) & 7); - kvm_info("Lo1=0x%09" PRIx64 " %c%c attr %lx sz=%lx\n", - (uint64_t) mips3_tlbpfn_to_paddr(tlb.tlb_lo1), - (tlb.tlb_lo1 & MIPS3_PG_D) ? 'D' : ' ', - (tlb.tlb_lo1 & MIPS3_PG_G) ? 'G' : ' ', - (tlb.tlb_lo1 >> 3) & 7, tlb.tlb_mask); - } - write_c0_entryhi(old_entryhi); - write_c0_pagemask(old_pagemask); - mtc0_tlbw_hazard(); local_irq_restore(flags); } EXPORT_SYMBOL_GPL(kvm_mips_dump_host_tlbs); @@ -132,74 +86,24 @@ void kvm_mips_dump_guest_tlbs(struct kvm_vcpu *vcpu) for (i = 0; i < KVM_MIPS_GUEST_TLB_SIZE; i++) { tlb = vcpu->arch.guest_tlb[i]; kvm_info("TLB%c%3d Hi 0x%08lx ", - (tlb.tlb_lo0 | tlb.tlb_lo1) & MIPS3_PG_V ? ' ' : '*', + (tlb.tlb_lo[0] | tlb.tlb_lo[1]) & ENTRYLO_V + ? ' ' : '*', i, tlb.tlb_hi); - kvm_info("Lo0=0x%09" PRIx64 " %c%c attr %lx ", - (uint64_t) mips3_tlbpfn_to_paddr(tlb.tlb_lo0), - (tlb.tlb_lo0 & MIPS3_PG_D) ? 'D' : ' ', - (tlb.tlb_lo0 & MIPS3_PG_G) ? 'G' : ' ', - (tlb.tlb_lo0 >> 3) & 7); - kvm_info("Lo1=0x%09" PRIx64 " %c%c attr %lx sz=%lx\n", - (uint64_t) mips3_tlbpfn_to_paddr(tlb.tlb_lo1), - (tlb.tlb_lo1 & MIPS3_PG_D) ? 'D' : ' ', - (tlb.tlb_lo1 & MIPS3_PG_G) ? 'G' : ' ', - (tlb.tlb_lo1 >> 3) & 7, tlb.tlb_mask); + kvm_info("Lo0=0x%09llx %c%c attr %lx ", + (u64) mips3_tlbpfn_to_paddr(tlb.tlb_lo[0]), + (tlb.tlb_lo[0] & ENTRYLO_D) ? 'D' : ' ', + (tlb.tlb_lo[0] & ENTRYLO_G) ? 'G' : ' ', + (tlb.tlb_lo[0] & ENTRYLO_C) >> ENTRYLO_C_SHIFT); + kvm_info("Lo1=0x%09llx %c%c attr %lx sz=%lx\n", + (u64) mips3_tlbpfn_to_paddr(tlb.tlb_lo[1]), + (tlb.tlb_lo[1] & ENTRYLO_D) ? 'D' : ' ', + (tlb.tlb_lo[1] & ENTRYLO_G) ? 'G' : ' ', + (tlb.tlb_lo[1] & ENTRYLO_C) >> ENTRYLO_C_SHIFT, + tlb.tlb_mask); } } EXPORT_SYMBOL_GPL(kvm_mips_dump_guest_tlbs); -static int kvm_mips_map_page(struct kvm *kvm, gfn_t gfn) -{ - int srcu_idx, err = 0; - kvm_pfn_t pfn; - - if (kvm->arch.guest_pmap[gfn] != KVM_INVALID_PAGE) - return 0; - - srcu_idx = srcu_read_lock(&kvm->srcu); - pfn = kvm_mips_gfn_to_pfn(kvm, gfn); - - if (kvm_mips_is_error_pfn(pfn)) { - kvm_err("Couldn't get pfn for gfn %#" PRIx64 "!\n", gfn); - err = -EFAULT; - goto out; - } - - kvm->arch.guest_pmap[gfn] = pfn; -out: - srcu_read_unlock(&kvm->srcu, srcu_idx); - return err; -} - -/* Translate guest KSEG0 addresses to Host PA */ -unsigned long kvm_mips_translate_guest_kseg0_to_hpa(struct kvm_vcpu *vcpu, - unsigned long gva) -{ - gfn_t gfn; - uint32_t offset = gva & ~PAGE_MASK; - struct kvm *kvm = vcpu->kvm; - - if (KVM_GUEST_KSEGX(gva) != KVM_GUEST_KSEG0) { - kvm_err("%s/%p: Invalid gva: %#lx\n", __func__, - __builtin_return_address(0), gva); - return KVM_INVALID_PAGE; - } - - gfn = (KVM_GUEST_CPHYSADDR(gva) >> PAGE_SHIFT); - - if (gfn >= kvm->arch.guest_pmap_npages) { - kvm_err("%s: Invalid gfn: %#llx, GVA: %#lx\n", __func__, gfn, - gva); - return KVM_INVALID_PAGE; - } - - if (kvm_mips_map_page(vcpu->kvm, gfn) < 0) - return KVM_INVALID_ADDR; - - return (kvm->arch.guest_pmap[gfn] << PAGE_SHIFT) + offset; -} -EXPORT_SYMBOL_GPL(kvm_mips_translate_guest_kseg0_to_hpa); - /* XXXKYMA: Must be called with interrupts disabled */ /* set flush_dcache_mask == 0 if no dcache flush required */ int kvm_mips_host_tlb_write(struct kvm_vcpu *vcpu, unsigned long entryhi, @@ -243,12 +147,12 @@ int kvm_mips_host_tlb_write(struct kvm_vcpu *vcpu, unsigned long entryhi, /* Flush D-cache */ if (flush_dcache_mask) { - if (entrylo0 & MIPS3_PG_V) { + if (entrylo0 & ENTRYLO_V) { ++vcpu->stat.flush_dcache_exits; flush_data_cache_page((entryhi & VPN2_MASK) & ~flush_dcache_mask); } - if (entrylo1 & MIPS3_PG_V) { + if (entrylo1 & ENTRYLO_V) { ++vcpu->stat.flush_dcache_exits; flush_data_cache_page(((entryhi & VPN2_MASK) & ~flush_dcache_mask) | @@ -259,96 +163,35 @@ int kvm_mips_host_tlb_write(struct kvm_vcpu *vcpu, unsigned long entryhi, /* Restore old ASID */ write_c0_entryhi(old_entryhi); mtc0_tlbw_hazard(); - tlbw_use_hazard(); local_irq_restore(flags); return 0; } - -/* XXXKYMA: Must be called with interrupts disabled */ -int kvm_mips_handle_kseg0_tlb_fault(unsigned long badvaddr, - struct kvm_vcpu *vcpu) -{ - gfn_t gfn; - kvm_pfn_t pfn0, pfn1; - unsigned long vaddr = 0; - unsigned long entryhi = 0, entrylo0 = 0, entrylo1 = 0; - int even; - struct kvm *kvm = vcpu->kvm; - const int flush_dcache_mask = 0; - int ret; - - if (KVM_GUEST_KSEGX(badvaddr) != KVM_GUEST_KSEG0) { - kvm_err("%s: Invalid BadVaddr: %#lx\n", __func__, badvaddr); - kvm_mips_dump_host_tlbs(); - return -1; - } - - gfn = (KVM_GUEST_CPHYSADDR(badvaddr) >> PAGE_SHIFT); - if (gfn >= kvm->arch.guest_pmap_npages) { - kvm_err("%s: Invalid gfn: %#llx, BadVaddr: %#lx\n", __func__, - gfn, badvaddr); - kvm_mips_dump_host_tlbs(); - return -1; - } - even = !(gfn & 0x1); - vaddr = badvaddr & (PAGE_MASK << 1); - - if (kvm_mips_map_page(vcpu->kvm, gfn) < 0) - return -1; - - if (kvm_mips_map_page(vcpu->kvm, gfn ^ 0x1) < 0) - return -1; - - if (even) { - pfn0 = kvm->arch.guest_pmap[gfn]; - pfn1 = kvm->arch.guest_pmap[gfn ^ 0x1]; - } else { - pfn0 = kvm->arch.guest_pmap[gfn ^ 0x1]; - pfn1 = kvm->arch.guest_pmap[gfn]; - } - - entrylo0 = mips3_paddr_to_tlbpfn(pfn0 << PAGE_SHIFT) | (0x3 << 3) | - (1 << 2) | (0x1 << 1); - entrylo1 = mips3_paddr_to_tlbpfn(pfn1 << PAGE_SHIFT) | (0x3 << 3) | - (1 << 2) | (0x1 << 1); - - preempt_disable(); - entryhi = (vaddr | kvm_mips_get_kernel_asid(vcpu)); - ret = kvm_mips_host_tlb_write(vcpu, entryhi, entrylo0, entrylo1, - flush_dcache_mask); - preempt_enable(); - - return ret; -} -EXPORT_SYMBOL_GPL(kvm_mips_handle_kseg0_tlb_fault); +EXPORT_SYMBOL_GPL(kvm_mips_host_tlb_write); int kvm_mips_handle_commpage_tlb_fault(unsigned long badvaddr, struct kvm_vcpu *vcpu) { - kvm_pfn_t pfn0, pfn1; + kvm_pfn_t pfn; unsigned long flags, old_entryhi = 0, vaddr = 0; - unsigned long entrylo0 = 0, entrylo1 = 0; + unsigned long entrylo[2] = { 0, 0 }; + unsigned int pair_idx; - pfn0 = CPHYSADDR(vcpu->arch.kseg0_commpage) >> PAGE_SHIFT; - pfn1 = 0; - entrylo0 = mips3_paddr_to_tlbpfn(pfn0 << PAGE_SHIFT) | (0x3 << 3) | - (1 << 2) | (0x1 << 1); - entrylo1 = 0; + pfn = PFN_DOWN(virt_to_phys(vcpu->arch.kseg0_commpage)); + pair_idx = (badvaddr >> PAGE_SHIFT) & 1; + entrylo[pair_idx] = mips3_paddr_to_tlbpfn(pfn << PAGE_SHIFT) | + ((_page_cachable_default >> _CACHE_SHIFT) << ENTRYLO_C_SHIFT) | + ENTRYLO_D | ENTRYLO_V; local_irq_save(flags); old_entryhi = read_c0_entryhi(); vaddr = badvaddr & (PAGE_MASK << 1); write_c0_entryhi(vaddr | kvm_mips_get_kernel_asid(vcpu)); - mtc0_tlbw_hazard(); - write_c0_entrylo0(entrylo0); - mtc0_tlbw_hazard(); - write_c0_entrylo1(entrylo1); - mtc0_tlbw_hazard(); + write_c0_entrylo0(entrylo[0]); + write_c0_entrylo1(entrylo[1]); write_c0_index(kvm_mips_get_commpage_asid(vcpu)); mtc0_tlbw_hazard(); tlb_write_indexed(); - mtc0_tlbw_hazard(); tlbw_use_hazard(); kvm_debug("@ %#lx idx: %2d [entryhi(R): %#lx] entrylo0 (R): 0x%08lx, entrylo1(R): 0x%08lx\n", @@ -358,68 +201,12 @@ int kvm_mips_handle_commpage_tlb_fault(unsigned long badvaddr, /* Restore old ASID */ write_c0_entryhi(old_entryhi); mtc0_tlbw_hazard(); - tlbw_use_hazard(); local_irq_restore(flags); return 0; } EXPORT_SYMBOL_GPL(kvm_mips_handle_commpage_tlb_fault); -int kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu, - struct kvm_mips_tlb *tlb, - unsigned long *hpa0, - unsigned long *hpa1) -{ - unsigned long entryhi = 0, entrylo0 = 0, entrylo1 = 0; - struct kvm *kvm = vcpu->kvm; - kvm_pfn_t pfn0, pfn1; - int ret; - - if ((tlb->tlb_hi & VPN2_MASK) == 0) { - pfn0 = 0; - pfn1 = 0; - } else { - if (kvm_mips_map_page(kvm, mips3_tlbpfn_to_paddr(tlb->tlb_lo0) - >> PAGE_SHIFT) < 0) - return -1; - - if (kvm_mips_map_page(kvm, mips3_tlbpfn_to_paddr(tlb->tlb_lo1) - >> PAGE_SHIFT) < 0) - return -1; - - pfn0 = kvm->arch.guest_pmap[mips3_tlbpfn_to_paddr(tlb->tlb_lo0) - >> PAGE_SHIFT]; - pfn1 = kvm->arch.guest_pmap[mips3_tlbpfn_to_paddr(tlb->tlb_lo1) - >> PAGE_SHIFT]; - } - - if (hpa0) - *hpa0 = pfn0 << PAGE_SHIFT; - - if (hpa1) - *hpa1 = pfn1 << PAGE_SHIFT; - - /* Get attributes from the Guest TLB */ - entrylo0 = mips3_paddr_to_tlbpfn(pfn0 << PAGE_SHIFT) | (0x3 << 3) | - (tlb->tlb_lo0 & MIPS3_PG_D) | (tlb->tlb_lo0 & MIPS3_PG_V); - entrylo1 = mips3_paddr_to_tlbpfn(pfn1 << PAGE_SHIFT) | (0x3 << 3) | - (tlb->tlb_lo1 & MIPS3_PG_D) | (tlb->tlb_lo1 & MIPS3_PG_V); - - kvm_debug("@ %#lx tlb_lo0: 0x%08lx tlb_lo1: 0x%08lx\n", vcpu->arch.pc, - tlb->tlb_lo0, tlb->tlb_lo1); - - preempt_disable(); - entryhi = (tlb->tlb_hi & VPN2_MASK) | (KVM_GUEST_KERNEL_MODE(vcpu) ? - kvm_mips_get_kernel_asid(vcpu) : - kvm_mips_get_user_asid(vcpu)); - ret = kvm_mips_host_tlb_write(vcpu, entryhi, entrylo0, entrylo1, - tlb->tlb_mask); - preempt_enable(); - - return ret; -} -EXPORT_SYMBOL_GPL(kvm_mips_handle_mapped_seg_tlb_fault); - int kvm_mips_guest_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long entryhi) { int i; @@ -435,7 +222,7 @@ int kvm_mips_guest_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long entryhi) } kvm_debug("%s: entryhi: %#lx, index: %d lo0: %#lx, lo1: %#lx\n", - __func__, entryhi, index, tlb[i].tlb_lo0, tlb[i].tlb_lo1); + __func__, entryhi, index, tlb[i].tlb_lo[0], tlb[i].tlb_lo[1]); return index; } @@ -467,7 +254,6 @@ int kvm_mips_host_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long vaddr) /* Restore old ASID */ write_c0_entryhi(old_entryhi); mtc0_tlbw_hazard(); - tlbw_use_hazard(); local_irq_restore(flags); @@ -498,21 +284,16 @@ int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va) if (idx > 0) { write_c0_entryhi(UNIQUE_ENTRYHI(idx)); - mtc0_tlbw_hazard(); - write_c0_entrylo0(0); - mtc0_tlbw_hazard(); - write_c0_entrylo1(0); mtc0_tlbw_hazard(); tlb_write_indexed(); - mtc0_tlbw_hazard(); + tlbw_use_hazard(); } write_c0_entryhi(old_entryhi); mtc0_tlbw_hazard(); - tlbw_use_hazard(); local_irq_restore(flags); @@ -540,61 +321,39 @@ void kvm_mips_flush_host_tlb(int skip_kseg0) /* Blast 'em all away. */ for (entry = 0; entry < maxentry; entry++) { write_c0_index(entry); - mtc0_tlbw_hazard(); if (skip_kseg0) { + mtc0_tlbr_hazard(); tlb_read(); - tlbw_use_hazard(); + tlb_read_hazard(); entryhi = read_c0_entryhi(); /* Don't blow away guest kernel entries */ if (KVM_GUEST_KSEGX(entryhi) == KVM_GUEST_KSEG0) continue; + + write_c0_pagemask(old_pagemask); } /* Make sure all entries differ. */ write_c0_entryhi(UNIQUE_ENTRYHI(entry)); - mtc0_tlbw_hazard(); write_c0_entrylo0(0); - mtc0_tlbw_hazard(); write_c0_entrylo1(0); mtc0_tlbw_hazard(); tlb_write_indexed(); - mtc0_tlbw_hazard(); + tlbw_use_hazard(); } - tlbw_use_hazard(); - write_c0_entryhi(old_entryhi); write_c0_pagemask(old_pagemask); mtc0_tlbw_hazard(); - tlbw_use_hazard(); local_irq_restore(flags); } EXPORT_SYMBOL_GPL(kvm_mips_flush_host_tlb); -void kvm_get_new_mmu_context(struct mm_struct *mm, unsigned long cpu, - struct kvm_vcpu *vcpu) -{ - unsigned long asid = asid_cache(cpu); - - asid += cpu_asid_inc(); - if (!(asid & cpu_asid_mask(&cpu_data[cpu]))) { - if (cpu_has_vtag_icache) - flush_icache_all(); - - kvm_local_flush_tlb_all(); /* start new asid cycle */ - - if (!asid) /* fix version if needed */ - asid = asid_first_version(cpu); - } - - cpu_context(cpu, mm) = asid_cache(cpu) = asid; -} - void kvm_local_flush_tlb_all(void) { unsigned long flags; @@ -614,185 +373,12 @@ void kvm_local_flush_tlb_all(void) write_c0_index(entry); mtc0_tlbw_hazard(); tlb_write_indexed(); + tlbw_use_hazard(); entry++; } - tlbw_use_hazard(); write_c0_entryhi(old_ctx); mtc0_tlbw_hazard(); local_irq_restore(flags); } EXPORT_SYMBOL_GPL(kvm_local_flush_tlb_all); - -/** - * kvm_mips_migrate_count() - Migrate timer. - * @vcpu: Virtual CPU. - * - * Migrate CP0_Count hrtimer to the current CPU by cancelling and restarting it - * if it was running prior to being cancelled. - * - * Must be called when the VCPU is migrated to a different CPU to ensure that - * timer expiry during guest execution interrupts the guest and causes the - * interrupt to be delivered in a timely manner. - */ -static void kvm_mips_migrate_count(struct kvm_vcpu *vcpu) -{ - if (hrtimer_cancel(&vcpu->arch.comparecount_timer)) - hrtimer_restart(&vcpu->arch.comparecount_timer); -} - -/* Restore ASID once we are scheduled back after preemption */ -void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) -{ - unsigned long asid_mask = cpu_asid_mask(&cpu_data[cpu]); - unsigned long flags; - int newasid = 0; - - kvm_debug("%s: vcpu %p, cpu: %d\n", __func__, vcpu, cpu); - - /* Allocate new kernel and user ASIDs if needed */ - - local_irq_save(flags); - - if ((vcpu->arch.guest_kernel_asid[cpu] ^ asid_cache(cpu)) & - asid_version_mask(cpu)) { - kvm_get_new_mmu_context(&vcpu->arch.guest_kernel_mm, cpu, vcpu); - vcpu->arch.guest_kernel_asid[cpu] = - vcpu->arch.guest_kernel_mm.context.asid[cpu]; - kvm_get_new_mmu_context(&vcpu->arch.guest_user_mm, cpu, vcpu); - vcpu->arch.guest_user_asid[cpu] = - vcpu->arch.guest_user_mm.context.asid[cpu]; - newasid++; - - kvm_debug("[%d]: cpu_context: %#lx\n", cpu, - cpu_context(cpu, current->mm)); - kvm_debug("[%d]: Allocated new ASID for Guest Kernel: %#x\n", - cpu, vcpu->arch.guest_kernel_asid[cpu]); - kvm_debug("[%d]: Allocated new ASID for Guest User: %#x\n", cpu, - vcpu->arch.guest_user_asid[cpu]); - } - - if (vcpu->arch.last_sched_cpu != cpu) { - kvm_debug("[%d->%d]KVM VCPU[%d] switch\n", - vcpu->arch.last_sched_cpu, cpu, vcpu->vcpu_id); - /* - * Migrate the timer interrupt to the current CPU so that it - * always interrupts the guest and synchronously triggers a - * guest timer interrupt. - */ - kvm_mips_migrate_count(vcpu); - } - - if (!newasid) { - /* - * If we preempted while the guest was executing, then reload - * the pre-empted ASID - */ - if (current->flags & PF_VCPU) { - write_c0_entryhi(vcpu->arch. - preempt_entryhi & asid_mask); - ehb(); - } - } else { - /* New ASIDs were allocated for the VM */ - - /* - * Were we in guest context? If so then the pre-empted ASID is - * no longer valid, we need to set it to what it should be based - * on the mode of the Guest (Kernel/User) - */ - if (current->flags & PF_VCPU) { - if (KVM_GUEST_KERNEL_MODE(vcpu)) - write_c0_entryhi(vcpu->arch. - guest_kernel_asid[cpu] & - asid_mask); - else - write_c0_entryhi(vcpu->arch. - guest_user_asid[cpu] & - asid_mask); - ehb(); - } - } - - /* restore guest state to registers */ - kvm_mips_callbacks->vcpu_set_regs(vcpu); - - local_irq_restore(flags); - -} -EXPORT_SYMBOL_GPL(kvm_arch_vcpu_load); - -/* ASID can change if another task is scheduled during preemption */ -void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) -{ - unsigned long flags; - uint32_t cpu; - - local_irq_save(flags); - - cpu = smp_processor_id(); - - vcpu->arch.preempt_entryhi = read_c0_entryhi(); - vcpu->arch.last_sched_cpu = cpu; - - /* save guest state in registers */ - kvm_mips_callbacks->vcpu_get_regs(vcpu); - - if (((cpu_context(cpu, current->mm) ^ asid_cache(cpu)) & - asid_version_mask(cpu))) { - kvm_debug("%s: Dropping MMU Context: %#lx\n", __func__, - cpu_context(cpu, current->mm)); - drop_mmu_context(current->mm, cpu); - } - write_c0_entryhi(cpu_asid(cpu, current->mm)); - ehb(); - - local_irq_restore(flags); -} -EXPORT_SYMBOL_GPL(kvm_arch_vcpu_put); - -uint32_t kvm_get_inst(uint32_t *opc, struct kvm_vcpu *vcpu) -{ - struct mips_coproc *cop0 = vcpu->arch.cop0; - unsigned long paddr, flags, vpn2, asid; - uint32_t inst; - int index; - - if (KVM_GUEST_KSEGX((unsigned long) opc) < KVM_GUEST_KSEG0 || - KVM_GUEST_KSEGX((unsigned long) opc) == KVM_GUEST_KSEG23) { - local_irq_save(flags); - index = kvm_mips_host_tlb_lookup(vcpu, (unsigned long) opc); - if (index >= 0) { - inst = *(opc); - } else { - vpn2 = (unsigned long) opc & VPN2_MASK; - asid = kvm_read_c0_guest_entryhi(cop0) & - KVM_ENTRYHI_ASID; - index = kvm_mips_guest_tlb_lookup(vcpu, vpn2 | asid); - if (index < 0) { - kvm_err("%s: get_user_failed for %p, vcpu: %p, ASID: %#lx\n", - __func__, opc, vcpu, read_c0_entryhi()); - kvm_mips_dump_host_tlbs(); - local_irq_restore(flags); - return KVM_INVALID_INST; - } - kvm_mips_handle_mapped_seg_tlb_fault(vcpu, - &vcpu->arch. - guest_tlb[index], - NULL, NULL); - inst = *(opc); - } - local_irq_restore(flags); - } else if (KVM_GUEST_KSEGX(opc) == KVM_GUEST_KSEG0) { - paddr = - kvm_mips_translate_guest_kseg0_to_hpa(vcpu, - (unsigned long) opc); - inst = *(uint32_t *) CKSEG0ADDR(paddr); - } else { - kvm_err("%s: illegal address: %p\n", __func__, opc); - return KVM_INVALID_INST; - } - - return inst; -} -EXPORT_SYMBOL_GPL(kvm_get_inst); diff --git a/arch/mips/kvm/trace.h b/arch/mips/kvm/trace.h index bd6437f67dc0..c858cf168078 100644 --- a/arch/mips/kvm/trace.h +++ b/arch/mips/kvm/trace.h @@ -17,8 +17,75 @@ #define TRACE_INCLUDE_PATH . #define TRACE_INCLUDE_FILE trace -/* Tracepoints for VM eists */ -extern char *kvm_mips_exit_types_str[MAX_KVM_MIPS_EXIT_TYPES]; +/* + * Tracepoints for VM enters + */ +DECLARE_EVENT_CLASS(kvm_transition, + TP_PROTO(struct kvm_vcpu *vcpu), + TP_ARGS(vcpu), + TP_STRUCT__entry( + __field(unsigned long, pc) + ), + + TP_fast_assign( + __entry->pc = vcpu->arch.pc; + ), + + TP_printk("PC: 0x%08lx", + __entry->pc) +); + +DEFINE_EVENT(kvm_transition, kvm_enter, + TP_PROTO(struct kvm_vcpu *vcpu), + TP_ARGS(vcpu)); + +DEFINE_EVENT(kvm_transition, kvm_reenter, + TP_PROTO(struct kvm_vcpu *vcpu), + TP_ARGS(vcpu)); + +DEFINE_EVENT(kvm_transition, kvm_out, + TP_PROTO(struct kvm_vcpu *vcpu), + TP_ARGS(vcpu)); + +/* The first 32 exit reasons correspond to Cause.ExcCode */ +#define KVM_TRACE_EXIT_INT 0 +#define KVM_TRACE_EXIT_TLBMOD 1 +#define KVM_TRACE_EXIT_TLBMISS_LD 2 +#define KVM_TRACE_EXIT_TLBMISS_ST 3 +#define KVM_TRACE_EXIT_ADDRERR_LD 4 +#define KVM_TRACE_EXIT_ADDRERR_ST 5 +#define KVM_TRACE_EXIT_SYSCALL 8 +#define KVM_TRACE_EXIT_BREAK_INST 9 +#define KVM_TRACE_EXIT_RESVD_INST 10 +#define KVM_TRACE_EXIT_COP_UNUSABLE 11 +#define KVM_TRACE_EXIT_TRAP_INST 13 +#define KVM_TRACE_EXIT_MSA_FPE 14 +#define KVM_TRACE_EXIT_FPE 15 +#define KVM_TRACE_EXIT_MSA_DISABLED 21 +/* Further exit reasons */ +#define KVM_TRACE_EXIT_WAIT 32 +#define KVM_TRACE_EXIT_CACHE 33 +#define KVM_TRACE_EXIT_SIGNAL 34 + +/* Tracepoints for VM exits */ +#define kvm_trace_symbol_exit_types \ + { KVM_TRACE_EXIT_INT, "Interrupt" }, \ + { KVM_TRACE_EXIT_TLBMOD, "TLB Mod" }, \ + { KVM_TRACE_EXIT_TLBMISS_LD, "TLB Miss (LD)" }, \ + { KVM_TRACE_EXIT_TLBMISS_ST, "TLB Miss (ST)" }, \ + { KVM_TRACE_EXIT_ADDRERR_LD, "Address Error (LD)" }, \ + { KVM_TRACE_EXIT_ADDRERR_ST, "Address Err (ST)" }, \ + { KVM_TRACE_EXIT_SYSCALL, "System Call" }, \ + { KVM_TRACE_EXIT_BREAK_INST, "Break Inst" }, \ + { KVM_TRACE_EXIT_RESVD_INST, "Reserved Inst" }, \ + { KVM_TRACE_EXIT_COP_UNUSABLE, "COP0/1 Unusable" }, \ + { KVM_TRACE_EXIT_TRAP_INST, "Trap Inst" }, \ + { KVM_TRACE_EXIT_MSA_FPE, "MSA FPE" }, \ + { KVM_TRACE_EXIT_FPE, "FPE" }, \ + { KVM_TRACE_EXIT_MSA_DISABLED, "MSA Disabled" }, \ + { KVM_TRACE_EXIT_WAIT, "WAIT" }, \ + { KVM_TRACE_EXIT_CACHE, "CACHE" }, \ + { KVM_TRACE_EXIT_SIGNAL, "Signal" } TRACE_EVENT(kvm_exit, TP_PROTO(struct kvm_vcpu *vcpu, unsigned int reason), @@ -34,10 +101,173 @@ TRACE_EVENT(kvm_exit, ), TP_printk("[%s]PC: 0x%08lx", - kvm_mips_exit_types_str[__entry->reason], + __print_symbolic(__entry->reason, + kvm_trace_symbol_exit_types), __entry->pc) ); +#define KVM_TRACE_MFC0 0 +#define KVM_TRACE_MTC0 1 +#define KVM_TRACE_DMFC0 2 +#define KVM_TRACE_DMTC0 3 +#define KVM_TRACE_RDHWR 4 + +#define KVM_TRACE_HWR_COP0 0 +#define KVM_TRACE_HWR_HWR 1 + +#define KVM_TRACE_COP0(REG, SEL) ((KVM_TRACE_HWR_COP0 << 8) | \ + ((REG) << 3) | (SEL)) +#define KVM_TRACE_HWR(REG, SEL) ((KVM_TRACE_HWR_HWR << 8) | \ + ((REG) << 3) | (SEL)) + +#define kvm_trace_symbol_hwr_ops \ + { KVM_TRACE_MFC0, "MFC0" }, \ + { KVM_TRACE_MTC0, "MTC0" }, \ + { KVM_TRACE_DMFC0, "DMFC0" }, \ + { KVM_TRACE_DMTC0, "DMTC0" }, \ + { KVM_TRACE_RDHWR, "RDHWR" } + +#define kvm_trace_symbol_hwr_cop \ + { KVM_TRACE_HWR_COP0, "COP0" }, \ + { KVM_TRACE_HWR_HWR, "HWR" } + +#define kvm_trace_symbol_hwr_regs \ + { KVM_TRACE_COP0( 0, 0), "Index" }, \ + { KVM_TRACE_COP0( 2, 0), "EntryLo0" }, \ + { KVM_TRACE_COP0( 3, 0), "EntryLo1" }, \ + { KVM_TRACE_COP0( 4, 0), "Context" }, \ + { KVM_TRACE_COP0( 4, 2), "UserLocal" }, \ + { KVM_TRACE_COP0( 5, 0), "PageMask" }, \ + { KVM_TRACE_COP0( 6, 0), "Wired" }, \ + { KVM_TRACE_COP0( 7, 0), "HWREna" }, \ + { KVM_TRACE_COP0( 8, 0), "BadVAddr" }, \ + { KVM_TRACE_COP0( 9, 0), "Count" }, \ + { KVM_TRACE_COP0(10, 0), "EntryHi" }, \ + { KVM_TRACE_COP0(11, 0), "Compare" }, \ + { KVM_TRACE_COP0(12, 0), "Status" }, \ + { KVM_TRACE_COP0(12, 1), "IntCtl" }, \ + { KVM_TRACE_COP0(12, 2), "SRSCtl" }, \ + { KVM_TRACE_COP0(13, 0), "Cause" }, \ + { KVM_TRACE_COP0(14, 0), "EPC" }, \ + { KVM_TRACE_COP0(15, 0), "PRId" }, \ + { KVM_TRACE_COP0(15, 1), "EBase" }, \ + { KVM_TRACE_COP0(16, 0), "Config" }, \ + { KVM_TRACE_COP0(16, 1), "Config1" }, \ + { KVM_TRACE_COP0(16, 2), "Config2" }, \ + { KVM_TRACE_COP0(16, 3), "Config3" }, \ + { KVM_TRACE_COP0(16, 4), "Config4" }, \ + { KVM_TRACE_COP0(16, 5), "Config5" }, \ + { KVM_TRACE_COP0(16, 7), "Config7" }, \ + { KVM_TRACE_COP0(26, 0), "ECC" }, \ + { KVM_TRACE_COP0(30, 0), "ErrorEPC" }, \ + { KVM_TRACE_COP0(31, 2), "KScratch1" }, \ + { KVM_TRACE_COP0(31, 3), "KScratch2" }, \ + { KVM_TRACE_COP0(31, 4), "KScratch3" }, \ + { KVM_TRACE_COP0(31, 5), "KScratch4" }, \ + { KVM_TRACE_COP0(31, 6), "KScratch5" }, \ + { KVM_TRACE_COP0(31, 7), "KScratch6" }, \ + { KVM_TRACE_HWR( 0, 0), "CPUNum" }, \ + { KVM_TRACE_HWR( 1, 0), "SYNCI_Step" }, \ + { KVM_TRACE_HWR( 2, 0), "CC" }, \ + { KVM_TRACE_HWR( 3, 0), "CCRes" }, \ + { KVM_TRACE_HWR(29, 0), "ULR" } + +TRACE_EVENT(kvm_hwr, + TP_PROTO(struct kvm_vcpu *vcpu, unsigned int op, unsigned int reg, + unsigned long val), + TP_ARGS(vcpu, op, reg, val), + TP_STRUCT__entry( + __field(unsigned long, val) + __field(u16, reg) + __field(u8, op) + ), + + TP_fast_assign( + __entry->val = val; + __entry->reg = reg; + __entry->op = op; + ), + + TP_printk("%s %s (%s:%u:%u) 0x%08lx", + __print_symbolic(__entry->op, + kvm_trace_symbol_hwr_ops), + __print_symbolic(__entry->reg, + kvm_trace_symbol_hwr_regs), + __print_symbolic(__entry->reg >> 8, + kvm_trace_symbol_hwr_cop), + (__entry->reg >> 3) & 0x1f, + __entry->reg & 0x7, + __entry->val) +); + +#define KVM_TRACE_AUX_RESTORE 0 +#define KVM_TRACE_AUX_SAVE 1 +#define KVM_TRACE_AUX_ENABLE 2 +#define KVM_TRACE_AUX_DISABLE 3 +#define KVM_TRACE_AUX_DISCARD 4 + +#define KVM_TRACE_AUX_FPU 1 +#define KVM_TRACE_AUX_MSA 2 +#define KVM_TRACE_AUX_FPU_MSA 3 + +#define kvm_trace_symbol_aux_op \ + { KVM_TRACE_AUX_RESTORE, "restore" }, \ + { KVM_TRACE_AUX_SAVE, "save" }, \ + { KVM_TRACE_AUX_ENABLE, "enable" }, \ + { KVM_TRACE_AUX_DISABLE, "disable" }, \ + { KVM_TRACE_AUX_DISCARD, "discard" } + +#define kvm_trace_symbol_aux_state \ + { KVM_TRACE_AUX_FPU, "FPU" }, \ + { KVM_TRACE_AUX_MSA, "MSA" }, \ + { KVM_TRACE_AUX_FPU_MSA, "FPU & MSA" } + +TRACE_EVENT(kvm_aux, + TP_PROTO(struct kvm_vcpu *vcpu, unsigned int op, + unsigned int state), + TP_ARGS(vcpu, op, state), + TP_STRUCT__entry( + __field(unsigned long, pc) + __field(u8, op) + __field(u8, state) + ), + + TP_fast_assign( + __entry->pc = vcpu->arch.pc; + __entry->op = op; + __entry->state = state; + ), + + TP_printk("%s %s PC: 0x%08lx", + __print_symbolic(__entry->op, + kvm_trace_symbol_aux_op), + __print_symbolic(__entry->state, + kvm_trace_symbol_aux_state), + __entry->pc) +); + +TRACE_EVENT(kvm_asid_change, + TP_PROTO(struct kvm_vcpu *vcpu, unsigned int old_asid, + unsigned int new_asid), + TP_ARGS(vcpu, old_asid, new_asid), + TP_STRUCT__entry( + __field(unsigned long, pc) + __field(u8, old_asid) + __field(u8, new_asid) + ), + + TP_fast_assign( + __entry->pc = vcpu->arch.pc; + __entry->old_asid = old_asid; + __entry->new_asid = new_asid; + ), + + TP_printk("PC: 0x%08lx old: 0x%02x new: 0x%02x", + __entry->pc, + __entry->old_asid, + __entry->new_asid) +); + #endif /* _TRACE_KVM_H */ /* This part must be outside protection */ diff --git a/arch/mips/kvm/trap_emul.c b/arch/mips/kvm/trap_emul.c index 6ba0fafcecbc..091553942bcb 100644 --- a/arch/mips/kvm/trap_emul.c +++ b/arch/mips/kvm/trap_emul.c @@ -21,7 +21,7 @@ static gpa_t kvm_trap_emul_gva_to_gpa_cb(gva_t gva) { gpa_t gpa; - uint32_t kseg = KSEGX(gva); + gva_t kseg = KSEGX(gva); if ((kseg == CKSEG0) || (kseg == CKSEG1)) gpa = CPHYSADDR(gva); @@ -40,8 +40,8 @@ static int kvm_trap_emul_handle_cop_unusable(struct kvm_vcpu *vcpu) { struct mips_coproc *cop0 = vcpu->arch.cop0; struct kvm_run *run = vcpu->run; - uint32_t __user *opc = (uint32_t __user *) vcpu->arch.pc; - unsigned long cause = vcpu->arch.host_cp0_cause; + u32 __user *opc = (u32 __user *) vcpu->arch.pc; + u32 cause = vcpu->arch.host_cp0_cause; enum emulation_result er = EMULATE_DONE; int ret = RESUME_GUEST; @@ -87,15 +87,15 @@ static int kvm_trap_emul_handle_cop_unusable(struct kvm_vcpu *vcpu) static int kvm_trap_emul_handle_tlb_mod(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; - uint32_t __user *opc = (uint32_t __user *) vcpu->arch.pc; + u32 __user *opc = (u32 __user *) vcpu->arch.pc; unsigned long badvaddr = vcpu->arch.host_cp0_badvaddr; - unsigned long cause = vcpu->arch.host_cp0_cause; + u32 cause = vcpu->arch.host_cp0_cause; enum emulation_result er = EMULATE_DONE; int ret = RESUME_GUEST; if (KVM_GUEST_KSEGX(badvaddr) < KVM_GUEST_KSEG0 || KVM_GUEST_KSEGX(badvaddr) == KVM_GUEST_KSEG23) { - kvm_debug("USER/KSEG23 ADDR TLB MOD fault: cause %#lx, PC: %p, BadVaddr: %#lx\n", + kvm_debug("USER/KSEG23 ADDR TLB MOD fault: cause %#x, PC: %p, BadVaddr: %#lx\n", cause, opc, badvaddr); er = kvm_mips_handle_tlbmod(cause, opc, run, vcpu); @@ -111,14 +111,14 @@ static int kvm_trap_emul_handle_tlb_mod(struct kvm_vcpu *vcpu) * when we are not using HIGHMEM. Need to address this in a * HIGHMEM kernel */ - kvm_err("TLB MOD fault not handled, cause %#lx, PC: %p, BadVaddr: %#lx\n", + kvm_err("TLB MOD fault not handled, cause %#x, PC: %p, BadVaddr: %#lx\n", cause, opc, badvaddr); kvm_mips_dump_host_tlbs(); kvm_arch_vcpu_dump_regs(vcpu); run->exit_reason = KVM_EXIT_INTERNAL_ERROR; ret = RESUME_HOST; } else { - kvm_err("Illegal TLB Mod fault address , cause %#lx, PC: %p, BadVaddr: %#lx\n", + kvm_err("Illegal TLB Mod fault address , cause %#x, PC: %p, BadVaddr: %#lx\n", cause, opc, badvaddr); kvm_mips_dump_host_tlbs(); kvm_arch_vcpu_dump_regs(vcpu); @@ -128,59 +128,12 @@ static int kvm_trap_emul_handle_tlb_mod(struct kvm_vcpu *vcpu) return ret; } -static int kvm_trap_emul_handle_tlb_st_miss(struct kvm_vcpu *vcpu) -{ - struct kvm_run *run = vcpu->run; - uint32_t __user *opc = (uint32_t __user *) vcpu->arch.pc; - unsigned long badvaddr = vcpu->arch.host_cp0_badvaddr; - unsigned long cause = vcpu->arch.host_cp0_cause; - enum emulation_result er = EMULATE_DONE; - int ret = RESUME_GUEST; - - if (((badvaddr & PAGE_MASK) == KVM_GUEST_COMMPAGE_ADDR) - && KVM_GUEST_KERNEL_MODE(vcpu)) { - if (kvm_mips_handle_commpage_tlb_fault(badvaddr, vcpu) < 0) { - run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - ret = RESUME_HOST; - } - } else if (KVM_GUEST_KSEGX(badvaddr) < KVM_GUEST_KSEG0 - || KVM_GUEST_KSEGX(badvaddr) == KVM_GUEST_KSEG23) { - kvm_debug("USER ADDR TLB LD fault: cause %#lx, PC: %p, BadVaddr: %#lx\n", - cause, opc, badvaddr); - er = kvm_mips_handle_tlbmiss(cause, opc, run, vcpu); - if (er == EMULATE_DONE) - ret = RESUME_GUEST; - else { - run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - ret = RESUME_HOST; - } - } else if (KVM_GUEST_KSEGX(badvaddr) == KVM_GUEST_KSEG0) { - /* - * All KSEG0 faults are handled by KVM, as the guest kernel does - * not expect to ever get them - */ - if (kvm_mips_handle_kseg0_tlb_fault - (vcpu->arch.host_cp0_badvaddr, vcpu) < 0) { - run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - ret = RESUME_HOST; - } - } else { - kvm_err("Illegal TLB LD fault address , cause %#lx, PC: %p, BadVaddr: %#lx\n", - cause, opc, badvaddr); - kvm_mips_dump_host_tlbs(); - kvm_arch_vcpu_dump_regs(vcpu); - run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - ret = RESUME_HOST; - } - return ret; -} - -static int kvm_trap_emul_handle_tlb_ld_miss(struct kvm_vcpu *vcpu) +static int kvm_trap_emul_handle_tlb_miss(struct kvm_vcpu *vcpu, bool store) { struct kvm_run *run = vcpu->run; - uint32_t __user *opc = (uint32_t __user *) vcpu->arch.pc; + u32 __user *opc = (u32 __user *) vcpu->arch.pc; unsigned long badvaddr = vcpu->arch.host_cp0_badvaddr; - unsigned long cause = vcpu->arch.host_cp0_cause; + u32 cause = vcpu->arch.host_cp0_cause; enum emulation_result er = EMULATE_DONE; int ret = RESUME_GUEST; @@ -192,8 +145,8 @@ static int kvm_trap_emul_handle_tlb_ld_miss(struct kvm_vcpu *vcpu) } } else if (KVM_GUEST_KSEGX(badvaddr) < KVM_GUEST_KSEG0 || KVM_GUEST_KSEGX(badvaddr) == KVM_GUEST_KSEG23) { - kvm_debug("USER ADDR TLB ST fault: PC: %#lx, BadVaddr: %#lx\n", - vcpu->arch.pc, badvaddr); + kvm_debug("USER ADDR TLB %s fault: cause %#x, PC: %p, BadVaddr: %#lx\n", + store ? "ST" : "LD", cause, opc, badvaddr); /* * User Address (UA) fault, this could happen if @@ -213,14 +166,18 @@ static int kvm_trap_emul_handle_tlb_ld_miss(struct kvm_vcpu *vcpu) ret = RESUME_HOST; } } else if (KVM_GUEST_KSEGX(badvaddr) == KVM_GUEST_KSEG0) { + /* + * All KSEG0 faults are handled by KVM, as the guest kernel does + * not expect to ever get them + */ if (kvm_mips_handle_kseg0_tlb_fault (vcpu->arch.host_cp0_badvaddr, vcpu) < 0) { run->exit_reason = KVM_EXIT_INTERNAL_ERROR; ret = RESUME_HOST; } } else { - kvm_err("Illegal TLB ST fault address , cause %#lx, PC: %p, BadVaddr: %#lx\n", - cause, opc, badvaddr); + kvm_err("Illegal TLB %s fault address , cause %#x, PC: %p, BadVaddr: %#lx\n", + store ? "ST" : "LD", cause, opc, badvaddr); kvm_mips_dump_host_tlbs(); kvm_arch_vcpu_dump_regs(vcpu); run->exit_reason = KVM_EXIT_INTERNAL_ERROR; @@ -229,12 +186,22 @@ static int kvm_trap_emul_handle_tlb_ld_miss(struct kvm_vcpu *vcpu) return ret; } +static int kvm_trap_emul_handle_tlb_st_miss(struct kvm_vcpu *vcpu) +{ + return kvm_trap_emul_handle_tlb_miss(vcpu, true); +} + +static int kvm_trap_emul_handle_tlb_ld_miss(struct kvm_vcpu *vcpu) +{ + return kvm_trap_emul_handle_tlb_miss(vcpu, false); +} + static int kvm_trap_emul_handle_addr_err_st(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; - uint32_t __user *opc = (uint32_t __user *) vcpu->arch.pc; + u32 __user *opc = (u32 __user *) vcpu->arch.pc; unsigned long badvaddr = vcpu->arch.host_cp0_badvaddr; - unsigned long cause = vcpu->arch.host_cp0_cause; + u32 cause = vcpu->arch.host_cp0_cause; enum emulation_result er = EMULATE_DONE; int ret = RESUME_GUEST; @@ -251,7 +218,7 @@ static int kvm_trap_emul_handle_addr_err_st(struct kvm_vcpu *vcpu) ret = RESUME_HOST; } } else { - kvm_err("Address Error (STORE): cause %#lx, PC: %p, BadVaddr: %#lx\n", + kvm_err("Address Error (STORE): cause %#x, PC: %p, BadVaddr: %#lx\n", cause, opc, badvaddr); run->exit_reason = KVM_EXIT_INTERNAL_ERROR; ret = RESUME_HOST; @@ -262,9 +229,9 @@ static int kvm_trap_emul_handle_addr_err_st(struct kvm_vcpu *vcpu) static int kvm_trap_emul_handle_addr_err_ld(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; - uint32_t __user *opc = (uint32_t __user *) vcpu->arch.pc; + u32 __user *opc = (u32 __user *) vcpu->arch.pc; unsigned long badvaddr = vcpu->arch.host_cp0_badvaddr; - unsigned long cause = vcpu->arch.host_cp0_cause; + u32 cause = vcpu->arch.host_cp0_cause; enum emulation_result er = EMULATE_DONE; int ret = RESUME_GUEST; @@ -280,7 +247,7 @@ static int kvm_trap_emul_handle_addr_err_ld(struct kvm_vcpu *vcpu) ret = RESUME_HOST; } } else { - kvm_err("Address Error (LOAD): cause %#lx, PC: %p, BadVaddr: %#lx\n", + kvm_err("Address Error (LOAD): cause %#x, PC: %p, BadVaddr: %#lx\n", cause, opc, badvaddr); run->exit_reason = KVM_EXIT_INTERNAL_ERROR; ret = RESUME_HOST; @@ -292,8 +259,8 @@ static int kvm_trap_emul_handle_addr_err_ld(struct kvm_vcpu *vcpu) static int kvm_trap_emul_handle_syscall(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; - uint32_t __user *opc = (uint32_t __user *) vcpu->arch.pc; - unsigned long cause = vcpu->arch.host_cp0_cause; + u32 __user *opc = (u32 __user *) vcpu->arch.pc; + u32 cause = vcpu->arch.host_cp0_cause; enum emulation_result er = EMULATE_DONE; int ret = RESUME_GUEST; @@ -310,8 +277,8 @@ static int kvm_trap_emul_handle_syscall(struct kvm_vcpu *vcpu) static int kvm_trap_emul_handle_res_inst(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; - uint32_t __user *opc = (uint32_t __user *) vcpu->arch.pc; - unsigned long cause = vcpu->arch.host_cp0_cause; + u32 __user *opc = (u32 __user *) vcpu->arch.pc; + u32 cause = vcpu->arch.host_cp0_cause; enum emulation_result er = EMULATE_DONE; int ret = RESUME_GUEST; @@ -328,8 +295,8 @@ static int kvm_trap_emul_handle_res_inst(struct kvm_vcpu *vcpu) static int kvm_trap_emul_handle_break(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; - uint32_t __user *opc = (uint32_t __user *) vcpu->arch.pc; - unsigned long cause = vcpu->arch.host_cp0_cause; + u32 __user *opc = (u32 __user *) vcpu->arch.pc; + u32 cause = vcpu->arch.host_cp0_cause; enum emulation_result er = EMULATE_DONE; int ret = RESUME_GUEST; @@ -346,8 +313,8 @@ static int kvm_trap_emul_handle_break(struct kvm_vcpu *vcpu) static int kvm_trap_emul_handle_trap(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; - uint32_t __user *opc = (uint32_t __user *)vcpu->arch.pc; - unsigned long cause = vcpu->arch.host_cp0_cause; + u32 __user *opc = (u32 __user *)vcpu->arch.pc; + u32 cause = vcpu->arch.host_cp0_cause; enum emulation_result er = EMULATE_DONE; int ret = RESUME_GUEST; @@ -364,8 +331,8 @@ static int kvm_trap_emul_handle_trap(struct kvm_vcpu *vcpu) static int kvm_trap_emul_handle_msa_fpe(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; - uint32_t __user *opc = (uint32_t __user *)vcpu->arch.pc; - unsigned long cause = vcpu->arch.host_cp0_cause; + u32 __user *opc = (u32 __user *)vcpu->arch.pc; + u32 cause = vcpu->arch.host_cp0_cause; enum emulation_result er = EMULATE_DONE; int ret = RESUME_GUEST; @@ -382,8 +349,8 @@ static int kvm_trap_emul_handle_msa_fpe(struct kvm_vcpu *vcpu) static int kvm_trap_emul_handle_fpe(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; - uint32_t __user *opc = (uint32_t __user *)vcpu->arch.pc; - unsigned long cause = vcpu->arch.host_cp0_cause; + u32 __user *opc = (u32 __user *)vcpu->arch.pc; + u32 cause = vcpu->arch.host_cp0_cause; enum emulation_result er = EMULATE_DONE; int ret = RESUME_GUEST; @@ -407,8 +374,8 @@ static int kvm_trap_emul_handle_msa_disabled(struct kvm_vcpu *vcpu) { struct mips_coproc *cop0 = vcpu->arch.cop0; struct kvm_run *run = vcpu->run; - uint32_t __user *opc = (uint32_t __user *) vcpu->arch.pc; - unsigned long cause = vcpu->arch.host_cp0_cause; + u32 __user *opc = (u32 __user *) vcpu->arch.pc; + u32 cause = vcpu->arch.host_cp0_cause; enum emulation_result er = EMULATE_DONE; int ret = RESUME_GUEST; @@ -451,24 +418,41 @@ static int kvm_trap_emul_vm_init(struct kvm *kvm) static int kvm_trap_emul_vcpu_init(struct kvm_vcpu *vcpu) { + vcpu->arch.kscratch_enabled = 0xfc; + return 0; } static int kvm_trap_emul_vcpu_setup(struct kvm_vcpu *vcpu) { struct mips_coproc *cop0 = vcpu->arch.cop0; - uint32_t config1; + u32 config, config1; int vcpu_id = vcpu->vcpu_id; /* * Arch specific stuff, set up config registers properly so that the - * guest will come up as expected, for now we simulate a MIPS 24kc + * guest will come up as expected */ +#ifndef CONFIG_CPU_MIPSR6 + /* r2-r5, simulate a MIPS 24kc */ kvm_write_c0_guest_prid(cop0, 0x00019300); - /* Have config1, Cacheable, noncoherent, write-back, write allocate */ - kvm_write_c0_guest_config(cop0, MIPS_CONF_M | (0x3 << CP0C0_K0) | - (0x1 << CP0C0_AR) | - (MMU_TYPE_R4000 << CP0C0_MT)); +#else + /* r6+, simulate a generic QEMU machine */ + kvm_write_c0_guest_prid(cop0, 0x00010000); +#endif + /* + * Have config1, Cacheable, noncoherent, write-back, write allocate. + * Endianness, arch revision & virtually tagged icache should match + * host. + */ + config = read_c0_config() & MIPS_CONF_AR; + config |= MIPS_CONF_M | CONF_CM_CACHABLE_NONCOHERENT | MIPS_CONF_MT_TLB; +#ifdef CONFIG_CPU_BIG_ENDIAN + config |= CONF_BE; +#endif + if (cpu_has_vtag_icache) + config |= MIPS_CONF_VI; + kvm_write_c0_guest_config(cop0, config); /* Read the cache characteristics from the host Config1 Register */ config1 = (read_c0_config1() & ~0x7f); @@ -478,9 +462,8 @@ static int kvm_trap_emul_vcpu_setup(struct kvm_vcpu *vcpu) config1 |= ((KVM_MIPS_GUEST_TLB_SIZE - 1) << 25); /* We unset some bits that we aren't emulating */ - config1 &= - ~((1 << CP0C1_C2) | (1 << CP0C1_MD) | (1 << CP0C1_PC) | - (1 << CP0C1_WR) | (1 << CP0C1_CA)); + config1 &= ~(MIPS_CONF1_C2 | MIPS_CONF1_MD | MIPS_CONF1_PC | + MIPS_CONF1_WR | MIPS_CONF1_CA); kvm_write_c0_guest_config1(cop0, config1); /* Have config3, no tertiary/secondary caches implemented */ @@ -511,6 +494,17 @@ static int kvm_trap_emul_vcpu_setup(struct kvm_vcpu *vcpu) return 0; } +static unsigned long kvm_trap_emul_num_regs(struct kvm_vcpu *vcpu) +{ + return 0; +} + +static int kvm_trap_emul_copy_reg_indices(struct kvm_vcpu *vcpu, + u64 __user *indices) +{ + return 0; +} + static int kvm_trap_emul_get_one_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg, s64 *v) @@ -660,6 +654,8 @@ static struct kvm_mips_callbacks kvm_trap_emul_callbacks = { .dequeue_io_int = kvm_mips_dequeue_io_int_cb, .irq_deliver = kvm_mips_irq_deliver_cb, .irq_clear = kvm_mips_irq_clear_cb, + .num_regs = kvm_trap_emul_num_regs, + .copy_reg_indices = kvm_trap_emul_copy_reg_indices, .get_one_reg = kvm_trap_emul_get_one_reg, .set_one_reg = kvm_trap_emul_set_one_reg, .vcpu_get_regs = kvm_trap_emul_vcpu_get_regs, diff --git a/arch/mips/lantiq/irq.c b/arch/mips/lantiq/irq.c index ff17669e30a3..8ac0e5994ed2 100644 --- a/arch/mips/lantiq/irq.c +++ b/arch/mips/lantiq/irq.c @@ -66,7 +66,7 @@ int gic_present; #endif static int exin_avail; -static struct resource ltq_eiu_irq[MAX_EIU]; +static u32 ltq_eiu_irq[MAX_EIU]; static void __iomem *ltq_icu_membase[MAX_IM]; static void __iomem *ltq_eiu_membase; static struct irq_domain *ltq_domain; @@ -75,7 +75,7 @@ static int ltq_perfcount_irq; int ltq_eiu_get_irq(int exin) { if (exin < exin_avail) - return ltq_eiu_irq[exin].start; + return ltq_eiu_irq[exin]; return -1; } @@ -125,8 +125,8 @@ static int ltq_eiu_settype(struct irq_data *d, unsigned int type) { int i; - for (i = 0; i < MAX_EIU; i++) { - if (d->hwirq == ltq_eiu_irq[i].start) { + for (i = 0; i < exin_avail; i++) { + if (d->hwirq == ltq_eiu_irq[i]) { int val = 0; int edge = 0; @@ -173,8 +173,8 @@ static unsigned int ltq_startup_eiu_irq(struct irq_data *d) int i; ltq_enable_irq(d); - for (i = 0; i < MAX_EIU; i++) { - if (d->hwirq == ltq_eiu_irq[i].start) { + for (i = 0; i < exin_avail; i++) { + if (d->hwirq == ltq_eiu_irq[i]) { /* by default we are low level triggered */ ltq_eiu_settype(d, IRQF_TRIGGER_LOW); /* clear all pending */ @@ -195,8 +195,8 @@ static void ltq_shutdown_eiu_irq(struct irq_data *d) int i; ltq_disable_irq(d); - for (i = 0; i < MAX_EIU; i++) { - if (d->hwirq == ltq_eiu_irq[i].start) { + for (i = 0; i < exin_avail; i++) { + if (d->hwirq == ltq_eiu_irq[i]) { /* disable */ ltq_eiu_w32(ltq_eiu_r32(LTQ_EIU_EXIN_INEN) & ~BIT(i), LTQ_EIU_EXIN_INEN); @@ -206,7 +206,7 @@ static void ltq_shutdown_eiu_irq(struct irq_data *d) } static struct irq_chip ltq_irq_type = { - "icu", + .name = "icu", .irq_enable = ltq_enable_irq, .irq_disable = ltq_disable_irq, .irq_unmask = ltq_enable_irq, @@ -216,7 +216,7 @@ static struct irq_chip ltq_irq_type = { }; static struct irq_chip ltq_eiu_type = { - "eiu", + .name = "eiu", .irq_startup = ltq_startup_eiu_irq, .irq_shutdown = ltq_shutdown_eiu_irq, .irq_enable = ltq_enable_irq, @@ -341,10 +341,10 @@ static int icu_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw) return 0; for (i = 0; i < exin_avail; i++) - if (hw == ltq_eiu_irq[i].start) + if (hw == ltq_eiu_irq[i]) chip = <q_eiu_type; - irq_set_chip_and_handler(hw, chip, handle_level_irq); + irq_set_chip_and_handler(irq, chip, handle_level_irq); return 0; } @@ -439,14 +439,15 @@ int __init icu_of_init(struct device_node *node, struct device_node *parent) eiu_node = of_find_compatible_node(NULL, NULL, "lantiq,eiu-xway"); if (eiu_node && !of_address_to_resource(eiu_node, 0, &res)) { /* find out how many external irq sources we have */ - exin_avail = of_irq_count(eiu_node); + exin_avail = of_property_count_u32_elems(eiu_node, + "lantiq,eiu-irqs"); if (exin_avail > MAX_EIU) exin_avail = MAX_EIU; - ret = of_irq_to_resource_table(eiu_node, + ret = of_property_read_u32_array(eiu_node, "lantiq,eiu-irqs", ltq_eiu_irq, exin_avail); - if (ret != exin_avail) + if (ret) panic("failed to load external irq resources"); if (!request_mem_region(res.start, resource_size(&res), diff --git a/arch/mips/lantiq/prom.c b/arch/mips/lantiq/prom.c index 5f693ac77a0d..4cbb000e778e 100644 --- a/arch/mips/lantiq/prom.c +++ b/arch/mips/lantiq/prom.c @@ -74,8 +74,8 @@ void __init plat_mem_setup(void) set_io_port_base((unsigned long) KSEG1); - if (fw_arg0 == -2) /* UHI interface */ - dtb = (void *)fw_arg1; + if (fw_passed_dtb) /* UHI interface */ + dtb = (void *)fw_passed_dtb; else if (__dtb_start != __dtb_end) dtb = (void *)__dtb_start; else diff --git a/arch/mips/loongson64/common/dma-swiotlb.c b/arch/mips/loongson64/common/dma-swiotlb.c index 4ffa6fc81c8f..1a80b6f73ab2 100644 --- a/arch/mips/loongson64/common/dma-swiotlb.c +++ b/arch/mips/loongson64/common/dma-swiotlb.c @@ -10,7 +10,7 @@ #include <dma-coherence.h> static void *loongson_dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp, struct dma_attrs *attrs) + dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) { void *ret; @@ -41,7 +41,7 @@ static void *loongson_dma_alloc_coherent(struct device *dev, size_t size, } static void loongson_dma_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_handle, struct dma_attrs *attrs) + void *vaddr, dma_addr_t dma_handle, unsigned long attrs) { swiotlb_free_coherent(dev, size, vaddr, dma_handle); } @@ -49,7 +49,7 @@ static void loongson_dma_free_coherent(struct device *dev, size_t size, static dma_addr_t loongson_dma_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { dma_addr_t daddr = swiotlb_map_page(dev, page, offset, size, dir, attrs); @@ -59,9 +59,9 @@ static dma_addr_t loongson_dma_map_page(struct device *dev, struct page *page, static int loongson_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { - int r = swiotlb_map_sg_attrs(dev, sg, nents, dir, NULL); + int r = swiotlb_map_sg_attrs(dev, sg, nents, dir, 0); mb(); return r; diff --git a/arch/mips/loongson64/loongson-3/hpet.c b/arch/mips/loongson64/loongson-3/hpet.c index 249039af66c4..4788bea62a6a 100644 --- a/arch/mips/loongson64/loongson-3/hpet.c +++ b/arch/mips/loongson64/loongson-3/hpet.c @@ -13,8 +13,8 @@ #define SMBUS_PCI_REG64 0x64 #define SMBUS_PCI_REGB4 0xb4 -#define HPET_MIN_CYCLES 64 -#define HPET_MIN_PROG_DELTA (HPET_MIN_CYCLES + (HPET_MIN_CYCLES >> 1)) +#define HPET_MIN_CYCLES 16 +#define HPET_MIN_PROG_DELTA (HPET_MIN_CYCLES * 12) static DEFINE_SPINLOCK(hpet_lock); DEFINE_PER_CPU(struct clock_event_device, hpet_clockevent_device); @@ -157,14 +157,14 @@ static int hpet_tick_resume(struct clock_event_device *evt) static int hpet_next_event(unsigned long delta, struct clock_event_device *evt) { - unsigned int cnt; - int res; + u32 cnt; + s32 res; cnt = hpet_read(HPET_COUNTER); - cnt += delta; + cnt += (u32) delta; hpet_write(HPET_T0_CMP, cnt); - res = (int)(cnt - hpet_read(HPET_COUNTER)); + res = (s32)(cnt - hpet_read(HPET_COUNTER)); return res < HPET_MIN_CYCLES ? -ETIME : 0; } @@ -230,7 +230,7 @@ void __init setup_hpet_timer(void) cd = &per_cpu(hpet_clockevent_device, cpu); cd->name = "hpet"; - cd->rating = 320; + cd->rating = 100; cd->features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT; cd->set_state_shutdown = hpet_set_state_shutdown; cd->set_state_periodic = hpet_set_state_periodic; diff --git a/arch/mips/loongson64/loongson-3/smp.c b/arch/mips/loongson64/loongson-3/smp.c index e59759af63d9..2fec6f753a35 100644 --- a/arch/mips/loongson64/loongson-3/smp.c +++ b/arch/mips/loongson64/loongson-3/smp.c @@ -417,6 +417,7 @@ static int loongson3_cpu_disable(void) return -EBUSY; set_cpu_online(cpu, false); + calculate_cpu_foreign_map(); cpumask_clear_cpu(cpu, &cpu_callin_map); local_irq_save(flags); fixup_irqs(); diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c index d96e912b9d44..36775d20b0e7 100644 --- a/arch/mips/math-emu/cp1emu.c +++ b/arch/mips/math-emu/cp1emu.c @@ -434,8 +434,8 @@ static int microMIPS32_to_MIPS32(union mips_instruction *insn_ptr) * a single subroutine should be used across both * modules. */ -static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, - unsigned long *contpc) +int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, + unsigned long *contpc) { union mips_instruction insn = (union mips_instruction)dec_insn.insn; unsigned int fcr31; @@ -627,8 +627,8 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, dec_insn.pc_inc + dec_insn.next_pc_inc; return 1; - case cbcond0_op: - case cbcond1_op: + case pop10_op: + case pop30_op: if (!cpu_has_mips_r6) break; if (insn.i_format.rt && !insn.i_format.rs) @@ -683,14 +683,14 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, dec_insn.next_pc_inc; return 1; - case beqzcjic_op: + case pop66_op: if (!cpu_has_mips_r6) break; *contpc = regs->cp0_epc + dec_insn.pc_inc + dec_insn.next_pc_inc; return 1; - case bnezcjialc_op: + case pop76_op: if (!cpu_has_mips_r6) break; if (!insn.i_format.rs) @@ -784,10 +784,10 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, */ static inline int cop1_64bit(struct pt_regs *xcp) { - if (config_enabled(CONFIG_64BIT) && !config_enabled(CONFIG_MIPS32_O32)) + if (IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_MIPS32_O32)) return 1; - else if (config_enabled(CONFIG_32BIT) && - !config_enabled(CONFIG_MIPS_O32_FP64_SUPPORT)) + else if (IS_ENABLED(CONFIG_32BIT) && + !IS_ENABLED(CONFIG_MIPS_O32_FP64_SUPPORT)) return 0; return !test_thread_flag(TIF_32BIT_FPREGS); @@ -1268,7 +1268,7 @@ branch_common: * instruction in the dslot. */ sig = mips_dsemul(xcp, ir, - contpc); + bcpc, contpc); if (sig < 0) break; if (sig) @@ -1323,7 +1323,7 @@ branch_common: * Single step the non-cp1 * instruction in the dslot */ - sig = mips_dsemul(xcp, ir, contpc); + sig = mips_dsemul(xcp, ir, bcpc, contpc); if (sig < 0) break; if (sig) diff --git a/arch/mips/math-emu/dsemul.c b/arch/mips/math-emu/dsemul.c index 47074887e64c..72a4642eee2c 100644 --- a/arch/mips/math-emu/dsemul.c +++ b/arch/mips/math-emu/dsemul.c @@ -1,3 +1,6 @@ +#include <linux/err.h> +#include <linux/slab.h> + #include <asm/branch.h> #include <asm/cacheflush.h> #include <asm/fpu_emulator.h> @@ -5,43 +8,211 @@ #include <asm/mipsregs.h> #include <asm/uaccess.h> -#include "ieee754.h" - -/* - * Emulate the arbitrary instruction ir at xcp->cp0_epc. Required when - * we have to emulate the instruction in a COP1 branch delay slot. Do - * not change cp0_epc due to the instruction +/** + * struct emuframe - The 'emulation' frame structure + * @emul: The instruction to 'emulate'. + * @badinst: A break instruction to cause a return to the kernel. * - * According to the spec: - * 1) it shouldn't be a branch :-) - * 2) it can be a COP instruction :-( - * 3) if we are tring to run a protected memory space we must take - * special care on memory access instructions :-( - */ - -/* - * "Trampoline" return routine to catch exception following - * execution of delay-slot instruction execution. + * This structure defines the frames placed within the delay slot emulation + * page in response to a call to mips_dsemul(). Each thread may be allocated + * only one frame at any given time. The kernel stores within it the + * instruction to be 'emulated' followed by a break instruction, then + * executes the frame in user mode. The break causes a trap to the kernel + * which leads to do_dsemulret() being called unless the instruction in + * @emul causes a trap itself, is a branch, or a signal is delivered to + * the thread. In these cases the allocated frame will either be reused by + * a subsequent delay slot 'emulation', or be freed during signal delivery or + * upon thread exit. + * + * This approach is used because: + * + * - Actually emulating all instructions isn't feasible. We would need to + * be able to handle instructions from all revisions of the MIPS ISA, + * all ASEs & all vendor instruction set extensions. This would be a + * whole lot of work & continual maintenance burden as new instructions + * are introduced, and in the case of some vendor extensions may not + * even be possible. Thus we need to take the approach of actually + * executing the instruction. + * + * - We must execute the instruction within user context. If we were to + * execute the instruction in kernel mode then it would have access to + * kernel resources without very careful checks, leaving us with a + * high potential for security or stability issues to arise. + * + * - We used to place the frame on the users stack, but this requires + * that the stack be executable. This is bad for security so the + * per-process page is now used instead. + * + * - The instruction in @emul may be something entirely invalid for a + * delay slot. The user may (intentionally or otherwise) place a branch + * in a delay slot, or a kernel mode instruction, or something else + * which generates an exception. Thus we can't rely upon the break in + * @badinst always being hit. For this reason we track the index of the + * frame allocated to each thread, allowing us to clean it up at later + * points such as signal delivery or thread exit. + * + * - The user may generate a fake struct emuframe if they wish, invoking + * the BRK_MEMU break instruction themselves. We must therefore not + * trust that BRK_MEMU means there's actually a valid frame allocated + * to the thread, and must not allow the user to do anything they + * couldn't already. */ - struct emuframe { mips_instruction emul; mips_instruction badinst; - mips_instruction cookie; - unsigned long epc; }; -/* - * Set up an emulation frame for instruction IR, from a delay slot of - * a branch jumping to CPC. Return 0 if successful, -1 if no emulation - * required, otherwise a signal number causing a frame setup failure. - */ -int mips_dsemul(struct pt_regs *regs, mips_instruction ir, unsigned long cpc) +static const int emupage_frame_count = PAGE_SIZE / sizeof(struct emuframe); + +static inline __user struct emuframe *dsemul_page(void) +{ + return (__user struct emuframe *)STACK_TOP; +} + +static int alloc_emuframe(void) +{ + mm_context_t *mm_ctx = ¤t->mm->context; + int idx; + +retry: + spin_lock(&mm_ctx->bd_emupage_lock); + + /* Ensure we have an allocation bitmap */ + if (!mm_ctx->bd_emupage_allocmap) { + mm_ctx->bd_emupage_allocmap = + kcalloc(BITS_TO_LONGS(emupage_frame_count), + sizeof(unsigned long), + GFP_ATOMIC); + + if (!mm_ctx->bd_emupage_allocmap) { + idx = BD_EMUFRAME_NONE; + goto out_unlock; + } + } + + /* Attempt to allocate a single bit/frame */ + idx = bitmap_find_free_region(mm_ctx->bd_emupage_allocmap, + emupage_frame_count, 0); + if (idx < 0) { + /* + * Failed to allocate a frame. We'll wait until one becomes + * available. We unlock the page so that other threads actually + * get the opportunity to free their frames, which means + * technically the result of bitmap_full may be incorrect. + * However the worst case is that we repeat all this and end up + * back here again. + */ + spin_unlock(&mm_ctx->bd_emupage_lock); + if (!wait_event_killable(mm_ctx->bd_emupage_queue, + !bitmap_full(mm_ctx->bd_emupage_allocmap, + emupage_frame_count))) + goto retry; + + /* Received a fatal signal - just give in */ + return BD_EMUFRAME_NONE; + } + + /* Success! */ + pr_debug("allocate emuframe %d to %d\n", idx, current->pid); +out_unlock: + spin_unlock(&mm_ctx->bd_emupage_lock); + return idx; +} + +static void free_emuframe(int idx, struct mm_struct *mm) +{ + mm_context_t *mm_ctx = &mm->context; + + spin_lock(&mm_ctx->bd_emupage_lock); + + pr_debug("free emuframe %d from %d\n", idx, current->pid); + bitmap_clear(mm_ctx->bd_emupage_allocmap, idx, 1); + + /* If some thread is waiting for a frame, now's its chance */ + wake_up(&mm_ctx->bd_emupage_queue); + + spin_unlock(&mm_ctx->bd_emupage_lock); +} + +static bool within_emuframe(struct pt_regs *regs) +{ + unsigned long base = (unsigned long)dsemul_page(); + + if (regs->cp0_epc < base) + return false; + if (regs->cp0_epc >= (base + PAGE_SIZE)) + return false; + + return true; +} + +bool dsemul_thread_cleanup(struct task_struct *tsk) +{ + int fr_idx; + + /* Clear any allocated frame, retrieving its index */ + fr_idx = atomic_xchg(&tsk->thread.bd_emu_frame, BD_EMUFRAME_NONE); + + /* If no frame was allocated, we're done */ + if (fr_idx == BD_EMUFRAME_NONE) + return false; + + task_lock(tsk); + + /* Free the frame that this thread had allocated */ + if (tsk->mm) + free_emuframe(fr_idx, tsk->mm); + + task_unlock(tsk); + return true; +} + +bool dsemul_thread_rollback(struct pt_regs *regs) +{ + struct emuframe __user *fr; + int fr_idx; + + /* Do nothing if we're not executing from a frame */ + if (!within_emuframe(regs)) + return false; + + /* Find the frame being executed */ + fr_idx = atomic_read(¤t->thread.bd_emu_frame); + if (fr_idx == BD_EMUFRAME_NONE) + return false; + fr = &dsemul_page()[fr_idx]; + + /* + * If the PC is at the emul instruction, roll back to the branch. If + * PC is at the badinst (break) instruction, we've already emulated the + * instruction so progress to the continue PC. If it's anything else + * then something is amiss & the user has branched into some other area + * of the emupage - we'll free the allocated frame anyway. + */ + if (msk_isa16_mode(regs->cp0_epc) == (unsigned long)&fr->emul) + regs->cp0_epc = current->thread.bd_emu_branch_pc; + else if (msk_isa16_mode(regs->cp0_epc) == (unsigned long)&fr->badinst) + regs->cp0_epc = current->thread.bd_emu_cont_pc; + + atomic_set(¤t->thread.bd_emu_frame, BD_EMUFRAME_NONE); + free_emuframe(fr_idx, current->mm); + return true; +} + +void dsemul_mm_cleanup(struct mm_struct *mm) +{ + mm_context_t *mm_ctx = &mm->context; + + kfree(mm_ctx->bd_emupage_allocmap); +} + +int mips_dsemul(struct pt_regs *regs, mips_instruction ir, + unsigned long branch_pc, unsigned long cont_pc) { int isa16 = get_isa16_mode(regs->cp0_epc); mips_instruction break_math; struct emuframe __user *fr; - int err; + int err, fr_idx; /* NOP is easy */ if (ir == 0) @@ -68,30 +239,20 @@ int mips_dsemul(struct pt_regs *regs, mips_instruction ir, unsigned long cpc) } } - pr_debug("dsemul %lx %lx\n", regs->cp0_epc, cpc); + pr_debug("dsemul 0x%08lx cont at 0x%08lx\n", regs->cp0_epc, cont_pc); - /* - * The strategy is to push the instruction onto the user stack - * and put a trap after it which we can catch and jump to - * the required address any alternative apart from full - * instruction emulation!!. - * - * Algorithmics used a system call instruction, and - * borrowed that vector. MIPS/Linux version is a bit - * more heavyweight in the interests of portability and - * multiprocessor support. For Linux we use a BREAK 514 - * instruction causing a breakpoint exception. - */ - break_math = BREAK_MATH(isa16); - - /* Ensure that the two instructions are in the same cache line */ - fr = (struct emuframe __user *) - ((regs->regs[29] - sizeof(struct emuframe)) & ~0x7); - - /* Verify that the stack pointer is not completely insane */ - if (unlikely(!access_ok(VERIFY_WRITE, fr, sizeof(struct emuframe)))) + /* Allocate a frame if we don't already have one */ + fr_idx = atomic_read(¤t->thread.bd_emu_frame); + if (fr_idx == BD_EMUFRAME_NONE) + fr_idx = alloc_emuframe(); + if (fr_idx == BD_EMUFRAME_NONE) return SIGBUS; + fr = &dsemul_page()[fr_idx]; + + /* Retrieve the appropriately encoded break instruction */ + break_math = BREAK_MATH(isa16); + /* Write the instructions to the frame */ if (isa16) { err = __put_user(ir >> 16, (u16 __user *)(&fr->emul)); @@ -106,84 +267,36 @@ int mips_dsemul(struct pt_regs *regs, mips_instruction ir, unsigned long cpc) err |= __put_user(break_math, &fr->badinst); } - err |= __put_user((mips_instruction)BD_COOKIE, &fr->cookie); - err |= __put_user(cpc, &fr->epc); - if (unlikely(err)) { MIPS_FPU_EMU_INC_STATS(errors); + free_emuframe(fr_idx, current->mm); return SIGBUS; } + /* Record the PC of the branch, PC to continue from & frame index */ + current->thread.bd_emu_branch_pc = branch_pc; + current->thread.bd_emu_cont_pc = cont_pc; + atomic_set(¤t->thread.bd_emu_frame, fr_idx); + + /* Change user register context to execute the frame */ regs->cp0_epc = (unsigned long)&fr->emul | isa16; + /* Ensure the icache observes our newly written frame */ flush_cache_sigtramp((unsigned long)&fr->emul); return 0; } -int do_dsemulret(struct pt_regs *xcp) +bool do_dsemulret(struct pt_regs *xcp) { - int isa16 = get_isa16_mode(xcp->cp0_epc); - struct emuframe __user *fr; - unsigned long epc; - u32 insn, cookie; - int err = 0; - u16 instr[2]; - - fr = (struct emuframe __user *) - (msk_isa16_mode(xcp->cp0_epc) - sizeof(mips_instruction)); - - /* - * If we can't even access the area, something is very wrong, but we'll - * leave that to the default handling - */ - if (!access_ok(VERIFY_READ, fr, sizeof(struct emuframe))) - return 0; - - /* - * Do some sanity checking on the stackframe: - * - * - Is the instruction pointed to by the EPC an BREAK_MATH? - * - Is the following memory word the BD_COOKIE? - */ - if (isa16) { - err = __get_user(instr[0], - (u16 __user *)(&fr->badinst)); - err |= __get_user(instr[1], - (u16 __user *)((long)(&fr->badinst) + 2)); - insn = (instr[0] << 16) | instr[1]; - } else { - err = __get_user(insn, &fr->badinst); - } - err |= __get_user(cookie, &fr->cookie); - - if (unlikely(err || - insn != BREAK_MATH(isa16) || cookie != BD_COOKIE)) { + /* Cleanup the allocated frame, returning if there wasn't one */ + if (!dsemul_thread_cleanup(current)) { MIPS_FPU_EMU_INC_STATS(errors); - return 0; - } - - /* - * At this point, we are satisfied that it's a BD emulation trap. Yes, - * a user might have deliberately put two malformed and useless - * instructions in a row in his program, in which case he's in for a - * nasty surprise - the next instruction will be treated as a - * continuation address! Alas, this seems to be the only way that we - * can handle signals, recursion, and longjmps() in the context of - * emulating the branch delay instruction. - */ - - pr_debug("dsemulret\n"); - - if (__get_user(epc, &fr->epc)) { /* Saved EPC */ - /* This is not a good situation to be in */ - force_sig(SIGBUS, current); - - return 0; + return false; } /* Set EPC to return to post-branch instruction */ - xcp->cp0_epc = epc; - MIPS_FPU_EMU_INC_STATS(ds_emul); - return 1; + xcp->cp0_epc = current->thread.bd_emu_cont_pc; + pr_debug("dsemulret to 0x%08lx\n", xcp->cp0_epc); + return true; } diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index ef7f925dd1b0..cd72805b64a7 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -40,6 +40,51 @@ #include <asm/mips-cm.h> /* + * Bits describing what cache ops an SMP callback function may perform. + * + * R4K_HIT - Virtual user or kernel address based cache operations. The + * active_mm must be checked before using user addresses, falling + * back to kmap. + * R4K_INDEX - Index based cache operations. + */ + +#define R4K_HIT BIT(0) +#define R4K_INDEX BIT(1) + +/** + * r4k_op_needs_ipi() - Decide if a cache op needs to be done on every core. + * @type: Type of cache operations (R4K_HIT or R4K_INDEX). + * + * Decides whether a cache op needs to be performed on every core in the system. + * This may change depending on the @type of cache operation, as well as the set + * of online CPUs, so preemption should be disabled by the caller to prevent CPU + * hotplug from changing the result. + * + * Returns: 1 if the cache operation @type should be done on every core in + * the system. + * 0 if the cache operation @type is globalized and only needs to + * be performed on a simple CPU. + */ +static inline bool r4k_op_needs_ipi(unsigned int type) +{ + /* The MIPS Coherence Manager (CM) globalizes address-based cache ops */ + if (type == R4K_HIT && mips_cm_present()) + return false; + + /* + * Hardware doesn't globalize the required cache ops, so SMP calls may + * be needed, but only if there are foreign CPUs (non-siblings with + * separate caches). + */ + /* cpu_foreign_map[] undeclared when !CONFIG_SMP */ +#ifdef CONFIG_SMP + return !cpumask_empty(&cpu_foreign_map[0]); +#else + return false; +#endif +} + +/* * Special Variant of smp_call_function for use by cache functions: * * o No return value @@ -48,30 +93,17 @@ * primary cache. * o doesn't disable interrupts on the local CPU */ -static inline void r4k_on_each_cpu(void (*func) (void *info), void *info) +static inline void r4k_on_each_cpu(unsigned int type, + void (*func)(void *info), void *info) { preempt_disable(); - - /* - * The Coherent Manager propagates address-based cache ops to other - * cores but not index-based ops. However, r4k_on_each_cpu is used - * in both cases so there is no easy way to tell what kind of op is - * executed to the other cores. The best we can probably do is - * to restrict that call when a CM is not present because both - * CM-based SMP protocols (CMP & CPS) restrict index-based cache ops. - */ - if (!mips_cm_present()) - smp_call_function_many(&cpu_foreign_map, func, info, 1); + if (r4k_op_needs_ipi(type)) + smp_call_function_many(&cpu_foreign_map[smp_processor_id()], + func, info, 1); func(info); preempt_enable(); } -#if defined(CONFIG_MIPS_CMP) || defined(CONFIG_MIPS_CPS) -#define cpu_has_safe_index_cacheops 0 -#else -#define cpu_has_safe_index_cacheops 1 -#endif - /* * Must die. */ @@ -462,22 +494,44 @@ static inline void local_r4k___flush_cache_all(void * args) static void r4k___flush_cache_all(void) { - r4k_on_each_cpu(local_r4k___flush_cache_all, NULL); + r4k_on_each_cpu(R4K_INDEX, local_r4k___flush_cache_all, NULL); } -static inline int has_valid_asid(const struct mm_struct *mm) +/** + * has_valid_asid() - Determine if an mm already has an ASID. + * @mm: Memory map. + * @type: R4K_HIT or R4K_INDEX, type of cache op. + * + * Determines whether @mm already has an ASID on any of the CPUs which cache ops + * of type @type within an r4k_on_each_cpu() call will affect. If + * r4k_on_each_cpu() does an SMP call to a single VPE in each core, then the + * scope of the operation is confined to sibling CPUs, otherwise all online CPUs + * will need to be checked. + * + * Must be called in non-preemptive context. + * + * Returns: 1 if the CPUs affected by @type cache ops have an ASID for @mm. + * 0 otherwise. + */ +static inline int has_valid_asid(const struct mm_struct *mm, unsigned int type) { -#ifdef CONFIG_MIPS_MT_SMP - int i; + unsigned int i; + const cpumask_t *mask = cpu_present_mask; - for_each_online_cpu(i) + /* cpu_sibling_map[] undeclared when !CONFIG_SMP */ +#ifdef CONFIG_SMP + /* + * If r4k_on_each_cpu does SMP calls, it does them to a single VPE in + * each foreign core, so we only need to worry about siblings. + * Otherwise we need to worry about all present CPUs. + */ + if (r4k_op_needs_ipi(type)) + mask = &cpu_sibling_map[smp_processor_id()]; +#endif + for_each_cpu(i, mask) if (cpu_context(i, mm)) return 1; - return 0; -#else - return cpu_context(smp_processor_id(), mm); -#endif } static void r4k__flush_cache_vmap(void) @@ -490,12 +544,16 @@ static void r4k__flush_cache_vunmap(void) r4k_blast_dcache(); } +/* + * Note: flush_tlb_range() assumes flush_cache_range() sufficiently flushes + * whole caches when vma is executable. + */ static inline void local_r4k_flush_cache_range(void * args) { struct vm_area_struct *vma = args; int exec = vma->vm_flags & VM_EXEC; - if (!(has_valid_asid(vma->vm_mm))) + if (!has_valid_asid(vma->vm_mm, R4K_INDEX)) return; /* @@ -516,14 +574,14 @@ static void r4k_flush_cache_range(struct vm_area_struct *vma, int exec = vma->vm_flags & VM_EXEC; if (cpu_has_dc_aliases || exec) - r4k_on_each_cpu(local_r4k_flush_cache_range, vma); + r4k_on_each_cpu(R4K_INDEX, local_r4k_flush_cache_range, vma); } static inline void local_r4k_flush_cache_mm(void * args) { struct mm_struct *mm = args; - if (!has_valid_asid(mm)) + if (!has_valid_asid(mm, R4K_INDEX)) return; /* @@ -548,7 +606,7 @@ static void r4k_flush_cache_mm(struct mm_struct *mm) if (!cpu_has_dc_aliases) return; - r4k_on_each_cpu(local_r4k_flush_cache_mm, mm); + r4k_on_each_cpu(R4K_INDEX, local_r4k_flush_cache_mm, mm); } struct flush_cache_page_args { @@ -573,10 +631,10 @@ static inline void local_r4k_flush_cache_page(void *args) void *vaddr; /* - * If ownes no valid ASID yet, cannot possibly have gotten + * If owns no valid ASID yet, cannot possibly have gotten * this page into the cache. */ - if (!has_valid_asid(mm)) + if (!has_valid_asid(mm, R4K_HIT)) return; addr &= PAGE_MASK; @@ -643,7 +701,7 @@ static void r4k_flush_cache_page(struct vm_area_struct *vma, args.addr = addr; args.pfn = pfn; - r4k_on_each_cpu(local_r4k_flush_cache_page, &args); + r4k_on_each_cpu(R4K_HIT, local_r4k_flush_cache_page, &args); } static inline void local_r4k_flush_data_cache_page(void * addr) @@ -656,18 +714,23 @@ static void r4k_flush_data_cache_page(unsigned long addr) if (in_atomic()) local_r4k_flush_data_cache_page((void *)addr); else - r4k_on_each_cpu(local_r4k_flush_data_cache_page, (void *) addr); + r4k_on_each_cpu(R4K_HIT, local_r4k_flush_data_cache_page, + (void *) addr); } struct flush_icache_range_args { unsigned long start; unsigned long end; + unsigned int type; }; -static inline void local_r4k_flush_icache_range(unsigned long start, unsigned long end) +static inline void __local_r4k_flush_icache_range(unsigned long start, + unsigned long end, + unsigned int type) { if (!cpu_has_ic_fills_f_dc) { - if (end - start >= dcache_size) { + if (type == R4K_INDEX || + (type & R4K_INDEX && end - start >= dcache_size)) { r4k_blast_dcache(); } else { R4600_HIT_CACHEOP_WAR_IMPL; @@ -675,7 +738,8 @@ static inline void local_r4k_flush_icache_range(unsigned long start, unsigned lo } } - if (end - start > icache_size) + if (type == R4K_INDEX || + (type & R4K_INDEX && end - start > icache_size)) r4k_blast_icache(); else { switch (boot_cpu_type()) { @@ -701,23 +765,52 @@ static inline void local_r4k_flush_icache_range(unsigned long start, unsigned lo #endif } +static inline void local_r4k_flush_icache_range(unsigned long start, + unsigned long end) +{ + __local_r4k_flush_icache_range(start, end, R4K_HIT | R4K_INDEX); +} + static inline void local_r4k_flush_icache_range_ipi(void *args) { struct flush_icache_range_args *fir_args = args; unsigned long start = fir_args->start; unsigned long end = fir_args->end; + unsigned int type = fir_args->type; - local_r4k_flush_icache_range(start, end); + __local_r4k_flush_icache_range(start, end, type); } static void r4k_flush_icache_range(unsigned long start, unsigned long end) { struct flush_icache_range_args args; + unsigned long size, cache_size; args.start = start; args.end = end; + args.type = R4K_HIT | R4K_INDEX; - r4k_on_each_cpu(local_r4k_flush_icache_range_ipi, &args); + /* + * Indexed cache ops require an SMP call. + * Consider if that can or should be avoided. + */ + preempt_disable(); + if (r4k_op_needs_ipi(R4K_INDEX) && !r4k_op_needs_ipi(R4K_HIT)) { + /* + * If address-based cache ops don't require an SMP call, then + * use them exclusively for small flushes. + */ + size = start - end; + cache_size = icache_size; + if (!cpu_has_ic_fills_f_dc) { + size *= 2; + cache_size += dcache_size; + } + if (size <= cache_size) + args.type &= ~R4K_INDEX; + } + r4k_on_each_cpu(args.type, local_r4k_flush_icache_range_ipi, &args); + preempt_enable(); instruction_hazard(); } @@ -744,7 +837,7 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size) * subset property so we have to flush the primary caches * explicitly */ - if (cpu_has_safe_index_cacheops && size >= dcache_size) { + if (size >= dcache_size) { r4k_blast_dcache(); } else { R4600_HIT_CACHEOP_WAR_IMPL; @@ -781,7 +874,7 @@ static void r4k_dma_cache_inv(unsigned long addr, unsigned long size) return; } - if (cpu_has_safe_index_cacheops && size >= dcache_size) { + if (size >= dcache_size) { r4k_blast_dcache(); } else { R4600_HIT_CACHEOP_WAR_IMPL; @@ -794,25 +887,76 @@ static void r4k_dma_cache_inv(unsigned long addr, unsigned long size) } #endif /* CONFIG_DMA_NONCOHERENT || CONFIG_DMA_MAYBE_COHERENT */ +struct flush_cache_sigtramp_args { + struct mm_struct *mm; + struct page *page; + unsigned long addr; +}; + /* * While we're protected against bad userland addresses we don't care * very much about what happens in that case. Usually a segmentation * fault will dump the process later on anyway ... */ -static void local_r4k_flush_cache_sigtramp(void * arg) +static void local_r4k_flush_cache_sigtramp(void *args) { + struct flush_cache_sigtramp_args *fcs_args = args; + unsigned long addr = fcs_args->addr; + struct page *page = fcs_args->page; + struct mm_struct *mm = fcs_args->mm; + int map_coherent = 0; + void *vaddr; + unsigned long ic_lsize = cpu_icache_line_size(); unsigned long dc_lsize = cpu_dcache_line_size(); unsigned long sc_lsize = cpu_scache_line_size(); - unsigned long addr = (unsigned long) arg; + + /* + * If owns no valid ASID yet, cannot possibly have gotten + * this page into the cache. + */ + if (!has_valid_asid(mm, R4K_HIT)) + return; + + if (mm == current->active_mm) { + vaddr = NULL; + } else { + /* + * Use kmap_coherent or kmap_atomic to do flushes for + * another ASID than the current one. + */ + map_coherent = (cpu_has_dc_aliases && + page_mapcount(page) && + !Page_dcache_dirty(page)); + if (map_coherent) + vaddr = kmap_coherent(page, addr); + else + vaddr = kmap_atomic(page); + addr = (unsigned long)vaddr + (addr & ~PAGE_MASK); + } R4600_HIT_CACHEOP_WAR_IMPL; - if (dc_lsize) - protected_writeback_dcache_line(addr & ~(dc_lsize - 1)); - if (!cpu_icache_snoops_remote_store && scache_size) - protected_writeback_scache_line(addr & ~(sc_lsize - 1)); + if (!cpu_has_ic_fills_f_dc) { + if (dc_lsize) + vaddr ? flush_dcache_line(addr & ~(dc_lsize - 1)) + : protected_writeback_dcache_line( + addr & ~(dc_lsize - 1)); + if (!cpu_icache_snoops_remote_store && scache_size) + vaddr ? flush_scache_line(addr & ~(sc_lsize - 1)) + : protected_writeback_scache_line( + addr & ~(sc_lsize - 1)); + } if (ic_lsize) - protected_flush_icache_line(addr & ~(ic_lsize - 1)); + vaddr ? flush_icache_line(addr & ~(ic_lsize - 1)) + : protected_flush_icache_line(addr & ~(ic_lsize - 1)); + + if (vaddr) { + if (map_coherent) + kunmap_coherent(); + else + kunmap_atomic(vaddr); + } + if (MIPS4K_ICACHE_REFILL_WAR) { __asm__ __volatile__ ( ".set push\n\t" @@ -837,7 +981,23 @@ static void local_r4k_flush_cache_sigtramp(void * arg) static void r4k_flush_cache_sigtramp(unsigned long addr) { - r4k_on_each_cpu(local_r4k_flush_cache_sigtramp, (void *) addr); + struct flush_cache_sigtramp_args args; + int npages; + + down_read(¤t->mm->mmap_sem); + + npages = get_user_pages_fast(addr, 1, 0, &args.page); + if (npages < 1) + goto out; + + args.mm = current->mm; + args.addr = addr; + + r4k_on_each_cpu(R4K_HIT, local_r4k_flush_cache_sigtramp, &args); + + put_page(args.page); +out: + up_read(¤t->mm->mmap_sem); } static void r4k_flush_icache_all(void) @@ -851,6 +1011,15 @@ struct flush_kernel_vmap_range_args { int size; }; +static inline void local_r4k_flush_kernel_vmap_range_index(void *args) +{ + /* + * Aliases only affect the primary caches so don't bother with + * S-caches or T-caches. + */ + r4k_blast_dcache(); +} + static inline void local_r4k_flush_kernel_vmap_range(void *args) { struct flush_kernel_vmap_range_args *vmra = args; @@ -861,12 +1030,8 @@ static inline void local_r4k_flush_kernel_vmap_range(void *args) * Aliases only affect the primary caches so don't bother with * S-caches or T-caches. */ - if (cpu_has_safe_index_cacheops && size >= dcache_size) - r4k_blast_dcache(); - else { - R4600_HIT_CACHEOP_WAR_IMPL; - blast_dcache_range(vaddr, vaddr + size); - } + R4600_HIT_CACHEOP_WAR_IMPL; + blast_dcache_range(vaddr, vaddr + size); } static void r4k_flush_kernel_vmap_range(unsigned long vaddr, int size) @@ -876,7 +1041,12 @@ static void r4k_flush_kernel_vmap_range(unsigned long vaddr, int size) args.vaddr = (unsigned long) vaddr; args.size = size; - r4k_on_each_cpu(local_r4k_flush_kernel_vmap_range, &args); + if (size >= dcache_size) + r4k_on_each_cpu(R4K_INDEX, + local_r4k_flush_kernel_vmap_range_index, NULL); + else + r4k_on_each_cpu(R4K_HIT, local_r4k_flush_kernel_vmap_range, + &args); } static inline void rm7k_erratum31(void) @@ -1206,7 +1376,7 @@ static void probe_pcache(void) c->icache.linesz; c->icache.waybit = __ffs(icache_size/c->icache.ways); - if (config & 0x8) /* VI bit */ + if (config & MIPS_CONF_VI) c->icache.flags |= MIPS_CACHE_VTAG; /* diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c index cb557d28cb21..b2eadd6fa9a1 100644 --- a/arch/mips/mm/dma-default.c +++ b/arch/mips/mm/dma-default.c @@ -131,7 +131,7 @@ static void *mips_dma_alloc_noncoherent(struct device *dev, size_t size, } static void *mips_dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t * dma_handle, gfp_t gfp, struct dma_attrs *attrs) + dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) { void *ret; struct page *page = NULL; @@ -141,7 +141,7 @@ static void *mips_dma_alloc_coherent(struct device *dev, size_t size, * XXX: seems like the coherent and non-coherent implementations could * be consolidated. */ - if (dma_get_attr(DMA_ATTR_NON_CONSISTENT, attrs)) + if (attrs & DMA_ATTR_NON_CONSISTENT) return mips_dma_alloc_noncoherent(dev, size, dma_handle, gfp); gfp = massage_gfp_flags(dev, gfp); @@ -176,13 +176,13 @@ static void mips_dma_free_noncoherent(struct device *dev, size_t size, } static void mips_dma_free_coherent(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_handle, struct dma_attrs *attrs) + dma_addr_t dma_handle, unsigned long attrs) { unsigned long addr = (unsigned long) vaddr; unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; struct page *page = NULL; - if (dma_get_attr(DMA_ATTR_NON_CONSISTENT, attrs)) { + if (attrs & DMA_ATTR_NON_CONSISTENT) { mips_dma_free_noncoherent(dev, size, vaddr, dma_handle); return; } @@ -200,7 +200,7 @@ static void mips_dma_free_coherent(struct device *dev, size_t size, void *vaddr, static int mips_dma_mmap(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, - struct dma_attrs *attrs) + unsigned long attrs) { unsigned long user_count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; @@ -214,7 +214,7 @@ static int mips_dma_mmap(struct device *dev, struct vm_area_struct *vma, pfn = page_to_pfn(virt_to_page((void *)addr)); - if (dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs)) + if (attrs & DMA_ATTR_WRITE_COMBINE) vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); else vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); @@ -291,7 +291,7 @@ static inline void __dma_sync(struct page *page, } static void mips_dma_unmap_page(struct device *dev, dma_addr_t dma_addr, - size_t size, enum dma_data_direction direction, struct dma_attrs *attrs) + size_t size, enum dma_data_direction direction, unsigned long attrs) { if (cpu_needs_post_dma_flush(dev)) __dma_sync(dma_addr_to_page(dev, dma_addr), @@ -301,7 +301,7 @@ static void mips_dma_unmap_page(struct device *dev, dma_addr_t dma_addr, } static int mips_dma_map_sg(struct device *dev, struct scatterlist *sglist, - int nents, enum dma_data_direction direction, struct dma_attrs *attrs) + int nents, enum dma_data_direction direction, unsigned long attrs) { int i; struct scatterlist *sg; @@ -322,7 +322,7 @@ static int mips_dma_map_sg(struct device *dev, struct scatterlist *sglist, static dma_addr_t mips_dma_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { if (!plat_device_is_coherent(dev)) __dma_sync(page, offset, size, direction); @@ -332,7 +332,7 @@ static dma_addr_t mips_dma_map_page(struct device *dev, struct page *page, static void mips_dma_unmap_sg(struct device *dev, struct scatterlist *sglist, int nhwentries, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { int i; struct scatterlist *sg; diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 9b58eb5fd0d5..a5509e7dcad2 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -504,7 +504,7 @@ void free_initrd_mem(unsigned long start, unsigned long end) void (*free_init_pages_eva)(void *begin, void *end) = NULL; -void __init_refok free_initmem(void) +void __ref free_initmem(void) { prom_free_prom_memory(); /* diff --git a/arch/mips/mm/sc-debugfs.c b/arch/mips/mm/sc-debugfs.c index 5eefe3281b24..01f1154cdb0c 100644 --- a/arch/mips/mm/sc-debugfs.c +++ b/arch/mips/mm/sc-debugfs.c @@ -73,8 +73,8 @@ static int __init sc_debugfs_init(void) file = debugfs_create_file("prefetch", S_IRUGO | S_IWUSR, dir, NULL, &sc_prefetch_fops); - if (IS_ERR(file)) - return PTR_ERR(file); + if (!file) + return -ENOMEM; return 0; } diff --git a/arch/mips/mm/sc-rm7k.c b/arch/mips/mm/sc-rm7k.c index 9ac1efcfbcc7..78f900c59276 100644 --- a/arch/mips/mm/sc-rm7k.c +++ b/arch/mips/mm/sc-rm7k.c @@ -161,7 +161,7 @@ static void rm7k_tc_disable(void) local_irq_save(flags); blast_rm7k_tcache(); clear_c0_config(RM7K_CONF_TE); - local_irq_save(flags); + local_irq_restore(flags); } static void rm7k_sc_disable(void) diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c index 4004b659ce50..55ce39606cb8 100644 --- a/arch/mips/mm/tlbex.c +++ b/arch/mips/mm/tlbex.c @@ -888,7 +888,7 @@ build_get_pgd_vmalloc64(u32 **p, struct uasm_label **l, struct uasm_reloc **r, } } if (!did_vmalloc_branch) { - if (uasm_in_compat_space_p(swpd) && !uasm_rel_lo(swpd)) { + if (single_insn_swpd) { uasm_il_b(p, r, label_vmalloc_done); uasm_i_lui(p, ptr, uasm_rel_hi(swpd)); } else { @@ -1025,7 +1025,7 @@ static void build_update_entries(u32 **p, unsigned int tmp, unsigned int ptep) pte_off_odd += offsetof(pte_t, pte_high); #endif - if (config_enabled(CONFIG_XPA)) { + if (IS_ENABLED(CONFIG_XPA)) { uasm_i_lw(p, tmp, pte_off_even, ptep); /* even pte */ UASM_i_ROTR(p, tmp, tmp, ilog2(_PAGE_GLOBAL)); UASM_i_MTC0(p, tmp, C0_ENTRYLO0); @@ -1643,7 +1643,7 @@ iPTE_SW(u32 **p, struct uasm_reloc **r, unsigned int pte, unsigned int ptr, unsigned int hwmode = mode & (_PAGE_VALID | _PAGE_DIRTY); unsigned int swmode = mode & ~hwmode; - if (config_enabled(CONFIG_XPA) && !cpu_has_64bits) { + if (IS_ENABLED(CONFIG_XPA) && !cpu_has_64bits) { uasm_i_lui(p, scratch, swmode >> 16); uasm_i_or(p, pte, pte, scratch); BUG_ON(swmode & 0xffff); @@ -2432,7 +2432,7 @@ static void config_htw_params(void) pwsize |= ilog2(PTRS_PER_PMD) << MIPS_PWSIZE_MDW_SHIFT; /* Set pointer size to size of directory pointers */ - if (config_enabled(CONFIG_64BIT)) + if (IS_ENABLED(CONFIG_64BIT)) pwsize |= MIPS_PWSIZE_PS_MASK; /* PTEs may be multiple pointers long (e.g. with XPA) */ pwsize |= ((PTE_T_LOG2 - PGD_T_LOG2) << MIPS_PWSIZE_PTEW_SHIFT) @@ -2448,7 +2448,7 @@ static void config_htw_params(void) * the pwctl fields. */ config = 1 << MIPS_PWCTL_PWEN_SHIFT; - if (config_enabled(CONFIG_64BIT)) + if (IS_ENABLED(CONFIG_64BIT)) config |= MIPS_PWCTL_XU_MASK; write_c0_pwctl(config); pr_info("Hardware Page Table Walker enabled\n"); @@ -2522,7 +2522,7 @@ void build_tlb_refill_handler(void) */ static int run_once = 0; - if (config_enabled(CONFIG_XPA) && !cpu_has_rixi) + if (IS_ENABLED(CONFIG_XPA) && !cpu_has_rixi) panic("Kernels supporting XPA currently require CPUs with RIXI"); output_pgtable_bits_defines(); diff --git a/arch/mips/mm/uasm-micromips.c b/arch/mips/mm/uasm-micromips.c index d78178daea4b..277cf52d80e1 100644 --- a/arch/mips/mm/uasm-micromips.c +++ b/arch/mips/mm/uasm-micromips.c @@ -53,8 +53,13 @@ static struct insn insn_table_MM[] = { { insn_bltzl, 0, 0 }, { insn_bne, M(mm_bne32_op, 0, 0, 0, 0, 0), RT | RS | BIMM }, { insn_cache, M(mm_pool32b_op, 0, 0, mm_cache_func, 0, 0), RT | RS | SIMM }, + { insn_cfc1, M(mm_pool32f_op, 0, 0, 0, mm_cfc1_op, mm_32f_73_op), RT | RS }, + { insn_cfcmsa, M(mm_pool32s_op, 0, msa_cfc_op, 0, 0, mm_32s_elm_op), RD | RE }, + { insn_ctc1, M(mm_pool32f_op, 0, 0, 0, mm_ctc1_op, mm_32f_73_op), RT | RS }, + { insn_ctcmsa, M(mm_pool32s_op, 0, msa_ctc_op, 0, 0, mm_32s_elm_op), RD | RE }, { insn_daddu, 0, 0 }, { insn_daddiu, 0, 0 }, + { insn_di, M(mm_pool32a_op, 0, 0, 0, mm_di_op, mm_pool32axf_op), RS }, { insn_divu, M(mm_pool32a_op, 0, 0, 0, mm_divu_op, mm_pool32axf_op), RT | RS }, { insn_dmfc0, 0, 0 }, { insn_dmtc0, 0, 0 }, @@ -84,6 +89,8 @@ static struct insn insn_table_MM[] = { { insn_mfhi, M(mm_pool32a_op, 0, 0, 0, mm_mfhi32_op, mm_pool32axf_op), RS }, { insn_mflo, M(mm_pool32a_op, 0, 0, 0, mm_mflo32_op, mm_pool32axf_op), RS }, { insn_mtc0, M(mm_pool32a_op, 0, 0, 0, mm_mtc0_op, mm_pool32axf_op), RT | RS | RD }, + { insn_mthi, M(mm_pool32a_op, 0, 0, 0, mm_mthi32_op, mm_pool32axf_op), RS }, + { insn_mtlo, M(mm_pool32a_op, 0, 0, 0, mm_mtlo32_op, mm_pool32axf_op), RS }, { insn_mul, M(mm_pool32a_op, 0, 0, 0, 0, mm_mul_op), RT | RS | RD }, { insn_or, M(mm_pool32a_op, 0, 0, 0, 0, mm_or32_op), RT | RS | RD }, { insn_ori, M(mm_ori32_op, 0, 0, 0, 0, 0), RT | RS | UIMM }, @@ -166,13 +173,15 @@ static void build_insn(u32 **buf, enum opcode opc, ...) op = ip->match; va_start(ap, opc); if (ip->fields & RS) { - if (opc == insn_mfc0 || opc == insn_mtc0) + if (opc == insn_mfc0 || opc == insn_mtc0 || + opc == insn_cfc1 || opc == insn_ctc1) op |= build_rt(va_arg(ap, u32)); else op |= build_rs(va_arg(ap, u32)); } if (ip->fields & RT) { - if (opc == insn_mfc0 || opc == insn_mtc0) + if (opc == insn_mfc0 || opc == insn_mtc0 || + opc == insn_cfc1 || opc == insn_ctc1) op |= build_rs(va_arg(ap, u32)); else op |= build_rt(va_arg(ap, u32)); diff --git a/arch/mips/mm/uasm-mips.c b/arch/mips/mm/uasm-mips.c index 9c2220a45189..763d3f1edb8a 100644 --- a/arch/mips/mm/uasm-mips.c +++ b/arch/mips/mm/uasm-mips.c @@ -65,11 +65,16 @@ static struct insn insn_table[] = { #ifndef CONFIG_CPU_MIPSR6 { insn_cache, M(cache_op, 0, 0, 0, 0, 0), RS | RT | SIMM }, #else - { insn_cache, M6(cache_op, 0, 0, 0, cache6_op), RS | RT | SIMM9 }, + { insn_cache, M6(spec3_op, 0, 0, 0, cache6_op), RS | RT | SIMM9 }, #endif + { insn_cfc1, M(cop1_op, cfc_op, 0, 0, 0, 0), RT | RD }, + { insn_cfcmsa, M(msa_op, 0, msa_cfc_op, 0, 0, msa_elm_op), RD | RE }, + { insn_ctc1, M(cop1_op, ctc_op, 0, 0, 0, 0), RT | RD }, + { insn_ctcmsa, M(msa_op, 0, msa_ctc_op, 0, 0, msa_elm_op), RD | RE }, { insn_daddiu, M(daddiu_op, 0, 0, 0, 0, 0), RS | RT | SIMM }, { insn_daddu, M(spec_op, 0, 0, 0, 0, daddu_op), RS | RT | RD }, { insn_dinsm, M(spec3_op, 0, 0, 0, 0, dinsm_op), RS | RT | RD | RE }, + { insn_di, M(cop0_op, mfmc0_op, 0, 12, 0, 0), RT }, { insn_dins, M(spec3_op, 0, 0, 0, 0, dins_op), RS | RT | RD | RE }, { insn_divu, M(spec_op, 0, 0, 0, 0, divu_op), RS | RT }, { insn_dmfc0, M(cop0_op, dmfc_op, 0, 0, 0, 0), RT | RD | SET}, @@ -114,7 +119,13 @@ static struct insn insn_table[] = { { insn_mflo, M(spec_op, 0, 0, 0, 0, mflo_op), RD }, { insn_mtc0, M(cop0_op, mtc_op, 0, 0, 0, 0), RT | RD | SET}, { insn_mthc0, M(cop0_op, mthc0_op, 0, 0, 0, 0), RT | RD | SET}, + { insn_mthi, M(spec_op, 0, 0, 0, 0, mthi_op), RS }, + { insn_mtlo, M(spec_op, 0, 0, 0, 0, mtlo_op), RS }, +#ifndef CONFIG_CPU_MIPSR6 { insn_mul, M(spec2_op, 0, 0, 0, 0, mul_op), RS | RT | RD}, +#else + { insn_mul, M(spec_op, 0, 0, 0, mult_mul_op, mult_op), RS | RT | RD}, +#endif { insn_ori, M(ori_op, 0, 0, 0, 0, 0), RS | RT | UIMM }, { insn_or, M(spec_op, 0, 0, 0, 0, or_op), RS | RT | RD }, #ifndef CONFIG_CPU_MIPSR6 diff --git a/arch/mips/mm/uasm.c b/arch/mips/mm/uasm.c index ad718debc35a..a82970442b8a 100644 --- a/arch/mips/mm/uasm.c +++ b/arch/mips/mm/uasm.c @@ -49,18 +49,19 @@ enum opcode { insn_invalid, insn_addiu, insn_addu, insn_and, insn_andi, insn_bbit0, insn_bbit1, insn_beq, insn_beql, insn_bgez, insn_bgezl, insn_bltz, insn_bltzl, - insn_bne, insn_cache, insn_daddiu, insn_daddu, insn_dins, insn_dinsm, - insn_divu, insn_dmfc0, insn_dmtc0, insn_drotr, insn_drotr32, insn_dsll, + insn_bne, insn_cache, insn_cfc1, insn_cfcmsa, insn_ctc1, insn_ctcmsa, + insn_daddiu, insn_daddu, insn_di, insn_dins, insn_dinsm, insn_divu, + insn_dmfc0, insn_dmtc0, insn_drotr, insn_drotr32, insn_dsll, insn_dsll32, insn_dsra, insn_dsrl, insn_dsrl32, insn_dsubu, insn_eret, insn_ext, insn_ins, insn_j, insn_jal, insn_jalr, insn_jr, insn_lb, insn_ld, insn_ldx, insn_lh, insn_ll, insn_lld, insn_lui, insn_lw, insn_lwx, insn_mfc0, insn_mfhc0, insn_mfhi, insn_mflo, insn_mtc0, - insn_mthc0, insn_mul, insn_or, insn_ori, insn_pref, insn_rfe, - insn_rotr, insn_sc, insn_scd, insn_sd, insn_sll, insn_sllv, insn_slt, - insn_sltiu, insn_sltu, insn_sra, insn_srl, insn_srlv, insn_subu, - insn_sw, insn_sync, insn_syscall, insn_tlbp, insn_tlbr, insn_tlbwi, - insn_tlbwr, insn_wait, insn_wsbh, insn_xor, insn_xori, insn_yield, - insn_lddir, insn_ldpte, + insn_mthc0, insn_mthi, insn_mtlo, insn_mul, insn_or, insn_ori, + insn_pref, insn_rfe, insn_rotr, insn_sc, insn_scd, insn_sd, insn_sll, + insn_sllv, insn_slt, insn_sltiu, insn_sltu, insn_sra, insn_srl, + insn_srlv, insn_subu, insn_sw, insn_sync, insn_syscall, insn_tlbp, + insn_tlbr, insn_tlbwi, insn_tlbwr, insn_wait, insn_wsbh, insn_xor, + insn_xori, insn_yield, insn_lddir, insn_ldpte, }; struct insn { @@ -268,10 +269,15 @@ I_u1s2(_bltz) I_u1s2(_bltzl) I_u1u2s3(_bne) I_u2s3u1(_cache) +I_u1u2(_cfc1) +I_u2u1(_cfcmsa) +I_u1u2(_ctc1) +I_u2u1(_ctcmsa) I_u1u2u3(_dmfc0) I_u1u2u3(_dmtc0) I_u2u1s3(_daddiu) I_u3u1u2(_daddu) +I_u1(_di); I_u1u2(_divu) I_u2u1u3(_dsll) I_u2u1u3(_dsll32) @@ -301,6 +307,8 @@ I_u1(_mfhi) I_u1(_mflo) I_u1u2u3(_mtc0) I_u1u2u3(_mthc0) +I_u1(_mthi) +I_u1(_mtlo) I_u3u1u2(_mul) I_u2u1u3(_ori) I_u3u1u2(_or) @@ -370,11 +378,7 @@ UASM_EXPORT_SYMBOL(ISAFUNC(uasm_build_label)); int ISAFUNC(uasm_in_compat_space_p)(long addr) { /* Is this address in 32bit compat space? */ -#ifdef CONFIG_64BIT - return (((addr) & 0xffffffff00000000L) == 0xffffffff00000000L); -#else - return 1; -#endif + return addr == (int)addr; } UASM_EXPORT_SYMBOL(ISAFUNC(uasm_in_compat_space_p)); diff --git a/arch/mips/mti-malta/malta-dtshim.c b/arch/mips/mti-malta/malta-dtshim.c index f7133efc5843..151f4882ec8a 100644 --- a/arch/mips/mti-malta/malta-dtshim.c +++ b/arch/mips/mti-malta/malta-dtshim.c @@ -31,7 +31,7 @@ static unsigned __init gen_fdt_mem_array(__be32 *mem_array, unsigned long size) entries = 1; mem_array[0] = cpu_to_be32(PHYS_OFFSET); - if (config_enabled(CONFIG_EVA)) { + if (IS_ENABLED(CONFIG_EVA)) { /* * The current Malta EVA configuration is "special" in that it * always makes use of addresses in the upper half of the 32 bit @@ -82,7 +82,7 @@ static void __init append_memory(void *fdt, int root_off) physical_memsize = 32 << 20; } - if (config_enabled(CONFIG_CPU_BIG_ENDIAN)) { + if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) { /* * SOC-it swaps, or perhaps doesn't swap, when DMA'ing * the last word of physical memory. diff --git a/arch/mips/mti-malta/malta-memory.c b/arch/mips/mti-malta/malta-memory.c index d5f8dae6a797..a47556723b85 100644 --- a/arch/mips/mti-malta/malta-memory.c +++ b/arch/mips/mti-malta/malta-memory.c @@ -32,7 +32,7 @@ static void free_init_pages_eva_malta(void *begin, void *end) void __init fw_meminit(void) { - bool eva = config_enabled(CONFIG_EVA); + bool eva = IS_ENABLED(CONFIG_EVA); free_init_pages_eva = eva ? free_init_pages_eva_malta : NULL; } diff --git a/arch/mips/mti-malta/malta-setup.c b/arch/mips/mti-malta/malta-setup.c index 33d5ff5069e5..ec5b21678fad 100644 --- a/arch/mips/mti-malta/malta-setup.c +++ b/arch/mips/mti-malta/malta-setup.c @@ -261,7 +261,7 @@ void __init plat_mem_setup(void) fdt = malta_dt_shim(fdt); __dt_setup_arch(fdt); - if (config_enabled(CONFIG_EVA)) + if (IS_ENABLED(CONFIG_EVA)) /* EVA has already been configured in mach-malta/kernel-init.h */ pr_info("Enhanced Virtual Addressing (EVA) activated\n"); diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c index 1a8c96035716..39e7b472f0d8 100644 --- a/arch/mips/net/bpf_jit.c +++ b/arch/mips/net/bpf_jit.c @@ -426,7 +426,7 @@ static inline void emit_load_ptr(unsigned int dst, unsigned int src, static inline void emit_load_func(unsigned int reg, ptr imm, struct jit_ctx *ctx) { - if (config_enabled(CONFIG_64BIT)) { + if (IS_ENABLED(CONFIG_64BIT)) { /* At this point imm is always 64-bit */ emit_load_imm(r_tmp, (u64)imm >> 32, ctx); emit_dsll(r_tmp_imm, r_tmp, 16, ctx); /* left shift by 16 */ @@ -516,7 +516,7 @@ static inline void emit_jr(unsigned int reg, struct jit_ctx *ctx) static inline u16 align_sp(unsigned int num) { /* Double word alignment for 32-bit, quadword for 64-bit */ - unsigned int align = config_enabled(CONFIG_64BIT) ? 16 : 8; + unsigned int align = IS_ENABLED(CONFIG_64BIT) ? 16 : 8; num = (num + (align - 1)) & -align; return num; } @@ -1199,7 +1199,7 @@ void bpf_jit_compile(struct bpf_prog *fp) memset(&ctx, 0, sizeof(ctx)); - ctx.offsets = kcalloc(fp->len, sizeof(*ctx.offsets), GFP_KERNEL); + ctx.offsets = kcalloc(fp->len + 1, sizeof(*ctx.offsets), GFP_KERNEL); if (ctx.offsets == NULL) return; diff --git a/arch/mips/netlogic/common/nlm-dma.c b/arch/mips/netlogic/common/nlm-dma.c index 3758715d4ab6..0630693bec2a 100644 --- a/arch/mips/netlogic/common/nlm-dma.c +++ b/arch/mips/netlogic/common/nlm-dma.c @@ -45,7 +45,7 @@ static char *nlm_swiotlb; static void *nlm_dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp, struct dma_attrs *attrs) + dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) { /* ignore region specifiers */ gfp &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM); @@ -62,7 +62,7 @@ static void *nlm_dma_alloc_coherent(struct device *dev, size_t size, } static void nlm_dma_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_handle, struct dma_attrs *attrs) + void *vaddr, dma_addr_t dma_handle, unsigned long attrs) { swiotlb_free_coherent(dev, size, vaddr, dma_handle); } diff --git a/arch/mips/pci/pci.c b/arch/mips/pci/pci.c index f1b11f0dea2d..b4c02f29663e 100644 --- a/arch/mips/pci/pci.c +++ b/arch/mips/pci/pci.c @@ -112,7 +112,14 @@ static void pcibios_scanbus(struct pci_controller *hose) need_domain_info = 1; } - if (!pci_has_flag(PCI_PROBE_ONLY)) { + /* + * We insert PCI resources into the iomem_resource and + * ioport_resource trees in either pci_bus_claim_resources() + * or pci_bus_assign_resources(). + */ + if (pci_has_flag(PCI_PROBE_ONLY)) { + pci_bus_claim_resources(bus); + } else { pci_bus_size_bridges(bus); pci_bus_assign_resources(bus); } @@ -319,6 +326,16 @@ void pcibios_fixup_bus(struct pci_bus *bus) EXPORT_SYMBOL(PCIBIOS_MIN_IO); EXPORT_SYMBOL(PCIBIOS_MIN_MEM); +void pci_resource_to_user(const struct pci_dev *dev, int bar, + const struct resource *rsrc, resource_size_t *start, + resource_size_t *end) +{ + phys_addr_t size = resource_size(rsrc); + + *start = fixup_bigphys_addr(rsrc->start, size); + *end = rsrc->start + size; +} + int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, enum pci_mmap_state mmap_state, int write_combine) { diff --git a/arch/mips/pic32/pic32mzda/init.c b/arch/mips/pic32/pic32mzda/init.c index 77ecf32ef3dc..51599710472b 100644 --- a/arch/mips/pic32/pic32mzda/init.c +++ b/arch/mips/pic32/pic32mzda/init.c @@ -33,8 +33,8 @@ static ulong get_fdtaddr(void) { ulong ftaddr = 0; - if ((fw_arg0 == -2) && fw_arg1 && !fw_arg2 && !fw_arg3) - return (ulong)fw_arg1; + if (fw_passed_dtb && !fw_arg2 && !fw_arg3) + return (ulong)fw_passed_dtb; if (__dtb_start < __dtb_end) ftaddr = (ulong)__dtb_start; diff --git a/arch/mips/pistachio/init.c b/arch/mips/pistachio/init.c index c50a670e60d2..1c91cad7988f 100644 --- a/arch/mips/pistachio/init.c +++ b/arch/mips/pistachio/init.c @@ -59,29 +59,6 @@ const char *get_system_type(void) return sys_type; } -static void __init plat_setup_iocoherency(void) -{ - /* - * Kernel has been configured with software coherency - * but we might choose to turn it off and use hardware - * coherency instead. - */ - if (mips_cm_numiocu() != 0) { - /* Nothing special needs to be done to enable coherency */ - pr_info("CMP IOCU detected\n"); - hw_coherentio = 1; - if (coherentio == 0) - pr_info("Hardware DMA cache coherency disabled\n"); - else - pr_info("Hardware DMA cache coherency enabled\n"); - } else { - if (coherentio == 1) - pr_info("Hardware DMA cache coherency unsupported, but enabled from command line!\n"); - else - pr_info("Software DMA cache coherency enabled\n"); - } -} - void __init *plat_get_fdt(void) { if (fw_arg0 != -2) @@ -92,8 +69,6 @@ void __init *plat_get_fdt(void) void __init plat_mem_setup(void) { __dt_setup_arch(plat_get_fdt()); - - plat_setup_iocoherency(); } #define DEFAULT_CPC_BASE_ADDR 0x1bde0000 diff --git a/arch/mips/ralink/mt7620.c b/arch/mips/ralink/mt7620.c index d40edda0ca3b..3c7c9bf57bf3 100644 --- a/arch/mips/ralink/mt7620.c +++ b/arch/mips/ralink/mt7620.c @@ -175,7 +175,7 @@ static struct rt2880_pmx_func spi_cs1_grp_mt7628[] = { }; static struct rt2880_pmx_func spis_grp_mt7628[] = { - FUNC("pwm", 3, 14, 4), + FUNC("pwm_uart2", 3, 14, 4), FUNC("util", 2, 14, 4), FUNC("gpio", 1, 14, 4), FUNC("spis", 0, 14, 4), diff --git a/arch/mips/sgi-ip22/ip22-reset.c b/arch/mips/sgi-ip22/ip22-reset.c index 063c2dd31e72..2f45b0357021 100644 --- a/arch/mips/sgi-ip22/ip22-reset.c +++ b/arch/mips/sgi-ip22/ip22-reset.c @@ -7,7 +7,7 @@ */ #include <linux/linkage.h> #include <linux/init.h> -#include <linux/ds1286.h> +#include <linux/rtc/ds1286.h> #include <linux/module.h> #include <linux/interrupt.h> #include <linux/kernel.h> diff --git a/arch/mips/sni/time.c b/arch/mips/sni/time.c index fb4b3520cdc6..7ee14f41fc25 100644 --- a/arch/mips/sni/time.c +++ b/arch/mips/sni/time.c @@ -8,7 +8,6 @@ #include <asm/sni.h> #include <asm/time.h> -#include <asm-generic/rtc.h> #define SNI_CLOCK_TICK_RATE 3686400 #define SNI_COUNTER2_DIV 64 diff --git a/arch/mips/txx9/generic/pci.c b/arch/mips/txx9/generic/pci.c index a77698ff2b6f..1f6bc9a3036c 100644 --- a/arch/mips/txx9/generic/pci.c +++ b/arch/mips/txx9/generic/pci.c @@ -268,7 +268,7 @@ static int txx9_i8259_irq_setup(int irq) return err; } -static void __init_refok quirk_slc90e66_bridge(struct pci_dev *dev) +static void __ref quirk_slc90e66_bridge(struct pci_dev *dev) { int irq; /* PCI/ISA Bridge interrupt */ u8 reg_64; diff --git a/arch/mn10300/Kconfig b/arch/mn10300/Kconfig index 9627e81a6cbb..38e3494bfb63 100644 --- a/arch/mn10300/Kconfig +++ b/arch/mn10300/Kconfig @@ -236,7 +236,9 @@ source "kernel/Kconfig.hz" config MN10300_RTC bool "Using MN10300 RTC" depends on MN10300_PROC_MN103E010 || MN10300_PROC_MN2WS0050 - select GENERIC_CMOS_UPDATE + select RTC_CLASS + select RTC_DRV_CMOS + select RTC_SYSTOHC default n help This option enables support for the RTC, thus enabling time to be diff --git a/arch/mn10300/include/asm/rtc-regs.h b/arch/mn10300/include/asm/rtc-regs.h index c42deefaec11..c81cacecb6e3 100644 --- a/arch/mn10300/include/asm/rtc-regs.h +++ b/arch/mn10300/include/asm/rtc-regs.h @@ -75,9 +75,9 @@ #define RTC_PORT(x) 0xd8600000 #define RTC_ALWAYS_BCD 1 /* RTC operates in binary mode */ -#define CMOS_READ(addr) __SYSREG(0xd8600000 + (addr), u8) +#define CMOS_READ(addr) __SYSREG(0xd8600000 + (u32)(addr), u8) #define CMOS_WRITE(val, addr) \ - do { __SYSREG(0xd8600000 + (addr), u8) = val; } while (0) + do { __SYSREG(0xd8600000 + (u32)(addr), u8) = val; } while (0) #define RTC_IRQ RTIRQ diff --git a/arch/mn10300/include/asm/rtc.h b/arch/mn10300/include/asm/rtc.h index 6c14bb1d0d9b..07dc87656197 100644 --- a/arch/mn10300/include/asm/rtc.h +++ b/arch/mn10300/include/asm/rtc.h @@ -25,6 +25,4 @@ static inline void calibrate_clock(void) #endif /* !CONFIG_MN10300_RTC */ -#include <asm-generic/rtc.h> - #endif /* _ASM_RTC_H */ diff --git a/arch/mn10300/kernel/rtc.c b/arch/mn10300/kernel/rtc.c index 48d7058b3295..f81f37025072 100644 --- a/arch/mn10300/kernel/rtc.c +++ b/arch/mn10300/kernel/rtc.c @@ -12,107 +12,19 @@ #include <linux/module.h> #include <linux/init.h> #include <linux/mc146818rtc.h> -#include <linux/bcd.h> -#include <linux/timex.h> +#include <linux/ioport.h> +#include <linux/platform_device.h> + #include <asm/rtc-regs.h> #include <asm/rtc.h> DEFINE_SPINLOCK(rtc_lock); EXPORT_SYMBOL(rtc_lock); -/* - * Read the current RTC time - */ -void read_persistent_clock(struct timespec *ts) -{ - struct rtc_time tm; - - get_rtc_time(&tm); - - ts->tv_nsec = 0; - ts->tv_sec = mktime(tm.tm_year, tm.tm_mon, tm.tm_mday, - tm.tm_hour, tm.tm_min, tm.tm_sec); - - /* if rtc is way off in the past, set something reasonable */ - if (ts->tv_sec < 0) - ts->tv_sec = mktime(2009, 1, 1, 12, 0, 0); -} - -/* - * In order to set the CMOS clock precisely, set_rtc_mmss has to be called 500 - * ms after the second nowtime has started, because when nowtime is written - * into the registers of the CMOS clock, it will jump to the next second - * precisely 500 ms later. Check the Motorola MC146818A or Dallas DS12887 data - * sheet for details. - * - * BUG: This routine does not handle hour overflow properly; it just - * sets the minutes. Usually you'll only notice that after reboot! - */ -static int set_rtc_mmss(unsigned long nowtime) -{ - unsigned char save_control, save_freq_select; - int retval = 0; - int real_seconds, real_minutes, cmos_minutes; - - /* gets recalled with irq locally disabled */ - spin_lock(&rtc_lock); - save_control = CMOS_READ(RTC_CONTROL); /* tell the clock it's being - * set */ - CMOS_WRITE(save_control | RTC_SET, RTC_CONTROL); - - save_freq_select = CMOS_READ(RTC_FREQ_SELECT); /* stop and reset - * prescaler */ - CMOS_WRITE(save_freq_select | RTC_DIV_RESET2, RTC_FREQ_SELECT); - - cmos_minutes = CMOS_READ(RTC_MINUTES); - if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) - cmos_minutes = bcd2bin(cmos_minutes); - - /* - * since we're only adjusting minutes and seconds, - * don't interfere with hour overflow. This avoids - * messing with unknown time zones but requires your - * RTC not to be off by more than 15 minutes - */ - real_seconds = nowtime % 60; - real_minutes = nowtime / 60; - if (((abs(real_minutes - cmos_minutes) + 15) / 30) & 1) - /* correct for half hour time zone */ - real_minutes += 30; - real_minutes %= 60; - - if (abs(real_minutes - cmos_minutes) < 30) { - if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { - real_seconds = bin2bcd(real_seconds); - real_minutes = bin2bcd(real_minutes); - } - CMOS_WRITE(real_seconds, RTC_SECONDS); - CMOS_WRITE(real_minutes, RTC_MINUTES); - } else { - printk_once(KERN_NOTICE - "set_rtc_mmss: can't update from %d to %d\n", - cmos_minutes, real_minutes); - retval = -1; - } - - /* The following flags have to be released exactly in this order, - * otherwise the DS12887 (popular MC146818A clone with integrated - * battery and quartz) will not reset the oscillator and will not - * update precisely 500 ms later. You won't find this mentioned in - * the Dallas Semiconductor data sheets, but who believes data - * sheets anyway ... -- Markus Kuhn - */ - CMOS_WRITE(save_control, RTC_CONTROL); - CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); - spin_unlock(&rtc_lock); - - return retval; -} - -int update_persistent_clock(struct timespec now) -{ - return set_rtc_mmss(now.tv_sec); -} +static const __initdata struct resource res[] = { + DEFINE_RES_IO(RTC_PORT(0), RTC_IO_EXTENT), + DEFINE_RES_IRQ(RTC_IRQ), +}; /* * calibrate the TSC clock against the RTC @@ -129,4 +41,6 @@ void __init calibrate_clock(void) RTCRA |= RTCRA_DVR; RTCRA &= ~RTCRA_DVR; RTCRB &= ~RTCRB_SET; + + platform_device_register_simple("rtc_cmos", -1, res, ARRAY_SIZE(res)); } diff --git a/arch/mn10300/mm/dma-alloc.c b/arch/mn10300/mm/dma-alloc.c index 8842394cb49a..4f4b9029f0ea 100644 --- a/arch/mn10300/mm/dma-alloc.c +++ b/arch/mn10300/mm/dma-alloc.c @@ -21,7 +21,7 @@ static unsigned long pci_sram_allocated = 0xbc000000; static void *mn10300_dma_alloc(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp, struct dma_attrs *attrs) + dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) { unsigned long addr; void *ret; @@ -63,7 +63,7 @@ done: } static void mn10300_dma_free(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_handle, struct dma_attrs *attrs) + dma_addr_t dma_handle, unsigned long attrs) { unsigned long addr = (unsigned long) vaddr & ~0x20000000; @@ -75,7 +75,7 @@ static void mn10300_dma_free(struct device *dev, size_t size, void *vaddr, static int mn10300_dma_map_sg(struct device *dev, struct scatterlist *sglist, int nents, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { struct scatterlist *sg; int i; @@ -92,7 +92,7 @@ static int mn10300_dma_map_sg(struct device *dev, struct scatterlist *sglist, static dma_addr_t mn10300_dma_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, - enum dma_data_direction direction, struct dma_attrs *attrs) + enum dma_data_direction direction, unsigned long attrs) { return page_to_bus(page) + offset; } diff --git a/arch/mn10300/proc-mn103e010/proc-init.c b/arch/mn10300/proc-mn103e010/proc-init.c index 27b97980dca4..102d86a6ae56 100644 --- a/arch/mn10300/proc-mn103e010/proc-init.c +++ b/arch/mn10300/proc-mn103e010/proc-init.c @@ -9,7 +9,10 @@ * 2 of the Licence, or (at your option) any later version. */ #include <linux/kernel.h> +#include <linux/irq.h> +#include <asm/cacheflush.h> #include <asm/fpu.h> +#include <asm/irq.h> #include <asm/rtc.h> #include <asm/busctl-regs.h> diff --git a/arch/mn10300/proc-mn2ws0050/proc-init.c b/arch/mn10300/proc-mn2ws0050/proc-init.c index ee6d03dbc8d8..950cc8dbb284 100644 --- a/arch/mn10300/proc-mn2ws0050/proc-init.c +++ b/arch/mn10300/proc-mn2ws0050/proc-init.c @@ -14,6 +14,7 @@ #include <linux/delay.h> #include <linux/interrupt.h> +#include <asm/cacheflush.h> #include <asm/processor.h> #include <asm/uaccess.h> #include <asm/io.h> diff --git a/arch/nios2/mm/dma-mapping.c b/arch/nios2/mm/dma-mapping.c index 90422c367ed3..d800fad87896 100644 --- a/arch/nios2/mm/dma-mapping.c +++ b/arch/nios2/mm/dma-mapping.c @@ -59,7 +59,7 @@ static inline void __dma_sync_for_cpu(void *vaddr, size_t size, } static void *nios2_dma_alloc(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp, struct dma_attrs *attrs) + dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) { void *ret; @@ -84,7 +84,7 @@ static void *nios2_dma_alloc(struct device *dev, size_t size, } static void nios2_dma_free(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_handle, struct dma_attrs *attrs) + dma_addr_t dma_handle, unsigned long attrs) { unsigned long addr = (unsigned long) CAC_ADDR((unsigned long) vaddr); @@ -93,7 +93,7 @@ static void nios2_dma_free(struct device *dev, size_t size, void *vaddr, static int nios2_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { int i; @@ -113,7 +113,7 @@ static int nios2_dma_map_sg(struct device *dev, struct scatterlist *sg, static dma_addr_t nios2_dma_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { void *addr = page_address(page) + offset; @@ -123,14 +123,14 @@ static dma_addr_t nios2_dma_map_page(struct device *dev, struct page *page, static void nios2_dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { __dma_sync_for_cpu(phys_to_virt(dma_address), size, direction); } static void nios2_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { void *addr; int i; diff --git a/arch/nios2/mm/init.c b/arch/nios2/mm/init.c index e75c75d249d6..c92fe4234009 100644 --- a/arch/nios2/mm/init.c +++ b/arch/nios2/mm/init.c @@ -89,7 +89,7 @@ void __init free_initrd_mem(unsigned long start, unsigned long end) } #endif -void __init_refok free_initmem(void) +void __ref free_initmem(void) { free_initmem_default(-1); } diff --git a/arch/openrisc/kernel/dma.c b/arch/openrisc/kernel/dma.c index 0b77ddb1ee07..140c99140649 100644 --- a/arch/openrisc/kernel/dma.c +++ b/arch/openrisc/kernel/dma.c @@ -22,7 +22,6 @@ #include <linux/dma-mapping.h> #include <linux/dma-debug.h> #include <linux/export.h> -#include <linux/dma-attrs.h> #include <asm/cpuinfo.h> #include <asm/spr_defs.h> @@ -83,7 +82,7 @@ page_clear_nocache(pte_t *pte, unsigned long addr, static void * or1k_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, - struct dma_attrs *attrs) + unsigned long attrs) { unsigned long va; void *page; @@ -101,7 +100,7 @@ or1k_dma_alloc(struct device *dev, size_t size, va = (unsigned long)page; - if (!dma_get_attr(DMA_ATTR_NON_CONSISTENT, attrs)) { + if ((attrs & DMA_ATTR_NON_CONSISTENT) == 0) { /* * We need to iterate through the pages, clearing the dcache for * them and setting the cache-inhibit bit. @@ -117,7 +116,7 @@ or1k_dma_alloc(struct device *dev, size_t size, static void or1k_dma_free(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_handle, struct dma_attrs *attrs) + dma_addr_t dma_handle, unsigned long attrs) { unsigned long va = (unsigned long)vaddr; struct mm_walk walk = { @@ -125,7 +124,7 @@ or1k_dma_free(struct device *dev, size_t size, void *vaddr, .mm = &init_mm }; - if (!dma_get_attr(DMA_ATTR_NON_CONSISTENT, attrs)) { + if ((attrs & DMA_ATTR_NON_CONSISTENT) == 0) { /* walk_page_range shouldn't be able to fail here */ WARN_ON(walk_page_range(va, va + size, &walk)); } @@ -137,7 +136,7 @@ static dma_addr_t or1k_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { unsigned long cl; dma_addr_t addr = page_to_phys(page) + offset; @@ -170,7 +169,7 @@ or1k_map_page(struct device *dev, struct page *page, static void or1k_unmap_page(struct device *dev, dma_addr_t dma_handle, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { /* Nothing special to do here... */ } @@ -178,14 +177,14 @@ or1k_unmap_page(struct device *dev, dma_addr_t dma_handle, static int or1k_map_sg(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { struct scatterlist *s; int i; for_each_sg(sg, s, nents, i) { s->dma_address = or1k_map_page(dev, sg_page(s), s->offset, - s->length, dir, NULL); + s->length, dir, 0); } return nents; @@ -194,13 +193,13 @@ or1k_map_sg(struct device *dev, struct scatterlist *sg, static void or1k_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { struct scatterlist *s; int i; for_each_sg(sg, s, nents, i) { - or1k_unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir, NULL); + or1k_unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir, 0); } } diff --git a/arch/openrisc/mm/ioremap.c b/arch/openrisc/mm/ioremap.c index 5b2a95116e8f..fa60b81aee3e 100644 --- a/arch/openrisc/mm/ioremap.c +++ b/arch/openrisc/mm/ioremap.c @@ -38,7 +38,7 @@ static unsigned int fixmaps_used __initdata; * have to convert them into an offset in a page-aligned mapping, but the * caller shouldn't need to know that small detail. */ -void __iomem *__init_refok +void __iomem *__ref __ioremap(phys_addr_t addr, unsigned long size, pgprot_t prot) { phys_addr_t p; @@ -116,7 +116,7 @@ void iounmap(void *addr) * the memblock infrastructure. */ -pte_t __init_refok *pte_alloc_one_kernel(struct mm_struct *mm, +pte_t __ref *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { pte_t *pte; diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index dc117385ce2e..cd8778103165 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -31,6 +31,7 @@ config PARISC select TTY # Needed for pdc_cons.c select HAVE_DEBUG_STACKOVERFLOW select HAVE_ARCH_AUDITSYSCALL + select HAVE_ARCH_HASH select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK select HAVE_UNSTABLE_SCHED_CLOCK if (SMP || !64BIT) diff --git a/arch/parisc/include/asm/hash.h b/arch/parisc/include/asm/hash.h new file mode 100644 index 000000000000..dbe93311aa26 --- /dev/null +++ b/arch/parisc/include/asm/hash.h @@ -0,0 +1,146 @@ +#ifndef _ASM_HASH_H +#define _ASM_HASH_H + +/* + * HP-PA only implements integer multiply in the FPU. However, for + * integer multiplies by constant, it has a number of shift-and-add + * (but no shift-and-subtract, sigh!) instructions that a compiler + * can synthesize a code sequence with. + * + * Unfortunately, GCC isn't very efficient at using them. For example + * it uses three instructions for "x *= 21" when only two are needed. + * But we can find a sequence manually. + */ + +#define HAVE_ARCH__HASH_32 1 + +/* + * This is a multiply by GOLDEN_RATIO_32 = 0x61C88647 optimized for the + * PA7100 pairing rules. This is an in-order 2-way superscalar processor. + * Only one instruction in a pair may be a shift (by more than 3 bits), + * but other than that, simple ALU ops (including shift-and-add by up + * to 3 bits) may be paired arbitrarily. + * + * PA8xxx processors also dual-issue ALU instructions, although with + * fewer constraints, so this schedule is good for them, too. + * + * This 6-step sequence was found by Yevgen Voronenko's implementation + * of the Hcub algorithm at http://spiral.ece.cmu.edu/mcm/gen.html. + */ +static inline u32 __attribute_const__ __hash_32(u32 x) +{ + u32 a, b, c; + + /* + * Phase 1: Compute a = (x << 19) + x, + * b = (x << 9) + a, c = (x << 23) + b. + */ + a = x << 19; /* Two shifts can't be paired */ + b = x << 9; a += x; + c = x << 23; b += a; + c += b; + /* Phase 2: Return (b<<11) + (c<<6) + (a<<3) - c */ + b <<= 11; + a += c << 3; b -= c; + return (a << 3) + b; +} + +#if BITS_PER_LONG == 64 + +#define HAVE_ARCH_HASH_64 1 + +/* + * Finding a good shift-and-add chain for GOLDEN_RATIO_64 is tricky, + * because available software for the purpose chokes on constants this + * large. (It's mostly designed for compiling FIR filter coefficients + * into FPGAs.) + * + * However, Jason Thong pointed out a work-around. The Hcub software + * (http://spiral.ece.cmu.edu/mcm/gen.html) is designed for *multiple* + * constant multiplication, and is good at finding shift-and-add chains + * which share common terms. + * + * Looking at 0x0x61C8864680B583EB in binary: + * 0110000111001000100001100100011010000000101101011000001111101011 + * \______________/ \__________/ \_______/ \________/ + * \____________________________/ \____________________/ + * you can see the non-zero bits are divided into several well-separated + * blocks. Hcub can find algorithms for those terms separately, which + * can then be shifted and added together. + * + * Dividing the input into 2, 3 or 4 blocks, Hcub can find solutions + * with 10, 9 or 8 adds, respectively, making a total of 11 for the + * whole number. + * + * Using just two large blocks, 0xC3910C8D << 31 in the high bits, + * and 0xB583EB in the low bits, produces as good an algorithm as any, + * and with one more small shift than alternatives. + * + * The high bits are a larger number and more work to compute, as well + * as needing one extra cycle to shift left 31 bits before the final + * addition, so they are the critical path for scheduling. The low bits + * can fit into the scheduling slots left over. + */ + + +/* + * This _ASSIGN(dst, src) macro performs "dst = src", but prevents GCC + * from inferring anything about the value assigned to "dest". + * + * This prevents it from mis-optimizing certain sequences. + * In particular, gcc is annoyingly eager to combine consecutive shifts. + * Given "x <<= 19; y += x; z += x << 1;", GCC will turn this into + * "y += x << 19; z += x << 20;" even though the latter sequence needs + * an additional instruction and temporary register. + * + * Because no actual assembly code is generated, this construct is + * usefully portable across all GCC platforms, and so can be test-compiled + * on non-PA systems. + * + * In two places, additional unused input dependencies are added. This + * forces GCC's scheduling so it does not rearrange instructions too much. + * Because the PA-8xxx is out of order, I'm not sure how much this matters, + * but why make it more difficult for the processor than necessary? + */ +#define _ASSIGN(dst, src, ...) asm("" : "=r" (dst) : "0" (src), ##__VA_ARGS__) + +/* + * Multiply by GOLDEN_RATIO_64 = 0x0x61C8864680B583EB using a heavily + * optimized shift-and-add sequence. + * + * Without the final shift, the multiply proper is 19 instructions, + * 10 cycles and uses only 4 temporaries. Whew! + * + * You are not expected to understand this. + */ +static __always_inline u32 __attribute_const__ +hash_64(u64 a, unsigned int bits) +{ + u64 b, c, d; + + /* + * Encourage GCC to move a dynamic shift to %sar early, + * thereby freeing up an additional temporary register. + */ + if (!__builtin_constant_p(bits)) + asm("" : "=q" (bits) : "0" (64 - bits)); + else + bits = 64 - bits; + + _ASSIGN(b, a*5); c = a << 13; + b = (b << 2) + a; _ASSIGN(d, a << 17); + a = b + (a << 1); c += d; + d = a << 10; _ASSIGN(a, a << 19); + d = a - d; _ASSIGN(a, a << 4, "X" (d)); + c += b; a += b; + d -= c; c += a << 1; + a += c << 3; _ASSIGN(b, b << (7+31), "X" (c), "X" (d)); + a <<= 31; b += d; + a += b; + return a >> bits; +} +#undef _ASSIGN /* We're a widely-used header file, so don't litter! */ + +#endif /* BITS_PER_LONG == 64 */ + +#endif /* _ASM_HASH_H */ diff --git a/arch/parisc/include/asm/mc146818rtc.h b/arch/parisc/include/asm/mc146818rtc.h deleted file mode 100644 index adf41631449f..000000000000 --- a/arch/parisc/include/asm/mc146818rtc.h +++ /dev/null @@ -1,9 +0,0 @@ -/* - * Machine dependent access functions for RTC registers. - */ -#ifndef _ASM_MC146818RTC_H -#define _ASM_MC146818RTC_H - -/* empty include file to satisfy the include in genrtc.c */ - -#endif /* _ASM_MC146818RTC_H */ diff --git a/arch/parisc/include/asm/rtc.h b/arch/parisc/include/asm/rtc.h deleted file mode 100644 index 099d641a42c2..000000000000 --- a/arch/parisc/include/asm/rtc.h +++ /dev/null @@ -1,131 +0,0 @@ -/* - * include/asm-parisc/rtc.h - * - * Copyright 2002 Randolph CHung <tausq@debian.org> - * - * Based on: include/asm-ppc/rtc.h and the genrtc driver in the - * 2.4 parisc linux tree - */ - -#ifndef __ASM_RTC_H__ -#define __ASM_RTC_H__ - -#ifdef __KERNEL__ - -#include <linux/rtc.h> - -#include <asm/pdc.h> - -#define SECS_PER_HOUR (60 * 60) -#define SECS_PER_DAY (SECS_PER_HOUR * 24) - - -#define RTC_PIE 0x40 /* periodic interrupt enable */ -#define RTC_AIE 0x20 /* alarm interrupt enable */ -#define RTC_UIE 0x10 /* update-finished interrupt enable */ - -#define RTC_BATT_BAD 0x100 /* battery bad */ - -/* some dummy definitions */ -#define RTC_SQWE 0x08 /* enable square-wave output */ -#define RTC_DM_BINARY 0x04 /* all time/date values are BCD if clear */ -#define RTC_24H 0x02 /* 24 hour mode - else hours bit 7 means pm */ -#define RTC_DST_EN 0x01 /* auto switch DST - works f. USA only */ - -# define __isleap(year) \ - ((year) % 4 == 0 && ((year) % 100 != 0 || (year) % 400 == 0)) - -/* How many days come before each month (0-12). */ -static const unsigned short int __mon_yday[2][13] = -{ - /* Normal years. */ - { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 }, - /* Leap years. */ - { 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 } -}; - -static inline unsigned int get_rtc_time(struct rtc_time *wtime) -{ - struct pdc_tod tod_data; - long int days, rem, y; - const unsigned short int *ip; - - memset(wtime, 0, sizeof(*wtime)); - if (pdc_tod_read(&tod_data) < 0) - return RTC_24H | RTC_BATT_BAD; - - // most of the remainder of this function is: -// Copyright (C) 1991, 1993, 1997, 1998 Free Software Foundation, Inc. -// This was originally a part of the GNU C Library. -// It is distributed under the GPL, and was swiped from offtime.c - - - days = tod_data.tod_sec / SECS_PER_DAY; - rem = tod_data.tod_sec % SECS_PER_DAY; - - wtime->tm_hour = rem / SECS_PER_HOUR; - rem %= SECS_PER_HOUR; - wtime->tm_min = rem / 60; - wtime->tm_sec = rem % 60; - - y = 1970; - -#define DIV(a, b) ((a) / (b) - ((a) % (b) < 0)) -#define LEAPS_THRU_END_OF(y) (DIV (y, 4) - DIV (y, 100) + DIV (y, 400)) - - while (days < 0 || days >= (__isleap (y) ? 366 : 365)) - { - /* Guess a corrected year, assuming 365 days per year. */ - long int yg = y + days / 365 - (days % 365 < 0); - - /* Adjust DAYS and Y to match the guessed year. */ - days -= ((yg - y) * 365 - + LEAPS_THRU_END_OF (yg - 1) - - LEAPS_THRU_END_OF (y - 1)); - y = yg; - } - wtime->tm_year = y - 1900; - - ip = __mon_yday[__isleap(y)]; - for (y = 11; days < (long int) ip[y]; --y) - continue; - days -= ip[y]; - wtime->tm_mon = y; - wtime->tm_mday = days + 1; - - return RTC_24H; -} - -static int set_rtc_time(struct rtc_time *wtime) -{ - u_int32_t secs; - - secs = mktime(wtime->tm_year + 1900, wtime->tm_mon + 1, wtime->tm_mday, - wtime->tm_hour, wtime->tm_min, wtime->tm_sec); - - if(pdc_tod_set(secs, 0) < 0) - return -1; - else - return 0; - -} - -static inline unsigned int get_rtc_ss(void) -{ - struct rtc_time h; - - get_rtc_time(&h); - return h.tm_sec; -} - -static inline int get_rtc_pll(struct rtc_pll_info *pll) -{ - return -EINVAL; -} -static inline int set_rtc_pll(struct rtc_pll_info *pll) -{ - return -EINVAL; -} - -#endif /* __KERNEL__ */ -#endif /* __ASM_RTC_H__ */ diff --git a/arch/parisc/kernel/firmware.c b/arch/parisc/kernel/firmware.c index 22395901d47b..e5d71905cad5 100644 --- a/arch/parisc/kernel/firmware.c +++ b/arch/parisc/kernel/firmware.c @@ -1354,9 +1354,9 @@ int pdc_pat_io_pci_cfg_read(unsigned long pci_addr, int pci_size, u32 *mem_addr) retval = mem_pdc_call(PDC_PAT_IO, PDC_PAT_IO_PCI_CONFIG_READ, __pa(pdc_result), pci_addr, pci_size); switch(pci_size) { - case 1: *(u8 *) mem_addr = (u8) pdc_result[0]; - case 2: *(u16 *)mem_addr = (u16) pdc_result[0]; - case 4: *(u32 *)mem_addr = (u32) pdc_result[0]; + case 1: *(u8 *) mem_addr = (u8) pdc_result[0]; break; + case 2: *(u16 *)mem_addr = (u16) pdc_result[0]; break; + case 4: *(u32 *)mem_addr = (u32) pdc_result[0]; break; } spin_unlock_irqrestore(&pdc_lock, flags); diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c index a27e4928bf73..02d9ed0f3949 100644 --- a/arch/parisc/kernel/pci-dma.c +++ b/arch/parisc/kernel/pci-dma.c @@ -414,7 +414,7 @@ pcxl_dma_init(void) __initcall(pcxl_dma_init); static void *pa11_dma_alloc(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flag, struct dma_attrs *attrs) + dma_addr_t *dma_handle, gfp_t flag, unsigned long attrs) { unsigned long vaddr; unsigned long paddr; @@ -441,7 +441,7 @@ static void *pa11_dma_alloc(struct device *dev, size_t size, } static void pa11_dma_free(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_handle, struct dma_attrs *attrs) + dma_addr_t dma_handle, unsigned long attrs) { int order; @@ -454,7 +454,7 @@ static void pa11_dma_free(struct device *dev, size_t size, void *vaddr, static dma_addr_t pa11_dma_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, - enum dma_data_direction direction, struct dma_attrs *attrs) + enum dma_data_direction direction, unsigned long attrs) { void *addr = page_address(page) + offset; BUG_ON(direction == DMA_NONE); @@ -465,7 +465,7 @@ static dma_addr_t pa11_dma_map_page(struct device *dev, struct page *page, static void pa11_dma_unmap_page(struct device *dev, dma_addr_t dma_handle, size_t size, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { BUG_ON(direction == DMA_NONE); @@ -484,7 +484,7 @@ static void pa11_dma_unmap_page(struct device *dev, dma_addr_t dma_handle, static int pa11_dma_map_sg(struct device *dev, struct scatterlist *sglist, int nents, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { int i; struct scatterlist *sg; @@ -503,7 +503,7 @@ static int pa11_dma_map_sg(struct device *dev, struct scatterlist *sglist, static void pa11_dma_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { int i; struct scatterlist *sg; @@ -577,11 +577,11 @@ struct dma_map_ops pcxl_dma_ops = { }; static void *pcx_dma_alloc(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flag, struct dma_attrs *attrs) + dma_addr_t *dma_handle, gfp_t flag, unsigned long attrs) { void *addr; - if (!dma_get_attr(DMA_ATTR_NON_CONSISTENT, attrs)) + if ((attrs & DMA_ATTR_NON_CONSISTENT) == 0) return NULL; addr = (void *)__get_free_pages(flag, get_order(size)); @@ -592,7 +592,7 @@ static void *pcx_dma_alloc(struct device *dev, size_t size, } static void pcx_dma_free(struct device *dev, size_t size, void *vaddr, - dma_addr_t iova, struct dma_attrs *attrs) + dma_addr_t iova, unsigned long attrs) { free_pages((unsigned long)vaddr, get_order(size)); return; diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c index 31ec99a5f119..505cf1ac5af2 100644 --- a/arch/parisc/kernel/time.c +++ b/arch/parisc/kernel/time.c @@ -12,6 +12,7 @@ */ #include <linux/errno.h> #include <linux/module.h> +#include <linux/rtc.h> #include <linux/sched.h> #include <linux/kernel.h> #include <linux/param.h> @@ -248,14 +249,47 @@ void __init start_cpu_itimer(void) per_cpu(cpu_data, cpu).it_value = next_tick; } +#if IS_ENABLED(CONFIG_RTC_DRV_GENERIC) +static int rtc_generic_get_time(struct device *dev, struct rtc_time *tm) +{ + struct pdc_tod tod_data; + + memset(tm, 0, sizeof(*tm)); + if (pdc_tod_read(&tod_data) < 0) + return -EOPNOTSUPP; + + /* we treat tod_sec as unsigned, so this can work until year 2106 */ + rtc_time64_to_tm(tod_data.tod_sec, tm); + return rtc_valid_tm(tm); +} + +static int rtc_generic_set_time(struct device *dev, struct rtc_time *tm) +{ + time64_t secs = rtc_tm_to_time64(tm); + + if (pdc_tod_set(secs, 0) < 0) + return -EOPNOTSUPP; + + return 0; +} + +static const struct rtc_class_ops rtc_generic_ops = { + .read_time = rtc_generic_get_time, + .set_time = rtc_generic_set_time, +}; + static int __init rtc_init(void) { struct platform_device *pdev; - pdev = platform_device_register_simple("rtc-generic", -1, NULL, 0); + pdev = platform_device_register_data(NULL, "rtc-generic", -1, + &rtc_generic_ops, + sizeof(rtc_generic_ops)); + return PTR_ERR_OR_ZERO(pdev); } device_initcall(rtc_init); +#endif void read_persistent_clock(struct timespec *ts) { diff --git a/arch/parisc/lib/iomap.c b/arch/parisc/lib/iomap.c index fb8e10a4fb39..eaffbb90aa14 100644 --- a/arch/parisc/lib/iomap.c +++ b/arch/parisc/lib/iomap.c @@ -125,22 +125,22 @@ static void ioport_write32r(void __iomem *addr, const void *s, unsigned long n) } static const struct iomap_ops ioport_ops = { - ioport_read8, - ioport_read16, - ioport_read16, - ioport_read32, - ioport_read32, - ioport_write8, - ioport_write16, - ioport_write16, - ioport_write32, - ioport_write32, - ioport_read8r, - ioport_read16r, - ioport_read32r, - ioport_write8r, - ioport_write16r, - ioport_write32r, + .read8 = ioport_read8, + .read16 = ioport_read16, + .read16be = ioport_read16, + .read32 = ioport_read32, + .read32be = ioport_read32, + .write8 = ioport_write8, + .write16 = ioport_write16, + .write16be = ioport_write16, + .write32 = ioport_write32, + .write32be = ioport_write32, + .read8r = ioport_read8r, + .read16r = ioport_read16r, + .read32r = ioport_read32r, + .write8r = ioport_write8r, + .write16r = ioport_write16r, + .write32r = ioport_write32r, }; /* Legacy I/O memory ops */ @@ -244,22 +244,22 @@ static void iomem_write32r(void __iomem *addr, const void *s, unsigned long n) } static const struct iomap_ops iomem_ops = { - iomem_read8, - iomem_read16, - iomem_read16be, - iomem_read32, - iomem_read32be, - iomem_write8, - iomem_write16, - iomem_write16be, - iomem_write32, - iomem_write32be, - iomem_read8r, - iomem_read16r, - iomem_read32r, - iomem_write8r, - iomem_write16r, - iomem_write32r, + .read8 = iomem_read8, + .read16 = iomem_read16, + .read16be = iomem_read16be, + .read32 = iomem_read32, + .read32be = iomem_read32be, + .write8 = iomem_write8, + .write16 = iomem_write16, + .write16be = iomem_write16be, + .write32 = iomem_write32, + .write32be = iomem_write32be, + .read8r = iomem_read8r, + .read16r = iomem_read16r, + .read32r = iomem_read32r, + .write8r = iomem_write8r, + .write16r = iomem_write16r, + .write32r = iomem_write32r, }; static const struct iomap_ops *iomap_ops[8] = { diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index ec4047e170a0..927d2ab2ce08 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -166,6 +166,7 @@ config PPC select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS select GENERIC_CPU_AUTOPROBE select HAVE_VIRT_CPU_ACCOUNTING + select HAVE_ARCH_HARDENED_USERCOPY config GENERIC_CSUM def_bool CPU_LITTLE_ENDIAN diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index 171047822b56..63292f64b25a 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -60,6 +60,25 @@ config CODE_PATCHING_SELFTEST depends on DEBUG_KERNEL default n +config JUMP_LABEL_FEATURE_CHECKS + bool "Enable use of jump label for cpu/mmu_has_feature()" + depends on JUMP_LABEL + default y + help + Selecting this options enables use of jump labels for some internal + feature checks. This should generate more optimal code for those + checks. + +config JUMP_LABEL_FEATURE_CHECK_DEBUG + bool "Do extra check on feature fixup calls" + depends on DEBUG_KERNEL && JUMP_LABEL_FEATURE_CHECKS + default n + help + This tries to catch incorrect usage of cpu_has_feature() and + mmu_has_feature() in the code. + + If you don't know what this means, say N. + config FTR_FIXUP_SELFTEST bool "Run self-tests of the feature-fixup code" depends on DEBUG_KERNEL diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index 4cd612a6e272..1a2a6e8dc40d 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -43,7 +43,7 @@ ifeq ($(call cc-option-yn, -fstack-protector),y) BOOTCFLAGS += -fno-stack-protector endif -BOOTCFLAGS += -I$(obj) -I$(srctree)/$(obj) +BOOTCFLAGS += -I$(objtree)/$(obj) -I$(srctree)/$(obj) DTC_FLAGS ?= -p 1024 diff --git a/arch/powerpc/include/asm/book3s/64/hugetlb-radix.h b/arch/powerpc/include/asm/book3s/64/hugetlb-radix.h index 60f47649306f..c45189aa7476 100644 --- a/arch/powerpc/include/asm/book3s/64/hugetlb-radix.h +++ b/arch/powerpc/include/asm/book3s/64/hugetlb-radix.h @@ -11,4 +11,19 @@ extern unsigned long radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); + +static inline int hstate_get_psize(struct hstate *hstate) +{ + unsigned long shift; + + shift = huge_page_shift(hstate); + if (shift == mmu_psize_defs[MMU_PAGE_2M].shift) + return MMU_PAGE_2M; + else if (shift == mmu_psize_defs[MMU_PAGE_1G].shift) + return MMU_PAGE_1G; + else { + WARN(1, "Wrong huge page shift\n"); + return mmu_virtual_psize; + } +} #endif diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index 5eaf86ac143d..287a656ceb57 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -24,6 +24,7 @@ #include <asm/book3s/64/pgtable.h> #include <asm/bug.h> #include <asm/processor.h> +#include <asm/cpu_has_feature.h> /* * SLB @@ -190,6 +191,15 @@ static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize) BUG(); } +static inline unsigned long get_sllp_encoding(int psize) +{ + unsigned long sllp; + + sllp = ((mmu_psize_defs[psize].sllp & SLB_VSID_L) >> 6) | + ((mmu_psize_defs[psize].sllp & SLB_VSID_LP) >> 4); + return sllp; +} + #endif /* __ASSEMBLY__ */ /* diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index d4eda6420523..8afb0e00f7d9 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -23,13 +23,6 @@ struct mmu_psize_def { }; extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; -#ifdef CONFIG_PPC_RADIX_MMU -#define radix_enabled() mmu_has_feature(MMU_FTR_RADIX) -#else -#define radix_enabled() (0) -#endif - - #endif /* __ASSEMBLY__ */ /* 64-bit classic hash table MMU */ @@ -107,6 +100,9 @@ extern int mmu_vmemmap_psize; extern int mmu_io_psize; /* MMU initialization */ +void mmu_early_init_devtree(void); +void hash__early_init_devtree(void); +void radix__early_init_devtree(void); extern void radix_init_native(void); extern void hash__early_init_mmu(void); extern void radix__early_init_mmu(void); @@ -132,11 +128,15 @@ extern void radix__setup_initial_memory_limit(phys_addr_t first_memblock_base, static inline void setup_initial_memory_limit(phys_addr_t first_memblock_base, phys_addr_t first_memblock_size) { - if (radix_enabled()) + if (early_radix_enabled()) return radix__setup_initial_memory_limit(first_memblock_base, first_memblock_size); return hash__setup_initial_memory_limit(first_memblock_base, first_memblock_size); } + +extern int (*register_process_table)(unsigned long base, unsigned long page_size, + unsigned long tbl_size); + #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_BOOK3S_64_MMU_H_ */ diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h index f12ddf5e8de5..2f6373144e2c 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h @@ -75,11 +75,6 @@ static inline void hash__flush_tlb_page(struct vm_area_struct *vma, { } -static inline void hash__flush_tlb_page_nohash(struct vm_area_struct *vma, - unsigned long vmaddr) -{ -} - static inline void hash__flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h index 00703e7e4c94..65037762b120 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h @@ -10,26 +10,32 @@ static inline int mmu_get_ap(int psize) return mmu_psize_defs[psize].ap; } +extern void radix__flush_hugetlb_tlb_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end); +extern void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start, + unsigned long end, int psize); +extern void radix__flush_pmd_tlb_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end); extern void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); extern void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end); extern void radix__local_flush_tlb_mm(struct mm_struct *mm); extern void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr); -extern void radix___local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, - unsigned long ap, int nid); extern void radix__local_flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr); +extern void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, + int psize); extern void radix__tlb_flush(struct mmu_gather *tlb); #ifdef CONFIG_SMP extern void radix__flush_tlb_mm(struct mm_struct *mm); extern void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr); -extern void radix___flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, - unsigned long ap, int nid); extern void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr); +extern void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, + int psize); #else #define radix__flush_tlb_mm(mm) radix__local_flush_tlb_mm(mm) #define radix__flush_tlb_page(vma,addr) radix__local_flush_tlb_page(vma,addr) -#define radix___flush_tlb_page(mm,addr,p,i) radix___local_flush_tlb_page(mm,addr,p,i) +#define radix__flush_tlb_page_psize(mm,addr,p) radix__local_flush_tlb_page_psize(mm,addr,p) #define radix__flush_tlb_pwc(tlb, addr) radix__local_flush_tlb_pwc(tlb, addr) #endif extern void radix__flush_tlb_lpid_va(unsigned long lpid, unsigned long gpa, diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h index 96e5769b18b0..72b925f97bab 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h @@ -7,6 +7,25 @@ #include <asm/book3s/64/tlbflush-hash.h> #include <asm/book3s/64/tlbflush-radix.h> +#define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE +static inline void flush_pmd_tlb_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end) +{ + if (radix_enabled()) + return radix__flush_pmd_tlb_range(vma, start, end); + return hash__flush_tlb_range(vma, start, end); +} + +#define __HAVE_ARCH_FLUSH_HUGETLB_TLB_RANGE +static inline void flush_hugetlb_tlb_range(struct vm_area_struct *vma, + unsigned long start, + unsigned long end) +{ + if (radix_enabled()) + return radix__flush_hugetlb_tlb_range(vma, start, end); + return hash__flush_tlb_range(vma, start, end); +} + static inline void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { @@ -38,14 +57,6 @@ static inline void local_flush_tlb_page(struct vm_area_struct *vma, return hash__local_flush_tlb_page(vma, vmaddr); } -static inline void flush_tlb_page_nohash(struct vm_area_struct *vma, - unsigned long vmaddr) -{ - if (radix_enabled()) - return radix__flush_tlb_page(vma, vmaddr); - return hash__flush_tlb_page_nohash(vma, vmaddr); -} - static inline void tlb_flush(struct mmu_gather *tlb) { if (radix_enabled()) diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h index 69fb16d7a811..b77f0364df94 100644 --- a/arch/powerpc/include/asm/cacheflush.h +++ b/arch/powerpc/include/asm/cacheflush.h @@ -11,6 +11,7 @@ #include <linux/mm.h> #include <asm/cputable.h> +#include <asm/cpu_has_feature.h> /* * No cache flushing is required when address mappings are changed, diff --git a/arch/powerpc/include/asm/cpu_has_feature.h b/arch/powerpc/include/asm/cpu_has_feature.h new file mode 100644 index 000000000000..2ef55f8968a2 --- /dev/null +++ b/arch/powerpc/include/asm/cpu_has_feature.h @@ -0,0 +1,53 @@ +#ifndef __ASM_POWERPC_CPUFEATURES_H +#define __ASM_POWERPC_CPUFEATURES_H + +#ifndef __ASSEMBLY__ + +#include <linux/bug.h> +#include <asm/cputable.h> + +static inline bool early_cpu_has_feature(unsigned long feature) +{ + return !!((CPU_FTRS_ALWAYS & feature) || + (CPU_FTRS_POSSIBLE & cur_cpu_spec->cpu_features & feature)); +} + +#ifdef CONFIG_JUMP_LABEL_FEATURE_CHECKS +#include <linux/jump_label.h> + +#define NUM_CPU_FTR_KEYS 64 + +extern struct static_key_true cpu_feature_keys[NUM_CPU_FTR_KEYS]; + +static __always_inline bool cpu_has_feature(unsigned long feature) +{ + int i; + + BUILD_BUG_ON(!__builtin_constant_p(feature)); + +#ifdef CONFIG_JUMP_LABEL_FEATURE_CHECK_DEBUG + if (!static_key_initialized) { + printk("Warning! cpu_has_feature() used prior to jump label init!\n"); + dump_stack(); + return early_cpu_has_feature(feature); + } +#endif + + if (CPU_FTRS_ALWAYS & feature) + return true; + + if (!(CPU_FTRS_POSSIBLE & feature)) + return false; + + i = __builtin_ctzl(feature); + return static_branch_likely(&cpu_feature_keys[i]); +} +#else +static inline bool cpu_has_feature(unsigned long feature) +{ + return early_cpu_has_feature(feature); +} +#endif + +#endif /* __ASSEMBLY__ */ +#endif /* __ASM_POWERPC_CPUFEATURE_H */ diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index df4fb5faba43..82026b419341 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -2,6 +2,7 @@ #define __ASM_POWERPC_CPUTABLE_H +#include <linux/types.h> #include <asm/asm-compat.h> #include <asm/feature-fixups.h> #include <uapi/asm/cputable.h> @@ -122,6 +123,12 @@ extern void do_feature_fixups(unsigned long value, void *fixup_start, extern const char *powerpc_base_platform; +#ifdef CONFIG_JUMP_LABEL_FEATURE_CHECKS +extern void cpu_feature_keys_init(void); +#else +static inline void cpu_feature_keys_init(void) { } +#endif + /* TLB flush actions. Used as argument to cpu_spec.flush_tlb() hook */ enum { TLB_INVAL_SCOPE_GLOBAL = 0, /* invalidate all TLBs */ @@ -576,14 +583,6 @@ enum { }; #endif /* __powerpc64__ */ -static inline int cpu_has_feature(unsigned long feature) -{ - return (CPU_FTRS_ALWAYS & feature) || - (CPU_FTRS_POSSIBLE - & cur_cpu_spec->cpu_features - & feature); -} - #define HBP_NUM 1 #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h index 2dfd4fc41f3e..4f60db074725 100644 --- a/arch/powerpc/include/asm/cputime.h +++ b/arch/powerpc/include/asm/cputime.h @@ -28,6 +28,7 @@ static inline void setup_cputime_one_jiffy(void) { } #include <asm/div64.h> #include <asm/time.h> #include <asm/param.h> +#include <asm/cpu_has_feature.h> typedef u64 __nocast cputime_t; typedef u64 __nocast cputime64_t; diff --git a/arch/powerpc/include/asm/dbell.h b/arch/powerpc/include/asm/dbell.h index 5fa6b20eba10..378167377065 100644 --- a/arch/powerpc/include/asm/dbell.h +++ b/arch/powerpc/include/asm/dbell.h @@ -16,6 +16,7 @@ #include <linux/threads.h> #include <asm/ppc-opcode.h> +#include <asm/cpu_has_feature.h> #define PPC_DBELL_MSG_BRDCAST (0x04000000) #define PPC_DBELL_TYPE(x) (((x) & 0xf) << (63-36)) diff --git a/arch/powerpc/include/asm/dcr-native.h b/arch/powerpc/include/asm/dcr-native.h index 4efc11dacb98..4a2beef74277 100644 --- a/arch/powerpc/include/asm/dcr-native.h +++ b/arch/powerpc/include/asm/dcr-native.h @@ -24,6 +24,7 @@ #include <linux/spinlock.h> #include <asm/cputable.h> +#include <asm/cpu_has_feature.h> typedef struct { unsigned int base; diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h index 77816acd4fd9..84e3f8dd5e4f 100644 --- a/arch/powerpc/include/asm/dma-mapping.h +++ b/arch/powerpc/include/asm/dma-mapping.h @@ -13,7 +13,6 @@ /* need struct page definitions */ #include <linux/mm.h> #include <linux/scatterlist.h> -#include <linux/dma-attrs.h> #include <linux/dma-debug.h> #include <asm/io.h> #include <asm/swiotlb.h> @@ -25,14 +24,14 @@ /* Some dma direct funcs must be visible for use in other dma_ops */ extern void *__dma_direct_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag, - struct dma_attrs *attrs); + unsigned long attrs); extern void __dma_direct_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, - struct dma_attrs *attrs); + unsigned long attrs); extern int dma_direct_mmap_coherent(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t handle, - size_t size, struct dma_attrs *attrs); + size_t size, unsigned long attrs); #ifdef CONFIG_NOT_COHERENT_CACHE /* diff --git a/arch/powerpc/include/asm/hmi.h b/arch/powerpc/include/asm/hmi.h new file mode 100644 index 000000000000..88b4901ac4ee --- /dev/null +++ b/arch/powerpc/include/asm/hmi.h @@ -0,0 +1,45 @@ +/* + * Hypervisor Maintenance Interrupt header file. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. + * + * Copyright 2015 IBM Corporation + * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> + */ + +#ifndef __ASM_PPC64_HMI_H__ +#define __ASM_PPC64_HMI_H__ + +#ifdef CONFIG_PPC_BOOK3S_64 + +#define CORE_TB_RESYNC_REQ_BIT 63 +#define MAX_SUBCORE_PER_CORE 4 + +/* + * sibling_subcore_state structure is used to co-ordinate all threads + * during HMI to avoid TB corruption. This structure is allocated once + * per each core and shared by all threads on that core. + */ +struct sibling_subcore_state { + unsigned long flags; + u8 in_guest[MAX_SUBCORE_PER_CORE]; +}; + +extern void wait_for_subcore_guest_exit(void); +extern void wait_for_tb_resync(void); +#else +static inline void wait_for_subcore_guest_exit(void) { } +static inline void wait_for_tb_resync(void) { } +#endif +#endif /* __ASM_PPC64_HMI_H__ */ diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index e2d9f4996e5c..c5517f463ec7 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -147,7 +147,7 @@ static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, { pte_t pte; pte = huge_ptep_get_and_clear(vma->vm_mm, addr, ptep); - flush_tlb_page(vma, addr); + flush_hugetlb_page(vma, addr); } static inline int huge_pte_none(pte_t pte) diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index f49a72a9062d..2c1d50792944 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -53,7 +53,7 @@ struct iommu_table_ops { long index, long npages, unsigned long uaddr, enum dma_data_direction direction, - struct dma_attrs *attrs); + unsigned long attrs); #ifdef CONFIG_IOMMU_API /* * Exchanges existing TCE with new TCE plus direction bits; @@ -248,12 +248,12 @@ extern int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl, struct scatterlist *sglist, int nelems, unsigned long mask, enum dma_data_direction direction, - struct dma_attrs *attrs); + unsigned long attrs); extern void ppc_iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist, int nelems, enum dma_data_direction direction, - struct dma_attrs *attrs); + unsigned long attrs); extern void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl, size_t size, dma_addr_t *dma_handle, @@ -264,10 +264,10 @@ extern dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl, struct page *page, unsigned long offset, size_t size, unsigned long mask, enum dma_data_direction direction, - struct dma_attrs *attrs); + unsigned long attrs); extern void iommu_unmap_page(struct iommu_table *tbl, dma_addr_t dma_handle, size_t size, enum dma_data_direction direction, - struct dma_attrs *attrs); + unsigned long attrs); extern void iommu_init_early_pSeries(void); extern void iommu_init_early_dart(struct pci_controller_ops *controller_ops); diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h index 47e155f15433..9a287e0ac8b1 100644 --- a/arch/powerpc/include/asm/jump_label.h +++ b/arch/powerpc/include/asm/jump_label.h @@ -14,6 +14,7 @@ #include <linux/types.h> #include <asm/feature-fixups.h> +#include <asm/asm-compat.h> #define JUMP_ENTRY_TYPE stringify_in_c(FTR_ENTRY_LONG) #define JUMP_LABEL_NOP_SIZE 4 @@ -21,7 +22,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { asm_volatile_goto("1:\n\t" - "nop\n\t" + "nop # arch_static_branch\n\t" ".pushsection __jump_table, \"aw\"\n\t" JUMP_ENTRY_TYPE "1b, %l[l_yes], %c0\n\t" ".popsection \n\t" @@ -35,7 +36,7 @@ l_yes: static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { asm_volatile_goto("1:\n\t" - "b %l[l_yes]\n\t" + "b %l[l_yes] # arch_static_branch_jump\n\t" ".pushsection __jump_table, \"aw\"\n\t" JUMP_ENTRY_TYPE "1b, %l[l_yes], %c0\n\t" ".popsection \n\t" diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 1f4497fb5b83..88d17b4ea9c8 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -181,8 +181,7 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, switch (b_psize) { case MMU_PAGE_4K: - sllp = ((mmu_psize_defs[a_psize].sllp & SLB_VSID_L) >> 6) | - ((mmu_psize_defs[a_psize].sllp & SLB_VSID_LP) >> 4); + sllp = get_sllp_encoding(a_psize); rb |= sllp << 5; /* AP field */ rb |= (va_low & 0x7ff) << 12; /* remaining 11 bits of AVA */ break; diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 76f5398e7152..0420b388dd83 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -219,8 +219,6 @@ struct machdep_calls { #ifdef CONFIG_ARCH_RANDOM int (*get_random_seed)(unsigned long *v); #endif - int (*register_process_table)(unsigned long base, unsigned long page_size, - unsigned long tbl_size); }; extern void e500_idle(void); diff --git a/arch/powerpc/include/asm/mman.h b/arch/powerpc/include/asm/mman.h index 2563c435a4b1..30922f699341 100644 --- a/arch/powerpc/include/asm/mman.h +++ b/arch/powerpc/include/asm/mman.h @@ -13,6 +13,7 @@ #include <asm/cputable.h> #include <linux/mm.h> +#include <asm/cpu_has_feature.h> /* * This file is included by linux/mman.h, so we can't use cacl_vm_prot_bits() @@ -31,13 +32,13 @@ static inline pgprot_t arch_vm_get_page_prot(unsigned long vm_flags) } #define arch_vm_get_page_prot(vm_flags) arch_vm_get_page_prot(vm_flags) -static inline int arch_validate_prot(unsigned long prot) +static inline bool arch_validate_prot(unsigned long prot) { if (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC | PROT_SEM | PROT_SAO)) - return 0; + return false; if ((prot & PROT_SAO) && !cpu_has_feature(CPU_FTR_SAO)) - return 0; - return 1; + return false; + return true; } #define arch_validate_prot(prot) arch_validate_prot(prot) diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index 54471228f7b8..e2fb408f8398 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -12,7 +12,7 @@ */ /* - * First half is MMU families + * MMU families */ #define MMU_FTR_HPTE_TABLE ASM_CONST(0x00000001) #define MMU_FTR_TYPE_8xx ASM_CONST(0x00000002) @@ -21,9 +21,13 @@ #define MMU_FTR_TYPE_FSL_E ASM_CONST(0x00000010) #define MMU_FTR_TYPE_47x ASM_CONST(0x00000020) +/* Radix page table supported and enabled */ +#define MMU_FTR_TYPE_RADIX ASM_CONST(0x00000040) + /* - * This is individual features + * Individual features below. */ + /* * We need to clear top 16bits of va (from the remaining 64 bits )in * tlbie* instructions @@ -93,11 +97,6 @@ */ #define MMU_FTR_1T_SEGMENT ASM_CONST(0x40000000) -/* - * Radix page table available - */ -#define MMU_FTR_RADIX ASM_CONST(0x80000000) - /* MMU feature bit sets for various CPUs */ #define MMU_FTRS_DEFAULT_HPTE_ARCH_V2 \ MMU_FTR_HPTE_TABLE | MMU_FTR_PPCAS_ARCH_V2 @@ -113,6 +112,7 @@ #define MMU_FTRS_PA6T MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \ MMU_FTR_CI_LARGE_PAGE | MMU_FTR_NO_SLBIE_B #ifndef __ASSEMBLY__ +#include <linux/bug.h> #include <asm/cputable.h> #ifdef CONFIG_PPC_FSL_BOOK3E @@ -131,20 +131,71 @@ enum { MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_CI_LARGE_PAGE | MMU_FTR_1T_SEGMENT | MMU_FTR_TLBIE_CROP_VA | #ifdef CONFIG_PPC_RADIX_MMU - MMU_FTR_RADIX | + MMU_FTR_TYPE_RADIX | #endif 0, }; -static inline int mmu_has_feature(unsigned long feature) +static inline bool early_mmu_has_feature(unsigned long feature) { - return (MMU_FTRS_POSSIBLE & cur_cpu_spec->mmu_features & feature); + return !!(MMU_FTRS_POSSIBLE & cur_cpu_spec->mmu_features & feature); +} + +#ifdef CONFIG_JUMP_LABEL_FEATURE_CHECKS +#include <linux/jump_label.h> + +#define NUM_MMU_FTR_KEYS 32 + +extern struct static_key_true mmu_feature_keys[NUM_MMU_FTR_KEYS]; + +extern void mmu_feature_keys_init(void); + +static __always_inline bool mmu_has_feature(unsigned long feature) +{ + int i; + + BUILD_BUG_ON(!__builtin_constant_p(feature)); + +#ifdef CONFIG_JUMP_LABEL_FEATURE_CHECK_DEBUG + if (!static_key_initialized) { + printk("Warning! mmu_has_feature() used prior to jump label init!\n"); + dump_stack(); + return early_mmu_has_feature(feature); + } +#endif + + if (!(MMU_FTRS_POSSIBLE & feature)) + return false; + + i = __builtin_ctzl(feature); + return static_branch_likely(&mmu_feature_keys[i]); } static inline void mmu_clear_feature(unsigned long feature) { + int i; + + i = __builtin_ctzl(feature); cur_cpu_spec->mmu_features &= ~feature; + static_branch_disable(&mmu_feature_keys[i]); } +#else + +static inline void mmu_feature_keys_init(void) +{ + +} + +static inline bool mmu_has_feature(unsigned long feature) +{ + return early_mmu_has_feature(feature); +} + +static inline void mmu_clear_feature(unsigned long feature) +{ + cur_cpu_spec->mmu_features &= ~feature; +} +#endif /* CONFIG_JUMP_LABEL */ extern unsigned int __start___mmu_ftr_fixup, __stop___mmu_ftr_fixup; @@ -164,6 +215,28 @@ static inline void assert_pte_locked(struct mm_struct *mm, unsigned long addr) } #endif /* !CONFIG_DEBUG_VM */ +#ifdef CONFIG_PPC_RADIX_MMU +static inline bool radix_enabled(void) +{ + return mmu_has_feature(MMU_FTR_TYPE_RADIX); +} + +static inline bool early_radix_enabled(void) +{ + return early_mmu_has_feature(MMU_FTR_TYPE_RADIX); +} +#else +static inline bool radix_enabled(void) +{ + return false; +} + +static inline bool early_radix_enabled(void) +{ + return false; +} +#endif + #endif /* !__ASSEMBLY__ */ /* The kernel use the constants below to index in the page sizes array. @@ -210,6 +283,7 @@ extern void early_init_mmu(void); extern void early_init_mmu_secondary(void); extern void setup_initial_memory_limit(phys_addr_t first_memblock_base, phys_addr_t first_memblock_size); +static inline void mmu_early_init_devtree(void) { } #endif /* __ASSEMBLY__ */ #endif @@ -230,9 +304,5 @@ extern void setup_initial_memory_limit(phys_addr_t first_memblock_base, # include <asm/mmu-8xx.h> #endif -#ifndef radix_enabled -#define radix_enabled() (0) -#endif - #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_MMU_H_ */ diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index ad171e979ab0..148303e7771f 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -26,6 +26,7 @@ #include <asm/kvm_book3s_asm.h> #endif #include <asm/accounting.h> +#include <asm/hmi.h> register struct paca_struct *local_paca asm("r13"); @@ -182,6 +183,11 @@ struct paca_struct { */ u16 in_mce; u8 hmi_event_available; /* HMI event is available */ + /* + * Bitmap for sibling subcore status. See kvm/book3s_hv_ras.c for + * more details + */ + struct sibling_subcore_state *sibling_subcore_state; #endif /* Stuff for accurate time accounting */ diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h index a6f3ac0d4602..e9bd6cf0212f 100644 --- a/arch/powerpc/include/asm/pci.h +++ b/arch/powerpc/include/asm/pci.h @@ -136,9 +136,6 @@ extern pgprot_t pci_phys_mem_access_prot(struct file *file, pgprot_t prot); #define HAVE_ARCH_PCI_RESOURCE_TO_USER -extern void pci_resource_to_user(const struct pci_dev *dev, int bar, - const struct resource *rsrc, - resource_size_t *start, resource_size_t *end); extern resource_size_t pcibios_io_space_offset(struct pci_controller *hose); extern void pcibios_setup_bus_devices(struct pci_bus *bus); diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 40f3615bf940..f69f40f1519a 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1256,15 +1256,6 @@ static inline void msr_check_and_clear(unsigned long bits) __msr_check_and_clear(bits); } -static inline unsigned long mfvtb (void) -{ -#ifdef CONFIG_PPC_BOOK3S_64 - if (cpu_has_feature(CPU_FTR_ARCH_207S)) - return mfspr(SPRN_VTB); -#endif - return 0; -} - #ifdef __powerpc64__ #if defined(CONFIG_PPC_CELL) || defined(CONFIG_PPC_FSL_BOOK3E) #define mftb() ({unsigned long rval; \ diff --git a/arch/powerpc/include/asm/rtc.h b/arch/powerpc/include/asm/rtc.h deleted file mode 100644 index f5802926b6c0..000000000000 --- a/arch/powerpc/include/asm/rtc.h +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Real-time clock definitions and interfaces - * - * Author: Tom Rini <trini@mvista.com> - * - * 2002 (c) MontaVista, Software, Inc. This file is licensed under - * the terms of the GNU General Public License version 2. This program - * is licensed "as is" without any warranty of any kind, whether express - * or implied. - * - * Based on: - * include/asm-m68k/rtc.h - * - * Copyright Richard Zidlicky - * implementation details for genrtc/q40rtc driver - * - * And the old drivers/macintosh/rtc.c which was heavily based on: - * Linux/SPARC Real Time Clock Driver - * Copyright (C) 1996 Thomas K. Dyas (tdyas@eden.rutgers.edu) - * - * With additional work by Paul Mackerras and Franz Sirl. - */ - -#ifndef __ASM_POWERPC_RTC_H__ -#define __ASM_POWERPC_RTC_H__ - -#ifdef __KERNEL__ - -#include <linux/rtc.h> - -#include <asm/machdep.h> -#include <asm/time.h> - -#define RTC_PIE 0x40 /* periodic interrupt enable */ -#define RTC_AIE 0x20 /* alarm interrupt enable */ -#define RTC_UIE 0x10 /* update-finished interrupt enable */ - -/* some dummy definitions */ -#define RTC_BATT_BAD 0x100 /* battery bad */ -#define RTC_SQWE 0x08 /* enable square-wave output */ -#define RTC_DM_BINARY 0x04 /* all time/date values are BCD if clear */ -#define RTC_24H 0x02 /* 24 hour mode - else hours bit 7 means pm */ -#define RTC_DST_EN 0x01 /* auto switch DST - works f. USA only */ - -static inline unsigned int get_rtc_time(struct rtc_time *time) -{ - if (ppc_md.get_rtc_time) - ppc_md.get_rtc_time(time); - return RTC_24H; -} - -/* Set the current date and time in the real time clock. */ -static inline int set_rtc_time(struct rtc_time *time) -{ - if (ppc_md.set_rtc_time) - return ppc_md.set_rtc_time(time); - return -EINVAL; -} - -static inline unsigned int get_rtc_ss(void) -{ - struct rtc_time h; - - get_rtc_time(&h); - return h.tm_sec; -} - -static inline int get_rtc_pll(struct rtc_pll_info *pll) -{ - return -EINVAL; -} -static inline int set_rtc_pll(struct rtc_pll_info *pll) -{ - return -EINVAL; -} - -#endif /* __KERNEL__ */ -#endif /* __ASM_POWERPC_RTC_H__ */ diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h index 17c8380673a6..0a74ebe934e1 100644 --- a/arch/powerpc/include/asm/switch_to.h +++ b/arch/powerpc/include/asm/switch_to.h @@ -75,6 +75,14 @@ static inline void disable_kernel_spe(void) static inline void __giveup_spe(struct task_struct *t) { } #endif +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +extern void flush_tmregs_to_thread(struct task_struct *); +#else +static inline void flush_tmregs_to_thread(struct task_struct *t) +{ +} +#endif + static inline void clear_task_ebb(struct task_struct *t) { #ifdef CONFIG_PPC_BOOK3S_64 diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index b21bb1f72314..87e4b2d8dcd4 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -138,40 +138,15 @@ static inline struct thread_info *current_thread_info(void) /* Don't move TLF_NAPPING without adjusting the code in entry_32.S */ #define TLF_NAPPING 0 /* idle thread enabled NAP mode */ #define TLF_SLEEPING 1 /* suspend code enabled SLEEP mode */ -#define TLF_RESTORE_SIGMASK 2 /* Restore signal mask in do_signal */ #define TLF_LAZY_MMU 3 /* tlb_batch is active */ #define TLF_RUNLATCH 4 /* Is the runlatch enabled? */ #define _TLF_NAPPING (1 << TLF_NAPPING) #define _TLF_SLEEPING (1 << TLF_SLEEPING) -#define _TLF_RESTORE_SIGMASK (1 << TLF_RESTORE_SIGMASK) #define _TLF_LAZY_MMU (1 << TLF_LAZY_MMU) #define _TLF_RUNLATCH (1 << TLF_RUNLATCH) #ifndef __ASSEMBLY__ -#define HAVE_SET_RESTORE_SIGMASK 1 -static inline void set_restore_sigmask(void) -{ - struct thread_info *ti = current_thread_info(); - ti->local_flags |= _TLF_RESTORE_SIGMASK; - WARN_ON(!test_bit(TIF_SIGPENDING, &ti->flags)); -} -static inline void clear_restore_sigmask(void) -{ - current_thread_info()->local_flags &= ~_TLF_RESTORE_SIGMASK; -} -static inline bool test_restore_sigmask(void) -{ - return current_thread_info()->local_flags & _TLF_RESTORE_SIGMASK; -} -static inline bool test_and_clear_restore_sigmask(void) -{ - struct thread_info *ti = current_thread_info(); - if (!(ti->local_flags & _TLF_RESTORE_SIGMASK)) - return false; - ti->local_flags &= ~_TLF_RESTORE_SIGMASK; - return true; -} static inline bool test_thread_local_flags(unsigned int flags) { diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h index 09211640a0e0..b240666b7bc1 100644 --- a/arch/powerpc/include/asm/time.h +++ b/arch/powerpc/include/asm/time.h @@ -18,6 +18,7 @@ #include <linux/percpu.h> #include <asm/processor.h> +#include <asm/cpu_has_feature.h> /* time.c */ extern unsigned long tb_ticks_per_jiffy; @@ -103,7 +104,7 @@ static inline u64 get_vtb(void) { #ifdef CONFIG_PPC_BOOK3S_64 if (cpu_has_feature(CPU_FTR_ARCH_207S)) - return mfvtb(); + return mfspr(SPRN_VTB); #endif return 0; } diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h index 20733fa518ae..f6f68f73e858 100644 --- a/arch/powerpc/include/asm/tlb.h +++ b/arch/powerpc/include/asm/tlb.h @@ -46,5 +46,18 @@ static inline void __tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep, #endif } +#ifdef CONFIG_SMP +static inline int mm_is_core_local(struct mm_struct *mm) +{ + return cpumask_subset(mm_cpumask(mm), + topology_sibling_cpumask(smp_processor_id())); +} +#else +static inline int mm_is_core_local(struct mm_struct *mm) +{ + return 1; +} +#endif + #endif /* __KERNEL__ */ #endif /* __ASM_POWERPC_TLB_H */ diff --git a/arch/powerpc/include/asm/tlbflush.h b/arch/powerpc/include/asm/tlbflush.h index 1b38eea28e5a..13dbcd41885e 100644 --- a/arch/powerpc/include/asm/tlbflush.h +++ b/arch/powerpc/include/asm/tlbflush.h @@ -54,7 +54,6 @@ extern void __flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, #define flush_tlb_page(vma,addr) local_flush_tlb_page(vma,addr) #define __flush_tlb_page(mm,addr,p,i) __local_flush_tlb_page(mm,addr,p,i) #endif -#define flush_tlb_page_nohash(vma,addr) flush_tlb_page(vma,addr) #elif defined(CONFIG_PPC_STD_MMU_32) diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index b7c20f0b8fbe..c1dc6c14deb8 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -310,10 +310,15 @@ static inline unsigned long copy_from_user(void *to, { unsigned long over; - if (access_ok(VERIFY_READ, from, n)) + if (access_ok(VERIFY_READ, from, n)) { + if (!__builtin_constant_p(n)) + check_object_size(to, n, false); return __copy_tofrom_user((__force void __user *)to, from, n); + } if ((unsigned long)from < TASK_SIZE) { over = (unsigned long)from + n - TASK_SIZE; + if (!__builtin_constant_p(n - over)) + check_object_size(to, n - over, false); return __copy_tofrom_user((__force void __user *)to, from, n - over) + over; } @@ -325,10 +330,15 @@ static inline unsigned long copy_to_user(void __user *to, { unsigned long over; - if (access_ok(VERIFY_WRITE, to, n)) + if (access_ok(VERIFY_WRITE, to, n)) { + if (!__builtin_constant_p(n)) + check_object_size(from, n, true); return __copy_tofrom_user(to, (__force void __user *)from, n); + } if ((unsigned long)to < TASK_SIZE) { over = (unsigned long)to + n - TASK_SIZE; + if (!__builtin_constant_p(n)) + check_object_size(from, n - over, true); return __copy_tofrom_user(to, (__force void __user *)from, n - over) + over; } @@ -372,6 +382,10 @@ static inline unsigned long __copy_from_user_inatomic(void *to, if (ret == 0) return 0; } + + if (!__builtin_constant_p(n)) + check_object_size(to, n, false); + return __copy_tofrom_user((__force void __user *)to, from, n); } @@ -398,6 +412,9 @@ static inline unsigned long __copy_to_user_inatomic(void __user *to, if (ret == 0) return 0; } + if (!__builtin_constant_p(n)) + check_object_size(from, n, true); + return __copy_tofrom_user(to, (__force const void __user *)from, n); } diff --git a/arch/powerpc/include/asm/xor.h b/arch/powerpc/include/asm/xor.h index 0abb97f3be10..a36c2069d8ed 100644 --- a/arch/powerpc/include/asm/xor.h +++ b/arch/powerpc/include/asm/xor.h @@ -23,6 +23,7 @@ #ifdef CONFIG_ALTIVEC #include <asm/cputable.h> +#include <asm/cpu_has_feature.h> void xor_altivec_2(unsigned long bytes, unsigned long *v1_in, unsigned long *v2_in); diff --git a/arch/powerpc/include/uapi/asm/elf.h b/arch/powerpc/include/uapi/asm/elf.h index c2d21d11c2d2..3a9e44c45c78 100644 --- a/arch/powerpc/include/uapi/asm/elf.h +++ b/arch/powerpc/include/uapi/asm/elf.h @@ -91,6 +91,11 @@ #define ELF_NGREG 48 /* includes nip, msr, lr, etc. */ #define ELF_NFPREG 33 /* includes fpscr */ +#define ELF_NVMX 34 /* includes all vector registers */ +#define ELF_NVSX 32 /* includes all VSX registers */ +#define ELF_NTMSPRREG 3 /* include tfhar, tfiar, texasr */ +#define ELF_NEBB 3 /* includes ebbrr, ebbhr, bescr */ +#define ELF_NPMU 5 /* includes siar, sdar, sier, mmcr2, mmcr0 */ typedef unsigned long elf_greg_t64; typedef elf_greg_t64 elf_gregset_t64[ELF_NGREG]; diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index fe4c075bcf50..b2027a5cf508 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -41,7 +41,7 @@ obj-$(CONFIG_VDSO32) += vdso32/ obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o -obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o +obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o hmi.o obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o obj-$(CONFIG_PPC64) += vdso64/ obj-$(CONFIG_ALTIVEC) += vecemu.o diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c index c7097f933114..033f3385fa49 100644 --- a/arch/powerpc/kernel/align.c +++ b/arch/powerpc/kernel/align.c @@ -26,6 +26,7 @@ #include <asm/emulated_ops.h> #include <asm/switch_to.h> #include <asm/disassemble.h> +#include <asm/cpu_has_feature.h> struct aligninfo { unsigned char len; diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index d81f826d1029..74248ab18e98 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -15,6 +15,7 @@ #include <linux/threads.h> #include <linux/init.h> #include <linux/export.h> +#include <linux/jump_label.h> #include <asm/oprofile_impl.h> #include <asm/cputable.h> @@ -2224,3 +2225,39 @@ struct cpu_spec * __init identify_cpu(unsigned long offset, unsigned int pvr) return NULL; } + +#ifdef CONFIG_JUMP_LABEL_FEATURE_CHECKS +struct static_key_true cpu_feature_keys[NUM_CPU_FTR_KEYS] = { + [0 ... NUM_CPU_FTR_KEYS - 1] = STATIC_KEY_TRUE_INIT +}; +EXPORT_SYMBOL_GPL(cpu_feature_keys); + +void __init cpu_feature_keys_init(void) +{ + int i; + + for (i = 0; i < NUM_CPU_FTR_KEYS; i++) { + unsigned long f = 1ul << i; + + if (!(cur_cpu_spec->cpu_features & f)) + static_branch_disable(&cpu_feature_keys[i]); + } +} + +struct static_key_true mmu_feature_keys[NUM_MMU_FTR_KEYS] = { + [0 ... NUM_MMU_FTR_KEYS - 1] = STATIC_KEY_TRUE_INIT +}; +EXPORT_SYMBOL_GPL(mmu_feature_keys); + +void __init mmu_feature_keys_init(void) +{ + int i; + + for (i = 0; i < NUM_MMU_FTR_KEYS; i++) { + unsigned long f = 1ul << i; + + if (!(cur_cpu_spec->mmu_features & f)) + static_branch_disable(&mmu_feature_keys[i]); + } +} +#endif diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c index 41a7d9d49a5a..fb7cbaa37658 100644 --- a/arch/powerpc/kernel/dma-iommu.c +++ b/arch/powerpc/kernel/dma-iommu.c @@ -18,7 +18,7 @@ */ static void *dma_iommu_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag, - struct dma_attrs *attrs) + unsigned long attrs) { return iommu_alloc_coherent(dev, get_iommu_table_base(dev), size, dma_handle, dev->coherent_dma_mask, flag, @@ -27,7 +27,7 @@ static void *dma_iommu_alloc_coherent(struct device *dev, size_t size, static void dma_iommu_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, - struct dma_attrs *attrs) + unsigned long attrs) { iommu_free_coherent(get_iommu_table_base(dev), size, vaddr, dma_handle); } @@ -40,7 +40,7 @@ static void dma_iommu_free_coherent(struct device *dev, size_t size, static dma_addr_t dma_iommu_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { return iommu_map_page(dev, get_iommu_table_base(dev), page, offset, size, device_to_mask(dev), direction, attrs); @@ -49,7 +49,7 @@ static dma_addr_t dma_iommu_map_page(struct device *dev, struct page *page, static void dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle, size_t size, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { iommu_unmap_page(get_iommu_table_base(dev), dma_handle, size, direction, attrs); @@ -58,7 +58,7 @@ static void dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle, static int dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist, int nelems, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { return ppc_iommu_map_sg(dev, get_iommu_table_base(dev), sglist, nelems, device_to_mask(dev), direction, attrs); @@ -66,7 +66,7 @@ static int dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist, static void dma_iommu_unmap_sg(struct device *dev, struct scatterlist *sglist, int nelems, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { ppc_iommu_unmap_sg(get_iommu_table_base(dev), sglist, nelems, direction, attrs); diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index 3f1472a78f39..e64a6016fba7 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -64,7 +64,7 @@ static int dma_direct_dma_supported(struct device *dev, u64 mask) void *__dma_direct_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag, - struct dma_attrs *attrs) + unsigned long attrs) { void *ret; #ifdef CONFIG_NOT_COHERENT_CACHE @@ -121,7 +121,7 @@ void *__dma_direct_alloc_coherent(struct device *dev, size_t size, void __dma_direct_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, - struct dma_attrs *attrs) + unsigned long attrs) { #ifdef CONFIG_NOT_COHERENT_CACHE __dma_free_coherent(size, vaddr); @@ -132,7 +132,7 @@ void __dma_direct_free_coherent(struct device *dev, size_t size, static void *dma_direct_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag, - struct dma_attrs *attrs) + unsigned long attrs) { struct iommu_table *iommu; @@ -156,7 +156,7 @@ static void *dma_direct_alloc_coherent(struct device *dev, size_t size, static void dma_direct_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, - struct dma_attrs *attrs) + unsigned long attrs) { struct iommu_table *iommu; @@ -177,7 +177,7 @@ static void dma_direct_free_coherent(struct device *dev, size_t size, int dma_direct_mmap_coherent(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t handle, size_t size, - struct dma_attrs *attrs) + unsigned long attrs) { unsigned long pfn; @@ -195,7 +195,7 @@ int dma_direct_mmap_coherent(struct device *dev, struct vm_area_struct *vma, static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { struct scatterlist *sg; int i; @@ -211,7 +211,7 @@ static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, static void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { } @@ -232,7 +232,7 @@ static inline dma_addr_t dma_direct_map_page(struct device *dev, unsigned long offset, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { BUG_ON(dir == DMA_NONE); __dma_sync_page(page, offset, size, dir); @@ -243,7 +243,7 @@ static inline void dma_direct_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { } diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index fcb2887f5a33..6b8bc0dd09d4 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -532,7 +532,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) #ifdef CONFIG_PPC_STD_MMU_64 BEGIN_MMU_FTR_SECTION b 2f -END_MMU_FTR_SECTION_IFSET(MMU_FTR_RADIX) +END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX) BEGIN_FTR_SECTION clrrdi r6,r8,28 /* get its ESID */ clrrdi r9,r1,28 /* get current sp ESID */ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 6200e4925d26..41091fdf9bd8 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -671,6 +671,8 @@ BEGIN_FTR_SECTION beq h_doorbell_common cmpwi r3,0xea0 beq h_virt_irq_common + cmpwi r3,0xe60 + beq hmi_exception_common FTR_SECTION_ELSE cmpwi r3,0xa00 beq doorbell_super_common @@ -938,7 +940,7 @@ BEGIN_MMU_FTR_SECTION b do_hash_page /* Try to handle as hpte fault */ MMU_FTR_SECTION_ELSE b handle_page_fault -ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_RADIX) +ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) .align 7 .globl h_data_storage_common @@ -969,7 +971,7 @@ BEGIN_MMU_FTR_SECTION b do_hash_page /* Try to handle as hpte fault */ MMU_FTR_SECTION_ELSE b handle_page_fault -ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_RADIX) +ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) STD_EXCEPTION_COMMON(0xe20, h_instr_storage, unknown_exception) @@ -1172,7 +1174,7 @@ fwnmi_data_area: .globl hmi_exception_early hmi_exception_early: - EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, 0xe60) + EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST, 0xe62) mr r10,r1 /* Save r1 */ ld r1,PACAEMERGSP(r13) /* Use emergency stack */ subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */ @@ -1390,7 +1392,7 @@ slb_miss_realmode: #ifdef CONFIG_PPC_STD_MMU_64 BEGIN_MMU_FTR_SECTION bl slb_allocate_realmode -END_MMU_FTR_SECTION_IFCLR(MMU_FTR_RADIX) +END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX) #endif /* All done -- return from exception. */ @@ -1404,7 +1406,7 @@ BEGIN_MMU_FTR_SECTION beq- 2f FTR_SECTION_ELSE b 2f -ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_RADIX) +ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) .machine push .machine "power4" diff --git a/arch/powerpc/kernel/hmi.c b/arch/powerpc/kernel/hmi.c new file mode 100644 index 000000000000..e3f738eb1cac --- /dev/null +++ b/arch/powerpc/kernel/hmi.c @@ -0,0 +1,56 @@ +/* + * Hypervisor Maintenance Interrupt (HMI) handling. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. + * + * Copyright 2015 IBM Corporation + * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> + */ + +#undef DEBUG + +#include <linux/types.h> +#include <linux/compiler.h> +#include <asm/paca.h> +#include <asm/hmi.h> + +void wait_for_subcore_guest_exit(void) +{ + int i; + + /* + * NULL bitmap pointer indicates that KVM module hasn't + * been loaded yet and hence no guests are running. + * If no KVM is in use, no need to co-ordinate among threads + * as all of them will always be in host and no one is going + * to modify TB other than the opal hmi handler. + * Hence, just return from here. + */ + if (!local_paca->sibling_subcore_state) + return; + + for (i = 0; i < MAX_SUBCORE_PER_CORE; i++) + while (local_paca->sibling_subcore_state->in_guest[i]) + cpu_relax(); +} + +void wait_for_tb_resync(void) +{ + if (!local_paca->sibling_subcore_state) + return; + + while (test_bit(CORE_TB_RESYNC_REQ_BIT, + &local_paca->sibling_subcore_state->flags)) + cpu_relax(); +} diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c index a89f4f7a66bd..c1ca9282f4a0 100644 --- a/arch/powerpc/kernel/ibmebus.c +++ b/arch/powerpc/kernel/ibmebus.c @@ -65,7 +65,7 @@ static void *ibmebus_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag, - struct dma_attrs *attrs) + unsigned long attrs) { void *mem; @@ -78,7 +78,7 @@ static void *ibmebus_alloc_coherent(struct device *dev, static void ibmebus_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, - struct dma_attrs *attrs) + unsigned long attrs) { kfree(vaddr); } @@ -88,7 +88,7 @@ static dma_addr_t ibmebus_map_page(struct device *dev, unsigned long offset, size_t size, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { return (dma_addr_t)(page_address(page) + offset); } @@ -97,7 +97,7 @@ static void ibmebus_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { return; } @@ -105,7 +105,7 @@ static void ibmebus_unmap_page(struct device *dev, static int ibmebus_map_sg(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { struct scatterlist *sg; int i; @@ -121,7 +121,7 @@ static int ibmebus_map_sg(struct device *dev, static void ibmebus_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { return; } diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index 335eb6cedae5..ba79d15f4ddd 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -336,7 +336,9 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \ ld r2,PACATOC(r13); \ ld r1,PACAR1(r13); \ std r3,ORIG_GPR3(r1); /* Save original r3 */ \ - bl opal_rm_handle_hmi; \ + li r3,0; /* NULL argument */ \ + bl hmi_exception_realmode; \ + nop; \ ld r3,ORIG_GPR3(r1); /* Restore original r3 */ \ 20: nop; @@ -570,7 +572,7 @@ common_exit: BEGIN_MMU_FTR_SECTION b no_segments -END_MMU_FTR_SECTION_IFSET(MMU_FTR_RADIX) +END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX) /* Restore SLB from PACA */ ld r8,PACA_SLBSHADOWPTR(r13) diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index a8e3490b54e3..37d6e741be82 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -307,7 +307,7 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, void *page, unsigned int npages, enum dma_data_direction direction, unsigned long mask, unsigned int align_order, - struct dma_attrs *attrs) + unsigned long attrs) { unsigned long entry; dma_addr_t ret = DMA_ERROR_CODE; @@ -431,7 +431,7 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl, struct scatterlist *sglist, int nelems, unsigned long mask, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { dma_addr_t dma_next = 0, dma_addr; struct scatterlist *s, *outs, *segstart; @@ -574,7 +574,7 @@ int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl, void ppc_iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist, int nelems, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { struct scatterlist *sg; @@ -753,7 +753,7 @@ void iommu_free_table(struct iommu_table *tbl, const char *node_name) dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl, struct page *page, unsigned long offset, size_t size, unsigned long mask, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { dma_addr_t dma_handle = DMA_ERROR_CODE; void *vaddr; @@ -790,7 +790,7 @@ dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl, void iommu_unmap_page(struct iommu_table *tbl, dma_addr_t dma_handle, size_t size, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { unsigned int npages; @@ -845,7 +845,7 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl, nio_pages = size >> tbl->it_page_shift; io_order = get_iommu_order(size, tbl); mapping = iommu_alloc(dev, tbl, ret, nio_pages, DMA_BIDIRECTIONAL, - mask >> tbl->it_page_shift, io_order, NULL); + mask >> tbl->it_page_shift, io_order, 0); if (mapping == DMA_ERROR_CODE) { free_pages((unsigned long)ret, order); return NULL; diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index ac910d9982df..08887cf2b20e 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -75,6 +75,7 @@ #endif #define CREATE_TRACE_POINTS #include <asm/trace.h> +#include <asm/cpu_has_feature.h> DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); EXPORT_PER_CPU_SYMBOL(irq_stat); diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 93dae296b6be..fa20060ff7a5 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -184,7 +184,7 @@ void setup_paca(struct paca_struct *new_paca) * if we do a GET_PACA() before the feature fixups have been * applied */ - if (cpu_has_feature(CPU_FTR_HVMODE)) + if (early_cpu_has_feature(CPU_FTR_HVMODE)) mtspr(SPRN_SPRG_HPACA, local_paca); #endif mtspr(SPRN_SPRG_PACA, local_paca); diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index f93942b4b6a6..a5c0153ede37 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -412,36 +412,6 @@ static struct resource *__pci_mmap_make_offset(struct pci_dev *dev, } /* - * Set vm_page_prot of VMA, as appropriate for this architecture, for a pci - * device mapping. - */ -static pgprot_t __pci_mmap_set_pgprot(struct pci_dev *dev, struct resource *rp, - pgprot_t protection, - enum pci_mmap_state mmap_state, - int write_combine) -{ - - /* Write combine is always 0 on non-memory space mappings. On - * memory space, if the user didn't pass 1, we check for a - * "prefetchable" resource. This is a bit hackish, but we use - * this to workaround the inability of /sysfs to provide a write - * combine bit - */ - if (mmap_state != pci_mmap_mem) - write_combine = 0; - else if (write_combine == 0) { - if (rp->flags & IORESOURCE_PREFETCH) - write_combine = 1; - } - - /* XXX would be nice to have a way to ask for write-through */ - if (write_combine) - return pgprot_noncached_wc(protection); - else - return pgprot_noncached(protection); -} - -/* * This one is used by /dev/mem and fbdev who have no clue about the * PCI device, it tries to find the PCI device first and calls the * above routine @@ -514,9 +484,10 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, return -EINVAL; vma->vm_pgoff = offset >> PAGE_SHIFT; - vma->vm_page_prot = __pci_mmap_set_pgprot(dev, rp, - vma->vm_page_prot, - mmap_state, write_combine); + if (write_combine) + vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot); + else + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); ret = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, vma->vm_end - vma->vm_start, vma->vm_page_prot); @@ -666,39 +637,25 @@ void pci_resource_to_user(const struct pci_dev *dev, int bar, const struct resource *rsrc, resource_size_t *start, resource_size_t *end) { - struct pci_controller *hose = pci_bus_to_host(dev->bus); - resource_size_t offset = 0; + struct pci_bus_region region; - if (hose == NULL) + if (rsrc->flags & IORESOURCE_IO) { + pcibios_resource_to_bus(dev->bus, ®ion, + (struct resource *) rsrc); + *start = region.start; + *end = region.end; return; + } - if (rsrc->flags & IORESOURCE_IO) - offset = (unsigned long)hose->io_base_virt - _IO_BASE; - - /* We pass a fully fixed up address to userland for MMIO instead of - * a BAR value because X is lame and expects to be able to use that - * to pass to /dev/mem ! - * - * That means that we'll have potentially 64 bits values where some - * userland apps only expect 32 (like X itself since it thinks only - * Sparc has 64 bits MMIO) but if we don't do that, we break it on - * 32 bits CHRPs :-( - * - * Hopefully, the sysfs insterface is immune to that gunk. Once X - * has been fixed (and the fix spread enough), we can re-enable the - * 2 lines below and pass down a BAR value to userland. In that case - * we'll also have to re-enable the matching code in - * __pci_mmap_make_offset(). + /* We pass a CPU physical address to userland for MMIO instead of a + * BAR value because X is lame and expects to be able to use that + * to pass to /dev/mem! * - * BenH. + * That means we may have 64-bit values where some apps only expect + * 32 (like X itself since it thinks only Sparc has 64-bit MMIO). */ -#if 0 - else if (rsrc->flags & IORESOURCE_MEM) - offset = hose->pci_mem_offset; -#endif - - *start = rsrc->start - offset; - *end = rsrc->end - offset; + *start = rsrc->start; + *end = rsrc->end; } /** diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index a8cca88e972f..58ccf86415b4 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -58,6 +58,7 @@ #include <asm/code-patching.h> #include <asm/exec.h> #include <asm/livepatch.h> +#include <asm/cpu_has_feature.h> #include <linux/kprobes.h> #include <linux/kdebug.h> @@ -1073,6 +1074,26 @@ static inline void restore_sprs(struct thread_struct *old_thread, #endif } +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +void flush_tmregs_to_thread(struct task_struct *tsk) +{ + /* + * Process self tracing is not yet supported through + * ptrace interface. Ptrace generic code should have + * prevented this from happening in the first place. + * Warn once here with the message, if some how it + * is attempted. + */ + WARN_ONCE(tsk == current, + "Not expecting ptrace on self: TM regs may be incorrect\n"); + + /* + * If task is not current, it should have been flushed + * already to it's thread_struct during __switch_to(). + */ +} +#endif + struct task_struct *__switch_to(struct task_struct *prev, struct task_struct *new) { diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index bae3db791150..b0245bed6f54 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -170,7 +170,7 @@ static struct ibm_pa_feature { */ {CPU_FTR_TM_COMP, 0, 0, PPC_FEATURE2_HTM_COMP|PPC_FEATURE2_HTM_NOSC_COMP, 22, 0, 0}, - {0, MMU_FTR_RADIX, 0, 0, 40, 0, 0}, + {0, MMU_FTR_TYPE_RADIX, 0, 0, 40, 0, 0}, }; static void __init scan_features(unsigned long node, const unsigned char *ftrs, @@ -647,14 +647,6 @@ static void __init early_reserve_mem(void) #endif } -static bool disable_radix; -static int __init parse_disable_radix(char *p) -{ - disable_radix = true; - return 0; -} -early_param("disable_radix", parse_disable_radix); - void __init early_init_devtree(void *params) { phys_addr_t limit; @@ -744,11 +736,8 @@ void __init early_init_devtree(void *params) */ spinning_secondaries = boot_cpu_count - 1; #endif - /* - * now fixup radix MMU mode based on kernel command line - */ - if (disable_radix) - cur_cpu_spec->mmu_features &= ~MMU_FTR_RADIX; + + mmu_early_init_devtree(); #ifdef CONFIG_PPC_POWERNV /* Scan and build the list of machine check recoverable ranges */ diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 134bee9ac664..4f3c5756cc09 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -64,6 +64,10 @@ struct pt_regs_offset { {.name = STR(gpr##num), .offset = offsetof(struct pt_regs, gpr[num])} #define REG_OFFSET_END {.name = NULL, .offset = 0} +#define TVSO(f) (offsetof(struct thread_vr_state, f)) +#define TFSO(f) (offsetof(struct thread_fp_state, f)) +#define TSO(f) (offsetof(struct thread_struct, f)) + static const struct pt_regs_offset regoffset_table[] = { GPR_OFFSET_NAME(0), GPR_OFFSET_NAME(1), @@ -181,6 +185,26 @@ static int set_user_msr(struct task_struct *task, unsigned long msr) return 0; } +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +static unsigned long get_user_ckpt_msr(struct task_struct *task) +{ + return task->thread.ckpt_regs.msr | task->thread.fpexc_mode; +} + +static int set_user_ckpt_msr(struct task_struct *task, unsigned long msr) +{ + task->thread.ckpt_regs.msr &= ~MSR_DEBUGCHANGE; + task->thread.ckpt_regs.msr |= msr & MSR_DEBUGCHANGE; + return 0; +} + +static int set_user_ckpt_trap(struct task_struct *task, unsigned long trap) +{ + task->thread.ckpt_regs.trap = trap & 0xfff0; + return 0; +} +#endif + #ifdef CONFIG_PPC64 static int get_user_dscr(struct task_struct *task, unsigned long *data) { @@ -358,6 +382,29 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset, return ret; } +/* + * When the transaction is active, 'transact_fp' holds the current running + * value of all FPR registers and 'fp_state' holds the last checkpointed + * value of all FPR registers for the current transaction. When transaction + * is not active 'fp_state' holds the current running state of all the FPR + * registers. So this function which returns the current running values of + * all the FPR registers, needs to know whether any transaction is active + * or not. + * + * Userspace interface buffer layout: + * + * struct data { + * u64 fpr[32]; + * u64 fpscr; + * }; + * + * There are two config options CONFIG_VSX and CONFIG_PPC_TRANSACTIONAL_MEM + * which determines the final code in this function. All the combinations of + * these two config options are possible except the one below as transactional + * memory config pulls in CONFIG_VSX automatically. + * + * !defined(CONFIG_VSX) && defined(CONFIG_PPC_TRANSACTIONAL_MEM) + */ static int fpr_get(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) @@ -368,14 +415,31 @@ static int fpr_get(struct task_struct *target, const struct user_regset *regset, #endif flush_fp_to_thread(target); -#ifdef CONFIG_VSX +#if defined(CONFIG_VSX) && defined(CONFIG_PPC_TRANSACTIONAL_MEM) + /* copy to local buffer then write that out */ + if (MSR_TM_ACTIVE(target->thread.regs->msr)) { + flush_altivec_to_thread(target); + flush_tmregs_to_thread(target); + for (i = 0; i < 32 ; i++) + buf[i] = target->thread.TS_TRANS_FPR(i); + buf[32] = target->thread.transact_fp.fpscr; + } else { + for (i = 0; i < 32 ; i++) + buf[i] = target->thread.TS_FPR(i); + buf[32] = target->thread.fp_state.fpscr; + } + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, buf, 0, -1); +#endif + +#if defined(CONFIG_VSX) && !defined(CONFIG_PPC_TRANSACTIONAL_MEM) /* copy to local buffer then write that out */ for (i = 0; i < 32 ; i++) buf[i] = target->thread.TS_FPR(i); buf[32] = target->thread.fp_state.fpscr; return user_regset_copyout(&pos, &count, &kbuf, &ubuf, buf, 0, -1); +#endif -#else +#if !defined(CONFIG_VSX) && !defined(CONFIG_PPC_TRANSACTIONAL_MEM) BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) != offsetof(struct thread_fp_state, fpr[32])); @@ -384,6 +448,29 @@ static int fpr_get(struct task_struct *target, const struct user_regset *regset, #endif } +/* + * When the transaction is active, 'transact_fp' holds the current running + * value of all FPR registers and 'fp_state' holds the last checkpointed + * value of all FPR registers for the current transaction. When transaction + * is not active 'fp_state' holds the current running state of all the FPR + * registers. So this function which setss the current running values of + * all the FPR registers, needs to know whether any transaction is active + * or not. + * + * Userspace interface buffer layout: + * + * struct data { + * u64 fpr[32]; + * u64 fpscr; + * }; + * + * There are two config options CONFIG_VSX and CONFIG_PPC_TRANSACTIONAL_MEM + * which determines the final code in this function. All the combinations of + * these two config options are possible except the one below as transactional + * memory config pulls in CONFIG_VSX automatically. + * + * !defined(CONFIG_VSX) && defined(CONFIG_PPC_TRANSACTIONAL_MEM) + */ static int fpr_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf) @@ -394,7 +481,27 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset, #endif flush_fp_to_thread(target); -#ifdef CONFIG_VSX +#if defined(CONFIG_VSX) && defined(CONFIG_PPC_TRANSACTIONAL_MEM) + /* copy to local buffer then write that out */ + i = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, -1); + if (i) + return i; + + if (MSR_TM_ACTIVE(target->thread.regs->msr)) { + flush_altivec_to_thread(target); + flush_tmregs_to_thread(target); + for (i = 0; i < 32 ; i++) + target->thread.TS_TRANS_FPR(i) = buf[i]; + target->thread.transact_fp.fpscr = buf[32]; + } else { + for (i = 0; i < 32 ; i++) + target->thread.TS_FPR(i) = buf[i]; + target->thread.fp_state.fpscr = buf[32]; + } + return 0; +#endif + +#if defined(CONFIG_VSX) && !defined(CONFIG_PPC_TRANSACTIONAL_MEM) /* copy to local buffer then write that out */ i = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, -1); if (i) @@ -403,7 +510,9 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset, target->thread.TS_FPR(i) = buf[i]; target->thread.fp_state.fpscr = buf[32]; return 0; -#else +#endif + +#if !defined(CONFIG_VSX) && !defined(CONFIG_PPC_TRANSACTIONAL_MEM) BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) != offsetof(struct thread_fp_state, fpr[32])); @@ -433,10 +542,28 @@ static int vr_active(struct task_struct *target, return target->thread.used_vr ? regset->n : 0; } +/* + * When the transaction is active, 'transact_vr' holds the current running + * value of all the VMX registers and 'vr_state' holds the last checkpointed + * value of all the VMX registers for the current transaction to fall back + * on in case it aborts. When transaction is not active 'vr_state' holds + * the current running state of all the VMX registers. So this function which + * gets the current running values of all the VMX registers, needs to know + * whether any transaction is active or not. + * + * Userspace interface buffer layout: + * + * struct data { + * vector128 vr[32]; + * vector128 vscr; + * vector128 vrsave; + * }; + */ static int vr_get(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) { + struct thread_vr_state *addr; int ret; flush_altivec_to_thread(target); @@ -444,8 +571,19 @@ static int vr_get(struct task_struct *target, const struct user_regset *regset, BUILD_BUG_ON(offsetof(struct thread_vr_state, vscr) != offsetof(struct thread_vr_state, vr[32])); +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + if (MSR_TM_ACTIVE(target->thread.regs->msr)) { + flush_fp_to_thread(target); + flush_tmregs_to_thread(target); + addr = &target->thread.transact_vr; + } else { + addr = &target->thread.vr_state; + } +#else + addr = &target->thread.vr_state; +#endif ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.vr_state, 0, + addr, 0, 33 * sizeof(vector128)); if (!ret) { /* @@ -456,7 +594,16 @@ static int vr_get(struct task_struct *target, const struct user_regset *regset, u32 word; } vrsave; memset(&vrsave, 0, sizeof(vrsave)); + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + if (MSR_TM_ACTIVE(target->thread.regs->msr)) + vrsave.word = target->thread.transact_vrsave; + else + vrsave.word = target->thread.vrsave; +#else vrsave.word = target->thread.vrsave; +#endif + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &vrsave, 33 * sizeof(vector128), -1); } @@ -464,10 +611,28 @@ static int vr_get(struct task_struct *target, const struct user_regset *regset, return ret; } +/* + * When the transaction is active, 'transact_vr' holds the current running + * value of all the VMX registers and 'vr_state' holds the last checkpointed + * value of all the VMX registers for the current transaction to fall back + * on in case it aborts. When transaction is not active 'vr_state' holds + * the current running state of all the VMX registers. So this function which + * sets the current running values of all the VMX registers, needs to know + * whether any transaction is active or not. + * + * Userspace interface buffer layout: + * + * struct data { + * vector128 vr[32]; + * vector128 vscr; + * vector128 vrsave; + * }; + */ static int vr_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf) { + struct thread_vr_state *addr; int ret; flush_altivec_to_thread(target); @@ -475,8 +640,19 @@ static int vr_set(struct task_struct *target, const struct user_regset *regset, BUILD_BUG_ON(offsetof(struct thread_vr_state, vscr) != offsetof(struct thread_vr_state, vr[32])); +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + if (MSR_TM_ACTIVE(target->thread.regs->msr)) { + flush_fp_to_thread(target); + flush_tmregs_to_thread(target); + addr = &target->thread.transact_vr; + } else { + addr = &target->thread.vr_state; + } +#else + addr = &target->thread.vr_state; +#endif ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.vr_state, 0, + addr, 0, 33 * sizeof(vector128)); if (!ret && count > 0) { /* @@ -487,11 +663,28 @@ static int vr_set(struct task_struct *target, const struct user_regset *regset, u32 word; } vrsave; memset(&vrsave, 0, sizeof(vrsave)); + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + if (MSR_TM_ACTIVE(target->thread.regs->msr)) + vrsave.word = target->thread.transact_vrsave; + else + vrsave.word = target->thread.vrsave; +#else vrsave.word = target->thread.vrsave; +#endif ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &vrsave, 33 * sizeof(vector128), -1); - if (!ret) + if (!ret) { + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + if (MSR_TM_ACTIVE(target->thread.regs->msr)) + target->thread.transact_vrsave = vrsave.word; + else + target->thread.vrsave = vrsave.word; +#else target->thread.vrsave = vrsave.word; +#endif + } } return ret; @@ -512,6 +705,21 @@ static int vsr_active(struct task_struct *target, return target->thread.used_vsr ? regset->n : 0; } +/* + * When the transaction is active, 'transact_fp' holds the current running + * value of all FPR registers and 'fp_state' holds the last checkpointed + * value of all FPR registers for the current transaction. When transaction + * is not active 'fp_state' holds the current running state of all the FPR + * registers. So this function which returns the current running values of + * all the FPR registers, needs to know whether any transaction is active + * or not. + * + * Userspace interface buffer layout: + * + * struct data { + * u64 vsx[32]; + * }; + */ static int vsr_get(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) @@ -519,16 +727,47 @@ static int vsr_get(struct task_struct *target, const struct user_regset *regset, u64 buf[32]; int ret, i; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + flush_fp_to_thread(target); + flush_altivec_to_thread(target); + flush_tmregs_to_thread(target); +#endif flush_vsx_to_thread(target); +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + if (MSR_TM_ACTIVE(target->thread.regs->msr)) { + for (i = 0; i < 32 ; i++) + buf[i] = target->thread. + transact_fp.fpr[i][TS_VSRLOWOFFSET]; + } else { + for (i = 0; i < 32 ; i++) + buf[i] = target->thread. + fp_state.fpr[i][TS_VSRLOWOFFSET]; + } +#else for (i = 0; i < 32 ; i++) buf[i] = target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET]; +#endif ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, buf, 0, 32 * sizeof(double)); return ret; } +/* + * When the transaction is active, 'transact_fp' holds the current running + * value of all FPR registers and 'fp_state' holds the last checkpointed + * value of all FPR registers for the current transaction. When transaction + * is not active 'fp_state' holds the current running state of all the FPR + * registers. So this function which sets the current running values of all + * the FPR registers, needs to know whether any transaction is active or not. + * + * Userspace interface buffer layout: + * + * struct data { + * u64 vsx[32]; + * }; + */ static int vsr_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf) @@ -536,12 +775,30 @@ static int vsr_set(struct task_struct *target, const struct user_regset *regset, u64 buf[32]; int ret,i; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + flush_fp_to_thread(target); + flush_altivec_to_thread(target); + flush_tmregs_to_thread(target); +#endif flush_vsx_to_thread(target); ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, 32 * sizeof(double)); + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + if (MSR_TM_ACTIVE(target->thread.regs->msr)) { + for (i = 0; i < 32 ; i++) + target->thread.transact_fp. + fpr[i][TS_VSRLOWOFFSET] = buf[i]; + } else { + for (i = 0; i < 32 ; i++) + target->thread.fp_state. + fpr[i][TS_VSRLOWOFFSET] = buf[i]; + } +#else for (i = 0; i < 32 ; i++) target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i]; +#endif return ret; @@ -614,8 +871,1030 @@ static int evr_set(struct task_struct *target, const struct user_regset *regset, } #endif /* CONFIG_SPE */ +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +/** + * tm_cgpr_active - get active number of registers in CGPR + * @target: The target task. + * @regset: The user regset structure. + * + * This function checks for the active number of available + * regisers in transaction checkpointed GPR category. + */ +static int tm_cgpr_active(struct task_struct *target, + const struct user_regset *regset) +{ + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return 0; + + return regset->n; +} + +/** + * tm_cgpr_get - get CGPR registers + * @target: The target task. + * @regset: The user regset structure. + * @pos: The buffer position. + * @count: Number of bytes to copy. + * @kbuf: Kernel buffer to copy from. + * @ubuf: User buffer to copy into. + * + * This function gets transaction checkpointed GPR registers. + * + * When the transaction is active, 'ckpt_regs' holds all the checkpointed + * GPR register values for the current transaction to fall back on if it + * aborts in between. This function gets those checkpointed GPR registers. + * The userspace interface buffer layout is as follows. + * + * struct data { + * struct pt_regs ckpt_regs; + * }; + */ +static int tm_cgpr_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + int ret; + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + flush_fp_to_thread(target); + flush_altivec_to_thread(target); + flush_tmregs_to_thread(target); + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.ckpt_regs, + 0, offsetof(struct pt_regs, msr)); + if (!ret) { + unsigned long msr = get_user_ckpt_msr(target); + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &msr, + offsetof(struct pt_regs, msr), + offsetof(struct pt_regs, msr) + + sizeof(msr)); + } + + BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) != + offsetof(struct pt_regs, msr) + sizeof(long)); + + if (!ret) + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.ckpt_regs.orig_gpr3, + offsetof(struct pt_regs, orig_gpr3), + sizeof(struct pt_regs)); + if (!ret) + ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, + sizeof(struct pt_regs), -1); + + return ret; +} /* + * tm_cgpr_set - set the CGPR registers + * @target: The target task. + * @regset: The user regset structure. + * @pos: The buffer position. + * @count: Number of bytes to copy. + * @kbuf: Kernel buffer to copy into. + * @ubuf: User buffer to copy from. + * + * This function sets in transaction checkpointed GPR registers. + * + * When the transaction is active, 'ckpt_regs' holds the checkpointed + * GPR register values for the current transaction to fall back on if it + * aborts in between. This function sets those checkpointed GPR registers. + * The userspace interface buffer layout is as follows. + * + * struct data { + * struct pt_regs ckpt_regs; + * }; + */ +static int tm_cgpr_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + unsigned long reg; + int ret; + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + flush_fp_to_thread(target); + flush_altivec_to_thread(target); + flush_tmregs_to_thread(target); + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.ckpt_regs, + 0, PT_MSR * sizeof(reg)); + + if (!ret && count > 0) { + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, ®, + PT_MSR * sizeof(reg), + (PT_MSR + 1) * sizeof(reg)); + if (!ret) + ret = set_user_ckpt_msr(target, reg); + } + + BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) != + offsetof(struct pt_regs, msr) + sizeof(long)); + + if (!ret) + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.ckpt_regs.orig_gpr3, + PT_ORIG_R3 * sizeof(reg), + (PT_MAX_PUT_REG + 1) * sizeof(reg)); + + if (PT_MAX_PUT_REG + 1 < PT_TRAP && !ret) + ret = user_regset_copyin_ignore( + &pos, &count, &kbuf, &ubuf, + (PT_MAX_PUT_REG + 1) * sizeof(reg), + PT_TRAP * sizeof(reg)); + + if (!ret && count > 0) { + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, ®, + PT_TRAP * sizeof(reg), + (PT_TRAP + 1) * sizeof(reg)); + if (!ret) + ret = set_user_ckpt_trap(target, reg); + } + + if (!ret) + ret = user_regset_copyin_ignore( + &pos, &count, &kbuf, &ubuf, + (PT_TRAP + 1) * sizeof(reg), -1); + + return ret; +} + +/** + * tm_cfpr_active - get active number of registers in CFPR + * @target: The target task. + * @regset: The user regset structure. + * + * This function checks for the active number of available + * regisers in transaction checkpointed FPR category. + */ +static int tm_cfpr_active(struct task_struct *target, + const struct user_regset *regset) +{ + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return 0; + + return regset->n; +} + +/** + * tm_cfpr_get - get CFPR registers + * @target: The target task. + * @regset: The user regset structure. + * @pos: The buffer position. + * @count: Number of bytes to copy. + * @kbuf: Kernel buffer to copy from. + * @ubuf: User buffer to copy into. + * + * This function gets in transaction checkpointed FPR registers. + * + * When the transaction is active 'fp_state' holds the checkpointed + * values for the current transaction to fall back on if it aborts + * in between. This function gets those checkpointed FPR registers. + * The userspace interface buffer layout is as follows. + * + * struct data { + * u64 fpr[32]; + * u64 fpscr; + *}; + */ +static int tm_cfpr_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + u64 buf[33]; + int i; + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + flush_fp_to_thread(target); + flush_altivec_to_thread(target); + flush_tmregs_to_thread(target); + + /* copy to local buffer then write that out */ + for (i = 0; i < 32 ; i++) + buf[i] = target->thread.TS_FPR(i); + buf[32] = target->thread.fp_state.fpscr; + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, buf, 0, -1); +} + +/** + * tm_cfpr_set - set CFPR registers + * @target: The target task. + * @regset: The user regset structure. + * @pos: The buffer position. + * @count: Number of bytes to copy. + * @kbuf: Kernel buffer to copy into. + * @ubuf: User buffer to copy from. + * + * This function sets in transaction checkpointed FPR registers. + * + * When the transaction is active 'fp_state' holds the checkpointed + * FPR register values for the current transaction to fall back on + * if it aborts in between. This function sets these checkpointed + * FPR registers. The userspace interface buffer layout is as follows. + * + * struct data { + * u64 fpr[32]; + * u64 fpscr; + *}; + */ +static int tm_cfpr_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + u64 buf[33]; + int i; + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + flush_fp_to_thread(target); + flush_altivec_to_thread(target); + flush_tmregs_to_thread(target); + + /* copy to local buffer then write that out */ + i = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, -1); + if (i) + return i; + for (i = 0; i < 32 ; i++) + target->thread.TS_FPR(i) = buf[i]; + target->thread.fp_state.fpscr = buf[32]; + return 0; +} + +/** + * tm_cvmx_active - get active number of registers in CVMX + * @target: The target task. + * @regset: The user regset structure. + * + * This function checks for the active number of available + * regisers in checkpointed VMX category. + */ +static int tm_cvmx_active(struct task_struct *target, + const struct user_regset *regset) +{ + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return 0; + + return regset->n; +} + +/** + * tm_cvmx_get - get CMVX registers + * @target: The target task. + * @regset: The user regset structure. + * @pos: The buffer position. + * @count: Number of bytes to copy. + * @kbuf: Kernel buffer to copy from. + * @ubuf: User buffer to copy into. + * + * This function gets in transaction checkpointed VMX registers. + * + * When the transaction is active 'vr_state' and 'vr_save' hold + * the checkpointed values for the current transaction to fall + * back on if it aborts in between. The userspace interface buffer + * layout is as follows. + * + * struct data { + * vector128 vr[32]; + * vector128 vscr; + * vector128 vrsave; + *}; + */ +static int tm_cvmx_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + int ret; + + BUILD_BUG_ON(TVSO(vscr) != TVSO(vr[32])); + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + /* Flush the state */ + flush_fp_to_thread(target); + flush_altivec_to_thread(target); + flush_tmregs_to_thread(target); + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.vr_state, 0, + 33 * sizeof(vector128)); + if (!ret) { + /* + * Copy out only the low-order word of vrsave. + */ + union { + elf_vrreg_t reg; + u32 word; + } vrsave; + memset(&vrsave, 0, sizeof(vrsave)); + vrsave.word = target->thread.vrsave; + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &vrsave, + 33 * sizeof(vector128), -1); + } + + return ret; +} + +/** + * tm_cvmx_set - set CMVX registers + * @target: The target task. + * @regset: The user regset structure. + * @pos: The buffer position. + * @count: Number of bytes to copy. + * @kbuf: Kernel buffer to copy into. + * @ubuf: User buffer to copy from. + * + * This function sets in transaction checkpointed VMX registers. + * + * When the transaction is active 'vr_state' and 'vr_save' hold + * the checkpointed values for the current transaction to fall + * back on if it aborts in between. The userspace interface buffer + * layout is as follows. + * + * struct data { + * vector128 vr[32]; + * vector128 vscr; + * vector128 vrsave; + *}; + */ +static int tm_cvmx_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + + BUILD_BUG_ON(TVSO(vscr) != TVSO(vr[32])); + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + flush_fp_to_thread(target); + flush_altivec_to_thread(target); + flush_tmregs_to_thread(target); + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.vr_state, 0, + 33 * sizeof(vector128)); + if (!ret && count > 0) { + /* + * We use only the low-order word of vrsave. + */ + union { + elf_vrreg_t reg; + u32 word; + } vrsave; + memset(&vrsave, 0, sizeof(vrsave)); + vrsave.word = target->thread.vrsave; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &vrsave, + 33 * sizeof(vector128), -1); + if (!ret) + target->thread.vrsave = vrsave.word; + } + + return ret; +} + +/** + * tm_cvsx_active - get active number of registers in CVSX + * @target: The target task. + * @regset: The user regset structure. + * + * This function checks for the active number of available + * regisers in transaction checkpointed VSX category. + */ +static int tm_cvsx_active(struct task_struct *target, + const struct user_regset *regset) +{ + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return 0; + + flush_vsx_to_thread(target); + return target->thread.used_vsr ? regset->n : 0; +} + +/** + * tm_cvsx_get - get CVSX registers + * @target: The target task. + * @regset: The user regset structure. + * @pos: The buffer position. + * @count: Number of bytes to copy. + * @kbuf: Kernel buffer to copy from. + * @ubuf: User buffer to copy into. + * + * This function gets in transaction checkpointed VSX registers. + * + * When the transaction is active 'fp_state' holds the checkpointed + * values for the current transaction to fall back on if it aborts + * in between. This function gets those checkpointed VSX registers. + * The userspace interface buffer layout is as follows. + * + * struct data { + * u64 vsx[32]; + *}; + */ +static int tm_cvsx_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + u64 buf[32]; + int ret, i; + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + /* Flush the state */ + flush_fp_to_thread(target); + flush_altivec_to_thread(target); + flush_tmregs_to_thread(target); + flush_vsx_to_thread(target); + + for (i = 0; i < 32 ; i++) + buf[i] = target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET]; + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + buf, 0, 32 * sizeof(double)); + + return ret; +} + +/** + * tm_cvsx_set - set CFPR registers + * @target: The target task. + * @regset: The user regset structure. + * @pos: The buffer position. + * @count: Number of bytes to copy. + * @kbuf: Kernel buffer to copy into. + * @ubuf: User buffer to copy from. + * + * This function sets in transaction checkpointed VSX registers. + * + * When the transaction is active 'fp_state' holds the checkpointed + * VSX register values for the current transaction to fall back on + * if it aborts in between. This function sets these checkpointed + * FPR registers. The userspace interface buffer layout is as follows. + * + * struct data { + * u64 vsx[32]; + *}; + */ +static int tm_cvsx_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + u64 buf[32]; + int ret, i; + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + /* Flush the state */ + flush_fp_to_thread(target); + flush_altivec_to_thread(target); + flush_tmregs_to_thread(target); + flush_vsx_to_thread(target); + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + buf, 0, 32 * sizeof(double)); + for (i = 0; i < 32 ; i++) + target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i]; + + return ret; +} + +/** + * tm_spr_active - get active number of registers in TM SPR + * @target: The target task. + * @regset: The user regset structure. + * + * This function checks the active number of available + * regisers in the transactional memory SPR category. + */ +static int tm_spr_active(struct task_struct *target, + const struct user_regset *regset) +{ + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + return regset->n; +} + +/** + * tm_spr_get - get the TM related SPR registers + * @target: The target task. + * @regset: The user regset structure. + * @pos: The buffer position. + * @count: Number of bytes to copy. + * @kbuf: Kernel buffer to copy from. + * @ubuf: User buffer to copy into. + * + * This function gets transactional memory related SPR registers. + * The userspace interface buffer layout is as follows. + * + * struct { + * u64 tm_tfhar; + * u64 tm_texasr; + * u64 tm_tfiar; + * }; + */ +static int tm_spr_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + int ret; + + /* Build tests */ + BUILD_BUG_ON(TSO(tm_tfhar) + sizeof(u64) != TSO(tm_texasr)); + BUILD_BUG_ON(TSO(tm_texasr) + sizeof(u64) != TSO(tm_tfiar)); + BUILD_BUG_ON(TSO(tm_tfiar) + sizeof(u64) != TSO(ckpt_regs)); + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + /* Flush the states */ + flush_fp_to_thread(target); + flush_altivec_to_thread(target); + flush_tmregs_to_thread(target); + + /* TFHAR register */ + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.tm_tfhar, 0, sizeof(u64)); + + /* TEXASR register */ + if (!ret) + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.tm_texasr, sizeof(u64), + 2 * sizeof(u64)); + + /* TFIAR register */ + if (!ret) + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.tm_tfiar, + 2 * sizeof(u64), 3 * sizeof(u64)); + return ret; +} + +/** + * tm_spr_set - set the TM related SPR registers + * @target: The target task. + * @regset: The user regset structure. + * @pos: The buffer position. + * @count: Number of bytes to copy. + * @kbuf: Kernel buffer to copy into. + * @ubuf: User buffer to copy from. + * + * This function sets transactional memory related SPR registers. + * The userspace interface buffer layout is as follows. + * + * struct { + * u64 tm_tfhar; + * u64 tm_texasr; + * u64 tm_tfiar; + * }; + */ +static int tm_spr_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + + /* Build tests */ + BUILD_BUG_ON(TSO(tm_tfhar) + sizeof(u64) != TSO(tm_texasr)); + BUILD_BUG_ON(TSO(tm_texasr) + sizeof(u64) != TSO(tm_tfiar)); + BUILD_BUG_ON(TSO(tm_tfiar) + sizeof(u64) != TSO(ckpt_regs)); + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + /* Flush the states */ + flush_fp_to_thread(target); + flush_altivec_to_thread(target); + flush_tmregs_to_thread(target); + + /* TFHAR register */ + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.tm_tfhar, 0, sizeof(u64)); + + /* TEXASR register */ + if (!ret) + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.tm_texasr, sizeof(u64), + 2 * sizeof(u64)); + + /* TFIAR register */ + if (!ret) + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.tm_tfiar, + 2 * sizeof(u64), 3 * sizeof(u64)); + return ret; +} + +static int tm_tar_active(struct task_struct *target, + const struct user_regset *regset) +{ + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (MSR_TM_ACTIVE(target->thread.regs->msr)) + return regset->n; + + return 0; +} + +static int tm_tar_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + int ret; + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.tm_tar, 0, sizeof(u64)); + return ret; +} + +static int tm_tar_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.tm_tar, 0, sizeof(u64)); + return ret; +} + +static int tm_ppr_active(struct task_struct *target, + const struct user_regset *regset) +{ + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (MSR_TM_ACTIVE(target->thread.regs->msr)) + return regset->n; + + return 0; +} + + +static int tm_ppr_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + int ret; + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.tm_ppr, 0, sizeof(u64)); + return ret; +} + +static int tm_ppr_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.tm_ppr, 0, sizeof(u64)); + return ret; +} + +static int tm_dscr_active(struct task_struct *target, + const struct user_regset *regset) +{ + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (MSR_TM_ACTIVE(target->thread.regs->msr)) + return regset->n; + + return 0; +} + +static int tm_dscr_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + int ret; + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.tm_dscr, 0, sizeof(u64)); + return ret; +} + +static int tm_dscr_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.tm_dscr, 0, sizeof(u64)); + return ret; +} +#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ + +#ifdef CONFIG_PPC64 +static int ppr_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + int ret; + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.ppr, 0, sizeof(u64)); + return ret; +} + +static int ppr_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.ppr, 0, sizeof(u64)); + return ret; +} + +static int dscr_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + int ret; + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.dscr, 0, sizeof(u64)); + return ret; +} +static int dscr_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.dscr, 0, sizeof(u64)); + return ret; +} +#endif +#ifdef CONFIG_PPC_BOOK3S_64 +static int tar_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + int ret; + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.tar, 0, sizeof(u64)); + return ret; +} +static int tar_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.tar, 0, sizeof(u64)); + return ret; +} + +static int ebb_active(struct task_struct *target, + const struct user_regset *regset) +{ + if (!cpu_has_feature(CPU_FTR_ARCH_207S)) + return -ENODEV; + + if (target->thread.used_ebb) + return regset->n; + + return 0; +} + +static int ebb_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + /* Build tests */ + BUILD_BUG_ON(TSO(ebbrr) + sizeof(unsigned long) != TSO(ebbhr)); + BUILD_BUG_ON(TSO(ebbhr) + sizeof(unsigned long) != TSO(bescr)); + + if (!cpu_has_feature(CPU_FTR_ARCH_207S)) + return -ENODEV; + + if (!target->thread.used_ebb) + return -ENODATA; + + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.ebbrr, 0, 3 * sizeof(unsigned long)); +} + +static int ebb_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret = 0; + + /* Build tests */ + BUILD_BUG_ON(TSO(ebbrr) + sizeof(unsigned long) != TSO(ebbhr)); + BUILD_BUG_ON(TSO(ebbhr) + sizeof(unsigned long) != TSO(bescr)); + + if (!cpu_has_feature(CPU_FTR_ARCH_207S)) + return -ENODEV; + + if (target->thread.used_ebb) + return -ENODATA; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.ebbrr, 0, sizeof(unsigned long)); + + if (!ret) + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.ebbhr, sizeof(unsigned long), + 2 * sizeof(unsigned long)); + + if (!ret) + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.bescr, + 2 * sizeof(unsigned long), 3 * sizeof(unsigned long)); + + return ret; +} +static int pmu_active(struct task_struct *target, + const struct user_regset *regset) +{ + if (!cpu_has_feature(CPU_FTR_ARCH_207S)) + return -ENODEV; + + return regset->n; +} + +static int pmu_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + /* Build tests */ + BUILD_BUG_ON(TSO(siar) + sizeof(unsigned long) != TSO(sdar)); + BUILD_BUG_ON(TSO(sdar) + sizeof(unsigned long) != TSO(sier)); + BUILD_BUG_ON(TSO(sier) + sizeof(unsigned long) != TSO(mmcr2)); + BUILD_BUG_ON(TSO(mmcr2) + sizeof(unsigned long) != TSO(mmcr0)); + + if (!cpu_has_feature(CPU_FTR_ARCH_207S)) + return -ENODEV; + + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.siar, 0, + 5 * sizeof(unsigned long)); +} + +static int pmu_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret = 0; + + /* Build tests */ + BUILD_BUG_ON(TSO(siar) + sizeof(unsigned long) != TSO(sdar)); + BUILD_BUG_ON(TSO(sdar) + sizeof(unsigned long) != TSO(sier)); + BUILD_BUG_ON(TSO(sier) + sizeof(unsigned long) != TSO(mmcr2)); + BUILD_BUG_ON(TSO(mmcr2) + sizeof(unsigned long) != TSO(mmcr0)); + + if (!cpu_has_feature(CPU_FTR_ARCH_207S)) + return -ENODEV; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.siar, 0, + sizeof(unsigned long)); + + if (!ret) + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.sdar, sizeof(unsigned long), + 2 * sizeof(unsigned long)); + + if (!ret) + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.sier, 2 * sizeof(unsigned long), + 3 * sizeof(unsigned long)); + + if (!ret) + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.mmcr2, 3 * sizeof(unsigned long), + 4 * sizeof(unsigned long)); + + if (!ret) + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.mmcr0, 4 * sizeof(unsigned long), + 5 * sizeof(unsigned long)); + return ret; +} +#endif +/* * These are our native regset flavors. */ enum powerpc_regset { @@ -630,6 +1909,25 @@ enum powerpc_regset { #ifdef CONFIG_SPE REGSET_SPE, #endif +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + REGSET_TM_CGPR, /* TM checkpointed GPR registers */ + REGSET_TM_CFPR, /* TM checkpointed FPR registers */ + REGSET_TM_CVMX, /* TM checkpointed VMX registers */ + REGSET_TM_CVSX, /* TM checkpointed VSX registers */ + REGSET_TM_SPR, /* TM specific SPR registers */ + REGSET_TM_CTAR, /* TM checkpointed TAR register */ + REGSET_TM_CPPR, /* TM checkpointed PPR register */ + REGSET_TM_CDSCR, /* TM checkpointed DSCR register */ +#endif +#ifdef CONFIG_PPC64 + REGSET_PPR, /* PPR register */ + REGSET_DSCR, /* DSCR register */ +#endif +#ifdef CONFIG_PPC_BOOK3S_64 + REGSET_TAR, /* TAR register */ + REGSET_EBB, /* EBB registers */ + REGSET_PMR, /* Performance Monitor Registers */ +#endif }; static const struct user_regset native_regsets[] = { @@ -664,6 +1962,77 @@ static const struct user_regset native_regsets[] = { .active = evr_active, .get = evr_get, .set = evr_set }, #endif +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + [REGSET_TM_CGPR] = { + .core_note_type = NT_PPC_TM_CGPR, .n = ELF_NGREG, + .size = sizeof(long), .align = sizeof(long), + .active = tm_cgpr_active, .get = tm_cgpr_get, .set = tm_cgpr_set + }, + [REGSET_TM_CFPR] = { + .core_note_type = NT_PPC_TM_CFPR, .n = ELF_NFPREG, + .size = sizeof(double), .align = sizeof(double), + .active = tm_cfpr_active, .get = tm_cfpr_get, .set = tm_cfpr_set + }, + [REGSET_TM_CVMX] = { + .core_note_type = NT_PPC_TM_CVMX, .n = ELF_NVMX, + .size = sizeof(vector128), .align = sizeof(vector128), + .active = tm_cvmx_active, .get = tm_cvmx_get, .set = tm_cvmx_set + }, + [REGSET_TM_CVSX] = { + .core_note_type = NT_PPC_TM_CVSX, .n = ELF_NVSX, + .size = sizeof(double), .align = sizeof(double), + .active = tm_cvsx_active, .get = tm_cvsx_get, .set = tm_cvsx_set + }, + [REGSET_TM_SPR] = { + .core_note_type = NT_PPC_TM_SPR, .n = ELF_NTMSPRREG, + .size = sizeof(u64), .align = sizeof(u64), + .active = tm_spr_active, .get = tm_spr_get, .set = tm_spr_set + }, + [REGSET_TM_CTAR] = { + .core_note_type = NT_PPC_TM_CTAR, .n = 1, + .size = sizeof(u64), .align = sizeof(u64), + .active = tm_tar_active, .get = tm_tar_get, .set = tm_tar_set + }, + [REGSET_TM_CPPR] = { + .core_note_type = NT_PPC_TM_CPPR, .n = 1, + .size = sizeof(u64), .align = sizeof(u64), + .active = tm_ppr_active, .get = tm_ppr_get, .set = tm_ppr_set + }, + [REGSET_TM_CDSCR] = { + .core_note_type = NT_PPC_TM_CDSCR, .n = 1, + .size = sizeof(u64), .align = sizeof(u64), + .active = tm_dscr_active, .get = tm_dscr_get, .set = tm_dscr_set + }, +#endif +#ifdef CONFIG_PPC64 + [REGSET_PPR] = { + .core_note_type = NT_PPC_PPR, .n = 1, + .size = sizeof(u64), .align = sizeof(u64), + .get = ppr_get, .set = ppr_set + }, + [REGSET_DSCR] = { + .core_note_type = NT_PPC_DSCR, .n = 1, + .size = sizeof(u64), .align = sizeof(u64), + .get = dscr_get, .set = dscr_set + }, +#endif +#ifdef CONFIG_PPC_BOOK3S_64 + [REGSET_TAR] = { + .core_note_type = NT_PPC_TAR, .n = 1, + .size = sizeof(u64), .align = sizeof(u64), + .get = tar_get, .set = tar_set + }, + [REGSET_EBB] = { + .core_note_type = NT_PPC_EBB, .n = ELF_NEBB, + .size = sizeof(u64), .align = sizeof(u64), + .active = ebb_active, .get = ebb_get, .set = ebb_set + }, + [REGSET_PMR] = { + .core_note_type = NT_PPC_PMU, .n = ELF_NPMU, + .size = sizeof(u64), .align = sizeof(u64), + .active = pmu_active, .get = pmu_get, .set = pmu_set + }, +#endif }; static const struct user_regset_view user_ppc_native_view = { @@ -674,24 +2043,35 @@ static const struct user_regset_view user_ppc_native_view = { #ifdef CONFIG_PPC64 #include <linux/compat.h> -static int gpr32_get(struct task_struct *target, +static int gpr32_get_common(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + void *kbuf, void __user *ubuf, bool tm_active) { const unsigned long *regs = &target->thread.regs->gpr[0]; + const unsigned long *ckpt_regs; compat_ulong_t *k = kbuf; compat_ulong_t __user *u = ubuf; compat_ulong_t reg; int i; - if (target->thread.regs == NULL) - return -EIO; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + ckpt_regs = &target->thread.ckpt_regs.gpr[0]; +#endif + if (tm_active) { + regs = ckpt_regs; + } else { + if (target->thread.regs == NULL) + return -EIO; - if (!FULL_REGS(target->thread.regs)) { - /* We have a partial register set. Fill 14-31 with bogus values */ - for (i = 14; i < 32; i++) - target->thread.regs->gpr[i] = NV_REG_POISON; + if (!FULL_REGS(target->thread.regs)) { + /* + * We have a partial register set. + * Fill 14-31 with bogus values. + */ + for (i = 14; i < 32; i++) + target->thread.regs->gpr[i] = NV_REG_POISON; + } } pos /= sizeof(reg); @@ -731,20 +2111,31 @@ static int gpr32_get(struct task_struct *target, PT_REGS_COUNT * sizeof(reg), -1); } -static int gpr32_set(struct task_struct *target, +static int gpr32_set_common(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) + const void *kbuf, const void __user *ubuf, bool tm_active) { unsigned long *regs = &target->thread.regs->gpr[0]; + unsigned long *ckpt_regs; const compat_ulong_t *k = kbuf; const compat_ulong_t __user *u = ubuf; compat_ulong_t reg; - if (target->thread.regs == NULL) - return -EIO; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + ckpt_regs = &target->thread.ckpt_regs.gpr[0]; +#endif - CHECK_FULL_REGS(target->thread.regs); + if (tm_active) { + regs = ckpt_regs; + } else { + regs = &target->thread.regs->gpr[0]; + + if (target->thread.regs == NULL) + return -EIO; + + CHECK_FULL_REGS(target->thread.regs); + } pos /= sizeof(reg); count /= sizeof(reg); @@ -804,6 +2195,40 @@ static int gpr32_set(struct task_struct *target, (PT_TRAP + 1) * sizeof(reg), -1); } +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +static int tm_cgpr32_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + return gpr32_get_common(target, regset, pos, count, kbuf, ubuf, 1); +} + +static int tm_cgpr32_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + return gpr32_set_common(target, regset, pos, count, kbuf, ubuf, 1); +} +#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ + +static int gpr32_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + return gpr32_get_common(target, regset, pos, count, kbuf, ubuf, 0); +} + +static int gpr32_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + return gpr32_set_common(target, regset, pos, count, kbuf, ubuf, 0); +} + /* * These are the regset flavors matching the CONFIG_PPC32 native set. */ @@ -832,6 +2257,73 @@ static const struct user_regset compat_regsets[] = { .active = evr_active, .get = evr_get, .set = evr_set }, #endif +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + [REGSET_TM_CGPR] = { + .core_note_type = NT_PPC_TM_CGPR, .n = ELF_NGREG, + .size = sizeof(long), .align = sizeof(long), + .active = tm_cgpr_active, + .get = tm_cgpr32_get, .set = tm_cgpr32_set + }, + [REGSET_TM_CFPR] = { + .core_note_type = NT_PPC_TM_CFPR, .n = ELF_NFPREG, + .size = sizeof(double), .align = sizeof(double), + .active = tm_cfpr_active, .get = tm_cfpr_get, .set = tm_cfpr_set + }, + [REGSET_TM_CVMX] = { + .core_note_type = NT_PPC_TM_CVMX, .n = ELF_NVMX, + .size = sizeof(vector128), .align = sizeof(vector128), + .active = tm_cvmx_active, .get = tm_cvmx_get, .set = tm_cvmx_set + }, + [REGSET_TM_CVSX] = { + .core_note_type = NT_PPC_TM_CVSX, .n = ELF_NVSX, + .size = sizeof(double), .align = sizeof(double), + .active = tm_cvsx_active, .get = tm_cvsx_get, .set = tm_cvsx_set + }, + [REGSET_TM_SPR] = { + .core_note_type = NT_PPC_TM_SPR, .n = ELF_NTMSPRREG, + .size = sizeof(u64), .align = sizeof(u64), + .active = tm_spr_active, .get = tm_spr_get, .set = tm_spr_set + }, + [REGSET_TM_CTAR] = { + .core_note_type = NT_PPC_TM_CTAR, .n = 1, + .size = sizeof(u64), .align = sizeof(u64), + .active = tm_tar_active, .get = tm_tar_get, .set = tm_tar_set + }, + [REGSET_TM_CPPR] = { + .core_note_type = NT_PPC_TM_CPPR, .n = 1, + .size = sizeof(u64), .align = sizeof(u64), + .active = tm_ppr_active, .get = tm_ppr_get, .set = tm_ppr_set + }, + [REGSET_TM_CDSCR] = { + .core_note_type = NT_PPC_TM_CDSCR, .n = 1, + .size = sizeof(u64), .align = sizeof(u64), + .active = tm_dscr_active, .get = tm_dscr_get, .set = tm_dscr_set + }, +#endif +#ifdef CONFIG_PPC64 + [REGSET_PPR] = { + .core_note_type = NT_PPC_PPR, .n = 1, + .size = sizeof(u64), .align = sizeof(u64), + .get = ppr_get, .set = ppr_set + }, + [REGSET_DSCR] = { + .core_note_type = NT_PPC_DSCR, .n = 1, + .size = sizeof(u64), .align = sizeof(u64), + .get = dscr_get, .set = dscr_set + }, +#endif +#ifdef CONFIG_PPC_BOOK3S_64 + [REGSET_TAR] = { + .core_note_type = NT_PPC_TAR, .n = 1, + .size = sizeof(u64), .align = sizeof(u64), + .get = tar_get, .set = tar_set + }, + [REGSET_EBB] = { + .core_note_type = NT_PPC_EBB, .n = ELF_NEBB, + .size = sizeof(u64), .align = sizeof(u64), + .active = ebb_active, .get = ebb_get, .set = ebb_set + }, +#endif }; static const struct user_regset_view user_ppc_compat_view = { diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 714b4ba7ab86..dba265c586df 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -66,6 +66,7 @@ #include <asm/hugetlb.h> #include <asm/livepatch.h> #include <asm/mmu_context.h> +#include <asm/cpu_has_feature.h> #include "setup.h" diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 00f57754407e..c3e861df4b20 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -37,6 +37,7 @@ #include <asm/serial.h> #include <asm/udbg.h> #include <asm/code-patching.h> +#include <asm/cpu_has_feature.h> #define DBG(fmt...) diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index d8216aed22b7..eafb9a79e011 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -227,8 +227,8 @@ static void __init configure_exceptions(void) opal_configure_cores(); /* Enable AIL if supported, and we are in hypervisor mode */ - if (cpu_has_feature(CPU_FTR_HVMODE) && - cpu_has_feature(CPU_FTR_ARCH_207S)) { + if (early_cpu_has_feature(CPU_FTR_HVMODE) && + early_cpu_has_feature(CPU_FTR_ARCH_207S)) { unsigned long lpcr = mfspr(SPRN_LPCR); mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3); } @@ -298,12 +298,12 @@ void __init early_setup(unsigned long dt_ptr) */ configure_exceptions(); - /* Initialize the hash table or TLB handling */ - early_init_mmu(); - /* Apply all the dynamic patching */ apply_feature_fixups(); + /* Initialize the hash table or TLB handling */ + early_init_mmu(); + /* * At this point, we can let interrupts switch to virtual mode * (the MMU has been setup), so adjust the MSR in the PACA to diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 5a1f015ea9f3..25a39052bf6b 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -55,6 +55,7 @@ #include <asm/debug.h> #include <asm/kexec.h> #include <asm/asm-prototypes.h> +#include <asm/cpu_has_feature.h> #ifdef DEBUG #include <asm/udbg.h> diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 4e7759c8ca30..3efbedefba6a 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -56,6 +56,7 @@ #include <linux/irq_work.h> #include <linux/clk-provider.h> #include <linux/suspend.h> +#include <linux/rtc.h> #include <asm/trace.h> #include <asm/io.h> @@ -1159,6 +1160,29 @@ void calibrate_delay(void) loops_per_jiffy = tb_ticks_per_jiffy; } +#if IS_ENABLED(CONFIG_RTC_DRV_GENERIC) +static int rtc_generic_get_time(struct device *dev, struct rtc_time *tm) +{ + ppc_md.get_rtc_time(tm); + return rtc_valid_tm(tm); +} + +static int rtc_generic_set_time(struct device *dev, struct rtc_time *tm) +{ + if (!ppc_md.set_rtc_time) + return -EOPNOTSUPP; + + if (ppc_md.set_rtc_time(tm) < 0) + return -EOPNOTSUPP; + + return 0; +} + +static const struct rtc_class_ops rtc_generic_ops = { + .read_time = rtc_generic_get_time, + .set_time = rtc_generic_set_time, +}; + static int __init rtc_init(void) { struct platform_device *pdev; @@ -1166,9 +1190,12 @@ static int __init rtc_init(void) if (!ppc_md.get_rtc_time) return -ENODEV; - pdev = platform_device_register_simple("rtc-generic", -1, NULL, 0); + pdev = platform_device_register_data(NULL, "rtc-generic", -1, + &rtc_generic_ops, + sizeof(rtc_generic_ops)); return PTR_ERR_OR_ZERO(pdev); } device_initcall(rtc_init); +#endif diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index f7e2f2e318bd..2cb589264cb7 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -61,6 +61,7 @@ #include <asm/tm.h> #include <asm/debug.h> #include <asm/asm-prototypes.h> +#include <asm/hmi.h> #include <sysdev/fsl_pci.h> #if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC) @@ -308,9 +309,13 @@ long hmi_exception_realmode(struct pt_regs *regs) { __this_cpu_inc(irq_stat.hmi_exceptions); + wait_for_subcore_guest_exit(); + if (ppc_md.hmi_exception_early) ppc_md.hmi_exception_early(regs); + wait_for_tb_resync(); + return 0; } diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index 8d7358f3a273..b3813ddb2fb4 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -482,7 +482,7 @@ static void vio_cmo_balance(struct work_struct *work) static void *vio_dma_iommu_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag, - struct dma_attrs *attrs) + unsigned long attrs) { struct vio_dev *viodev = to_vio_dev(dev); void *ret; @@ -503,7 +503,7 @@ static void *vio_dma_iommu_alloc_coherent(struct device *dev, size_t size, static void vio_dma_iommu_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, - struct dma_attrs *attrs) + unsigned long attrs) { struct vio_dev *viodev = to_vio_dev(dev); @@ -515,7 +515,7 @@ static void vio_dma_iommu_free_coherent(struct device *dev, size_t size, static dma_addr_t vio_dma_iommu_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { struct vio_dev *viodev = to_vio_dev(dev); struct iommu_table *tbl; @@ -539,7 +539,7 @@ static dma_addr_t vio_dma_iommu_map_page(struct device *dev, struct page *page, static void vio_dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle, size_t size, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { struct vio_dev *viodev = to_vio_dev(dev); struct iommu_table *tbl; @@ -552,7 +552,7 @@ static void vio_dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle, static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist, int nelems, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { struct vio_dev *viodev = to_vio_dev(dev); struct iommu_table *tbl; @@ -588,7 +588,7 @@ static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist, static void vio_dma_iommu_unmap_sg(struct device *dev, struct scatterlist *sglist, int nelems, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { struct vio_dev *viodev = to_vio_dev(dev); struct iommu_table *tbl; diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index eba0bea6e032..1f9e5529e692 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -20,7 +20,7 @@ common-objs-y += powerpc.o emulate.o emulate_loadstore.o obj-$(CONFIG_KVM_EXIT_TIMING) += timing.o obj-$(CONFIG_KVM_BOOK3S_HANDLER) += book3s_exports.o -AFLAGS_booke_interrupts.o := -I$(obj) +AFLAGS_booke_interrupts.o := -I$(objtree)/$(obj) kvm-e500-objs := \ $(common-objs-y) \ diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index e20beae5ca7a..2fd5580c8f6e 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -52,6 +52,7 @@ #include <asm/switch_to.h> #include <asm/smp.h> #include <asm/dbell.h> +#include <asm/hmi.h> #include <linux/gfp.h> #include <linux/vmalloc.h> #include <linux/highmem.h> @@ -2522,7 +2523,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list) spin_unlock(&pvc->lock); - kvm_guest_enter(); + guest_enter(); srcu_idx = srcu_read_lock(&vc->kvm->srcu); @@ -2570,7 +2571,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) /* make sure updates to secondary vcpu structs are visible now */ smp_mb(); - kvm_guest_exit(); + guest_exit(); for (sub = 0; sub < core_info.n_subcores; ++sub) list_for_each_entry_safe(pvc, vcnext, &core_info.vcs[sub], @@ -3401,6 +3402,38 @@ static struct kvmppc_ops kvm_ops_hv = { .hcall_implemented = kvmppc_hcall_impl_hv, }; +static int kvm_init_subcore_bitmap(void) +{ + int i, j; + int nr_cores = cpu_nr_cores(); + struct sibling_subcore_state *sibling_subcore_state; + + for (i = 0; i < nr_cores; i++) { + int first_cpu = i * threads_per_core; + int node = cpu_to_node(first_cpu); + + /* Ignore if it is already allocated. */ + if (paca[first_cpu].sibling_subcore_state) + continue; + + sibling_subcore_state = + kmalloc_node(sizeof(struct sibling_subcore_state), + GFP_KERNEL, node); + if (!sibling_subcore_state) + return -ENOMEM; + + memset(sibling_subcore_state, 0, + sizeof(struct sibling_subcore_state)); + + for (j = 0; j < threads_per_core; j++) { + int cpu = first_cpu + j; + + paca[cpu].sibling_subcore_state = sibling_subcore_state; + } + } + return 0; +} + static int kvmppc_book3s_init_hv(void) { int r; @@ -3411,6 +3444,10 @@ static int kvmppc_book3s_init_hv(void) if (r < 0) return -ENODEV; + r = kvm_init_subcore_bitmap(); + if (r) + return r; + kvm_ops_hv.owner = THIS_MODULE; kvmppc_hv_ops = &kvm_ops_hv; diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c index 93b5f5c9b445..0fa70a9618d7 100644 --- a/arch/powerpc/kvm/book3s_hv_ras.c +++ b/arch/powerpc/kvm/book3s_hv_ras.c @@ -13,6 +13,9 @@ #include <linux/kernel.h> #include <asm/opal.h> #include <asm/mce.h> +#include <asm/machdep.h> +#include <asm/cputhreads.h> +#include <asm/hmi.h> /* SRR1 bits for machine check on POWER7 */ #define SRR1_MC_LDSTERR (1ul << (63-42)) @@ -140,3 +143,176 @@ long kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu) { return kvmppc_realmode_mc_power7(vcpu); } + +/* Check if dynamic split is in force and return subcore size accordingly. */ +static inline int kvmppc_cur_subcore_size(void) +{ + if (local_paca->kvm_hstate.kvm_split_mode) + return local_paca->kvm_hstate.kvm_split_mode->subcore_size; + + return threads_per_subcore; +} + +void kvmppc_subcore_enter_guest(void) +{ + int thread_id, subcore_id; + + thread_id = cpu_thread_in_core(local_paca->paca_index); + subcore_id = thread_id / kvmppc_cur_subcore_size(); + + local_paca->sibling_subcore_state->in_guest[subcore_id] = 1; +} + +void kvmppc_subcore_exit_guest(void) +{ + int thread_id, subcore_id; + + thread_id = cpu_thread_in_core(local_paca->paca_index); + subcore_id = thread_id / kvmppc_cur_subcore_size(); + + local_paca->sibling_subcore_state->in_guest[subcore_id] = 0; +} + +static bool kvmppc_tb_resync_required(void) +{ + if (test_and_set_bit(CORE_TB_RESYNC_REQ_BIT, + &local_paca->sibling_subcore_state->flags)) + return false; + + return true; +} + +static void kvmppc_tb_resync_done(void) +{ + clear_bit(CORE_TB_RESYNC_REQ_BIT, + &local_paca->sibling_subcore_state->flags); +} + +/* + * kvmppc_realmode_hmi_handler() is called only by primary thread during + * guest exit path. + * + * There are multiple reasons why HMI could occur, one of them is + * Timebase (TB) error. If this HMI is due to TB error, then TB would + * have been in stopped state. The opal hmi handler Will fix it and + * restore the TB value with host timebase value. For HMI caused due + * to non-TB errors, opal hmi handler will not touch/restore TB register + * and hence there won't be any change in TB value. + * + * Since we are not sure about the cause of this HMI, we can't be sure + * about the content of TB register whether it holds guest or host timebase + * value. Hence the idea is to resync the TB on every HMI, so that we + * know about the exact state of the TB value. Resync TB call will + * restore TB to host timebase. + * + * Things to consider: + * - On TB error, HMI interrupt is reported on all the threads of the core + * that has encountered TB error irrespective of split-core mode. + * - The very first thread on the core that get chance to fix TB error + * would rsync the TB with local chipTOD value. + * - The resync TB is a core level action i.e. it will sync all the TBs + * in that core independent of split-core mode. This means if we trigger + * TB sync from a thread from one subcore, it would affect TB values of + * sibling subcores of the same core. + * + * All threads need to co-ordinate before making opal hmi handler. + * All threads will use sibling_subcore_state->in_guest[] (shared by all + * threads in the core) in paca which holds information about whether + * sibling subcores are in Guest mode or host mode. The in_guest[] array + * is of size MAX_SUBCORE_PER_CORE=4, indexed using subcore id to set/unset + * subcore status. Only primary threads from each subcore is responsible + * to set/unset its designated array element while entering/exiting the + * guset. + * + * After invoking opal hmi handler call, one of the thread (of entire core) + * will need to resync the TB. Bit 63 from subcore state bitmap flags + * (sibling_subcore_state->flags) will be used to co-ordinate between + * primary threads to decide who takes up the responsibility. + * + * This is what we do: + * - Primary thread from each subcore tries to set resync required bit[63] + * of paca->sibling_subcore_state->flags. + * - The first primary thread that is able to set the flag takes the + * responsibility of TB resync. (Let us call it as thread leader) + * - All other threads which are in host will call + * wait_for_subcore_guest_exit() and wait for in_guest[0-3] from + * paca->sibling_subcore_state to get cleared. + * - All the primary thread will clear its subcore status from subcore + * state in_guest[] array respectively. + * - Once all primary threads clear in_guest[0-3], all of them will invoke + * opal hmi handler. + * - Now all threads will wait for TB resync to complete by invoking + * wait_for_tb_resync() except the thread leader. + * - Thread leader will do a TB resync by invoking opal_resync_timebase() + * call and the it will clear the resync required bit. + * - All other threads will now come out of resync wait loop and proceed + * with individual execution. + * - On return of this function, primary thread will signal all + * secondary threads to proceed. + * - All secondary threads will eventually call opal hmi handler on + * their exit path. + */ + +long kvmppc_realmode_hmi_handler(void) +{ + int ptid = local_paca->kvm_hstate.ptid; + bool resync_req; + + /* This is only called on primary thread. */ + BUG_ON(ptid != 0); + __this_cpu_inc(irq_stat.hmi_exceptions); + + /* + * By now primary thread has already completed guest->host + * partition switch but haven't signaled secondaries yet. + * All the secondary threads on this subcore is waiting + * for primary thread to signal them to go ahead. + * + * For threads from subcore which isn't in guest, they all will + * wait until all other subcores on this core exit the guest. + * + * Now set the resync required bit. If you are the first to + * set this bit then kvmppc_tb_resync_required() function will + * return true. For rest all other subcores + * kvmppc_tb_resync_required() will return false. + * + * If resync_req == true, then this thread is responsible to + * initiate TB resync after hmi handler has completed. + * All other threads on this core will wait until this thread + * clears the resync required bit flag. + */ + resync_req = kvmppc_tb_resync_required(); + + /* Reset the subcore status to indicate it has exited guest */ + kvmppc_subcore_exit_guest(); + + /* + * Wait for other subcores on this core to exit the guest. + * All the primary threads and threads from subcore that are + * not in guest will wait here until all subcores are out + * of guest context. + */ + wait_for_subcore_guest_exit(); + + /* + * At this point we are sure that primary threads from each + * subcore on this core have completed guest->host partition + * switch. Now it is safe to call HMI handler. + */ + if (ppc_md.hmi_exception_early) + ppc_md.hmi_exception_early(NULL); + + /* + * Check if this thread is responsible to resync TB. + * All other threads will wait until this thread completes the + * TB resync. + */ + if (resync_req) { + opal_resync_timebase(); + /* Reset TB resync req bit */ + kvmppc_tb_resync_done(); + } else { + wait_for_tb_resync(); + } + return 0; +} diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 86f0cae37a85..975655573844 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -29,6 +29,7 @@ #include <asm/kvm_book3s_asm.h> #include <asm/book3s/64/mmu-hash.h> #include <asm/tm.h> +#include <asm/opal.h> #define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM) @@ -373,6 +374,18 @@ kvm_secondary_got_guest: lwsync std r0, HSTATE_KVM_VCORE(r13) + /* + * All secondaries exiting guest will fall through this path. + * Before proceeding, just check for HMI interrupt and + * invoke opal hmi handler. By now we are sure that the + * primary thread on this core/subcore has already made partition + * switch/TB resync and we are good to call opal hmi handler. + */ + cmpwi r12, BOOK3S_INTERRUPT_HMI + bne kvm_no_guest + + li r3,0 /* NULL argument */ + bl hmi_exception_realmode /* * At this point we have finished executing in the guest. * We need to wait for hwthread_req to become zero, since @@ -428,6 +441,22 @@ kvm_no_guest: */ kvm_unsplit_nap: /* + * When secondaries are napping in kvm_unsplit_nap() with + * hwthread_req = 1, HMI goes ignored even though subcores are + * already exited the guest. Hence HMI keeps waking up secondaries + * from nap in a loop and secondaries always go back to nap since + * no vcore is assigned to them. This makes impossible for primary + * thread to get hold of secondary threads resulting into a soft + * lockup in KVM path. + * + * Let us check if HMI is pending and handle it before we go to nap. + */ + cmpwi r12, BOOK3S_INTERRUPT_HMI + bne 55f + li r3, 0 /* NULL argument */ + bl hmi_exception_realmode +55: + /* * Ensure that secondary doesn't nap when it has * its vcore pointer set. */ @@ -601,6 +630,11 @@ BEGIN_FTR_SECTION mtspr SPRN_DPDES, r8 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) + /* Mark the subcore state as inside guest */ + bl kvmppc_subcore_enter_guest + nop + ld r5, HSTATE_KVM_VCORE(r13) + ld r4, HSTATE_KVM_VCPU(r13) li r0,1 stb r0,VCORE_IN_GUEST(r5) /* signal secondaries to continue */ @@ -655,112 +689,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) #ifdef CONFIG_PPC_TRANSACTIONAL_MEM BEGIN_FTR_SECTION - b skip_tm -END_FTR_SECTION_IFCLR(CPU_FTR_TM) - - /* Turn on TM/FP/VSX/VMX so we can restore them. */ - mfmsr r5 - li r6, MSR_TM >> 32 - sldi r6, r6, 32 - or r5, r5, r6 - ori r5, r5, MSR_FP - oris r5, r5, (MSR_VEC | MSR_VSX)@h - mtmsrd r5 - - /* - * The user may change these outside of a transaction, so they must - * always be context switched. - */ - ld r5, VCPU_TFHAR(r4) - ld r6, VCPU_TFIAR(r4) - ld r7, VCPU_TEXASR(r4) - mtspr SPRN_TFHAR, r5 - mtspr SPRN_TFIAR, r6 - mtspr SPRN_TEXASR, r7 - - ld r5, VCPU_MSR(r4) - rldicl. r5, r5, 64 - MSR_TS_S_LG, 62 - beq skip_tm /* TM not active in guest */ - - /* Make sure the failure summary is set, otherwise we'll program check - * when we trechkpt. It's possible that this might have been not set - * on a kvmppc_set_one_reg() call but we shouldn't let this crash the - * host. - */ - oris r7, r7, (TEXASR_FS)@h - mtspr SPRN_TEXASR, r7 - - /* - * We need to load up the checkpointed state for the guest. - * We need to do this early as it will blow away any GPRs, VSRs and - * some SPRs. - */ - - mr r31, r4 - addi r3, r31, VCPU_FPRS_TM - bl load_fp_state - addi r3, r31, VCPU_VRS_TM - bl load_vr_state - mr r4, r31 - lwz r7, VCPU_VRSAVE_TM(r4) - mtspr SPRN_VRSAVE, r7 - - ld r5, VCPU_LR_TM(r4) - lwz r6, VCPU_CR_TM(r4) - ld r7, VCPU_CTR_TM(r4) - ld r8, VCPU_AMR_TM(r4) - ld r9, VCPU_TAR_TM(r4) - mtlr r5 - mtcr r6 - mtctr r7 - mtspr SPRN_AMR, r8 - mtspr SPRN_TAR, r9 - - /* - * Load up PPR and DSCR values but don't put them in the actual SPRs - * till the last moment to avoid running with userspace PPR and DSCR for - * too long. - */ - ld r29, VCPU_DSCR_TM(r4) - ld r30, VCPU_PPR_TM(r4) - - std r2, PACATMSCRATCH(r13) /* Save TOC */ - - /* Clear the MSR RI since r1, r13 are all going to be foobar. */ - li r5, 0 - mtmsrd r5, 1 - - /* Load GPRs r0-r28 */ - reg = 0 - .rept 29 - ld reg, VCPU_GPRS_TM(reg)(r31) - reg = reg + 1 - .endr - - mtspr SPRN_DSCR, r29 - mtspr SPRN_PPR, r30 - - /* Load final GPRs */ - ld 29, VCPU_GPRS_TM(29)(r31) - ld 30, VCPU_GPRS_TM(30)(r31) - ld 31, VCPU_GPRS_TM(31)(r31) - - /* TM checkpointed state is now setup. All GPRs are now volatile. */ - TRECHKPT - - /* Now let's get back the state we need. */ - HMT_MEDIUM - GET_PACA(r13) - ld r29, HSTATE_DSCR(r13) - mtspr SPRN_DSCR, r29 - ld r4, HSTATE_KVM_VCPU(r13) - ld r1, HSTATE_HOST_R1(r13) - ld r2, PACATMSCRATCH(r13) - - /* Set the MSR RI since we have our registers back. */ - li r5, MSR_RI - mtmsrd r5, 1 -skip_tm: + bl kvmppc_restore_tm +END_FTR_SECTION_IFSET(CPU_FTR_TM) #endif /* Load guest PMU registers */ @@ -841,12 +771,6 @@ BEGIN_FTR_SECTION /* Skip next section on POWER7 */ b 8f END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) - /* Turn on TM so we can access TFHAR/TFIAR/TEXASR */ - mfmsr r8 - li r0, 1 - rldimi r8, r0, MSR_TM_LG, 63-MSR_TM_LG - mtmsrd r8 - /* Load up POWER8-specific registers */ ld r5, VCPU_IAMR(r4) lwz r6, VCPU_PSPB(r4) @@ -1436,106 +1360,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) #ifdef CONFIG_PPC_TRANSACTIONAL_MEM BEGIN_FTR_SECTION - b 2f -END_FTR_SECTION_IFCLR(CPU_FTR_TM) - /* Turn on TM. */ - mfmsr r8 - li r0, 1 - rldimi r8, r0, MSR_TM_LG, 63-MSR_TM_LG - mtmsrd r8 - - ld r5, VCPU_MSR(r9) - rldicl. r5, r5, 64 - MSR_TS_S_LG, 62 - beq 1f /* TM not active in guest. */ - - li r3, TM_CAUSE_KVM_RESCHED - - /* Clear the MSR RI since r1, r13 are all going to be foobar. */ - li r5, 0 - mtmsrd r5, 1 - - /* All GPRs are volatile at this point. */ - TRECLAIM(R3) - - /* Temporarily store r13 and r9 so we have some regs to play with */ - SET_SCRATCH0(r13) - GET_PACA(r13) - std r9, PACATMSCRATCH(r13) - ld r9, HSTATE_KVM_VCPU(r13) - - /* Get a few more GPRs free. */ - std r29, VCPU_GPRS_TM(29)(r9) - std r30, VCPU_GPRS_TM(30)(r9) - std r31, VCPU_GPRS_TM(31)(r9) - - /* Save away PPR and DSCR soon so don't run with user values. */ - mfspr r31, SPRN_PPR - HMT_MEDIUM - mfspr r30, SPRN_DSCR - ld r29, HSTATE_DSCR(r13) - mtspr SPRN_DSCR, r29 - - /* Save all but r9, r13 & r29-r31 */ - reg = 0 - .rept 29 - .if (reg != 9) && (reg != 13) - std reg, VCPU_GPRS_TM(reg)(r9) - .endif - reg = reg + 1 - .endr - /* ... now save r13 */ - GET_SCRATCH0(r4) - std r4, VCPU_GPRS_TM(13)(r9) - /* ... and save r9 */ - ld r4, PACATMSCRATCH(r13) - std r4, VCPU_GPRS_TM(9)(r9) - - /* Reload stack pointer and TOC. */ - ld r1, HSTATE_HOST_R1(r13) - ld r2, PACATOC(r13) - - /* Set MSR RI now we have r1 and r13 back. */ - li r5, MSR_RI - mtmsrd r5, 1 - - /* Save away checkpinted SPRs. */ - std r31, VCPU_PPR_TM(r9) - std r30, VCPU_DSCR_TM(r9) - mflr r5 - mfcr r6 - mfctr r7 - mfspr r8, SPRN_AMR - mfspr r10, SPRN_TAR - std r5, VCPU_LR_TM(r9) - stw r6, VCPU_CR_TM(r9) - std r7, VCPU_CTR_TM(r9) - std r8, VCPU_AMR_TM(r9) - std r10, VCPU_TAR_TM(r9) - - /* Restore r12 as trap number. */ - lwz r12, VCPU_TRAP(r9) - - /* Save FP/VSX. */ - addi r3, r9, VCPU_FPRS_TM - bl store_fp_state - addi r3, r9, VCPU_VRS_TM - bl store_vr_state - mfspr r6, SPRN_VRSAVE - stw r6, VCPU_VRSAVE_TM(r9) -1: - /* - * We need to save these SPRs after the treclaim so that the software - * error code is recorded correctly in the TEXASR. Also the user may - * change these outside of a transaction, so they must always be - * context switched. - */ - mfspr r5, SPRN_TFHAR - mfspr r6, SPRN_TFIAR - mfspr r7, SPRN_TEXASR - std r5, VCPU_TFHAR(r9) - std r6, VCPU_TFIAR(r9) - std r7, VCPU_TEXASR(r9) -2: + bl kvmppc_save_tm +END_FTR_SECTION_IFSET(CPU_FTR_TM) #endif /* Increment yield count if they have a VPA */ @@ -1683,6 +1509,23 @@ BEGIN_FTR_SECTION mtspr SPRN_DPDES, r8 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) + /* If HMI, call kvmppc_realmode_hmi_handler() */ + cmpwi r12, BOOK3S_INTERRUPT_HMI + bne 27f + bl kvmppc_realmode_hmi_handler + nop + li r12, BOOK3S_INTERRUPT_HMI + /* + * At this point kvmppc_realmode_hmi_handler would have resync-ed + * the TB. Hence it is not required to subtract guest timebase + * offset from timebase. So, skip it. + * + * Also, do not call kvmppc_subcore_exit_guest() because it has + * been invoked as part of kvmppc_realmode_hmi_handler(). + */ + b 30f + +27: /* Subtract timebase offset from timebase */ ld r8,VCORE_TB_OFFSET(r5) cmpdi r8,0 @@ -1698,8 +1541,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) addis r8,r8,0x100 /* if so, increment upper 40 bits */ mtspr SPRN_TBU40,r8 +17: bl kvmppc_subcore_exit_guest + nop +30: ld r5,HSTATE_KVM_VCORE(r13) + ld r4,VCORE_KVM(r5) /* pointer to struct kvm */ + /* Reset PCR */ -17: ld r0, VCORE_PCR(r5) + ld r0, VCORE_PCR(r5) cmpdi r0, 0 beq 18f li r0, 0 @@ -2245,6 +2093,13 @@ _GLOBAL(kvmppc_h_cede) /* r3 = vcpu pointer, r11 = msr, r13 = paca */ /* save FP state */ bl kvmppc_save_fp +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +BEGIN_FTR_SECTION + ld r9, HSTATE_KVM_VCPU(r13) + bl kvmppc_save_tm +END_FTR_SECTION_IFSET(CPU_FTR_TM) +#endif + /* * Set DEC to the smaller of DEC and HDEC, so that we wake * no later than the end of our timeslice (HDEC interrupts @@ -2321,6 +2176,12 @@ kvm_end_cede: bl kvmhv_accumulate_time #endif +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +BEGIN_FTR_SECTION + bl kvmppc_restore_tm +END_FTR_SECTION_IFSET(CPU_FTR_TM) +#endif + /* load up FP state */ bl kvmppc_load_fp @@ -2461,6 +2322,8 @@ BEGIN_FTR_SECTION cmpwi r6, 3 /* hypervisor doorbell? */ beq 3f END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) + cmpwi r6, 0xa /* Hypervisor maintenance ? */ + beq 4f li r3, 1 /* anything else, return 1 */ 0: blr @@ -2482,6 +2345,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) li r3, -1 blr + /* Woken up due to Hypervisor maintenance interrupt */ +4: li r12, BOOK3S_INTERRUPT_HMI + li r3, 1 + blr + /* * Determine what sort of external interrupt is pending (if any). * Returns: @@ -2631,6 +2499,239 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) mr r4,r31 blr +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +/* + * Save transactional state and TM-related registers. + * Called with r9 pointing to the vcpu struct. + * This can modify all checkpointed registers, but + * restores r1, r2 and r9 (vcpu pointer) before exit. + */ +kvmppc_save_tm: + mflr r0 + std r0, PPC_LR_STKOFF(r1) + + /* Turn on TM. */ + mfmsr r8 + li r0, 1 + rldimi r8, r0, MSR_TM_LG, 63-MSR_TM_LG + mtmsrd r8 + + ld r5, VCPU_MSR(r9) + rldicl. r5, r5, 64 - MSR_TS_S_LG, 62 + beq 1f /* TM not active in guest. */ + + std r1, HSTATE_HOST_R1(r13) + li r3, TM_CAUSE_KVM_RESCHED + + /* Clear the MSR RI since r1, r13 are all going to be foobar. */ + li r5, 0 + mtmsrd r5, 1 + + /* All GPRs are volatile at this point. */ + TRECLAIM(R3) + + /* Temporarily store r13 and r9 so we have some regs to play with */ + SET_SCRATCH0(r13) + GET_PACA(r13) + std r9, PACATMSCRATCH(r13) + ld r9, HSTATE_KVM_VCPU(r13) + + /* Get a few more GPRs free. */ + std r29, VCPU_GPRS_TM(29)(r9) + std r30, VCPU_GPRS_TM(30)(r9) + std r31, VCPU_GPRS_TM(31)(r9) + + /* Save away PPR and DSCR soon so don't run with user values. */ + mfspr r31, SPRN_PPR + HMT_MEDIUM + mfspr r30, SPRN_DSCR + ld r29, HSTATE_DSCR(r13) + mtspr SPRN_DSCR, r29 + + /* Save all but r9, r13 & r29-r31 */ + reg = 0 + .rept 29 + .if (reg != 9) && (reg != 13) + std reg, VCPU_GPRS_TM(reg)(r9) + .endif + reg = reg + 1 + .endr + /* ... now save r13 */ + GET_SCRATCH0(r4) + std r4, VCPU_GPRS_TM(13)(r9) + /* ... and save r9 */ + ld r4, PACATMSCRATCH(r13) + std r4, VCPU_GPRS_TM(9)(r9) + + /* Reload stack pointer and TOC. */ + ld r1, HSTATE_HOST_R1(r13) + ld r2, PACATOC(r13) + + /* Set MSR RI now we have r1 and r13 back. */ + li r5, MSR_RI + mtmsrd r5, 1 + + /* Save away checkpinted SPRs. */ + std r31, VCPU_PPR_TM(r9) + std r30, VCPU_DSCR_TM(r9) + mflr r5 + mfcr r6 + mfctr r7 + mfspr r8, SPRN_AMR + mfspr r10, SPRN_TAR + std r5, VCPU_LR_TM(r9) + stw r6, VCPU_CR_TM(r9) + std r7, VCPU_CTR_TM(r9) + std r8, VCPU_AMR_TM(r9) + std r10, VCPU_TAR_TM(r9) + + /* Restore r12 as trap number. */ + lwz r12, VCPU_TRAP(r9) + + /* Save FP/VSX. */ + addi r3, r9, VCPU_FPRS_TM + bl store_fp_state + addi r3, r9, VCPU_VRS_TM + bl store_vr_state + mfspr r6, SPRN_VRSAVE + stw r6, VCPU_VRSAVE_TM(r9) +1: + /* + * We need to save these SPRs after the treclaim so that the software + * error code is recorded correctly in the TEXASR. Also the user may + * change these outside of a transaction, so they must always be + * context switched. + */ + mfspr r5, SPRN_TFHAR + mfspr r6, SPRN_TFIAR + mfspr r7, SPRN_TEXASR + std r5, VCPU_TFHAR(r9) + std r6, VCPU_TFIAR(r9) + std r7, VCPU_TEXASR(r9) + + ld r0, PPC_LR_STKOFF(r1) + mtlr r0 + blr + +/* + * Restore transactional state and TM-related registers. + * Called with r4 pointing to the vcpu struct. + * This potentially modifies all checkpointed registers. + * It restores r1, r2, r4 from the PACA. + */ +kvmppc_restore_tm: + mflr r0 + std r0, PPC_LR_STKOFF(r1) + + /* Turn on TM/FP/VSX/VMX so we can restore them. */ + mfmsr r5 + li r6, MSR_TM >> 32 + sldi r6, r6, 32 + or r5, r5, r6 + ori r5, r5, MSR_FP + oris r5, r5, (MSR_VEC | MSR_VSX)@h + mtmsrd r5 + + /* + * The user may change these outside of a transaction, so they must + * always be context switched. + */ + ld r5, VCPU_TFHAR(r4) + ld r6, VCPU_TFIAR(r4) + ld r7, VCPU_TEXASR(r4) + mtspr SPRN_TFHAR, r5 + mtspr SPRN_TFIAR, r6 + mtspr SPRN_TEXASR, r7 + + ld r5, VCPU_MSR(r4) + rldicl. r5, r5, 64 - MSR_TS_S_LG, 62 + beqlr /* TM not active in guest */ + std r1, HSTATE_HOST_R1(r13) + + /* Make sure the failure summary is set, otherwise we'll program check + * when we trechkpt. It's possible that this might have been not set + * on a kvmppc_set_one_reg() call but we shouldn't let this crash the + * host. + */ + oris r7, r7, (TEXASR_FS)@h + mtspr SPRN_TEXASR, r7 + + /* + * We need to load up the checkpointed state for the guest. + * We need to do this early as it will blow away any GPRs, VSRs and + * some SPRs. + */ + + mr r31, r4 + addi r3, r31, VCPU_FPRS_TM + bl load_fp_state + addi r3, r31, VCPU_VRS_TM + bl load_vr_state + mr r4, r31 + lwz r7, VCPU_VRSAVE_TM(r4) + mtspr SPRN_VRSAVE, r7 + + ld r5, VCPU_LR_TM(r4) + lwz r6, VCPU_CR_TM(r4) + ld r7, VCPU_CTR_TM(r4) + ld r8, VCPU_AMR_TM(r4) + ld r9, VCPU_TAR_TM(r4) + mtlr r5 + mtcr r6 + mtctr r7 + mtspr SPRN_AMR, r8 + mtspr SPRN_TAR, r9 + + /* + * Load up PPR and DSCR values but don't put them in the actual SPRs + * till the last moment to avoid running with userspace PPR and DSCR for + * too long. + */ + ld r29, VCPU_DSCR_TM(r4) + ld r30, VCPU_PPR_TM(r4) + + std r2, PACATMSCRATCH(r13) /* Save TOC */ + + /* Clear the MSR RI since r1, r13 are all going to be foobar. */ + li r5, 0 + mtmsrd r5, 1 + + /* Load GPRs r0-r28 */ + reg = 0 + .rept 29 + ld reg, VCPU_GPRS_TM(reg)(r31) + reg = reg + 1 + .endr + + mtspr SPRN_DSCR, r29 + mtspr SPRN_PPR, r30 + + /* Load final GPRs */ + ld 29, VCPU_GPRS_TM(29)(r31) + ld 30, VCPU_GPRS_TM(30)(r31) + ld 31, VCPU_GPRS_TM(31)(r31) + + /* TM checkpointed state is now setup. All GPRs are now volatile. */ + TRECHKPT + + /* Now let's get back the state we need. */ + HMT_MEDIUM + GET_PACA(r13) + ld r29, HSTATE_DSCR(r13) + mtspr SPRN_DSCR, r29 + ld r4, HSTATE_KVM_VCPU(r13) + ld r1, HSTATE_HOST_R1(r13) + ld r2, PACATMSCRATCH(r13) + + /* Set the MSR RI since we have our registers back. */ + li r5, MSR_RI + mtmsrd r5, 1 + + ld r0, PPC_LR_STKOFF(r1) + mtlr r0 + blr +#endif + /* * We come here if we get any exception or interrupt while we are * executing host real mode code while in guest MMU context. diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index c4f7d6b86b9e..e76f79a45988 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -914,7 +914,7 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, /* We get here with MSR.EE=1 */ trace_kvm_exit(exit_nr, vcpu); - kvm_guest_exit(); + guest_exit(); switch (exit_nr) { case BOOK3S_INTERRUPT_INST_STORAGE: @@ -1049,7 +1049,17 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, int emul; program_interrupt: - flags = vcpu->arch.shadow_srr1 & 0x1f0000ull; + /* + * shadow_srr1 only contains valid flags if we came here via + * a program exception. The other exceptions (emulation assist, + * FP unavailable, etc.) do not provide flags in SRR1, so use + * an illegal-instruction exception when injecting a program + * interrupt into the guest. + */ + if (exit_nr == BOOK3S_INTERRUPT_PROGRAM) + flags = vcpu->arch.shadow_srr1 & 0x1f0000ull; + else + flags = SRR1_PROGILL; emul = kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst); if (emul != EMULATE_DONE) { @@ -1531,7 +1541,7 @@ static int kvmppc_vcpu_run_pr(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) kvmppc_clear_debug(vcpu); - /* No need for kvm_guest_exit. It's done in handle_exit. + /* No need for guest_exit. It's done in handle_exit. We also get here with interrupts enabled. */ /* Make sure we save the guest FPU/Altivec/VSX state */ diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 4afae695899a..02b4672f7347 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -776,7 +776,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) ret = __kvmppc_vcpu_run(kvm_run, vcpu); - /* No need for kvm_guest_exit. It's done in handle_exit. + /* No need for guest_exit. It's done in handle_exit. We also get here with interrupts enabled. */ /* Switch back to user space debug context */ @@ -1012,7 +1012,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, } trace_kvm_exit(exit_nr, vcpu); - __kvm_guest_exit(); + guest_exit_irqoff(); local_irq_enable(); diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index 5cc2e7af3a7b..b379146de55b 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -302,7 +302,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) advance = 0; printk(KERN_ERR "Couldn't emulate instruction 0x%08x " "(op %d xop %d)\n", inst, get_op(inst), get_xop(inst)); - kvmppc_core_queue_program(vcpu, 0); } } diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c index 6249cdc834d1..ed38f8114118 100644 --- a/arch/powerpc/kvm/mpic.c +++ b/arch/powerpc/kvm/mpic.c @@ -1823,7 +1823,8 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, return 0; } -int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e, +int kvm_set_routing_entry(struct kvm *kvm, + struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue) { int r = -EINVAL; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 02416fea7653..6ce40dd6fe51 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -119,7 +119,7 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) continue; } - __kvm_guest_enter(); + guest_enter_irqoff(); return 1; } @@ -588,6 +588,10 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = 1; break; #endif + case KVM_CAP_PPC_HTM: + r = cpu_has_feature(CPU_FTR_TM_COMP) && + is_kvmppc_hv_enabled(kvm); + break; default: r = 0; break; diff --git a/arch/powerpc/lib/alloc.c b/arch/powerpc/lib/alloc.c index 60b0b3fc8fc1..a58abe4afbd1 100644 --- a/arch/powerpc/lib/alloc.c +++ b/arch/powerpc/lib/alloc.c @@ -6,7 +6,7 @@ #include <asm/setup.h> -void * __init_refok zalloc_maybe_bootmem(size_t size, gfp_t mask) +void * __ref zalloc_maybe_bootmem(size_t size, gfp_t mask) { void *p; diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index defb2998b818..74145f02ad41 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -13,6 +13,7 @@ */ #include <linux/types.h> +#include <linux/jump_label.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/init.h> @@ -152,9 +153,18 @@ static void do_final_fixups(void) #endif } -void apply_feature_fixups(void) +static unsigned long __initdata saved_cpu_features; +static unsigned int __initdata saved_mmu_features; +#ifdef CONFIG_PPC64 +static unsigned long __initdata saved_firmware_features; +#endif + +void __init apply_feature_fixups(void) { - struct cpu_spec *spec = *PTRRELOC(&cur_cpu_spec); + struct cpu_spec *spec = PTRRELOC(*PTRRELOC(&cur_cpu_spec)); + + *PTRRELOC(&saved_cpu_features) = spec->cpu_features; + *PTRRELOC(&saved_mmu_features) = spec->mmu_features; /* * Apply the CPU-specific and firmware specific fixups to kernel text @@ -173,11 +183,36 @@ void apply_feature_fixups(void) PTRRELOC(&__stop___lwsync_fixup)); #ifdef CONFIG_PPC64 + saved_firmware_features = powerpc_firmware_features; do_feature_fixups(powerpc_firmware_features, &__start___fw_ftr_fixup, &__stop___fw_ftr_fixup); #endif do_final_fixups(); + + /* + * Initialise jump label. This causes all the cpu/mmu_has_feature() + * checks to take on their correct polarity based on the current set of + * CPU/MMU features. + */ + jump_label_init(); + cpu_feature_keys_init(); + mmu_feature_keys_init(); +} + +static int __init check_features(void) +{ + WARN(saved_cpu_features != cur_cpu_spec->cpu_features, + "CPU features changed after feature patching!\n"); + WARN(saved_mmu_features != cur_cpu_spec->mmu_features, + "MMU features changed after feature patching!\n"); +#ifdef CONFIG_PPC64 + WARN(saved_firmware_features != powerpc_firmware_features, + "Firmware features changed after feature patching!\n"); +#endif + + return 0; } +late_initcall(check_features); #ifdef CONFIG_FTR_FIXUP_SELFTEST diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 88ce7d212320..0e4e9654bd2c 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -72,8 +72,7 @@ static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize) /* clear out bits after (52) [0....52.....63] */ va &= ~((1ul << (64 - 52)) - 1); va |= ssize << 8; - sllp = ((mmu_psize_defs[apsize].sllp & SLB_VSID_L) >> 6) | - ((mmu_psize_defs[apsize].sllp & SLB_VSID_LP) >> 4); + sllp = get_sllp_encoding(apsize); va |= sllp << 5; asm volatile(ASM_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0), %2) : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206) @@ -122,8 +121,7 @@ static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize) /* clear out bits after(52) [0....52.....63] */ va &= ~((1ul << (64 - 52)) - 1); va |= ssize << 8; - sllp = ((mmu_psize_defs[apsize].sllp & SLB_VSID_L) >> 6) | - ((mmu_psize_defs[apsize].sllp & SLB_VSID_LP) >> 4); + sllp = get_sllp_encoding(apsize); va |= sllp << 5; asm volatile(".long 0x7c000224 | (%0 << 11) | (0 << 21)" : : "r"(va) : "memory"); @@ -749,5 +747,5 @@ void __init hpte_init_native(void) mmu_hash_ops.hugepage_invalidate = native_hugepage_invalidate; if (cpu_has_feature(CPU_FTR_ARCH_300)) - ppc_md.register_process_table = native_register_proc_table; + register_process_table = native_register_proc_table; } diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index b78b5d211278..0821556e16f4 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -363,11 +363,6 @@ static int __init htab_dt_scan_seg_sizes(unsigned long node, return 0; } -static void __init htab_init_seg_sizes(void) -{ - of_scan_flat_dt(htab_dt_scan_seg_sizes, NULL); -} - static int __init get_idx_from_shift(unsigned int shift) { int idx = -1; @@ -539,7 +534,7 @@ static bool might_have_hea(void) #endif /* #ifdef CONFIG_PPC_64K_PAGES */ -static void __init htab_init_page_sizes(void) +static void __init htab_scan_page_sizes(void) { int rc; @@ -554,17 +549,23 @@ static void __init htab_init_page_sizes(void) * Try to find the available page sizes in the device-tree */ rc = of_scan_flat_dt(htab_dt_scan_page_sizes, NULL); - if (rc != 0) /* Found */ - goto found; - - /* - * Not in the device-tree, let's fallback on known size - * list for 16M capable GP & GR - */ - if (mmu_has_feature(MMU_FTR_16M_PAGE)) + if (rc == 0 && early_mmu_has_feature(MMU_FTR_16M_PAGE)) { + /* + * Nothing in the device-tree, but the CPU supports 16M pages, + * so let's fallback on a known size list for 16M capable CPUs. + */ memcpy(mmu_psize_defs, mmu_psize_defaults_gp, sizeof(mmu_psize_defaults_gp)); -found: + } + +#ifdef CONFIG_HUGETLB_PAGE + /* Reserve 16G huge page memory sections for huge pages */ + of_scan_flat_dt(htab_dt_scan_hugepage_blocks, NULL); +#endif /* CONFIG_HUGETLB_PAGE */ +} + +static void __init htab_init_page_sizes(void) +{ if (!debug_pagealloc_enabled()) { /* * Pick a size for the linear mapping. Currently, we only @@ -630,11 +631,6 @@ found: ,mmu_psize_defs[mmu_vmemmap_psize].shift #endif ); - -#ifdef CONFIG_HUGETLB_PAGE - /* Reserve 16G huge page memory sections for huge pages */ - of_scan_flat_dt(htab_dt_scan_hugepage_blocks, NULL); -#endif /* CONFIG_HUGETLB_PAGE */ } static int __init htab_dt_scan_pftsize(unsigned long node, @@ -759,12 +755,6 @@ static void __init htab_initialize(void) DBG(" -> htab_initialize()\n"); - /* Initialize segment sizes */ - htab_init_seg_sizes(); - - /* Initialize page sizes */ - htab_init_page_sizes(); - if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) { mmu_kernel_ssize = MMU_SEGSIZE_1T; mmu_highuser_ssize = MMU_SEGSIZE_1T; @@ -885,8 +875,19 @@ static void __init htab_initialize(void) #undef KB #undef MB +void __init hash__early_init_devtree(void) +{ + /* Initialize segment sizes */ + of_scan_flat_dt(htab_dt_scan_seg_sizes, NULL); + + /* Initialize page sizes */ + htab_scan_page_sizes(); +} + void __init hash__early_init_mmu(void) { + htab_init_page_sizes(); + /* * initialize page table size */ diff --git a/arch/powerpc/mm/hugetlbpage-radix.c b/arch/powerpc/mm/hugetlbpage-radix.c index 1e11559e1aac..35254a678456 100644 --- a/arch/powerpc/mm/hugetlbpage-radix.c +++ b/arch/powerpc/mm/hugetlbpage-radix.c @@ -5,39 +5,34 @@ #include <asm/cacheflush.h> #include <asm/machdep.h> #include <asm/mman.h> +#include <asm/tlb.h> void radix__flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr) { - unsigned long ap, shift; + int psize; struct hstate *hstate = hstate_file(vma->vm_file); - shift = huge_page_shift(hstate); - if (shift == mmu_psize_defs[MMU_PAGE_2M].shift) - ap = mmu_get_ap(MMU_PAGE_2M); - else if (shift == mmu_psize_defs[MMU_PAGE_1G].shift) - ap = mmu_get_ap(MMU_PAGE_1G); - else { - WARN(1, "Wrong huge page shift\n"); - return ; - } - radix___flush_tlb_page(vma->vm_mm, vmaddr, ap, 0); + psize = hstate_get_psize(hstate); + radix__flush_tlb_page_psize(vma->vm_mm, vmaddr, psize); } void radix__local_flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr) { - unsigned long ap, shift; + int psize; struct hstate *hstate = hstate_file(vma->vm_file); - shift = huge_page_shift(hstate); - if (shift == mmu_psize_defs[MMU_PAGE_2M].shift) - ap = mmu_get_ap(MMU_PAGE_2M); - else if (shift == mmu_psize_defs[MMU_PAGE_1G].shift) - ap = mmu_get_ap(MMU_PAGE_1G); - else { - WARN(1, "Wrong huge page shift\n"); - return ; - } - radix___local_flush_tlb_page(vma->vm_mm, vmaddr, ap, 0); + psize = hstate_get_psize(hstate); + radix__local_flush_tlb_page_psize(vma->vm_mm, vmaddr, psize); +} + +void radix__flush_hugetlb_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + int psize; + struct hstate *hstate = hstate_file(vma->vm_file); + + psize = hstate_get_psize(hstate); + radix__flush_tlb_range_psize(vma->vm_mm, start, end, psize); } /* diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 33709bdb0419..16ada1eb7e26 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -411,3 +411,25 @@ struct page *realmode_pfn_to_page(unsigned long pfn) EXPORT_SYMBOL_GPL(realmode_pfn_to_page); #endif /* CONFIG_SPARSEMEM_VMEMMAP/CONFIG_FLATMEM */ + +#ifdef CONFIG_PPC_STD_MMU_64 +static bool disable_radix; +static int __init parse_disable_radix(char *p) +{ + disable_radix = true; + return 0; +} +early_param("disable_radix", parse_disable_radix); + +void __init mmu_early_init_devtree(void) +{ + /* Disable radix mode based on kernel command line. */ + if (disable_radix) + cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; + + if (early_radix_enabled()) + radix__early_init_devtree(); + else + hash__early_init_devtree(); +} +#endif /* CONFIG_PPC_STD_MMU_64 */ diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c index 670318766545..34079302cc17 100644 --- a/arch/powerpc/mm/pgtable-book3s64.c +++ b/arch/powerpc/mm/pgtable-book3s64.c @@ -14,6 +14,9 @@ #include "mmu_decl.h" #include <trace/events/thp.h> +int (*register_process_table)(unsigned long base, unsigned long page_size, + unsigned long tbl_size); + #ifdef CONFIG_TRANSPARENT_HUGEPAGE /* * This is called when relaxing access to a hugepage. It's also called in the page @@ -33,7 +36,7 @@ int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address, changed = !pmd_same(*(pmdp), entry); if (changed) { __ptep_set_access_flags(pmdp_ptep(pmdp), pmd_pte(entry)); - flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); + flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); } return changed; } @@ -66,7 +69,7 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, 0); - flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); + flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); /* * This ensures that generic code that rely on IRQ disabling * to prevent a parallel THP split work as expected. diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index 003ff48a11b6..af897d91d09f 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -171,7 +171,7 @@ redo: * of process table here. But our linear mapping also enable us to use * physical address here. */ - ppc_md.register_process_table(__pa(process_tb), 0, PRTB_SIZE_SHIFT - 12); + register_process_table(__pa(process_tb), 0, PRTB_SIZE_SHIFT - 12); pr_info("Process table %p and radix root for kernel: %p\n", process_tb, init_mm.pgd); } @@ -198,7 +198,7 @@ static void __init radix_init_partition_table(void) void __init radix_init_native(void) { - ppc_md.register_process_table = native_register_process_table; + register_process_table = native_register_process_table; } static int __init get_idx_from_shift(unsigned int shift) @@ -264,7 +264,7 @@ static int __init radix_dt_scan_page_sizes(unsigned long node, return 1; } -static void __init radix_init_page_sizes(void) +void __init radix__early_init_devtree(void) { int rc; @@ -343,7 +343,6 @@ void __init radix__early_init_mmu(void) __pte_frag_nr = H_PTE_FRAG_NR; __pte_frag_size_shift = H_PTE_FRAG_SIZE_SHIFT; - radix_init_page_sizes(); if (!firmware_has_feature(FW_FEATURE_LPAR)) { radix_init_native(); lpcr = mfspr(SPRN_LPCR); diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index 88a307504b5a..0b6fb244d0a1 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -225,7 +225,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, if (!is_vm_hugetlb_page(vma)) assert_pte_locked(vma->vm_mm, address); __ptep_set_access_flags(ptep, entry); - flush_tlb_page_nohash(vma, address); + flush_tlb_page(vma, address); } return changed; } diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 7f922f557936..0ae0572bc239 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -79,7 +79,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd) #endif } -__init_refok pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) +__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { pte_t *pte; diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index e1f22700fb16..48df05ef5231 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -140,10 +140,11 @@ void radix__local_flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr) } EXPORT_SYMBOL(radix__local_flush_tlb_pwc); -void radix___local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, - unsigned long ap, int nid) +void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, + int psize) { unsigned long pid; + unsigned long ap = mmu_get_ap(psize); preempt_disable(); pid = mm ? mm->context.id : 0; @@ -159,18 +160,12 @@ void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmadd if (vma && is_vm_hugetlb_page(vma)) return __local_flush_hugetlb_page(vma, vmaddr); #endif - radix___local_flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr, - mmu_get_ap(mmu_virtual_psize), 0); + radix__local_flush_tlb_page_psize(vma ? vma->vm_mm : NULL, vmaddr, + mmu_virtual_psize); } EXPORT_SYMBOL(radix__local_flush_tlb_page); #ifdef CONFIG_SMP -static int mm_is_core_local(struct mm_struct *mm) -{ - return cpumask_subset(mm_cpumask(mm), - topology_sibling_cpumask(smp_processor_id())); -} - void radix__flush_tlb_mm(struct mm_struct *mm) { unsigned long pid; @@ -221,10 +216,11 @@ no_context: } EXPORT_SYMBOL(radix__flush_tlb_pwc); -void radix___flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, - unsigned long ap, int nid) +void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, + int psize) { unsigned long pid; + unsigned long ap = mmu_get_ap(psize); preempt_disable(); pid = mm ? mm->context.id : 0; @@ -250,8 +246,8 @@ void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) if (vma && is_vm_hugetlb_page(vma)) return flush_hugetlb_page(vma, vmaddr); #endif - radix___flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr, - mmu_get_ap(mmu_virtual_psize), 0); + radix__flush_tlb_page_psize(vma ? vma->vm_mm : NULL, vmaddr, + mmu_virtual_psize); } EXPORT_SYMBOL(radix__flush_tlb_page); @@ -299,8 +295,65 @@ static int radix_get_mmu_psize(int page_size) void radix__tlb_flush(struct mmu_gather *tlb) { + int psize = 0; struct mm_struct *mm = tlb->mm; - radix__flush_tlb_mm(mm); + int page_size = tlb->page_size; + + psize = radix_get_mmu_psize(page_size); + /* + * if page size is not something we understand, do a full mm flush + */ + if (psize != -1 && !tlb->fullmm && !tlb->need_flush_all) + radix__flush_tlb_range_psize(mm, tlb->start, tlb->end, psize); + else + radix__flush_tlb_mm(mm); +} + +#define TLB_FLUSH_ALL -1UL +/* + * Number of pages above which we will do a bcast tlbie. Just a + * number at this point copied from x86 + */ +static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33; + +void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start, + unsigned long end, int psize) +{ + unsigned long pid; + unsigned long addr; + int local = mm_is_core_local(mm); + unsigned long ap = mmu_get_ap(psize); + int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); + unsigned long page_size = 1UL << mmu_psize_defs[psize].shift; + + + preempt_disable(); + pid = mm ? mm->context.id : 0; + if (unlikely(pid == MMU_NO_CONTEXT)) + goto err_out; + + if (end == TLB_FLUSH_ALL || + (end - start) > tlb_single_page_flush_ceiling * page_size) { + if (local) + _tlbiel_pid(pid, RIC_FLUSH_TLB); + else + _tlbie_pid(pid, RIC_FLUSH_TLB); + goto err_out; + } + for (addr = start; addr < end; addr += page_size) { + + if (local) + _tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB); + else { + if (lock_tlbie) + raw_spin_lock(&native_tlbie_lock); + _tlbie_va(addr, pid, ap, RIC_FLUSH_TLB); + if (lock_tlbie) + raw_spin_unlock(&native_tlbie_lock); + } + } +err_out: + preempt_enable(); } void radix__flush_tlb_lpid_va(unsigned long lpid, unsigned long gpa, @@ -340,3 +393,10 @@ void radix__flush_tlb_lpid(unsigned long lpid) asm volatile("eieio; tlbsync; ptesync": : :"memory"); } EXPORT_SYMBOL(radix__flush_tlb_lpid); + +void radix__flush_pmd_tlb_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end) +{ + radix__flush_tlb_range_psize(vma->vm_mm, start, end, MMU_PAGE_2M); +} +EXPORT_SYMBOL(radix__flush_pmd_tlb_range); diff --git a/arch/powerpc/mm/tlb_hash32.c b/arch/powerpc/mm/tlb_hash32.c index 558e30cce33e..702d7689d714 100644 --- a/arch/powerpc/mm/tlb_hash32.c +++ b/arch/powerpc/mm/tlb_hash32.c @@ -49,17 +49,6 @@ void flush_hash_entry(struct mm_struct *mm, pte_t *ptep, unsigned long addr) EXPORT_SYMBOL(flush_hash_entry); /* - * Called by ptep_set_access_flags, must flush on CPUs for which the - * DSI handler can't just "fixup" the TLB on a write fault - */ -void flush_tlb_page_nohash(struct vm_area_struct *vma, unsigned long addr) -{ - if (Hash != 0) - return; - _tlbie(addr); -} - -/* * Called at the end of a mmu_gather operation to make sure the * TLB flush is completely done. */ diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index f4668488512c..050badc0ebd3 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c @@ -215,12 +215,6 @@ EXPORT_SYMBOL(local_flush_tlb_page); static DEFINE_RAW_SPINLOCK(tlbivax_lock); -static int mm_is_core_local(struct mm_struct *mm) -{ - return cpumask_subset(mm_cpumask(mm), - topology_sibling_cpumask(smp_processor_id())); -} - struct tlb_flush_param { unsigned long addr; unsigned int pid; diff --git a/arch/powerpc/perf/power9-events-list.h b/arch/powerpc/perf/power9-events-list.h index cda6fcb809ca..6447dc1c3d89 100644 --- a/arch/powerpc/perf/power9-events-list.h +++ b/arch/powerpc/perf/power9-events-list.h @@ -34,15 +34,15 @@ EVENT(PM_L1_ICACHE_MISS, 0x200fd) /* Instruction Demand sectors wriittent into IL1 */ EVENT(PM_L1_DEMAND_WRITE, 0x0408c) /* Instruction prefetch written into IL1 */ -EVENT(PM_IC_PREF_WRITE, 0x0408e) +EVENT(PM_IC_PREF_WRITE, 0x0488c) /* The data cache was reloaded from local core's L3 due to a demand load */ EVENT(PM_DATA_FROM_L3, 0x4c042) /* Demand LD - L3 Miss (not L2 hit and not L3 hit) */ EVENT(PM_DATA_FROM_L3MISS, 0x300fe) /* All successful D-side store dispatches for this thread */ -EVENT(PM_L2_ST, 0x16081) +EVENT(PM_L2_ST, 0x16880) /* All successful D-side store dispatches for this thread that were L2 Miss */ -EVENT(PM_L2_ST_MISS, 0x26081) +EVENT(PM_L2_ST_MISS, 0x26880) /* Total HW L3 prefetches(Load+store) */ EVENT(PM_L3_PREF_ALL, 0x4e052) /* Data PTEG reload */ diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index 3663f71fd913..fbdae8377b71 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig @@ -321,6 +321,17 @@ config OF_RTC Uses information from the OF or flattened device tree to instantiate platform devices for direct mapped RTC chips like the DS1742 or DS1743. +config GEN_RTC + bool "Use the platform RTC operations from user space" + select RTC_CLASS + select RTC_DRV_GENERIC + help + This option provides backwards compatibility with the old gen_rtc.ko + module that was traditionally used for old PowerPC machines. + Platforms should migrate to enabling the RTC_DRV_GENERIC by hand + replacing their get_rtc_time/set_rtc_time callbacks with + a proper RTC device driver. + config SIMPLE_GPIO bool "Support for simple, memory-mapped GPIO controllers" depends on PPC diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c index 9027d7c48507..f7d1a4953ea0 100644 --- a/arch/powerpc/platforms/cell/iommu.c +++ b/arch/powerpc/platforms/cell/iommu.c @@ -166,7 +166,7 @@ static void invalidate_tce_cache(struct cbe_iommu *iommu, unsigned long *pte, static int tce_build_cell(struct iommu_table *tbl, long index, long npages, unsigned long uaddr, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { int i; unsigned long *io_pte, base_pte; @@ -193,7 +193,7 @@ static int tce_build_cell(struct iommu_table *tbl, long index, long npages, base_pte = CBE_IOPTE_PP_W | CBE_IOPTE_PP_R | CBE_IOPTE_M | CBE_IOPTE_SO_RW | (window->ioid & CBE_IOPTE_IOID_Mask); #endif - if (unlikely(dma_get_attr(DMA_ATTR_WEAK_ORDERING, attrs))) + if (unlikely(attrs & DMA_ATTR_WEAK_ORDERING)) base_pte &= ~CBE_IOPTE_SO_RW; io_pte = (unsigned long *)tbl->it_base + (index - tbl->it_offset); @@ -526,7 +526,7 @@ cell_iommu_setup_window(struct cbe_iommu *iommu, struct device_node *np, __set_bit(0, window->table.it_map); tce_build_cell(&window->table, window->table.it_offset, 1, - (unsigned long)iommu->pad_page, DMA_TO_DEVICE, NULL); + (unsigned long)iommu->pad_page, DMA_TO_DEVICE, 0); return window; } @@ -572,7 +572,7 @@ static struct iommu_table *cell_get_iommu_table(struct device *dev) static void *dma_fixed_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag, - struct dma_attrs *attrs) + unsigned long attrs) { if (iommu_fixed_is_weak) return iommu_alloc_coherent(dev, cell_get_iommu_table(dev), @@ -586,7 +586,7 @@ static void *dma_fixed_alloc_coherent(struct device *dev, size_t size, static void dma_fixed_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, - struct dma_attrs *attrs) + unsigned long attrs) { if (iommu_fixed_is_weak) iommu_free_coherent(cell_get_iommu_table(dev), size, vaddr, @@ -598,9 +598,9 @@ static void dma_fixed_free_coherent(struct device *dev, size_t size, static dma_addr_t dma_fixed_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { - if (iommu_fixed_is_weak == dma_get_attr(DMA_ATTR_WEAK_ORDERING, attrs)) + if (iommu_fixed_is_weak == (attrs & DMA_ATTR_WEAK_ORDERING)) return dma_direct_ops.map_page(dev, page, offset, size, direction, attrs); else @@ -611,9 +611,9 @@ static dma_addr_t dma_fixed_map_page(struct device *dev, struct page *page, static void dma_fixed_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { - if (iommu_fixed_is_weak == dma_get_attr(DMA_ATTR_WEAK_ORDERING, attrs)) + if (iommu_fixed_is_weak == (attrs & DMA_ATTR_WEAK_ORDERING)) dma_direct_ops.unmap_page(dev, dma_addr, size, direction, attrs); else @@ -623,9 +623,9 @@ static void dma_fixed_unmap_page(struct device *dev, dma_addr_t dma_addr, static int dma_fixed_map_sg(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { - if (iommu_fixed_is_weak == dma_get_attr(DMA_ATTR_WEAK_ORDERING, attrs)) + if (iommu_fixed_is_weak == (attrs & DMA_ATTR_WEAK_ORDERING)) return dma_direct_ops.map_sg(dev, sg, nents, direction, attrs); else return ppc_iommu_map_sg(dev, cell_get_iommu_table(dev), sg, @@ -635,9 +635,9 @@ static int dma_fixed_map_sg(struct device *dev, struct scatterlist *sg, static void dma_fixed_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { - if (iommu_fixed_is_weak == dma_get_attr(DMA_ATTR_WEAK_ORDERING, attrs)) + if (iommu_fixed_is_weak == (attrs & DMA_ATTR_WEAK_ORDERING)) dma_direct_ops.unmap_sg(dev, sg, nents, direction, attrs); else ppc_iommu_unmap_sg(cell_get_iommu_table(dev), sg, nents, @@ -1162,7 +1162,7 @@ static int __init setup_iommu_fixed(char *str) pciep = of_find_node_by_type(NULL, "pcie-endpoint"); if (strcmp(str, "weak") == 0 || (pciep && strcmp(str, "strong") != 0)) - iommu_fixed_is_weak = 1; + iommu_fixed_is_weak = DMA_ATTR_WEAK_ORDERING; of_node_put(pciep); diff --git a/arch/powerpc/platforms/cell/pervasive.c b/arch/powerpc/platforms/cell/pervasive.c index d17e98bc0c10..e7d075077cb0 100644 --- a/arch/powerpc/platforms/cell/pervasive.c +++ b/arch/powerpc/platforms/cell/pervasive.c @@ -35,6 +35,7 @@ #include <asm/pgtable.h> #include <asm/reg.h> #include <asm/cell-regs.h> +#include <asm/cpu_has_feature.h> #include "pervasive.h" diff --git a/arch/powerpc/platforms/pasemi/iommu.c b/arch/powerpc/platforms/pasemi/iommu.c index 43dd3fb514e0..309d9ccccd50 100644 --- a/arch/powerpc/platforms/pasemi/iommu.c +++ b/arch/powerpc/platforms/pasemi/iommu.c @@ -88,7 +88,7 @@ static int iommu_table_iobmap_inited; static int iobmap_build(struct iommu_table *tbl, long index, long npages, unsigned long uaddr, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { u32 *ip; u32 rpn; diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c index 3de4a7c85140..6b4e9d181126 100644 --- a/arch/powerpc/platforms/powermac/setup.c +++ b/arch/powerpc/platforms/powermac/setup.c @@ -353,12 +353,12 @@ static int pmac_late_init(void) machine_late_initcall(powermac, pmac_late_init); /* - * This is __init_refok because we check for "initializing" before + * This is __ref because we check for "initializing" before * touching any of the __init sensitive things and "initializing" * will be false after __init time. This can't be __init because it * can be called whenever a disk is first accessed. */ -void __init_refok note_bootable_part(dev_t dev, int part, int goodness) +void __ref note_bootable_part(dev_t dev, int part, int goodness) { char *p; diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index 4383a5ff82ba..00e1a0195c78 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -73,7 +73,7 @@ EXPORT_SYMBOL(pnv_pci_get_npu_dev); static void *dma_npu_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag, - struct dma_attrs *attrs) + unsigned long attrs) { NPU_DMA_OP_UNSUPPORTED(); return NULL; @@ -81,7 +81,7 @@ static void *dma_npu_alloc(struct device *dev, size_t size, static void dma_npu_free(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, - struct dma_attrs *attrs) + unsigned long attrs) { NPU_DMA_OP_UNSUPPORTED(); } @@ -89,7 +89,7 @@ static void dma_npu_free(struct device *dev, size_t size, static dma_addr_t dma_npu_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { NPU_DMA_OP_UNSUPPORTED(); return 0; @@ -97,7 +97,7 @@ static dma_addr_t dma_npu_map_page(struct device *dev, struct page *page, static int dma_npu_map_sg(struct device *dev, struct scatterlist *sglist, int nelems, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { NPU_DMA_OP_UNSUPPORTED(); return 0; diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index cf928bba4d9a..3d29d40eb0e9 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -64,7 +64,6 @@ END_FTR_SECTION(0, 1); \ OPAL_BRANCH(opal_tracepoint_entry) \ mfcr r12; \ stw r12,8(r1); \ - std r1,PACAR1(r13); \ li r11,0; \ mfmsr r12; \ ori r11,r11,MSR_EE; \ @@ -127,7 +126,6 @@ opal_tracepoint_entry: mfcr r12 std r11,16(r1) stw r12,8(r1) - std r1,PACAR1(r13) li r11,0 mfmsr r12 ori r11,r11,MSR_EE diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 891fc4a453df..6b9528307f62 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1806,7 +1806,7 @@ static void pnv_pci_p7ioc_tce_invalidate(struct iommu_table *tbl, static int pnv_ioda1_tce_build(struct iommu_table *tbl, long index, long npages, unsigned long uaddr, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { int ret = pnv_tce_build(tbl, index, npages, uaddr, direction, attrs); @@ -1950,7 +1950,7 @@ static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl, static int pnv_ioda2_tce_build(struct iommu_table *tbl, long index, long npages, unsigned long uaddr, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { int ret = pnv_tce_build(tbl, index, npages, uaddr, direction, attrs); diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 6701dd5ded20..a21d831c1114 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -704,7 +704,7 @@ static __be64 *pnv_tce(struct iommu_table *tbl, long idx) int pnv_tce_build(struct iommu_table *tbl, long index, long npages, unsigned long uaddr, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { u64 proto_tce = iommu_direction_to_tce_perm(direction); u64 rpn = __pa(uaddr) >> tbl->it_page_shift; diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index d088d4f06116..e64df7894d6e 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -181,7 +181,7 @@ struct pnv_phb { extern struct pci_ops pnv_pci_ops; extern int pnv_tce_build(struct iommu_table *tbl, long index, long npages, unsigned long uaddr, enum dma_data_direction direction, - struct dma_attrs *attrs); + unsigned long attrs); extern void pnv_tce_free(struct iommu_table *tbl, long index, long npages); extern int pnv_tce_xchg(struct iommu_table *tbl, long index, unsigned long *hpa, enum dma_data_direction *direction); diff --git a/arch/powerpc/platforms/ps3/device-init.c b/arch/powerpc/platforms/ps3/device-init.c index 3f175e8aedb4..57caaf11a83f 100644 --- a/arch/powerpc/platforms/ps3/device-init.c +++ b/arch/powerpc/platforms/ps3/device-init.c @@ -189,7 +189,7 @@ fail_malloc: return result; } -static int __init_refok ps3_setup_uhc_device( +static int __ref ps3_setup_uhc_device( const struct ps3_repository_device *repo, enum ps3_match_id match_id, enum ps3_interrupt_type interrupt_type, enum ps3_reg_type reg_type) { diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c index 5606fe36faf2..8af1c15aef85 100644 --- a/arch/powerpc/platforms/ps3/system-bus.c +++ b/arch/powerpc/platforms/ps3/system-bus.c @@ -516,7 +516,7 @@ core_initcall(ps3_system_bus_init); */ static void * ps3_alloc_coherent(struct device *_dev, size_t size, dma_addr_t *dma_handle, gfp_t flag, - struct dma_attrs *attrs) + unsigned long attrs) { int result; struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev); @@ -553,7 +553,7 @@ clean_none: } static void ps3_free_coherent(struct device *_dev, size_t size, void *vaddr, - dma_addr_t dma_handle, struct dma_attrs *attrs) + dma_addr_t dma_handle, unsigned long attrs) { struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev); @@ -569,7 +569,7 @@ static void ps3_free_coherent(struct device *_dev, size_t size, void *vaddr, static dma_addr_t ps3_sb_map_page(struct device *_dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev); int result; @@ -592,7 +592,7 @@ static dma_addr_t ps3_sb_map_page(struct device *_dev, struct page *page, static dma_addr_t ps3_ioc0_map_page(struct device *_dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev); int result; @@ -626,7 +626,7 @@ static dma_addr_t ps3_ioc0_map_page(struct device *_dev, struct page *page, } static void ps3_unmap_page(struct device *_dev, dma_addr_t dma_addr, - size_t size, enum dma_data_direction direction, struct dma_attrs *attrs) + size_t size, enum dma_data_direction direction, unsigned long attrs) { struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev); int result; @@ -640,7 +640,7 @@ static void ps3_unmap_page(struct device *_dev, dma_addr_t dma_addr, } static int ps3_sb_map_sg(struct device *_dev, struct scatterlist *sgl, - int nents, enum dma_data_direction direction, struct dma_attrs *attrs) + int nents, enum dma_data_direction direction, unsigned long attrs) { #if defined(CONFIG_PS3_DYNAMIC_DMA) BUG_ON("do"); @@ -670,14 +670,14 @@ static int ps3_sb_map_sg(struct device *_dev, struct scatterlist *sgl, static int ps3_ioc0_map_sg(struct device *_dev, struct scatterlist *sg, int nents, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { BUG(); return 0; } static void ps3_sb_unmap_sg(struct device *_dev, struct scatterlist *sg, - int nents, enum dma_data_direction direction, struct dma_attrs *attrs) + int nents, enum dma_data_direction direction, unsigned long attrs) { #if defined(CONFIG_PS3_DYNAMIC_DMA) BUG_ON("do"); @@ -686,7 +686,7 @@ static void ps3_sb_unmap_sg(struct device *_dev, struct scatterlist *sg, static void ps3_ioc0_unmap_sg(struct device *_dev, struct scatterlist *sg, int nents, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { BUG(); } diff --git a/arch/powerpc/platforms/ps3/time.c b/arch/powerpc/platforms/ps3/time.c index 791c6142c4a7..11b45b58c81b 100644 --- a/arch/powerpc/platforms/ps3/time.c +++ b/arch/powerpc/platforms/ps3/time.c @@ -20,9 +20,9 @@ #include <linux/kernel.h> #include <linux/platform_device.h> +#include <linux/rtc.h> #include <asm/firmware.h> -#include <asm/rtc.h> #include <asm/lv1call.h> #include <asm/ps3.h> diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 770a753b52c9..0024e451bb36 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -123,7 +123,7 @@ static void iommu_pseries_free_group(struct iommu_table_group *table_group, static int tce_build_pSeries(struct iommu_table *tbl, long index, long npages, unsigned long uaddr, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { u64 proto_tce; __be64 *tcep, *tces; @@ -173,7 +173,7 @@ static void tce_freemulti_pSeriesLP(struct iommu_table*, long, long); static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages, unsigned long uaddr, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { u64 rc = 0; u64 proto_tce, tce; @@ -216,7 +216,7 @@ static DEFINE_PER_CPU(__be64 *, tce_page); static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages, unsigned long uaddr, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { u64 rc = 0; u64 proto_tce; diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c index 26904f4879ec..3573d54b2770 100644 --- a/arch/powerpc/sysdev/dart_iommu.c +++ b/arch/powerpc/sysdev/dart_iommu.c @@ -185,7 +185,7 @@ static void dart_flush(struct iommu_table *tbl) static int dart_build(struct iommu_table *tbl, long index, long npages, unsigned long uaddr, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { unsigned int *dp, *orig_dp; unsigned int rpn; diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c index f5bf38b94595..68e7c0dd2e45 100644 --- a/arch/powerpc/sysdev/fsl_rio.c +++ b/arch/powerpc/sysdev/fsl_rio.c @@ -289,7 +289,7 @@ static void fsl_rio_inbound_mem_init(struct rio_priv *priv) } int fsl_map_inb_mem(struct rio_mport *mport, dma_addr_t lstart, - u64 rstart, u32 size, u32 flags) + u64 rstart, u64 size, u32 flags) { struct rio_priv *priv = mport->priv; u32 base_size; @@ -298,7 +298,7 @@ int fsl_map_inb_mem(struct rio_mport *mport, dma_addr_t lstart, u32 riwar; int i; - if ((size & (size - 1)) != 0) + if ((size & (size - 1)) != 0 || size > 0x400000000ULL) return -EINVAL; base_size_log = ilog2(size); @@ -491,6 +491,7 @@ int fsl_rio_setup(struct platform_device *dev) rmu_node = of_parse_phandle(dev->dev.of_node, "fsl,srio-rmu-handle", 0); if (!rmu_node) { dev_err(&dev->dev, "No valid fsl,srio-rmu-handle property\n"); + rc = -ENOENT; goto err_rmu; } rc = of_address_to_resource(rmu_node, 0, &rmu_regs); @@ -643,19 +644,11 @@ int fsl_rio_setup(struct platform_device *dev) port->ops = ops; port->priv = priv; port->phys_efptr = 0x100; + port->phys_rmap = 1; priv->regs_win = rio_regs_win; - /* Probe the master port phy type */ ccsr = in_be32(priv->regs_win + RIO_CCSR + i*0x20); - port->phy_type = (ccsr & 1) ? RIO_PHY_SERIAL : RIO_PHY_PARALLEL; - if (port->phy_type == RIO_PHY_PARALLEL) { - dev_err(&dev->dev, "RIO: Parallel PHY type, unsupported port type!\n"); - release_resource(&port->iores); - kfree(priv); - kfree(port); - continue; - } - dev_info(&dev->dev, "RapidIO PHY type: Serial\n"); + /* Checking the port training status */ if (in_be32((priv->regs_win + RIO_ESCSR + i*0x20)) & 1) { dev_err(&dev->dev, "Port %d is not ready. " @@ -705,11 +698,9 @@ int fsl_rio_setup(struct platform_device *dev) ((i == 0) ? RIO_INB_ATMU_REGS_PORT1_OFFSET : RIO_INB_ATMU_REGS_PORT2_OFFSET)); - - /* Set to receive any dist ID for serial RapidIO controller. */ - if (port->phy_type == RIO_PHY_SERIAL) - out_be32((priv->regs_win - + RIO_ISR_AACR + i*0x80), RIO_ISR_AACR_AA); + /* Set to receive packets with any dest ID */ + out_be32((priv->regs_win + RIO_ISR_AACR + i*0x80), + RIO_ISR_AACR_AA); /* Configure maintenance transaction window */ out_be32(&priv->maint_atmu_regs->rowbar, diff --git a/arch/powerpc/sysdev/msi_bitmap.c b/arch/powerpc/sysdev/msi_bitmap.c index ed5234ed8d3f..5ebd3f018295 100644 --- a/arch/powerpc/sysdev/msi_bitmap.c +++ b/arch/powerpc/sysdev/msi_bitmap.c @@ -112,7 +112,7 @@ int msi_bitmap_reserve_dt_hwirqs(struct msi_bitmap *bmp) return 0; } -int __init_refok msi_bitmap_alloc(struct msi_bitmap *bmp, unsigned int irq_count, +int __ref msi_bitmap_alloc(struct msi_bitmap *bmp, unsigned int irq_count, struct device_node *of_node) { int size; diff --git a/arch/powerpc/xmon/ppc-dis.c b/arch/powerpc/xmon/ppc-dis.c index 89098f320ad5..ee9891734149 100644 --- a/arch/powerpc/xmon/ppc-dis.c +++ b/arch/powerpc/xmon/ppc-dis.c @@ -20,6 +20,7 @@ along with this file; see the file COPYING. If not, write to the Free Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. */ #include <asm/cputable.h> +#include <asm/cpu_has_feature.h> #include "nonstdio.h" #include "ansidecl.h" #include "ppc.h" diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 9e607bf2d640..e751fe25d6ab 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -123,6 +123,7 @@ config S390 select HAVE_ALIGNED_STRUCT_PAGE if SLUB select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_EARLY_PFN_TO_NID + select HAVE_ARCH_HARDENED_USERCOPY select HAVE_ARCH_JUMP_LABEL select CPU_NO_EFFICIENT_FFS if !HAVE_MARCH_Z9_109_FEATURES select HAVE_ARCH_SECCOMP_FILTER @@ -871,4 +872,17 @@ config S390_GUEST Select this option if you want to run the kernel as a guest under the KVM hypervisor. +config S390_GUEST_OLD_TRANSPORT + def_bool y + prompt "Guest support for old s390 virtio transport (DEPRECATED)" + depends on S390_GUEST + help + Enable this option to add support for the old s390-virtio + transport (i.e. virtio devices NOT based on virtio-ccw). This + type of virtio devices is only available on the experimental + kuli userspace or with old (< 2.6) qemu. If you are running + with a modern version of qemu (which supports virtio-ccw since + 1.4 and uses it by default since version 2.4), you probably won't + need this. + endmenu diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile index 13723c39e58e..33ba697c782d 100644 --- a/arch/s390/boot/compressed/Makefile +++ b/arch/s390/boot/compressed/Makefile @@ -33,10 +33,10 @@ quiet_cmd_sizes = GEN $@ $(obj)/sizes.h: vmlinux $(call if_changed,sizes) -AFLAGS_head.o += -I$(obj) +AFLAGS_head.o += -I$(objtree)/$(obj) $(obj)/head.o: $(obj)/sizes.h -CFLAGS_misc.o += -I$(obj) +CFLAGS_misc.o += -I$(objtree)/$(obj) $(obj)/misc.o: $(obj)/sizes.h OBJCOPYFLAGS_vmlinux.bin := -R .comment -S diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c index 67d43a0eabb4..28f03ca60100 100644 --- a/arch/s390/hypfs/hypfs_diag.c +++ b/arch/s390/hypfs/hypfs_diag.c @@ -19,29 +19,10 @@ #include <asm/ebcdic.h> #include "hypfs.h" -#define LPAR_NAME_LEN 8 /* lpar name len in diag 204 data */ -#define CPU_NAME_LEN 16 /* type name len of cpus in diag224 name table */ #define TMP_SIZE 64 /* size of temporary buffers */ #define DBFS_D204_HDR_VERSION 0 -/* diag 204 subcodes */ -enum diag204_sc { - SUBC_STIB4 = 4, - SUBC_RSI = 5, - SUBC_STIB6 = 6, - SUBC_STIB7 = 7 -}; - -/* The two available diag 204 data formats */ -enum diag204_format { - INFO_SIMPLE = 0, - INFO_EXT = 0x00010000 -}; - -/* bit is set in flags, when physical cpu info is included in diag 204 data */ -#define LPAR_PHYS_FLG 0x80 - static char *diag224_cpu_names; /* diag 224 name table */ static enum diag204_sc diag204_store_sc; /* used subcode for store */ static enum diag204_format diag204_info_type; /* used diag 204 data format */ @@ -53,7 +34,7 @@ static int diag204_buf_pages; /* number of pages for diag204 data */ static struct dentry *dbfs_d204_file; /* - * DIAG 204 data structures and member access functions. + * DIAG 204 member access functions. * * Since we have two different diag 204 data formats for old and new s390 * machines, we do not access the structs directly, but use getter functions for @@ -62,304 +43,173 @@ static struct dentry *dbfs_d204_file; /* Time information block */ -struct info_blk_hdr { - __u8 npar; - __u8 flags; - __u16 tslice; - __u16 phys_cpus; - __u16 this_part; - __u64 curtod; -} __attribute__ ((packed)); - -struct x_info_blk_hdr { - __u8 npar; - __u8 flags; - __u16 tslice; - __u16 phys_cpus; - __u16 this_part; - __u64 curtod1; - __u64 curtod2; - char reserved[40]; -} __attribute__ ((packed)); - static inline int info_blk_hdr__size(enum diag204_format type) { - if (type == INFO_SIMPLE) - return sizeof(struct info_blk_hdr); - else /* INFO_EXT */ - return sizeof(struct x_info_blk_hdr); + if (type == DIAG204_INFO_SIMPLE) + return sizeof(struct diag204_info_blk_hdr); + else /* DIAG204_INFO_EXT */ + return sizeof(struct diag204_x_info_blk_hdr); } static inline __u8 info_blk_hdr__npar(enum diag204_format type, void *hdr) { - if (type == INFO_SIMPLE) - return ((struct info_blk_hdr *)hdr)->npar; - else /* INFO_EXT */ - return ((struct x_info_blk_hdr *)hdr)->npar; + if (type == DIAG204_INFO_SIMPLE) + return ((struct diag204_info_blk_hdr *)hdr)->npar; + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_info_blk_hdr *)hdr)->npar; } static inline __u8 info_blk_hdr__flags(enum diag204_format type, void *hdr) { - if (type == INFO_SIMPLE) - return ((struct info_blk_hdr *)hdr)->flags; - else /* INFO_EXT */ - return ((struct x_info_blk_hdr *)hdr)->flags; + if (type == DIAG204_INFO_SIMPLE) + return ((struct diag204_info_blk_hdr *)hdr)->flags; + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_info_blk_hdr *)hdr)->flags; } static inline __u16 info_blk_hdr__pcpus(enum diag204_format type, void *hdr) { - if (type == INFO_SIMPLE) - return ((struct info_blk_hdr *)hdr)->phys_cpus; - else /* INFO_EXT */ - return ((struct x_info_blk_hdr *)hdr)->phys_cpus; + if (type == DIAG204_INFO_SIMPLE) + return ((struct diag204_info_blk_hdr *)hdr)->phys_cpus; + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_info_blk_hdr *)hdr)->phys_cpus; } /* Partition header */ -struct part_hdr { - __u8 pn; - __u8 cpus; - char reserved[6]; - char part_name[LPAR_NAME_LEN]; -} __attribute__ ((packed)); - -struct x_part_hdr { - __u8 pn; - __u8 cpus; - __u8 rcpus; - __u8 pflag; - __u32 mlu; - char part_name[LPAR_NAME_LEN]; - char lpc_name[8]; - char os_name[8]; - __u64 online_cs; - __u64 online_es; - __u8 upid; - char reserved1[3]; - __u32 group_mlu; - char group_name[8]; - char reserved2[32]; -} __attribute__ ((packed)); - static inline int part_hdr__size(enum diag204_format type) { - if (type == INFO_SIMPLE) - return sizeof(struct part_hdr); - else /* INFO_EXT */ - return sizeof(struct x_part_hdr); + if (type == DIAG204_INFO_SIMPLE) + return sizeof(struct diag204_part_hdr); + else /* DIAG204_INFO_EXT */ + return sizeof(struct diag204_x_part_hdr); } static inline __u8 part_hdr__rcpus(enum diag204_format type, void *hdr) { - if (type == INFO_SIMPLE) - return ((struct part_hdr *)hdr)->cpus; - else /* INFO_EXT */ - return ((struct x_part_hdr *)hdr)->rcpus; + if (type == DIAG204_INFO_SIMPLE) + return ((struct diag204_part_hdr *)hdr)->cpus; + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_part_hdr *)hdr)->rcpus; } static inline void part_hdr__part_name(enum diag204_format type, void *hdr, char *name) { - if (type == INFO_SIMPLE) - memcpy(name, ((struct part_hdr *)hdr)->part_name, - LPAR_NAME_LEN); - else /* INFO_EXT */ - memcpy(name, ((struct x_part_hdr *)hdr)->part_name, - LPAR_NAME_LEN); - EBCASC(name, LPAR_NAME_LEN); - name[LPAR_NAME_LEN] = 0; + if (type == DIAG204_INFO_SIMPLE) + memcpy(name, ((struct diag204_part_hdr *)hdr)->part_name, + DIAG204_LPAR_NAME_LEN); + else /* DIAG204_INFO_EXT */ + memcpy(name, ((struct diag204_x_part_hdr *)hdr)->part_name, + DIAG204_LPAR_NAME_LEN); + EBCASC(name, DIAG204_LPAR_NAME_LEN); + name[DIAG204_LPAR_NAME_LEN] = 0; strim(name); } -struct cpu_info { - __u16 cpu_addr; - char reserved1[2]; - __u8 ctidx; - __u8 cflag; - __u16 weight; - __u64 acc_time; - __u64 lp_time; -} __attribute__ ((packed)); - -struct x_cpu_info { - __u16 cpu_addr; - char reserved1[2]; - __u8 ctidx; - __u8 cflag; - __u16 weight; - __u64 acc_time; - __u64 lp_time; - __u16 min_weight; - __u16 cur_weight; - __u16 max_weight; - char reseved2[2]; - __u64 online_time; - __u64 wait_time; - __u32 pma_weight; - __u32 polar_weight; - char reserved3[40]; -} __attribute__ ((packed)); - /* CPU info block */ static inline int cpu_info__size(enum diag204_format type) { - if (type == INFO_SIMPLE) - return sizeof(struct cpu_info); - else /* INFO_EXT */ - return sizeof(struct x_cpu_info); + if (type == DIAG204_INFO_SIMPLE) + return sizeof(struct diag204_cpu_info); + else /* DIAG204_INFO_EXT */ + return sizeof(struct diag204_x_cpu_info); } static inline __u8 cpu_info__ctidx(enum diag204_format type, void *hdr) { - if (type == INFO_SIMPLE) - return ((struct cpu_info *)hdr)->ctidx; - else /* INFO_EXT */ - return ((struct x_cpu_info *)hdr)->ctidx; + if (type == DIAG204_INFO_SIMPLE) + return ((struct diag204_cpu_info *)hdr)->ctidx; + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_cpu_info *)hdr)->ctidx; } static inline __u16 cpu_info__cpu_addr(enum diag204_format type, void *hdr) { - if (type == INFO_SIMPLE) - return ((struct cpu_info *)hdr)->cpu_addr; - else /* INFO_EXT */ - return ((struct x_cpu_info *)hdr)->cpu_addr; + if (type == DIAG204_INFO_SIMPLE) + return ((struct diag204_cpu_info *)hdr)->cpu_addr; + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_cpu_info *)hdr)->cpu_addr; } static inline __u64 cpu_info__acc_time(enum diag204_format type, void *hdr) { - if (type == INFO_SIMPLE) - return ((struct cpu_info *)hdr)->acc_time; - else /* INFO_EXT */ - return ((struct x_cpu_info *)hdr)->acc_time; + if (type == DIAG204_INFO_SIMPLE) + return ((struct diag204_cpu_info *)hdr)->acc_time; + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_cpu_info *)hdr)->acc_time; } static inline __u64 cpu_info__lp_time(enum diag204_format type, void *hdr) { - if (type == INFO_SIMPLE) - return ((struct cpu_info *)hdr)->lp_time; - else /* INFO_EXT */ - return ((struct x_cpu_info *)hdr)->lp_time; + if (type == DIAG204_INFO_SIMPLE) + return ((struct diag204_cpu_info *)hdr)->lp_time; + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_cpu_info *)hdr)->lp_time; } static inline __u64 cpu_info__online_time(enum diag204_format type, void *hdr) { - if (type == INFO_SIMPLE) + if (type == DIAG204_INFO_SIMPLE) return 0; /* online_time not available in simple info */ - else /* INFO_EXT */ - return ((struct x_cpu_info *)hdr)->online_time; + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_cpu_info *)hdr)->online_time; } /* Physical header */ -struct phys_hdr { - char reserved1[1]; - __u8 cpus; - char reserved2[6]; - char mgm_name[8]; -} __attribute__ ((packed)); - -struct x_phys_hdr { - char reserved1[1]; - __u8 cpus; - char reserved2[6]; - char mgm_name[8]; - char reserved3[80]; -} __attribute__ ((packed)); - static inline int phys_hdr__size(enum diag204_format type) { - if (type == INFO_SIMPLE) - return sizeof(struct phys_hdr); - else /* INFO_EXT */ - return sizeof(struct x_phys_hdr); + if (type == DIAG204_INFO_SIMPLE) + return sizeof(struct diag204_phys_hdr); + else /* DIAG204_INFO_EXT */ + return sizeof(struct diag204_x_phys_hdr); } static inline __u8 phys_hdr__cpus(enum diag204_format type, void *hdr) { - if (type == INFO_SIMPLE) - return ((struct phys_hdr *)hdr)->cpus; - else /* INFO_EXT */ - return ((struct x_phys_hdr *)hdr)->cpus; + if (type == DIAG204_INFO_SIMPLE) + return ((struct diag204_phys_hdr *)hdr)->cpus; + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_phys_hdr *)hdr)->cpus; } /* Physical CPU info block */ -struct phys_cpu { - __u16 cpu_addr; - char reserved1[2]; - __u8 ctidx; - char reserved2[3]; - __u64 mgm_time; - char reserved3[8]; -} __attribute__ ((packed)); - -struct x_phys_cpu { - __u16 cpu_addr; - char reserved1[2]; - __u8 ctidx; - char reserved2[3]; - __u64 mgm_time; - char reserved3[80]; -} __attribute__ ((packed)); - static inline int phys_cpu__size(enum diag204_format type) { - if (type == INFO_SIMPLE) - return sizeof(struct phys_cpu); - else /* INFO_EXT */ - return sizeof(struct x_phys_cpu); + if (type == DIAG204_INFO_SIMPLE) + return sizeof(struct diag204_phys_cpu); + else /* DIAG204_INFO_EXT */ + return sizeof(struct diag204_x_phys_cpu); } static inline __u16 phys_cpu__cpu_addr(enum diag204_format type, void *hdr) { - if (type == INFO_SIMPLE) - return ((struct phys_cpu *)hdr)->cpu_addr; - else /* INFO_EXT */ - return ((struct x_phys_cpu *)hdr)->cpu_addr; + if (type == DIAG204_INFO_SIMPLE) + return ((struct diag204_phys_cpu *)hdr)->cpu_addr; + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_phys_cpu *)hdr)->cpu_addr; } static inline __u64 phys_cpu__mgm_time(enum diag204_format type, void *hdr) { - if (type == INFO_SIMPLE) - return ((struct phys_cpu *)hdr)->mgm_time; - else /* INFO_EXT */ - return ((struct x_phys_cpu *)hdr)->mgm_time; + if (type == DIAG204_INFO_SIMPLE) + return ((struct diag204_phys_cpu *)hdr)->mgm_time; + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_phys_cpu *)hdr)->mgm_time; } static inline __u64 phys_cpu__ctidx(enum diag204_format type, void *hdr) { - if (type == INFO_SIMPLE) - return ((struct phys_cpu *)hdr)->ctidx; - else /* INFO_EXT */ - return ((struct x_phys_cpu *)hdr)->ctidx; + if (type == DIAG204_INFO_SIMPLE) + return ((struct diag204_phys_cpu *)hdr)->ctidx; + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_phys_cpu *)hdr)->ctidx; } /* Diagnose 204 functions */ - -static inline int __diag204(unsigned long *subcode, unsigned long size, void *addr) -{ - register unsigned long _subcode asm("0") = *subcode; - register unsigned long _size asm("1") = size; - - asm volatile( - " diag %2,%0,0x204\n" - "0: nopr %%r7\n" - EX_TABLE(0b,0b) - : "+d" (_subcode), "+d" (_size) : "d" (addr) : "memory"); - *subcode = _subcode; - return _size; -} - -static int diag204(unsigned long subcode, unsigned long size, void *addr) -{ - diag_stat_inc(DIAG_STAT_X204); - size = __diag204(&subcode, size, addr); - if (subcode) - return -1; - return size; -} - /* * For the old diag subcode 4 with simple data format we have to use real * memory. If we use subcode 6 or 7 with extended data format, we can (and @@ -411,12 +261,12 @@ static void *diag204_get_buffer(enum diag204_format fmt, int *pages) *pages = diag204_buf_pages; return diag204_buf; } - if (fmt == INFO_SIMPLE) { + if (fmt == DIAG204_INFO_SIMPLE) { *pages = 1; return diag204_alloc_rbuf(); - } else {/* INFO_EXT */ - *pages = diag204((unsigned long)SUBC_RSI | - (unsigned long)INFO_EXT, 0, NULL); + } else {/* DIAG204_INFO_EXT */ + *pages = diag204((unsigned long)DIAG204_SUBC_RSI | + (unsigned long)DIAG204_INFO_EXT, 0, NULL); if (*pages <= 0) return ERR_PTR(-ENOSYS); else @@ -443,18 +293,18 @@ static int diag204_probe(void) void *buf; int pages, rc; - buf = diag204_get_buffer(INFO_EXT, &pages); + buf = diag204_get_buffer(DIAG204_INFO_EXT, &pages); if (!IS_ERR(buf)) { - if (diag204((unsigned long)SUBC_STIB7 | - (unsigned long)INFO_EXT, pages, buf) >= 0) { - diag204_store_sc = SUBC_STIB7; - diag204_info_type = INFO_EXT; + if (diag204((unsigned long)DIAG204_SUBC_STIB7 | + (unsigned long)DIAG204_INFO_EXT, pages, buf) >= 0) { + diag204_store_sc = DIAG204_SUBC_STIB7; + diag204_info_type = DIAG204_INFO_EXT; goto out; } - if (diag204((unsigned long)SUBC_STIB6 | - (unsigned long)INFO_EXT, pages, buf) >= 0) { - diag204_store_sc = SUBC_STIB6; - diag204_info_type = INFO_EXT; + if (diag204((unsigned long)DIAG204_SUBC_STIB6 | + (unsigned long)DIAG204_INFO_EXT, pages, buf) >= 0) { + diag204_store_sc = DIAG204_SUBC_STIB6; + diag204_info_type = DIAG204_INFO_EXT; goto out; } diag204_free_buffer(); @@ -462,15 +312,15 @@ static int diag204_probe(void) /* subcodes 6 and 7 failed, now try subcode 4 */ - buf = diag204_get_buffer(INFO_SIMPLE, &pages); + buf = diag204_get_buffer(DIAG204_INFO_SIMPLE, &pages); if (IS_ERR(buf)) { rc = PTR_ERR(buf); goto fail_alloc; } - if (diag204((unsigned long)SUBC_STIB4 | - (unsigned long)INFO_SIMPLE, pages, buf) >= 0) { - diag204_store_sc = SUBC_STIB4; - diag204_info_type = INFO_SIMPLE; + if (diag204((unsigned long)DIAG204_SUBC_STIB4 | + (unsigned long)DIAG204_INFO_SIMPLE, pages, buf) >= 0) { + diag204_store_sc = DIAG204_SUBC_STIB4; + diag204_info_type = DIAG204_INFO_SIMPLE; goto out; } else { rc = -ENOSYS; @@ -510,20 +360,6 @@ out: /* Diagnose 224 functions */ -static int diag224(void *ptr) -{ - int rc = -EOPNOTSUPP; - - diag_stat_inc(DIAG_STAT_X224); - asm volatile( - " diag %1,%2,0x224\n" - "0: lhi %0,0x0\n" - "1:\n" - EX_TABLE(0b,1b) - : "+d" (rc) :"d" (0), "d" (ptr) : "memory"); - return rc; -} - static int diag224_get_name_table(void) { /* memory must be below 2GB */ @@ -545,9 +381,9 @@ static void diag224_delete_name_table(void) static int diag224_idx2name(int index, char *name) { - memcpy(name, diag224_cpu_names + ((index + 1) * CPU_NAME_LEN), - CPU_NAME_LEN); - name[CPU_NAME_LEN] = 0; + memcpy(name, diag224_cpu_names + ((index + 1) * DIAG204_CPU_NAME_LEN), + DIAG204_CPU_NAME_LEN); + name[DIAG204_CPU_NAME_LEN] = 0; strim(name); return 0; } @@ -603,7 +439,7 @@ __init int hypfs_diag_init(void) pr_err("The hardware system does not support hypfs\n"); return -ENODATA; } - if (diag204_info_type == INFO_EXT) { + if (diag204_info_type == DIAG204_INFO_EXT) { rc = hypfs_dbfs_create_file(&dbfs_file_d204); if (rc) return rc; @@ -651,7 +487,7 @@ static int hypfs_create_cpu_files(struct dentry *cpus_dir, void *cpu_info) cpu_info__lp_time(diag204_info_type, cpu_info)); if (IS_ERR(rc)) return PTR_ERR(rc); - if (diag204_info_type == INFO_EXT) { + if (diag204_info_type == DIAG204_INFO_EXT) { rc = hypfs_create_u64(cpu_dir, "onlinetime", cpu_info__online_time(diag204_info_type, cpu_info)); @@ -667,12 +503,12 @@ static void *hypfs_create_lpar_files(struct dentry *systems_dir, void *part_hdr) { struct dentry *cpus_dir; struct dentry *lpar_dir; - char lpar_name[LPAR_NAME_LEN + 1]; + char lpar_name[DIAG204_LPAR_NAME_LEN + 1]; void *cpu_info; int i; part_hdr__part_name(diag204_info_type, part_hdr, lpar_name); - lpar_name[LPAR_NAME_LEN] = 0; + lpar_name[DIAG204_LPAR_NAME_LEN] = 0; lpar_dir = hypfs_mkdir(systems_dir, lpar_name); if (IS_ERR(lpar_dir)) return lpar_dir; @@ -755,7 +591,8 @@ int hypfs_diag_create_files(struct dentry *root) goto err_out; } } - if (info_blk_hdr__flags(diag204_info_type, time_hdr) & LPAR_PHYS_FLG) { + if (info_blk_hdr__flags(diag204_info_type, time_hdr) & + DIAG204_LPAR_PHYS_FLG) { ptr = hypfs_create_phys_files(root, part_hdr); if (IS_ERR(ptr)) { rc = PTR_ERR(ptr); diff --git a/arch/s390/include/asm/cpacf.h b/arch/s390/include/asm/cpacf.h index 1a82cf26ee11..d28621de8e0b 100644 --- a/arch/s390/include/asm/cpacf.h +++ b/arch/s390/include/asm/cpacf.h @@ -20,6 +20,9 @@ #define CPACF_KMC 0xb92f /* MSA */ #define CPACF_KIMD 0xb93e /* MSA */ #define CPACF_KLMD 0xb93f /* MSA */ +#define CPACF_PCKMO 0xb928 /* MSA3 */ +#define CPACF_KMF 0xb92a /* MSA4 */ +#define CPACF_KMO 0xb92b /* MSA4 */ #define CPACF_PCC 0xb92c /* MSA4 */ #define CPACF_KMCTR 0xb92d /* MSA4 */ #define CPACF_PPNO 0xb93c /* MSA5 */ @@ -136,6 +139,7 @@ static inline void __cpacf_query(unsigned int opcode, unsigned char *status) register unsigned long r1 asm("1") = (unsigned long) status; asm volatile( + " spm 0\n" /* pckmo doesn't change the cc */ /* Parameter registers are ignored, but may not be 0 */ "0: .insn rrf,%[opc] << 16,2,2,2,0\n" " brc 1,0b\n" /* handle partial completion */ @@ -157,6 +161,12 @@ static inline int cpacf_query(unsigned int opcode, unsigned int func) if (!test_facility(17)) /* check for MSA */ return 0; break; + case CPACF_PCKMO: + if (!test_facility(76)) /* check for MSA3 */ + return 0; + break; + case CPACF_KMF: + case CPACF_KMO: case CPACF_PCC: case CPACF_KMCTR: if (!test_facility(77)) /* check for MSA4 */ diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h index 86cae09e076a..8acf482162ed 100644 --- a/arch/s390/include/asm/diag.h +++ b/arch/s390/include/asm/diag.h @@ -78,4 +78,153 @@ struct diag210 { extern int diag210(struct diag210 *addr); +/* bit is set in flags, when physical cpu info is included in diag 204 data */ +#define DIAG204_LPAR_PHYS_FLG 0x80 +#define DIAG204_LPAR_NAME_LEN 8 /* lpar name len in diag 204 data */ +#define DIAG204_CPU_NAME_LEN 16 /* type name len of cpus in diag224 name table */ + +/* diag 204 subcodes */ +enum diag204_sc { + DIAG204_SUBC_STIB4 = 4, + DIAG204_SUBC_RSI = 5, + DIAG204_SUBC_STIB6 = 6, + DIAG204_SUBC_STIB7 = 7 +}; + +/* The two available diag 204 data formats */ +enum diag204_format { + DIAG204_INFO_SIMPLE = 0, + DIAG204_INFO_EXT = 0x00010000 +}; + +enum diag204_cpu_flags { + DIAG204_CPU_ONLINE = 0x20, + DIAG204_CPU_CAPPED = 0x40, +}; + +struct diag204_info_blk_hdr { + __u8 npar; + __u8 flags; + __u16 tslice; + __u16 phys_cpus; + __u16 this_part; + __u64 curtod; +} __packed; + +struct diag204_x_info_blk_hdr { + __u8 npar; + __u8 flags; + __u16 tslice; + __u16 phys_cpus; + __u16 this_part; + __u64 curtod1; + __u64 curtod2; + char reserved[40]; +} __packed; + +struct diag204_part_hdr { + __u8 pn; + __u8 cpus; + char reserved[6]; + char part_name[DIAG204_LPAR_NAME_LEN]; +} __packed; + +struct diag204_x_part_hdr { + __u8 pn; + __u8 cpus; + __u8 rcpus; + __u8 pflag; + __u32 mlu; + char part_name[DIAG204_LPAR_NAME_LEN]; + char lpc_name[8]; + char os_name[8]; + __u64 online_cs; + __u64 online_es; + __u8 upid; + __u8 reserved:3; + __u8 mtid:5; + char reserved1[2]; + __u32 group_mlu; + char group_name[8]; + char hardware_group_name[8]; + char reserved2[24]; +} __packed; + +struct diag204_cpu_info { + __u16 cpu_addr; + char reserved1[2]; + __u8 ctidx; + __u8 cflag; + __u16 weight; + __u64 acc_time; + __u64 lp_time; +} __packed; + +struct diag204_x_cpu_info { + __u16 cpu_addr; + char reserved1[2]; + __u8 ctidx; + __u8 cflag; + __u16 weight; + __u64 acc_time; + __u64 lp_time; + __u16 min_weight; + __u16 cur_weight; + __u16 max_weight; + char reseved2[2]; + __u64 online_time; + __u64 wait_time; + __u32 pma_weight; + __u32 polar_weight; + __u32 cpu_type_cap; + __u32 group_cpu_type_cap; + char reserved3[32]; +} __packed; + +struct diag204_phys_hdr { + char reserved1[1]; + __u8 cpus; + char reserved2[6]; + char mgm_name[8]; +} __packed; + +struct diag204_x_phys_hdr { + char reserved1[1]; + __u8 cpus; + char reserved2[6]; + char mgm_name[8]; + char reserved3[80]; +} __packed; + +struct diag204_phys_cpu { + __u16 cpu_addr; + char reserved1[2]; + __u8 ctidx; + char reserved2[3]; + __u64 mgm_time; + char reserved3[8]; +} __packed; + +struct diag204_x_phys_cpu { + __u16 cpu_addr; + char reserved1[2]; + __u8 ctidx; + char reserved2[1]; + __u16 weight; + __u64 mgm_time; + char reserved3[80]; +} __packed; + +struct diag204_x_part_block { + struct diag204_x_part_hdr hdr; + struct diag204_x_cpu_info cpus[]; +} __packed; + +struct diag204_x_phys_block { + struct diag204_x_phys_hdr hdr; + struct diag204_x_phys_cpu cpus[]; +} __packed; + +int diag204(unsigned long subcode, unsigned long size, void *addr); +int diag224(void *ptr); #endif /* _ASM_S390_DIAG_H */ diff --git a/arch/s390/include/asm/dma-mapping.h b/arch/s390/include/asm/dma-mapping.h index 3249b7464889..ffaba07f50ab 100644 --- a/arch/s390/include/asm/dma-mapping.h +++ b/arch/s390/include/asm/dma-mapping.h @@ -5,7 +5,6 @@ #include <linux/types.h> #include <linux/mm.h> #include <linux/scatterlist.h> -#include <linux/dma-attrs.h> #include <linux/dma-debug.h> #include <linux/io.h> diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h index d054c1b07a3c..741ddba0bf11 100644 --- a/arch/s390/include/asm/gmap.h +++ b/arch/s390/include/asm/gmap.h @@ -10,14 +10,25 @@ /** * struct gmap_struct - guest address space + * @list: list head for the mm->context gmap list * @crst_list: list of all crst tables used in the guest address space * @mm: pointer to the parent mm_struct * @guest_to_host: radix tree with guest to host address translation * @host_to_guest: radix tree with pointer to segment table entries * @guest_table_lock: spinlock to protect all entries in the guest page table + * @ref_count: reference counter for the gmap structure * @table: pointer to the page directory * @asce: address space control element for gmap page table * @pfault_enabled: defines if pfaults are applicable for the guest + * @host_to_rmap: radix tree with gmap_rmap lists + * @children: list of shadow gmap structures + * @pt_list: list of all page tables used in the shadow guest address space + * @shadow_lock: spinlock to protect the shadow gmap list + * @parent: pointer to the parent gmap for shadow guest address spaces + * @orig_asce: ASCE for which the shadow page table has been created + * @edat_level: edat level to be used for the shadow translation + * @removed: flag to indicate if a shadow guest address space has been removed + * @initialized: flag to indicate if a shadow guest address space can be used */ struct gmap { struct list_head list; @@ -26,26 +37,64 @@ struct gmap { struct radix_tree_root guest_to_host; struct radix_tree_root host_to_guest; spinlock_t guest_table_lock; + atomic_t ref_count; unsigned long *table; unsigned long asce; unsigned long asce_end; void *private; bool pfault_enabled; + /* Additional data for shadow guest address spaces */ + struct radix_tree_root host_to_rmap; + struct list_head children; + struct list_head pt_list; + spinlock_t shadow_lock; + struct gmap *parent; + unsigned long orig_asce; + int edat_level; + bool removed; + bool initialized; }; /** + * struct gmap_rmap - reverse mapping for shadow page table entries + * @next: pointer to next rmap in the list + * @raddr: virtual rmap address in the shadow guest address space + */ +struct gmap_rmap { + struct gmap_rmap *next; + unsigned long raddr; +}; + +#define gmap_for_each_rmap(pos, head) \ + for (pos = (head); pos; pos = pos->next) + +#define gmap_for_each_rmap_safe(pos, n, head) \ + for (pos = (head); n = pos ? pos->next : NULL, pos; pos = n) + +/** * struct gmap_notifier - notify function block for page invalidation * @notifier_call: address of callback function */ struct gmap_notifier { struct list_head list; - void (*notifier_call)(struct gmap *gmap, unsigned long gaddr); + struct rcu_head rcu; + void (*notifier_call)(struct gmap *gmap, unsigned long start, + unsigned long end); }; -struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit); -void gmap_free(struct gmap *gmap); +static inline int gmap_is_shadow(struct gmap *gmap) +{ + return !!gmap->parent; +} + +struct gmap *gmap_create(struct mm_struct *mm, unsigned long limit); +void gmap_remove(struct gmap *gmap); +struct gmap *gmap_get(struct gmap *gmap); +void gmap_put(struct gmap *gmap); + void gmap_enable(struct gmap *gmap); void gmap_disable(struct gmap *gmap); +struct gmap *gmap_get_enabled(void); int gmap_map_segment(struct gmap *gmap, unsigned long from, unsigned long to, unsigned long len); int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len); @@ -57,8 +106,29 @@ void gmap_discard(struct gmap *, unsigned long from, unsigned long to); void __gmap_zap(struct gmap *, unsigned long gaddr); void gmap_unlink(struct mm_struct *, unsigned long *table, unsigned long vmaddr); -void gmap_register_ipte_notifier(struct gmap_notifier *); -void gmap_unregister_ipte_notifier(struct gmap_notifier *); -int gmap_ipte_notify(struct gmap *, unsigned long start, unsigned long len); +int gmap_read_table(struct gmap *gmap, unsigned long gaddr, unsigned long *val); + +struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, + int edat_level); +int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level); +int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t, + int fake); +int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t, + int fake); +int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt, + int fake); +int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt, + int fake); +int gmap_shadow_pgt_lookup(struct gmap *sg, unsigned long saddr, + unsigned long *pgt, int *dat_protection, int *fake); +int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte); + +void gmap_register_pte_notifier(struct gmap_notifier *); +void gmap_unregister_pte_notifier(struct gmap_notifier *); +void gmap_pte_notify(struct mm_struct *, unsigned long addr, pte_t *, + unsigned long bits); + +int gmap_mprotect_notify(struct gmap *, unsigned long start, + unsigned long len, int prot); #endif /* _ASM_S390_GMAP_H */ diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index ac82e8eb936d..8e5daf7a76ce 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -43,6 +43,7 @@ /* s390-specific vcpu->requests bit members */ #define KVM_REQ_ENABLE_IBS 8 #define KVM_REQ_DISABLE_IBS 9 +#define KVM_REQ_ICPT_OPEREXC 10 #define SIGP_CTRL_C 0x80 #define SIGP_CTRL_SCN_MASK 0x3f @@ -145,7 +146,7 @@ struct kvm_s390_sie_block { __u64 cputm; /* 0x0028 */ __u64 ckc; /* 0x0030 */ __u64 epoch; /* 0x0038 */ - __u8 reserved40[4]; /* 0x0040 */ + __u32 svcc; /* 0x0040 */ #define LCTL_CR0 0x8000 #define LCTL_CR6 0x0200 #define LCTL_CR9 0x0040 @@ -154,6 +155,7 @@ struct kvm_s390_sie_block { #define LCTL_CR14 0x0002 __u16 lctl; /* 0x0044 */ __s16 icpua; /* 0x0046 */ +#define ICTL_OPEREXC 0x80000000 #define ICTL_PINT 0x20000000 #define ICTL_LPSW 0x00400000 #define ICTL_STCTL 0x00040000 @@ -166,6 +168,9 @@ struct kvm_s390_sie_block { #define ICPT_INST 0x04 #define ICPT_PROGI 0x08 #define ICPT_INSTPROGI 0x0C +#define ICPT_EXTINT 0x14 +#define ICPT_VALIDITY 0x20 +#define ICPT_STOP 0x28 #define ICPT_OPEREXC 0x2C #define ICPT_PARTEXEC 0x38 #define ICPT_IOINST 0x40 @@ -185,7 +190,9 @@ struct kvm_s390_sie_block { __u32 scaol; /* 0x0064 */ __u8 reserved68[4]; /* 0x0068 */ __u32 todpr; /* 0x006c */ - __u8 reserved70[32]; /* 0x0070 */ + __u8 reserved70[16]; /* 0x0070 */ + __u64 mso; /* 0x0080 */ + __u64 msl; /* 0x0088 */ psw_t gpsw; /* 0x0090 */ __u64 gg14; /* 0x00a0 */ __u64 gg15; /* 0x00a8 */ @@ -223,7 +230,7 @@ struct kvm_s390_sie_block { __u8 reserved1e6[2]; /* 0x01e6 */ __u64 itdba; /* 0x01e8 */ __u64 riccbd; /* 0x01f0 */ - __u8 reserved1f8[8]; /* 0x01f8 */ + __u64 gvrd; /* 0x01f8 */ } __attribute__((packed)); struct kvm_s390_itdb { @@ -256,6 +263,7 @@ struct kvm_vcpu_stat { u32 instruction_stctg; u32 exit_program_interruption; u32 exit_instr_and_program; + u32 exit_operation_exception; u32 deliver_external_call; u32 deliver_emergency_signal; u32 deliver_service_signal; @@ -278,7 +286,9 @@ struct kvm_vcpu_stat { u32 instruction_stsi; u32 instruction_stfl; u32 instruction_tprot; + u32 instruction_sie; u32 instruction_essa; + u32 instruction_sthyi; u32 instruction_sigp_sense; u32 instruction_sigp_sense_running; u32 instruction_sigp_external_call; @@ -541,12 +551,16 @@ struct kvm_guestdbg_info_arch { struct kvm_vcpu_arch { struct kvm_s390_sie_block *sie_block; + /* if vsie is active, currently executed shadow sie control block */ + struct kvm_s390_sie_block *vsie_block; unsigned int host_acrs[NUM_ACRS]; struct fpu host_fpregs; struct kvm_s390_local_interrupt local_int; struct hrtimer ckc_timer; struct kvm_s390_pgm_info pgm; struct gmap *gmap; + /* backup location for the currently enabled gmap when scheduled out */ + struct gmap *enabled_gmap; struct kvm_guestdbg_info_arch guestdbg; unsigned long pfault_token; unsigned long pfault_select; @@ -631,6 +645,14 @@ struct sie_page2 { u8 reserved900[0x1000 - 0x900]; /* 0x0900 */ } __packed; +struct kvm_s390_vsie { + struct mutex mutex; + struct radix_tree_root addr_to_page; + int page_count; + int next; + struct page *pages[KVM_MAX_VCPUS]; +}; + struct kvm_arch{ void *sca; int use_esca; @@ -646,15 +668,20 @@ struct kvm_arch{ int user_cpu_state_ctrl; int user_sigp; int user_stsi; + int user_instr0; struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS]; wait_queue_head_t ipte_wq; int ipte_lock_count; struct mutex ipte_mutex; + struct ratelimit_state sthyi_limit; spinlock_t start_stop_lock; struct sie_page2 *sie_page2; struct kvm_s390_cpu_model model; struct kvm_s390_crypto crypto; + struct kvm_s390_vsie vsie; u64 epoch; + /* subset of available cpu features enabled by user space */ + DECLARE_BITMAP(cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS); }; #define KVM_HVA_ERR_BAD (-1UL) diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h index 18226437a832..6d39329c894b 100644 --- a/arch/s390/include/asm/mmu.h +++ b/arch/s390/include/asm/mmu.h @@ -8,8 +8,9 @@ typedef struct { cpumask_t cpu_attach_mask; atomic_t flush_count; unsigned int flush_mm; - spinlock_t list_lock; + spinlock_t pgtable_lock; struct list_head pgtable_list; + spinlock_t gmap_lock; struct list_head gmap_list; unsigned long asce; unsigned long asce_limit; @@ -22,9 +23,11 @@ typedef struct { unsigned int use_skey:1; } mm_context_t; -#define INIT_MM_CONTEXT(name) \ - .context.list_lock = __SPIN_LOCK_UNLOCKED(name.context.list_lock), \ - .context.pgtable_list = LIST_HEAD_INIT(name.context.pgtable_list), \ +#define INIT_MM_CONTEXT(name) \ + .context.pgtable_lock = \ + __SPIN_LOCK_UNLOCKED(name.context.pgtable_lock), \ + .context.pgtable_list = LIST_HEAD_INIT(name.context.pgtable_list), \ + .context.gmap_lock = __SPIN_LOCK_UNLOCKED(name.context.gmap_lock), \ .context.gmap_list = LIST_HEAD_INIT(name.context.gmap_list), static inline int tprot(unsigned long addr) diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index f77c638bf397..c6a088c91aee 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -15,8 +15,9 @@ static inline int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { - spin_lock_init(&mm->context.list_lock); + spin_lock_init(&mm->context.pgtable_lock); INIT_LIST_HEAD(&mm->context.pgtable_list); + spin_lock_init(&mm->context.gmap_lock); INIT_LIST_HEAD(&mm->context.gmap_list); cpumask_clear(&mm->context.cpu_attach_mask); atomic_set(&mm->context.flush_count, 0); diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index b2146c4119b2..69b8a41fca84 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -111,13 +111,14 @@ static inline unsigned char page_get_storage_key(unsigned long addr) static inline int page_reset_referenced(unsigned long addr) { - unsigned int ipm; + int cc; asm volatile( " rrbe 0,%1\n" " ipm %0\n" - : "=d" (ipm) : "a" (addr) : "cc"); - return !!(ipm & 0x20000000); + " srl %0,28\n" + : "=d" (cc) : "a" (addr) : "cc"); + return cc; } /* Bits int the storage key */ @@ -148,6 +149,8 @@ static inline int devmem_is_allowed(unsigned long pfn) #define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) #define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT) #define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) +#define pfn_to_virt(pfn) __va((pfn) << PAGE_SHIFT) +#define page_to_virt(page) pfn_to_virt(page_to_pfn(page)) #define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | \ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index da34cb6b1f3b..f4eb9843eed4 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -19,8 +19,10 @@ unsigned long *crst_table_alloc(struct mm_struct *); void crst_table_free(struct mm_struct *, unsigned long *); unsigned long *page_table_alloc(struct mm_struct *); +struct page *page_table_alloc_pgste(struct mm_struct *mm); void page_table_free(struct mm_struct *, unsigned long *); void page_table_free_rcu(struct mmu_gather *, unsigned long *, unsigned long); +void page_table_free_pgste(struct page *page); extern int page_table_allocate_pgste; static inline void clear_table(unsigned long *s, unsigned long val, size_t n) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 48d383af078f..72c7f60bfe83 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -277,6 +277,7 @@ static inline int is_module_addr(void *addr) /* Bits in the region table entry */ #define _REGION_ENTRY_ORIGIN ~0xfffUL/* region/segment table origin */ #define _REGION_ENTRY_PROTECT 0x200 /* region protection bit */ +#define _REGION_ENTRY_OFFSET 0xc0 /* region table offset */ #define _REGION_ENTRY_INVALID 0x20 /* invalid region table entry */ #define _REGION_ENTRY_TYPE_MASK 0x0c /* region/segment table type mask */ #define _REGION_ENTRY_TYPE_R1 0x0c /* region first table type */ @@ -364,6 +365,7 @@ static inline int is_module_addr(void *addr) #define PGSTE_GC_BIT 0x0002000000000000UL #define PGSTE_UC_BIT 0x0000800000000000UL /* user dirty (migration) */ #define PGSTE_IN_BIT 0x0000400000000000UL /* IPTE notify bit */ +#define PGSTE_VSIE_BIT 0x0000200000000000UL /* ref'd in a shadow table */ /* Guest Page State used for virtualization */ #define _PGSTE_GPS_ZERO 0x0000000080000000UL @@ -1002,15 +1004,26 @@ static inline int ptep_set_access_flags(struct vm_area_struct *vma, void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t entry); void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep); -void ptep_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep); +void ptep_notify(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, unsigned long bits); +int ptep_force_prot(struct mm_struct *mm, unsigned long gaddr, + pte_t *ptep, int prot, unsigned long bit); void ptep_zap_unused(struct mm_struct *mm, unsigned long addr, pte_t *ptep , int reset); void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep); +int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr, + pte_t *sptep, pte_t *tptep, pte_t pte); +void ptep_unshadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *ptep); bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long address); int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, unsigned char key, bool nq); -unsigned char get_guest_storage_key(struct mm_struct *mm, unsigned long addr); +int cond_set_guest_storage_key(struct mm_struct *mm, unsigned long addr, + unsigned char key, unsigned char *oldkey, + bool nq, bool mr, bool mc); +int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr); +int get_guest_storage_key(struct mm_struct *mm, unsigned long addr, + unsigned char *key); /* * Certain architectures need to do special things when PTEs diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 09529202ea77..03323175de30 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -112,6 +112,8 @@ struct thread_struct { unsigned long ksp; /* kernel stack pointer */ mm_segment_t mm_segment; unsigned long gmap_addr; /* address of last gmap fault. */ + unsigned int gmap_write_flag; /* gmap fault write indication */ + unsigned int gmap_int_code; /* int code of last gmap fault */ unsigned int gmap_pfault; /* signal of a pending guest pfault */ struct per_regs per_user; /* User specified PER registers */ struct per_event per_event; /* Cause of the last PER trap */ diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index e4f6f73afe2f..2ad9c204b1a2 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -32,12 +32,19 @@ struct sclp_core_entry { u8 reserved0; u8 : 4; u8 sief2 : 1; - u8 : 3; - u8 : 3; + u8 skey : 1; + u8 : 2; + u8 : 2; + u8 gpere : 1; u8 siif : 1; u8 sigpif : 1; u8 : 3; - u8 reserved2[10]; + u8 reserved2[3]; + u8 : 2; + u8 ib : 1; + u8 cei : 1; + u8 : 4; + u8 reserved3[6]; u8 type; u8 reserved1; } __attribute__((packed)); @@ -59,6 +66,15 @@ struct sclp_info { unsigned char has_hvs : 1; unsigned char has_esca : 1; unsigned char has_sief2 : 1; + unsigned char has_64bscao : 1; + unsigned char has_gpere : 1; + unsigned char has_cmma : 1; + unsigned char has_gsls : 1; + unsigned char has_ib : 1; + unsigned char has_cei : 1; + unsigned char has_pfmfi : 1; + unsigned char has_ibs : 1; + unsigned char has_skey : 1; unsigned int ibc; unsigned int mtid; unsigned int mtid_cp; @@ -101,5 +117,6 @@ int memcpy_hsa_kernel(void *dest, unsigned long src, size_t count); int memcpy_hsa_user(void __user *dest, unsigned long src, size_t count); void sclp_early_detect(void); void _sclp_print_early(const char *); +void sclp_ocf_cpc_name_copy(char *dst); #endif /* _ASM_S390_SCLP_H */ diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h index 3b8e99ef9d58..a2ffec4139ad 100644 --- a/arch/s390/include/uapi/asm/kvm.h +++ b/arch/s390/include/uapi/asm/kvm.h @@ -93,6 +93,47 @@ struct kvm_s390_vm_cpu_machine { __u64 fac_list[256]; }; +#define KVM_S390_VM_CPU_PROCESSOR_FEAT 2 +#define KVM_S390_VM_CPU_MACHINE_FEAT 3 + +#define KVM_S390_VM_CPU_FEAT_NR_BITS 1024 +#define KVM_S390_VM_CPU_FEAT_ESOP 0 +#define KVM_S390_VM_CPU_FEAT_SIEF2 1 +#define KVM_S390_VM_CPU_FEAT_64BSCAO 2 +#define KVM_S390_VM_CPU_FEAT_SIIF 3 +#define KVM_S390_VM_CPU_FEAT_GPERE 4 +#define KVM_S390_VM_CPU_FEAT_GSLS 5 +#define KVM_S390_VM_CPU_FEAT_IB 6 +#define KVM_S390_VM_CPU_FEAT_CEI 7 +#define KVM_S390_VM_CPU_FEAT_IBS 8 +#define KVM_S390_VM_CPU_FEAT_SKEY 9 +#define KVM_S390_VM_CPU_FEAT_CMMA 10 +#define KVM_S390_VM_CPU_FEAT_PFMFI 11 +#define KVM_S390_VM_CPU_FEAT_SIGPIF 12 +struct kvm_s390_vm_cpu_feat { + __u64 feat[16]; +}; + +#define KVM_S390_VM_CPU_PROCESSOR_SUBFUNC 4 +#define KVM_S390_VM_CPU_MACHINE_SUBFUNC 5 +/* for "test bit" instructions MSB 0 bit ordering, for "query" raw blocks */ +struct kvm_s390_vm_cpu_subfunc { + __u8 plo[32]; /* always */ + __u8 ptff[16]; /* with TOD-clock steering */ + __u8 kmac[16]; /* with MSA */ + __u8 kmc[16]; /* with MSA */ + __u8 km[16]; /* with MSA */ + __u8 kimd[16]; /* with MSA */ + __u8 klmd[16]; /* with MSA */ + __u8 pckmo[16]; /* with MSA3 */ + __u8 kmctr[16]; /* with MSA4 */ + __u8 kmf[16]; /* with MSA4 */ + __u8 kmo[16]; /* with MSA4 */ + __u8 pcc[16]; /* with MSA4 */ + __u8 ppno[16]; /* with MSA5 */ + __u8 reserved[1824]; +}; + /* kvm attributes for crypto */ #define KVM_S390_VM_CRYPTO_ENABLE_AES_KW 0 #define KVM_S390_VM_CRYPTO_ENABLE_DEA_KW 1 diff --git a/arch/s390/include/uapi/asm/sie.h b/arch/s390/include/uapi/asm/sie.h index 8fb5d4a6dd25..3ac634368939 100644 --- a/arch/s390/include/uapi/asm/sie.h +++ b/arch/s390/include/uapi/asm/sie.h @@ -140,6 +140,7 @@ exit_code_ipa0(0xB2, 0x4c, "TAR"), \ exit_code_ipa0(0xB2, 0x50, "CSP"), \ exit_code_ipa0(0xB2, 0x54, "MVPG"), \ + exit_code_ipa0(0xB2, 0x56, "STHYI"), \ exit_code_ipa0(0xB2, 0x58, "BSG"), \ exit_code_ipa0(0xB2, 0x5a, "BSA"), \ exit_code_ipa0(0xB2, 0x5f, "CHSC"), \ diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c index 48b37b8357e6..a97354c8c667 100644 --- a/arch/s390/kernel/diag.c +++ b/arch/s390/kernel/diag.c @@ -162,6 +162,30 @@ int diag14(unsigned long rx, unsigned long ry1, unsigned long subcode) } EXPORT_SYMBOL(diag14); +static inline int __diag204(unsigned long *subcode, unsigned long size, void *addr) +{ + register unsigned long _subcode asm("0") = *subcode; + register unsigned long _size asm("1") = size; + + asm volatile( + " diag %2,%0,0x204\n" + "0: nopr %%r7\n" + EX_TABLE(0b,0b) + : "+d" (_subcode), "+d" (_size) : "d" (addr) : "memory"); + *subcode = _subcode; + return _size; +} + +int diag204(unsigned long subcode, unsigned long size, void *addr) +{ + diag_stat_inc(DIAG_STAT_X204); + size = __diag204(&subcode, size, addr); + if (subcode) + return -1; + return size; +} +EXPORT_SYMBOL(diag204); + /* * Diagnose 210: Get information about a virtual device */ @@ -196,3 +220,18 @@ int diag210(struct diag210 *addr) return ccode; } EXPORT_SYMBOL(diag210); + +int diag224(void *ptr) +{ + int rc = -EOPNOTSUPP; + + diag_stat_inc(DIAG_STAT_X224); + asm volatile( + " diag %1,%2,0x224\n" + "0: lhi %0,0x0\n" + "1:\n" + EX_TABLE(0b,1b) + : "+d" (rc) :"d" (0), "d" (ptr) : "memory"); + return rc; +} +EXPORT_SYMBOL(diag224); diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile index d42fa38c2429..09a9e6dfc09f 100644 --- a/arch/s390/kvm/Makefile +++ b/arch/s390/kvm/Makefile @@ -12,6 +12,6 @@ common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o $(KVM)/irqch ccflags-y := -Ivirt/kvm -Iarch/s390/kvm kvm-objs := $(common-objs) kvm-s390.o intercept.o interrupt.o priv.o sigp.o -kvm-objs += diag.o gaccess.o guestdbg.o +kvm-objs += diag.o gaccess.o guestdbg.o sthyi.o vsie.o obj-$(CONFIG_KVM) += kvm.o diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 1ea4095b67d7..ce865bd4f81d 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -212,6 +212,11 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu) (vcpu->run->s.regs.gprs[1] != KVM_S390_VIRTIO_CCW_NOTIFY)) return -EOPNOTSUPP; + VCPU_EVENT(vcpu, 4, "diag 0x500 schid 0x%8.8x queue 0x%x cookie 0x%llx", + (u32) vcpu->run->s.regs.gprs[2], + (u32) vcpu->run->s.regs.gprs[3], + vcpu->run->s.regs.gprs[4]); + /* * The layout is as follows: * - gpr 2 contains the subchannel id (passed as addr) diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 66938d283b77..54200208bf24 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -8,6 +8,7 @@ #include <linux/vmalloc.h> #include <linux/err.h> #include <asm/pgtable.h> +#include <asm/gmap.h> #include "kvm-s390.h" #include "gaccess.h" #include <asm/switch_to.h> @@ -476,18 +477,73 @@ enum { FSI_FETCH = 2 /* Exception was due to fetch operation */ }; -static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce, - ar_t ar, enum gacc_mode mode) +enum prot_type { + PROT_TYPE_LA = 0, + PROT_TYPE_KEYC = 1, + PROT_TYPE_ALC = 2, + PROT_TYPE_DAT = 3, +}; + +static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva, + ar_t ar, enum gacc_mode mode, enum prot_type prot) { - int rc; - struct psw_bits psw = psw_bits(vcpu->arch.sie_block->gpsw); struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; - struct trans_exc_code_bits *tec_bits; + struct trans_exc_code_bits *tec; memset(pgm, 0, sizeof(*pgm)); - tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code; - tec_bits->fsi = mode == GACC_STORE ? FSI_STORE : FSI_FETCH; - tec_bits->as = psw.as; + pgm->code = code; + tec = (struct trans_exc_code_bits *)&pgm->trans_exc_code; + + switch (code) { + case PGM_ASCE_TYPE: + case PGM_PAGE_TRANSLATION: + case PGM_REGION_FIRST_TRANS: + case PGM_REGION_SECOND_TRANS: + case PGM_REGION_THIRD_TRANS: + case PGM_SEGMENT_TRANSLATION: + /* + * op_access_id only applies to MOVE_PAGE -> set bit 61 + * exc_access_id has to be set to 0 for some instructions. Both + * cases have to be handled by the caller. We can always store + * exc_access_id, as it is undefined for non-ar cases. + */ + tec->addr = gva >> PAGE_SHIFT; + tec->fsi = mode == GACC_STORE ? FSI_STORE : FSI_FETCH; + tec->as = psw_bits(vcpu->arch.sie_block->gpsw).as; + /* FALL THROUGH */ + case PGM_ALEN_TRANSLATION: + case PGM_ALE_SEQUENCE: + case PGM_ASTE_VALIDITY: + case PGM_ASTE_SEQUENCE: + case PGM_EXTENDED_AUTHORITY: + pgm->exc_access_id = ar; + break; + case PGM_PROTECTION: + switch (prot) { + case PROT_TYPE_ALC: + tec->b60 = 1; + /* FALL THROUGH */ + case PROT_TYPE_DAT: + tec->b61 = 1; + tec->addr = gva >> PAGE_SHIFT; + tec->fsi = mode == GACC_STORE ? FSI_STORE : FSI_FETCH; + tec->as = psw_bits(vcpu->arch.sie_block->gpsw).as; + /* exc_access_id is undefined for most cases */ + pgm->exc_access_id = ar; + break; + default: /* LA and KEYC set b61 to 0, other params undefined */ + break; + } + break; + } + return code; +} + +static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce, + unsigned long ga, ar_t ar, enum gacc_mode mode) +{ + int rc; + struct psw_bits psw = psw_bits(vcpu->arch.sie_block->gpsw); if (!psw.t) { asce->val = 0; @@ -510,21 +566,8 @@ static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce, return 0; case PSW_AS_ACCREG: rc = ar_translation(vcpu, asce, ar, mode); - switch (rc) { - case PGM_ALEN_TRANSLATION: - case PGM_ALE_SEQUENCE: - case PGM_ASTE_VALIDITY: - case PGM_ASTE_SEQUENCE: - case PGM_EXTENDED_AUTHORITY: - vcpu->arch.pgm.exc_access_id = ar; - break; - case PGM_PROTECTION: - tec_bits->b60 = 1; - tec_bits->b61 = 1; - break; - } if (rc > 0) - pgm->code = rc; + return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_ALC); return rc; } return 0; @@ -729,40 +772,31 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu, return 1; } -static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, +static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, unsigned long *pages, unsigned long nr_pages, const union asce asce, enum gacc_mode mode) { - struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; psw_t *psw = &vcpu->arch.sie_block->gpsw; - struct trans_exc_code_bits *tec_bits; - int lap_enabled, rc; + int lap_enabled, rc = 0; - tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code; lap_enabled = low_address_protection_enabled(vcpu, asce); while (nr_pages) { ga = kvm_s390_logical_to_effective(vcpu, ga); - tec_bits->addr = ga >> PAGE_SHIFT; - if (mode == GACC_STORE && lap_enabled && is_low_address(ga)) { - pgm->code = PGM_PROTECTION; - return pgm->code; - } + if (mode == GACC_STORE && lap_enabled && is_low_address(ga)) + return trans_exc(vcpu, PGM_PROTECTION, ga, ar, mode, + PROT_TYPE_LA); ga &= PAGE_MASK; if (psw_bits(*psw).t) { rc = guest_translate(vcpu, ga, pages, asce, mode); if (rc < 0) return rc; - if (rc == PGM_PROTECTION) - tec_bits->b61 = 1; - if (rc) - pgm->code = rc; } else { *pages = kvm_s390_real_to_abs(vcpu, ga); if (kvm_is_error_gpa(vcpu->kvm, *pages)) - pgm->code = PGM_ADDRESSING; + rc = PGM_ADDRESSING; } - if (pgm->code) - return pgm->code; + if (rc) + return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_DAT); ga += PAGE_SIZE; pages++; nr_pages--; @@ -783,7 +817,8 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, if (!len) return 0; - rc = get_vcpu_asce(vcpu, &asce, ar, mode); + ga = kvm_s390_logical_to_effective(vcpu, ga); + rc = get_vcpu_asce(vcpu, &asce, ga, ar, mode); if (rc) return rc; nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1; @@ -795,7 +830,7 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, need_ipte_lock = psw_bits(*psw).t && !asce.r; if (need_ipte_lock) ipte_lock(vcpu); - rc = guest_page_range(vcpu, ga, pages, nr_pages, asce, mode); + rc = guest_page_range(vcpu, ga, ar, pages, nr_pages, asce, mode); for (idx = 0; idx < nr_pages && !rc; idx++) { gpa = *(pages + idx) + (ga & ~PAGE_MASK); _len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len); @@ -846,37 +881,28 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, unsigned long *gpa, enum gacc_mode mode) { - struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; psw_t *psw = &vcpu->arch.sie_block->gpsw; - struct trans_exc_code_bits *tec; union asce asce; int rc; gva = kvm_s390_logical_to_effective(vcpu, gva); - tec = (struct trans_exc_code_bits *)&pgm->trans_exc_code; - rc = get_vcpu_asce(vcpu, &asce, ar, mode); - tec->addr = gva >> PAGE_SHIFT; + rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode); if (rc) return rc; if (is_low_address(gva) && low_address_protection_enabled(vcpu, asce)) { - if (mode == GACC_STORE) { - rc = pgm->code = PGM_PROTECTION; - return rc; - } + if (mode == GACC_STORE) + return trans_exc(vcpu, PGM_PROTECTION, gva, 0, + mode, PROT_TYPE_LA); } if (psw_bits(*psw).t && !asce.r) { /* Use DAT? */ rc = guest_translate(vcpu, gva, gpa, asce, mode); - if (rc > 0) { - if (rc == PGM_PROTECTION) - tec->b61 = 1; - pgm->code = rc; - } + if (rc > 0) + return trans_exc(vcpu, rc, gva, 0, mode, PROT_TYPE_DAT); } else { - rc = 0; *gpa = kvm_s390_real_to_abs(vcpu, gva); if (kvm_is_error_gpa(vcpu->kvm, *gpa)) - rc = pgm->code = PGM_ADDRESSING; + return trans_exc(vcpu, rc, gva, PGM_ADDRESSING, mode, 0); } return rc; @@ -915,20 +941,247 @@ int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, */ int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra) { - struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; - psw_t *psw = &vcpu->arch.sie_block->gpsw; - struct trans_exc_code_bits *tec_bits; union ctlreg0 ctlreg0 = {.val = vcpu->arch.sie_block->gcr[0]}; if (!ctlreg0.lap || !is_low_address(gra)) return 0; + return trans_exc(vcpu, PGM_PROTECTION, gra, 0, GACC_STORE, PROT_TYPE_LA); +} - memset(pgm, 0, sizeof(*pgm)); - tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code; - tec_bits->fsi = FSI_STORE; - tec_bits->as = psw_bits(*psw).as; - tec_bits->addr = gra >> PAGE_SHIFT; - pgm->code = PGM_PROTECTION; +/** + * kvm_s390_shadow_tables - walk the guest page table and create shadow tables + * @sg: pointer to the shadow guest address space structure + * @saddr: faulting address in the shadow gmap + * @pgt: pointer to the page table address result + * @fake: pgt references contiguous guest memory block, not a pgtable + */ +static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, + unsigned long *pgt, int *dat_protection, + int *fake) +{ + struct gmap *parent; + union asce asce; + union vaddress vaddr; + unsigned long ptr; + int rc; + + *fake = 0; + *dat_protection = 0; + parent = sg->parent; + vaddr.addr = saddr; + asce.val = sg->orig_asce; + ptr = asce.origin * 4096; + if (asce.r) { + *fake = 1; + asce.dt = ASCE_TYPE_REGION1; + } + switch (asce.dt) { + case ASCE_TYPE_REGION1: + if (vaddr.rfx01 > asce.tl && !asce.r) + return PGM_REGION_FIRST_TRANS; + break; + case ASCE_TYPE_REGION2: + if (vaddr.rfx) + return PGM_ASCE_TYPE; + if (vaddr.rsx01 > asce.tl) + return PGM_REGION_SECOND_TRANS; + break; + case ASCE_TYPE_REGION3: + if (vaddr.rfx || vaddr.rsx) + return PGM_ASCE_TYPE; + if (vaddr.rtx01 > asce.tl) + return PGM_REGION_THIRD_TRANS; + break; + case ASCE_TYPE_SEGMENT: + if (vaddr.rfx || vaddr.rsx || vaddr.rtx) + return PGM_ASCE_TYPE; + if (vaddr.sx01 > asce.tl) + return PGM_SEGMENT_TRANSLATION; + break; + } + + switch (asce.dt) { + case ASCE_TYPE_REGION1: { + union region1_table_entry rfte; - return pgm->code; + if (*fake) { + /* offset in 16EB guest memory block */ + ptr = ptr + ((unsigned long) vaddr.rsx << 53UL); + rfte.val = ptr; + goto shadow_r2t; + } + rc = gmap_read_table(parent, ptr + vaddr.rfx * 8, &rfte.val); + if (rc) + return rc; + if (rfte.i) + return PGM_REGION_FIRST_TRANS; + if (rfte.tt != TABLE_TYPE_REGION1) + return PGM_TRANSLATION_SPEC; + if (vaddr.rsx01 < rfte.tf || vaddr.rsx01 > rfte.tl) + return PGM_REGION_SECOND_TRANS; + if (sg->edat_level >= 1) + *dat_protection |= rfte.p; + ptr = rfte.rto << 12UL; +shadow_r2t: + rc = gmap_shadow_r2t(sg, saddr, rfte.val, *fake); + if (rc) + return rc; + /* fallthrough */ + } + case ASCE_TYPE_REGION2: { + union region2_table_entry rste; + + if (*fake) { + /* offset in 8PB guest memory block */ + ptr = ptr + ((unsigned long) vaddr.rtx << 42UL); + rste.val = ptr; + goto shadow_r3t; + } + rc = gmap_read_table(parent, ptr + vaddr.rsx * 8, &rste.val); + if (rc) + return rc; + if (rste.i) + return PGM_REGION_SECOND_TRANS; + if (rste.tt != TABLE_TYPE_REGION2) + return PGM_TRANSLATION_SPEC; + if (vaddr.rtx01 < rste.tf || vaddr.rtx01 > rste.tl) + return PGM_REGION_THIRD_TRANS; + if (sg->edat_level >= 1) + *dat_protection |= rste.p; + ptr = rste.rto << 12UL; +shadow_r3t: + rste.p |= *dat_protection; + rc = gmap_shadow_r3t(sg, saddr, rste.val, *fake); + if (rc) + return rc; + /* fallthrough */ + } + case ASCE_TYPE_REGION3: { + union region3_table_entry rtte; + + if (*fake) { + /* offset in 4TB guest memory block */ + ptr = ptr + ((unsigned long) vaddr.sx << 31UL); + rtte.val = ptr; + goto shadow_sgt; + } + rc = gmap_read_table(parent, ptr + vaddr.rtx * 8, &rtte.val); + if (rc) + return rc; + if (rtte.i) + return PGM_REGION_THIRD_TRANS; + if (rtte.tt != TABLE_TYPE_REGION3) + return PGM_TRANSLATION_SPEC; + if (rtte.cr && asce.p && sg->edat_level >= 2) + return PGM_TRANSLATION_SPEC; + if (rtte.fc && sg->edat_level >= 2) { + *dat_protection |= rtte.fc0.p; + *fake = 1; + ptr = rtte.fc1.rfaa << 31UL; + rtte.val = ptr; + goto shadow_sgt; + } + if (vaddr.sx01 < rtte.fc0.tf || vaddr.sx01 > rtte.fc0.tl) + return PGM_SEGMENT_TRANSLATION; + if (sg->edat_level >= 1) + *dat_protection |= rtte.fc0.p; + ptr = rtte.fc0.sto << 12UL; +shadow_sgt: + rtte.fc0.p |= *dat_protection; + rc = gmap_shadow_sgt(sg, saddr, rtte.val, *fake); + if (rc) + return rc; + /* fallthrough */ + } + case ASCE_TYPE_SEGMENT: { + union segment_table_entry ste; + + if (*fake) { + /* offset in 2G guest memory block */ + ptr = ptr + ((unsigned long) vaddr.sx << 20UL); + ste.val = ptr; + goto shadow_pgt; + } + rc = gmap_read_table(parent, ptr + vaddr.sx * 8, &ste.val); + if (rc) + return rc; + if (ste.i) + return PGM_SEGMENT_TRANSLATION; + if (ste.tt != TABLE_TYPE_SEGMENT) + return PGM_TRANSLATION_SPEC; + if (ste.cs && asce.p) + return PGM_TRANSLATION_SPEC; + *dat_protection |= ste.fc0.p; + if (ste.fc && sg->edat_level >= 1) { + *fake = 1; + ptr = ste.fc1.sfaa << 20UL; + ste.val = ptr; + goto shadow_pgt; + } + ptr = ste.fc0.pto << 11UL; +shadow_pgt: + ste.fc0.p |= *dat_protection; + rc = gmap_shadow_pgt(sg, saddr, ste.val, *fake); + if (rc) + return rc; + } + } + /* Return the parent address of the page table */ + *pgt = ptr; + return 0; +} + +/** + * kvm_s390_shadow_fault - handle fault on a shadow page table + * @vcpu: virtual cpu + * @sg: pointer to the shadow guest address space structure + * @saddr: faulting address in the shadow gmap + * + * Returns: - 0 if the shadow fault was successfully resolved + * - > 0 (pgm exception code) on exceptions while faulting + * - -EAGAIN if the caller can retry immediately + * - -EFAULT when accessing invalid guest addresses + * - -ENOMEM if out of memory + */ +int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg, + unsigned long saddr) +{ + union vaddress vaddr; + union page_table_entry pte; + unsigned long pgt; + int dat_protection, fake; + int rc; + + down_read(&sg->mm->mmap_sem); + /* + * We don't want any guest-2 tables to change - so the parent + * tables/pointers we read stay valid - unshadowing is however + * always possible - only guest_table_lock protects us. + */ + ipte_lock(vcpu); + + rc = gmap_shadow_pgt_lookup(sg, saddr, &pgt, &dat_protection, &fake); + if (rc) + rc = kvm_s390_shadow_tables(sg, saddr, &pgt, &dat_protection, + &fake); + + vaddr.addr = saddr; + if (fake) { + /* offset in 1MB guest memory block */ + pte.val = pgt + ((unsigned long) vaddr.px << 12UL); + goto shadow_page; + } + if (!rc) + rc = gmap_read_table(sg->parent, pgt + vaddr.px * 8, &pte.val); + if (!rc && pte.i) + rc = PGM_PAGE_TRANSLATION; + if (!rc && (pte.z || (pte.co && sg->edat_level < 1))) + rc = PGM_TRANSLATION_SPEC; +shadow_page: + pte.p |= dat_protection; + if (!rc) + rc = gmap_shadow_page(sg, saddr, __pte(pte.val)); + ipte_unlock(vcpu); + up_read(&sg->mm->mmap_sem); + return rc; } diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h index df0a79dd8159..8756569ad938 100644 --- a/arch/s390/kvm/gaccess.h +++ b/arch/s390/kvm/gaccess.h @@ -361,4 +361,7 @@ void ipte_unlock(struct kvm_vcpu *vcpu); int ipte_lock_held(struct kvm_vcpu *vcpu); int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra); +int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *shadow, + unsigned long saddr); + #endif /* __KVM_S390_GACCESS_H */ diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c index e8c6843b9600..31a05330d11c 100644 --- a/arch/s390/kvm/guestdbg.c +++ b/arch/s390/kvm/guestdbg.c @@ -439,6 +439,23 @@ exit_required: #define guest_per_enabled(vcpu) \ (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PER) +int kvm_s390_handle_per_ifetch_icpt(struct kvm_vcpu *vcpu) +{ + const u8 ilen = kvm_s390_get_ilen(vcpu); + struct kvm_s390_pgm_info pgm_info = { + .code = PGM_PER, + .per_code = PER_EVENT_IFETCH >> 24, + .per_address = __rewind_psw(vcpu->arch.sie_block->gpsw, ilen), + }; + + /* + * The PSW points to the next instruction, therefore the intercepted + * instruction generated a PER i-fetch event. PER address therefore + * points at the previous PSW address (could be an EXECUTE function). + */ + return kvm_s390_inject_prog_irq(vcpu, &pgm_info); +} + static void filter_guest_per_event(struct kvm_vcpu *vcpu) { u32 perc = vcpu->arch.sie_block->perc << 24; @@ -465,7 +482,7 @@ static void filter_guest_per_event(struct kvm_vcpu *vcpu) guest_perc &= ~PER_EVENT_IFETCH; /* All other PER events will be given to the guest */ - /* TODO: Check alterated address/address space */ + /* TODO: Check altered address/address space */ vcpu->arch.sie_block->perc = guest_perc >> 24; diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 252157181302..dfd0ca2638fa 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -351,8 +351,26 @@ static int handle_partial_execution(struct kvm_vcpu *vcpu) return -EOPNOTSUPP; } +static int handle_operexc(struct kvm_vcpu *vcpu) +{ + vcpu->stat.exit_operation_exception++; + trace_kvm_s390_handle_operexc(vcpu, vcpu->arch.sie_block->ipa, + vcpu->arch.sie_block->ipb); + + if (vcpu->arch.sie_block->ipa == 0xb256 && + test_kvm_facility(vcpu->kvm, 74)) + return handle_sthyi(vcpu); + + if (vcpu->arch.sie_block->ipa == 0 && vcpu->kvm->arch.user_instr0) + return -EOPNOTSUPP; + + return kvm_s390_inject_program_int(vcpu, PGM_OPERATION); +} + int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu) { + int rc, per_rc = 0; + if (kvm_is_ucontrol(vcpu->kvm)) return -EOPNOTSUPP; @@ -361,7 +379,8 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu) case 0x18: return handle_noop(vcpu); case 0x04: - return handle_instruction(vcpu); + rc = handle_instruction(vcpu); + break; case 0x08: return handle_prog(vcpu); case 0x14: @@ -372,9 +391,19 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu) return handle_validity(vcpu); case 0x28: return handle_stop(vcpu); + case 0x2c: + rc = handle_operexc(vcpu); + break; case 0x38: - return handle_partial_execution(vcpu); + rc = handle_partial_execution(vcpu); + break; default: return -EOPNOTSUPP; } + + /* process PER, also if the instrution is processed in user space */ + if (vcpu->arch.sie_block->icptstatus & 0x02 && + (!rc || rc == -EOPNOTSUPP)) + per_rc = kvm_s390_handle_per_ifetch_icpt(vcpu); + return per_rc ? per_rc : rc; } diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 5a80af740d3e..24524c0f3ef8 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -28,9 +28,6 @@ #include "gaccess.h" #include "trace-s390.h" -#define IOINT_SCHID_MASK 0x0000ffff -#define IOINT_SSID_MASK 0x00030000 -#define IOINT_CSSID_MASK 0x03fc0000 #define PFAULT_INIT 0x0600 #define PFAULT_DONE 0x0680 #define VIRTIO_PARAM 0x0d00 @@ -821,7 +818,14 @@ static int __must_check __deliver_io(struct kvm_vcpu *vcpu, struct kvm_s390_interrupt_info, list); if (inti) { - VCPU_EVENT(vcpu, 4, "deliver: I/O 0x%llx", inti->type); + if (inti->type & KVM_S390_INT_IO_AI_MASK) + VCPU_EVENT(vcpu, 4, "%s", "deliver: I/O (AI)"); + else + VCPU_EVENT(vcpu, 4, "deliver: I/O %x ss %x schid %04x", + inti->io.subchannel_id >> 8, + inti->io.subchannel_id >> 1 & 0x3, + inti->io.subchannel_nr); + vcpu->stat.deliver_io_int++; trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, @@ -991,6 +995,11 @@ void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu) swake_up(&vcpu->wq); vcpu->stat.halt_wakeup++; } + /* + * The VCPU might not be sleeping but is executing the VSIE. Let's + * kick it, so it leaves the SIE to process the request. + */ + kvm_s390_vsie_kick(vcpu); } enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer) @@ -1415,6 +1424,13 @@ static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) } fi->counters[FIRQ_CNTR_IO] += 1; + if (inti->type & KVM_S390_INT_IO_AI_MASK) + VM_EVENT(kvm, 4, "%s", "inject: I/O (AI)"); + else + VM_EVENT(kvm, 4, "inject: I/O %x ss %x schid %04x", + inti->io.subchannel_id >> 8, + inti->io.subchannel_id >> 1 & 0x3, + inti->io.subchannel_nr); isc = int_word_to_isc(inti->io.io_int_word); list = &fi->lists[FIRQ_LIST_IO_ISC_0 + isc]; list_add_tail(&inti->list, list); @@ -1531,13 +1547,6 @@ int kvm_s390_inject_vm(struct kvm *kvm, inti->mchk.mcic = s390int->parm64; break; case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: - if (inti->type & KVM_S390_INT_IO_AI_MASK) - VM_EVENT(kvm, 5, "%s", "inject: I/O (AI)"); - else - VM_EVENT(kvm, 5, "inject: I/O css %x ss %x schid %04x", - s390int->type & IOINT_CSSID_MASK, - s390int->type & IOINT_SSID_MASK, - s390int->type & IOINT_SCHID_MASK); inti->io.subchannel_id = s390int->parm >> 16; inti->io.subchannel_nr = s390int->parm & 0x0000ffffu; inti->io.io_int_parm = s390int->parm64 >> 32; @@ -2237,7 +2246,8 @@ static int set_adapter_int(struct kvm_kernel_irq_routing_entry *e, return ret; } -int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e, +int kvm_set_routing_entry(struct kvm *kvm, + struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue) { int ret; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 6f5c344cd785..3f3ae4865d57 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -21,11 +21,13 @@ #include <linux/init.h> #include <linux/kvm.h> #include <linux/kvm_host.h> +#include <linux/mman.h> #include <linux/module.h> #include <linux/random.h> #include <linux/slab.h> #include <linux/timer.h> #include <linux/vmalloc.h> +#include <linux/bitmap.h> #include <asm/asm-offsets.h> #include <asm/lowcore.h> #include <asm/stp.h> @@ -35,6 +37,8 @@ #include <asm/switch_to.h> #include <asm/isc.h> #include <asm/sclp.h> +#include <asm/cpacf.h> +#include <asm/timex.h> #include "kvm-s390.h" #include "gaccess.h" @@ -64,6 +68,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "exit_pei", VCPU_STAT(exit_pei) }, { "exit_program_interruption", VCPU_STAT(exit_program_interruption) }, { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) }, + { "exit_operation_exception", VCPU_STAT(exit_operation_exception) }, { "halt_successful_poll", VCPU_STAT(halt_successful_poll) }, { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) }, { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) }, @@ -94,6 +99,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "instruction_stsi", VCPU_STAT(instruction_stsi) }, { "instruction_stfl", VCPU_STAT(instruction_stfl) }, { "instruction_tprot", VCPU_STAT(instruction_tprot) }, + { "instruction_sthyi", VCPU_STAT(instruction_sthyi) }, + { "instruction_sie", VCPU_STAT(instruction_sie) }, { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) }, { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) }, { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) }, @@ -119,6 +126,11 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { NULL } }; +/* allow nested virtualization in KVM (if enabled by user space) */ +static int nested; +module_param(nested, int, S_IRUGO); +MODULE_PARM_DESC(nested, "Nested virtualization support"); + /* upper facilities limit for kvm */ unsigned long kvm_s390_fac_list_mask[16] = { 0xffe6000000000000UL, @@ -131,7 +143,13 @@ unsigned long kvm_s390_fac_list_mask_size(void) return ARRAY_SIZE(kvm_s390_fac_list_mask); } +/* available cpu features supported by kvm */ +static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS); +/* available subfunctions indicated via query / "test bit" */ +static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc; + static struct gmap_notifier gmap_notifier; +static struct gmap_notifier vsie_gmap_notifier; debug_info_t *kvm_s390_dbf; /* Section: not file related */ @@ -141,7 +159,8 @@ int kvm_arch_hardware_enable(void) return 0; } -static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address); +static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start, + unsigned long end); /* * This callback is executed during stop_machine(). All CPUs are therefore @@ -163,6 +182,8 @@ static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val, vcpu->arch.sie_block->epoch -= *delta; if (vcpu->arch.cputm_enabled) vcpu->arch.cputm_start += *delta; + if (vcpu->arch.vsie_block) + vcpu->arch.vsie_block->epoch -= *delta; } } return NOTIFY_OK; @@ -175,7 +196,9 @@ static struct notifier_block kvm_clock_notifier = { int kvm_arch_hardware_setup(void) { gmap_notifier.notifier_call = kvm_gmap_notifier; - gmap_register_ipte_notifier(&gmap_notifier); + gmap_register_pte_notifier(&gmap_notifier); + vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier; + gmap_register_pte_notifier(&vsie_gmap_notifier); atomic_notifier_chain_register(&s390_epoch_delta_notifier, &kvm_clock_notifier); return 0; @@ -183,11 +206,109 @@ int kvm_arch_hardware_setup(void) void kvm_arch_hardware_unsetup(void) { - gmap_unregister_ipte_notifier(&gmap_notifier); + gmap_unregister_pte_notifier(&gmap_notifier); + gmap_unregister_pte_notifier(&vsie_gmap_notifier); atomic_notifier_chain_unregister(&s390_epoch_delta_notifier, &kvm_clock_notifier); } +static void allow_cpu_feat(unsigned long nr) +{ + set_bit_inv(nr, kvm_s390_available_cpu_feat); +} + +static inline int plo_test_bit(unsigned char nr) +{ + register unsigned long r0 asm("0") = (unsigned long) nr | 0x100; + int cc = 3; /* subfunction not available */ + + asm volatile( + /* Parameter registers are ignored for "test bit" */ + " plo 0,0,0,0(0)\n" + " ipm %0\n" + " srl %0,28\n" + : "=d" (cc) + : "d" (r0) + : "cc"); + return cc == 0; +} + +static void kvm_s390_cpu_feat_init(void) +{ + int i; + + for (i = 0; i < 256; ++i) { + if (plo_test_bit(i)) + kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7); + } + + if (test_facility(28)) /* TOD-clock steering */ + ptff(kvm_s390_available_subfunc.ptff, + sizeof(kvm_s390_available_subfunc.ptff), + PTFF_QAF); + + if (test_facility(17)) { /* MSA */ + __cpacf_query(CPACF_KMAC, kvm_s390_available_subfunc.kmac); + __cpacf_query(CPACF_KMC, kvm_s390_available_subfunc.kmc); + __cpacf_query(CPACF_KM, kvm_s390_available_subfunc.km); + __cpacf_query(CPACF_KIMD, kvm_s390_available_subfunc.kimd); + __cpacf_query(CPACF_KLMD, kvm_s390_available_subfunc.klmd); + } + if (test_facility(76)) /* MSA3 */ + __cpacf_query(CPACF_PCKMO, kvm_s390_available_subfunc.pckmo); + if (test_facility(77)) { /* MSA4 */ + __cpacf_query(CPACF_KMCTR, kvm_s390_available_subfunc.kmctr); + __cpacf_query(CPACF_KMF, kvm_s390_available_subfunc.kmf); + __cpacf_query(CPACF_KMO, kvm_s390_available_subfunc.kmo); + __cpacf_query(CPACF_PCC, kvm_s390_available_subfunc.pcc); + } + if (test_facility(57)) /* MSA5 */ + __cpacf_query(CPACF_PPNO, kvm_s390_available_subfunc.ppno); + + if (MACHINE_HAS_ESOP) + allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP); + /* + * We need SIE support, ESOP (PROT_READ protection for gmap_shadow), + * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing). + */ + if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao || + !test_facility(3) || !nested) + return; + allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2); + if (sclp.has_64bscao) + allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO); + if (sclp.has_siif) + allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF); + if (sclp.has_gpere) + allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE); + if (sclp.has_gsls) + allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS); + if (sclp.has_ib) + allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB); + if (sclp.has_cei) + allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI); + if (sclp.has_ibs) + allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS); + /* + * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make + * all skey handling functions read/set the skey from the PGSTE + * instead of the real storage key. + * + * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make + * pages being detected as preserved although they are resident. + * + * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will + * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY. + * + * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and + * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be + * correctly shadowed. We can do that for the PGSTE but not for PTE.I. + * + * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We + * cannot easily shadow the SCA because of the ipte lock. + */ +} + int kvm_arch_init(void *opaque) { kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long)); @@ -199,6 +320,8 @@ int kvm_arch_init(void *opaque) return -ENOMEM; } + kvm_s390_cpu_feat_init(); + /* Register floating interrupt controller interface. */ return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC); } @@ -244,6 +367,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_USER_STSI: case KVM_CAP_S390_SKEYS: case KVM_CAP_S390_IRQ_STATE: + case KVM_CAP_S390_USER_INSTR0: r = 1; break; case KVM_CAP_S390_MEM_OP: @@ -251,8 +375,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) break; case KVM_CAP_NR_VCPUS: case KVM_CAP_MAX_VCPUS: - r = sclp.has_esca ? KVM_S390_ESCA_CPU_SLOTS - : KVM_S390_BSCA_CPU_SLOTS; + r = KVM_S390_BSCA_CPU_SLOTS; + if (sclp.has_esca && sclp.has_64bscao) + r = KVM_S390_ESCA_CPU_SLOTS; break; case KVM_CAP_NR_MEMSLOTS: r = KVM_USER_MEM_SLOTS; @@ -335,6 +460,16 @@ out: return r; } +static void icpt_operexc_on_all_vcpus(struct kvm *kvm) +{ + unsigned int i; + struct kvm_vcpu *vcpu; + + kvm_for_each_vcpu(i, vcpu, kvm) { + kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu); + } +} + static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) { int r; @@ -355,7 +490,7 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) break; case KVM_CAP_S390_VECTOR_REGISTERS: mutex_lock(&kvm->lock); - if (atomic_read(&kvm->online_vcpus)) { + if (kvm->created_vcpus) { r = -EBUSY; } else if (MACHINE_HAS_VX) { set_kvm_facility(kvm->arch.model.fac_mask, 129); @@ -370,7 +505,7 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) case KVM_CAP_S390_RI: r = -EINVAL; mutex_lock(&kvm->lock); - if (atomic_read(&kvm->online_vcpus)) { + if (kvm->created_vcpus) { r = -EBUSY; } else if (test_facility(64)) { set_kvm_facility(kvm->arch.model.fac_mask, 64); @@ -386,6 +521,12 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) kvm->arch.user_stsi = 1; r = 0; break; + case KVM_CAP_S390_USER_INSTR0: + VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0"); + kvm->arch.user_instr0 = 1; + icpt_operexc_on_all_vcpus(kvm); + r = 0; + break; default: r = -EINVAL; break; @@ -418,21 +559,23 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att unsigned int idx; switch (attr->attr) { case KVM_S390_VM_MEM_ENABLE_CMMA: - /* enable CMMA only for z10 and later (EDAT_1) */ - ret = -EINVAL; - if (!MACHINE_IS_LPAR || !MACHINE_HAS_EDAT1) + ret = -ENXIO; + if (!sclp.has_cmma) break; ret = -EBUSY; VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support"); mutex_lock(&kvm->lock); - if (atomic_read(&kvm->online_vcpus) == 0) { + if (!kvm->created_vcpus) { kvm->arch.use_cmma = 1; ret = 0; } mutex_unlock(&kvm->lock); break; case KVM_S390_VM_MEM_CLR_CMMA: + ret = -ENXIO; + if (!sclp.has_cmma) + break; ret = -EINVAL; if (!kvm->arch.use_cmma) break; @@ -461,20 +604,20 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att if (!new_limit) return -EINVAL; - /* gmap_alloc takes last usable address */ + /* gmap_create takes last usable address */ if (new_limit != KVM_S390_NO_MEM_LIMIT) new_limit -= 1; ret = -EBUSY; mutex_lock(&kvm->lock); - if (atomic_read(&kvm->online_vcpus) == 0) { - /* gmap_alloc will round the limit up */ - struct gmap *new = gmap_alloc(current->mm, new_limit); + if (!kvm->created_vcpus) { + /* gmap_create will round the limit up */ + struct gmap *new = gmap_create(current->mm, new_limit); if (!new) { ret = -ENOMEM; } else { - gmap_free(kvm->arch.gmap); + gmap_remove(kvm->arch.gmap); new->private = kvm; kvm->arch.gmap = new; ret = 0; @@ -644,7 +787,7 @@ static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr) int ret = 0; mutex_lock(&kvm->lock); - if (atomic_read(&kvm->online_vcpus)) { + if (kvm->created_vcpus) { ret = -EBUSY; goto out; } @@ -676,6 +819,39 @@ out: return ret; } +static int kvm_s390_set_processor_feat(struct kvm *kvm, + struct kvm_device_attr *attr) +{ + struct kvm_s390_vm_cpu_feat data; + int ret = -EBUSY; + + if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data))) + return -EFAULT; + if (!bitmap_subset((unsigned long *) data.feat, + kvm_s390_available_cpu_feat, + KVM_S390_VM_CPU_FEAT_NR_BITS)) + return -EINVAL; + + mutex_lock(&kvm->lock); + if (!atomic_read(&kvm->online_vcpus)) { + bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat, + KVM_S390_VM_CPU_FEAT_NR_BITS); + ret = 0; + } + mutex_unlock(&kvm->lock); + return ret; +} + +static int kvm_s390_set_processor_subfunc(struct kvm *kvm, + struct kvm_device_attr *attr) +{ + /* + * Once supported by kernel + hw, we have to store the subfunctions + * in kvm->arch and remember that user space configured them. + */ + return -ENXIO; +} + static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) { int ret = -ENXIO; @@ -684,6 +860,12 @@ static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) case KVM_S390_VM_CPU_PROCESSOR: ret = kvm_s390_set_processor(kvm, attr); break; + case KVM_S390_VM_CPU_PROCESSOR_FEAT: + ret = kvm_s390_set_processor_feat(kvm, attr); + break; + case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: + ret = kvm_s390_set_processor_subfunc(kvm, attr); + break; } return ret; } @@ -732,6 +914,50 @@ out: return ret; } +static int kvm_s390_get_processor_feat(struct kvm *kvm, + struct kvm_device_attr *attr) +{ + struct kvm_s390_vm_cpu_feat data; + + bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat, + KVM_S390_VM_CPU_FEAT_NR_BITS); + if (copy_to_user((void __user *)attr->addr, &data, sizeof(data))) + return -EFAULT; + return 0; +} + +static int kvm_s390_get_machine_feat(struct kvm *kvm, + struct kvm_device_attr *attr) +{ + struct kvm_s390_vm_cpu_feat data; + + bitmap_copy((unsigned long *) data.feat, + kvm_s390_available_cpu_feat, + KVM_S390_VM_CPU_FEAT_NR_BITS); + if (copy_to_user((void __user *)attr->addr, &data, sizeof(data))) + return -EFAULT; + return 0; +} + +static int kvm_s390_get_processor_subfunc(struct kvm *kvm, + struct kvm_device_attr *attr) +{ + /* + * Once we can actually configure subfunctions (kernel + hw support), + * we have to check if they were already set by user space, if so copy + * them from kvm->arch. + */ + return -ENXIO; +} + +static int kvm_s390_get_machine_subfunc(struct kvm *kvm, + struct kvm_device_attr *attr) +{ + if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc, + sizeof(struct kvm_s390_vm_cpu_subfunc))) + return -EFAULT; + return 0; +} static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) { int ret = -ENXIO; @@ -743,6 +969,18 @@ static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) case KVM_S390_VM_CPU_MACHINE: ret = kvm_s390_get_machine(kvm, attr); break; + case KVM_S390_VM_CPU_PROCESSOR_FEAT: + ret = kvm_s390_get_processor_feat(kvm, attr); + break; + case KVM_S390_VM_CPU_MACHINE_FEAT: + ret = kvm_s390_get_machine_feat(kvm, attr); + break; + case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: + ret = kvm_s390_get_processor_subfunc(kvm, attr); + break; + case KVM_S390_VM_CPU_MACHINE_SUBFUNC: + ret = kvm_s390_get_machine_subfunc(kvm, attr); + break; } return ret; } @@ -803,6 +1041,8 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) switch (attr->attr) { case KVM_S390_VM_MEM_ENABLE_CMMA: case KVM_S390_VM_MEM_CLR_CMMA: + ret = sclp.has_cmma ? 0 : -ENXIO; + break; case KVM_S390_VM_MEM_LIMIT_SIZE: ret = 0; break; @@ -826,8 +1066,13 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) switch (attr->attr) { case KVM_S390_VM_CPU_PROCESSOR: case KVM_S390_VM_CPU_MACHINE: + case KVM_S390_VM_CPU_PROCESSOR_FEAT: + case KVM_S390_VM_CPU_MACHINE_FEAT: + case KVM_S390_VM_CPU_MACHINE_SUBFUNC: ret = 0; break; + /* configuring subfunctions is not supported yet */ + case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: default: ret = -ENXIO; break; @@ -858,7 +1103,6 @@ static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) { uint8_t *keys; uint64_t hva; - unsigned long curkey; int i, r = 0; if (args->flags != 0) @@ -879,26 +1123,27 @@ static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) if (!keys) return -ENOMEM; + down_read(¤t->mm->mmap_sem); for (i = 0; i < args->count; i++) { hva = gfn_to_hva(kvm, args->start_gfn + i); if (kvm_is_error_hva(hva)) { r = -EFAULT; - goto out; + break; } - curkey = get_guest_storage_key(current->mm, hva); - if (IS_ERR_VALUE(curkey)) { - r = curkey; - goto out; - } - keys[i] = curkey; + r = get_guest_storage_key(current->mm, hva, &keys[i]); + if (r) + break; + } + up_read(¤t->mm->mmap_sem); + + if (!r) { + r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys, + sizeof(uint8_t) * args->count); + if (r) + r = -EFAULT; } - r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys, - sizeof(uint8_t) * args->count); - if (r) - r = -EFAULT; -out: kvfree(keys); return r; } @@ -935,24 +1180,25 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) if (r) goto out; + down_read(¤t->mm->mmap_sem); for (i = 0; i < args->count; i++) { hva = gfn_to_hva(kvm, args->start_gfn + i); if (kvm_is_error_hva(hva)) { r = -EFAULT; - goto out; + break; } /* Lowest order bit is reserved */ if (keys[i] & 0x01) { r = -EINVAL; - goto out; + break; } - r = set_guest_storage_key(current->mm, hva, - (unsigned long)keys[i], 0); + r = set_guest_storage_key(current->mm, hva, keys[i], 0); if (r) - goto out; + break; } + up_read(¤t->mm->mmap_sem); out: kvfree(keys); return r; @@ -1129,6 +1375,7 @@ static void sca_dispose(struct kvm *kvm) int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { + gfp_t alloc_flags = GFP_KERNEL; int i, rc; char debug_name[16]; static unsigned long sca_offset; @@ -1150,9 +1397,13 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) rc = -ENOMEM; + ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500); + kvm->arch.use_esca = 0; /* start with basic SCA */ + if (!sclp.has_64bscao) + alloc_flags |= GFP_DMA; rwlock_init(&kvm->arch.sca_lock); - kvm->arch.sca = (struct bsca_block *) get_zeroed_page(GFP_KERNEL); + kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags); if (!kvm->arch.sca) goto out_err; spin_lock(&kvm_lock); @@ -1189,6 +1440,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask, S390_ARCH_FAC_LIST_SIZE_BYTE); + set_kvm_facility(kvm->arch.model.fac_mask, 74); + set_kvm_facility(kvm->arch.model.fac_list, 74); + kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid(); kvm->arch.model.ibc = sclp.ibc & 0x0fff; @@ -1212,7 +1466,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) else kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE, sclp.hamax + 1); - kvm->arch.gmap = gmap_alloc(current->mm, kvm->arch.mem_limit - 1); + kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1); if (!kvm->arch.gmap) goto out_err; kvm->arch.gmap->private = kvm; @@ -1224,6 +1478,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm->arch.epoch = 0; spin_lock_init(&kvm->arch.start_stop_lock); + kvm_s390_vsie_init(kvm); KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid); return 0; @@ -1245,7 +1500,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) sca_del_vcpu(vcpu); if (kvm_is_ucontrol(vcpu->kvm)) - gmap_free(vcpu->arch.gmap); + gmap_remove(vcpu->arch.gmap); if (vcpu->kvm->arch.use_cmma) kvm_s390_vcpu_unsetup_cmma(vcpu); @@ -1278,16 +1533,17 @@ void kvm_arch_destroy_vm(struct kvm *kvm) debug_unregister(kvm->arch.dbf); free_page((unsigned long)kvm->arch.sie_page2); if (!kvm_is_ucontrol(kvm)) - gmap_free(kvm->arch.gmap); + gmap_remove(kvm->arch.gmap); kvm_s390_destroy_adapters(kvm); kvm_s390_clear_float_irqs(kvm); + kvm_s390_vsie_destroy(kvm); KVM_EVENT(3, "vm 0x%pK destroyed", kvm); } /* Section: vcpu related */ static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu) { - vcpu->arch.gmap = gmap_alloc(current->mm, -1UL); + vcpu->arch.gmap = gmap_create(current->mm, -1UL); if (!vcpu->arch.gmap) return -ENOMEM; vcpu->arch.gmap->private = vcpu->kvm; @@ -1396,7 +1652,7 @@ static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id) if (id < KVM_S390_BSCA_CPU_SLOTS) return true; - if (!sclp.has_esca) + if (!sclp.has_esca || !sclp.has_64bscao) return false; mutex_lock(&kvm->lock); @@ -1537,7 +1793,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) save_access_regs(vcpu->arch.host_acrs); restore_access_regs(vcpu->run->s.regs.acrs); - gmap_enable(vcpu->arch.gmap); + gmap_enable(vcpu->arch.enabled_gmap); atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu)) __start_cpu_timer_accounting(vcpu); @@ -1550,7 +1806,8 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu)) __stop_cpu_timer_accounting(vcpu); atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); - gmap_disable(vcpu->arch.gmap); + vcpu->arch.enabled_gmap = gmap_get_enabled(); + gmap_disable(vcpu->arch.enabled_gmap); /* Save guest register state */ save_fpu_regs(); @@ -1599,7 +1856,10 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) vcpu->arch.gmap = vcpu->kvm->arch.gmap; sca_add_vcpu(vcpu); } - + if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0) + vcpu->arch.sie_block->ictl |= ICTL_OPEREXC; + /* make vcpu_load load the right gmap on the first trigger */ + vcpu->arch.enabled_gmap = vcpu->arch.gmap; } static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu) @@ -1658,15 +1918,21 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) kvm_s390_vcpu_setup_model(vcpu); - vcpu->arch.sie_block->ecb = 0x02; + /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */ + if (MACHINE_HAS_ESOP) + vcpu->arch.sie_block->ecb |= 0x02; if (test_kvm_facility(vcpu->kvm, 9)) vcpu->arch.sie_block->ecb |= 0x04; - if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73)) + if (test_kvm_facility(vcpu->kvm, 73)) vcpu->arch.sie_block->ecb |= 0x10; - if (test_kvm_facility(vcpu->kvm, 8)) + if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi) vcpu->arch.sie_block->ecb2 |= 0x08; - vcpu->arch.sie_block->eca = 0xC1002000U; + vcpu->arch.sie_block->eca = 0x1002000U; + if (sclp.has_cei) + vcpu->arch.sie_block->eca |= 0x80000000U; + if (sclp.has_ib) + vcpu->arch.sie_block->eca |= 0x40000000U; if (sclp.has_siif) vcpu->arch.sie_block->eca |= 1; if (sclp.has_sigpif) @@ -1716,6 +1982,10 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, vcpu->arch.sie_block = &sie_page->sie_block; vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb; + /* the real guest size will always be smaller than msl */ + vcpu->arch.sie_block->mso = 0; + vcpu->arch.sie_block->msl = sclp.hamax; + vcpu->arch.sie_block->icpua = id; spin_lock_init(&vcpu->arch.local_int.lock); vcpu->arch.local_int.float_int = &kvm->arch.float_int; @@ -1784,16 +2054,25 @@ void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu) kvm_s390_vcpu_request(vcpu); } -static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address) +static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start, + unsigned long end) { - int i; struct kvm *kvm = gmap->private; struct kvm_vcpu *vcpu; + unsigned long prefix; + int i; + if (gmap_is_shadow(gmap)) + return; + if (start >= 1UL << 31) + /* We are only interested in prefix pages */ + return; kvm_for_each_vcpu(i, vcpu, kvm) { /* match against both prefix pages */ - if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) { - VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address); + prefix = kvm_s390_get_prefix(vcpu); + if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) { + VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx", + start, end); kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu); } } @@ -2002,6 +2281,8 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, if (dbg->control & ~VALID_GUESTDBG_FLAGS) return -EINVAL; + if (!sclp.has_gpere) + return -EINVAL; if (dbg->control & KVM_GUESTDBG_ENABLE) { vcpu->guest_debug = dbg->control; @@ -2070,16 +2351,16 @@ retry: return 0; /* * We use MMU_RELOAD just to re-arm the ipte notifier for the - * guest prefix page. gmap_ipte_notify will wait on the ptl lock. + * guest prefix page. gmap_mprotect_notify will wait on the ptl lock. * This ensures that the ipte instruction for this request has * already finished. We might race against a second unmapper that * wants to set the blocking bit. Lets just retry the request loop. */ if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) { int rc; - rc = gmap_ipte_notify(vcpu->arch.gmap, - kvm_s390_get_prefix(vcpu), - PAGE_SIZE * 2); + rc = gmap_mprotect_notify(vcpu->arch.gmap, + kvm_s390_get_prefix(vcpu), + PAGE_SIZE * 2, PROT_WRITE); if (rc) return rc; goto retry; @@ -2108,6 +2389,11 @@ retry: goto retry; } + if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) { + vcpu->arch.sie_block->ictl |= ICTL_OPEREXC; + goto retry; + } + /* nothing to do, just clear the request */ clear_bit(KVM_REQ_UNHALT, &vcpu->requests); @@ -2362,14 +2648,14 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) * guest_enter and guest_exit should be no uaccess. */ local_irq_disable(); - __kvm_guest_enter(); + guest_enter_irqoff(); __disable_cpu_timer_accounting(vcpu); local_irq_enable(); exit_reason = sie64a(vcpu->arch.sie_block, vcpu->run->s.regs.gprs); local_irq_disable(); __enable_cpu_timer_accounting(vcpu); - __kvm_guest_exit(); + guest_exit_irqoff(); local_irq_enable(); vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); @@ -2598,6 +2884,8 @@ static void __disable_ibs_on_all_vcpus(struct kvm *kvm) static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu) { + if (!sclp.has_ibs) + return; kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu); kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu); } diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 8621ab00ec8e..b8432862a817 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -56,7 +56,7 @@ static inline int is_vcpu_stopped(struct kvm_vcpu *vcpu) static inline int is_vcpu_idle(struct kvm_vcpu *vcpu) { - return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_WAIT; + return test_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask); } static inline int kvm_is_ucontrol(struct kvm *kvm) @@ -175,6 +175,12 @@ static inline int set_kvm_facility(u64 *fac_list, unsigned long nr) return 0; } +static inline int test_kvm_cpu_feat(struct kvm *kvm, unsigned long nr) +{ + WARN_ON_ONCE(nr >= KVM_S390_VM_CPU_FEAT_NR_BITS); + return test_bit_inv(nr, kvm->arch.cpu_feat); +} + /* are cpu states controlled by user space */ static inline int kvm_s390_user_cpu_state_ctrl(struct kvm *kvm) { @@ -232,6 +238,8 @@ static inline void kvm_s390_forward_psw(struct kvm_vcpu *vcpu, int ilen) } static inline void kvm_s390_retry_instr(struct kvm_vcpu *vcpu) { + /* don't inject PER events if we re-execute the instruction */ + vcpu->arch.sie_block->icptstatus &= ~0x02; kvm_s390_rewind_psw(vcpu, kvm_s390_get_ilen(vcpu)); } @@ -246,10 +254,21 @@ int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu); int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu); int kvm_s390_handle_eb(struct kvm_vcpu *vcpu); +/* implemented in vsie.c */ +int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu); +void kvm_s390_vsie_kick(struct kvm_vcpu *vcpu); +void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start, + unsigned long end); +void kvm_s390_vsie_init(struct kvm *kvm); +void kvm_s390_vsie_destroy(struct kvm *kvm); + /* implemented in sigp.c */ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu); +/* implemented in sthyi.c */ +int handle_sthyi(struct kvm_vcpu *vcpu); + /* implemented in kvm-s390.c */ void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod); long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable); @@ -360,6 +379,7 @@ int kvm_s390_import_bp_data(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg); void kvm_s390_clear_bp_data(struct kvm_vcpu *vcpu); void kvm_s390_prepare_debug_exit(struct kvm_vcpu *vcpu); +int kvm_s390_handle_per_ifetch_icpt(struct kvm_vcpu *vcpu); void kvm_s390_handle_per_event(struct kvm_vcpu *vcpu); /* support for Basic/Extended SCA handling */ diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 95916fa7c670..46160388e996 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -27,6 +27,7 @@ #include <asm/io.h> #include <asm/ptrace.h> #include <asm/compat.h> +#include <asm/sclp.h> #include "gaccess.h" #include "kvm-s390.h" #include "trace.h" @@ -152,30 +153,166 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu) static int __skey_check_enable(struct kvm_vcpu *vcpu) { int rc = 0; + + trace_kvm_s390_skey_related_inst(vcpu); if (!(vcpu->arch.sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE))) return rc; rc = s390_enable_skey(); - VCPU_EVENT(vcpu, 3, "%s", "enabling storage keys for guest"); - trace_kvm_s390_skey_related_inst(vcpu); - vcpu->arch.sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE); + VCPU_EVENT(vcpu, 3, "enabling storage keys for guest: %d", rc); + if (!rc) + vcpu->arch.sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE); return rc; } - -static int handle_skey(struct kvm_vcpu *vcpu) +static int try_handle_skey(struct kvm_vcpu *vcpu) { - int rc = __skey_check_enable(vcpu); + int rc; + vcpu->stat.instruction_storage_key++; + rc = __skey_check_enable(vcpu); if (rc) return rc; - vcpu->stat.instruction_storage_key++; - + if (sclp.has_skey) { + /* with storage-key facility, SIE interprets it for us */ + kvm_s390_retry_instr(vcpu); + VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation"); + return -EAGAIN; + } if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + return 0; +} - kvm_s390_retry_instr(vcpu); - VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation"); +static int handle_iske(struct kvm_vcpu *vcpu) +{ + unsigned long addr; + unsigned char key; + int reg1, reg2; + int rc; + + rc = try_handle_skey(vcpu); + if (rc) + return rc != -EAGAIN ? rc : 0; + + kvm_s390_get_regs_rre(vcpu, ®1, ®2); + + addr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK; + addr = kvm_s390_logical_to_effective(vcpu, addr); + addr = kvm_s390_real_to_abs(vcpu, addr); + addr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(addr)); + if (kvm_is_error_hva(addr)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + + down_read(¤t->mm->mmap_sem); + rc = get_guest_storage_key(current->mm, addr, &key); + up_read(¤t->mm->mmap_sem); + if (rc) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + vcpu->run->s.regs.gprs[reg1] &= ~0xff; + vcpu->run->s.regs.gprs[reg1] |= key; + return 0; +} + +static int handle_rrbe(struct kvm_vcpu *vcpu) +{ + unsigned long addr; + int reg1, reg2; + int rc; + + rc = try_handle_skey(vcpu); + if (rc) + return rc != -EAGAIN ? rc : 0; + + kvm_s390_get_regs_rre(vcpu, ®1, ®2); + + addr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK; + addr = kvm_s390_logical_to_effective(vcpu, addr); + addr = kvm_s390_real_to_abs(vcpu, addr); + addr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(addr)); + if (kvm_is_error_hva(addr)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + + down_read(¤t->mm->mmap_sem); + rc = reset_guest_reference_bit(current->mm, addr); + up_read(¤t->mm->mmap_sem); + if (rc < 0) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + + kvm_s390_set_psw_cc(vcpu, rc); + return 0; +} + +#define SSKE_NQ 0x8 +#define SSKE_MR 0x4 +#define SSKE_MC 0x2 +#define SSKE_MB 0x1 +static int handle_sske(struct kvm_vcpu *vcpu) +{ + unsigned char m3 = vcpu->arch.sie_block->ipb >> 28; + unsigned long start, end; + unsigned char key, oldkey; + int reg1, reg2; + int rc; + + rc = try_handle_skey(vcpu); + if (rc) + return rc != -EAGAIN ? rc : 0; + + if (!test_kvm_facility(vcpu->kvm, 8)) + m3 &= ~SSKE_MB; + if (!test_kvm_facility(vcpu->kvm, 10)) + m3 &= ~(SSKE_MC | SSKE_MR); + if (!test_kvm_facility(vcpu->kvm, 14)) + m3 &= ~SSKE_NQ; + + kvm_s390_get_regs_rre(vcpu, ®1, ®2); + + key = vcpu->run->s.regs.gprs[reg1] & 0xfe; + start = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK; + start = kvm_s390_logical_to_effective(vcpu, start); + if (m3 & SSKE_MB) { + /* start already designates an absolute address */ + end = (start + (1UL << 20)) & ~((1UL << 20) - 1); + } else { + start = kvm_s390_real_to_abs(vcpu, start); + end = start + PAGE_SIZE; + } + + while (start != end) { + unsigned long addr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start)); + + if (kvm_is_error_hva(addr)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + + down_read(¤t->mm->mmap_sem); + rc = cond_set_guest_storage_key(current->mm, addr, key, &oldkey, + m3 & SSKE_NQ, m3 & SSKE_MR, + m3 & SSKE_MC); + up_read(¤t->mm->mmap_sem); + if (rc < 0) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + start += PAGE_SIZE; + }; + + if (m3 & (SSKE_MC | SSKE_MR)) { + if (m3 & SSKE_MB) { + /* skey in reg1 is unpredictable */ + kvm_s390_set_psw_cc(vcpu, 3); + } else { + kvm_s390_set_psw_cc(vcpu, rc); + vcpu->run->s.regs.gprs[reg1] &= ~0xff00UL; + vcpu->run->s.regs.gprs[reg1] |= (u64) oldkey << 8; + } + } + if (m3 & SSKE_MB) { + if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_AMODE_64BIT) + vcpu->run->s.regs.gprs[reg2] &= ~PAGE_MASK; + else + vcpu->run->s.regs.gprs[reg2] &= ~0xfffff000UL; + end = kvm_s390_logical_to_effective(vcpu, end); + vcpu->run->s.regs.gprs[reg2] |= end; + } return 0; } @@ -582,10 +719,11 @@ static const intercept_handler_t b2_handlers[256] = { [0x10] = handle_set_prefix, [0x11] = handle_store_prefix, [0x12] = handle_store_cpu_address, + [0x14] = kvm_s390_handle_vsie, [0x21] = handle_ipte_interlock, - [0x29] = handle_skey, - [0x2a] = handle_skey, - [0x2b] = handle_skey, + [0x29] = handle_iske, + [0x2a] = handle_rrbe, + [0x2b] = handle_sske, [0x2c] = handle_test_block, [0x30] = handle_io_inst, [0x31] = handle_io_inst, @@ -654,8 +792,10 @@ static int handle_epsw(struct kvm_vcpu *vcpu) static int handle_pfmf(struct kvm_vcpu *vcpu) { + bool mr = false, mc = false, nq; int reg1, reg2; unsigned long start, end; + unsigned char key; vcpu->stat.instruction_pfmf++; @@ -675,15 +815,27 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) !test_kvm_facility(vcpu->kvm, 14)) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - /* No support for conditional-SSKE */ - if (vcpu->run->s.regs.gprs[reg1] & (PFMF_MR | PFMF_MC)) - return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + /* Only provide conditional-SSKE support if enabled for the guest */ + if (vcpu->run->s.regs.gprs[reg1] & PFMF_SK && + test_kvm_facility(vcpu->kvm, 10)) { + mr = vcpu->run->s.regs.gprs[reg1] & PFMF_MR; + mc = vcpu->run->s.regs.gprs[reg1] & PFMF_MC; + } + nq = vcpu->run->s.regs.gprs[reg1] & PFMF_NQ; + key = vcpu->run->s.regs.gprs[reg1] & PFMF_KEY; start = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK; start = kvm_s390_logical_to_effective(vcpu, start); + if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) { + if (kvm_s390_check_low_addr_prot_real(vcpu, start)) + return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm); + } + switch (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) { case 0x00000000: + /* only 4k frames specify a real address */ + start = kvm_s390_real_to_abs(vcpu, start); end = (start + (1UL << 12)) & ~((1UL << 12) - 1); break; case 0x00001000: @@ -701,20 +853,11 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); } - if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) { - if (kvm_s390_check_low_addr_prot_real(vcpu, start)) - return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm); - } - - while (start < end) { - unsigned long useraddr, abs_addr; + while (start != end) { + unsigned long useraddr; /* Translate guest address to host address */ - if ((vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) == 0) - abs_addr = kvm_s390_real_to_abs(vcpu, start); - else - abs_addr = start; - useraddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(abs_addr)); + useraddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start)); if (kvm_is_error_hva(useraddr)) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); @@ -728,16 +871,25 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) if (rc) return rc; - if (set_guest_storage_key(current->mm, useraddr, - vcpu->run->s.regs.gprs[reg1] & PFMF_KEY, - vcpu->run->s.regs.gprs[reg1] & PFMF_NQ)) + down_read(¤t->mm->mmap_sem); + rc = cond_set_guest_storage_key(current->mm, useraddr, + key, NULL, nq, mr, mc); + up_read(¤t->mm->mmap_sem); + if (rc < 0) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); } start += PAGE_SIZE; } - if (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) - vcpu->run->s.regs.gprs[reg2] = end; + if (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) { + if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_AMODE_64BIT) { + vcpu->run->s.regs.gprs[reg2] = end; + } else { + vcpu->run->s.regs.gprs[reg2] &= ~0xffffffffUL; + end = kvm_s390_logical_to_effective(vcpu, end); + vcpu->run->s.regs.gprs[reg2] |= end; + } + } return 0; } @@ -1033,7 +1185,15 @@ static int handle_sckpf(struct kvm_vcpu *vcpu) return 0; } +static int handle_ptff(struct kvm_vcpu *vcpu) +{ + /* we don't emulate any control instructions yet */ + kvm_s390_set_psw_cc(vcpu, 3); + return 0; +} + static const intercept_handler_t x01_handlers[256] = { + [0x04] = handle_ptff, [0x07] = handle_sckpf, }; diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index 28ea0cab1f1b..1a252f537081 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -77,18 +77,18 @@ static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu, const u64 psw_int_mask = PSW_MASK_IO | PSW_MASK_EXT; u16 p_asn, s_asn; psw_t *psw; - u32 flags; + bool idle; - flags = atomic_read(&dst_vcpu->arch.sie_block->cpuflags); + idle = is_vcpu_idle(vcpu); psw = &dst_vcpu->arch.sie_block->gpsw; p_asn = dst_vcpu->arch.sie_block->gcr[4] & 0xffff; /* Primary ASN */ s_asn = dst_vcpu->arch.sie_block->gcr[3] & 0xffff; /* Secondary ASN */ /* Inject the emergency signal? */ - if (!(flags & CPUSTAT_STOPPED) + if (!is_vcpu_stopped(vcpu) || (psw->mask & psw_int_mask) != psw_int_mask - || ((flags & CPUSTAT_WAIT) && psw->addr != 0) - || (!(flags & CPUSTAT_WAIT) && (asn == p_asn || asn == s_asn))) { + || (idle && psw->addr != 0) + || (!idle && (asn == p_asn || asn == s_asn))) { return __inject_sigp_emergency(vcpu, dst_vcpu); } else { *reg &= 0xffffffff00000000UL; diff --git a/arch/s390/kvm/sthyi.c b/arch/s390/kvm/sthyi.c new file mode 100644 index 000000000000..bd98b7d25200 --- /dev/null +++ b/arch/s390/kvm/sthyi.c @@ -0,0 +1,471 @@ +/* + * store hypervisor information instruction emulation functions. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Copyright IBM Corp. 2016 + * Author(s): Janosch Frank <frankja@linux.vnet.ibm.com> + */ +#include <linux/kvm_host.h> +#include <linux/errno.h> +#include <linux/pagemap.h> +#include <linux/vmalloc.h> +#include <linux/ratelimit.h> + +#include <asm/kvm_host.h> +#include <asm/asm-offsets.h> +#include <asm/sclp.h> +#include <asm/diag.h> +#include <asm/sysinfo.h> +#include <asm/ebcdic.h> + +#include "kvm-s390.h" +#include "gaccess.h" +#include "trace.h" + +#define DED_WEIGHT 0xffff +/* + * CP and IFL as EBCDIC strings, SP/0x40 determines the end of string + * as they are justified with spaces. + */ +#define CP 0xc3d7404040404040UL +#define IFL 0xc9c6d34040404040UL + +enum hdr_flags { + HDR_NOT_LPAR = 0x10, + HDR_STACK_INCM = 0x20, + HDR_STSI_UNAV = 0x40, + HDR_PERF_UNAV = 0x80, +}; + +enum mac_validity { + MAC_NAME_VLD = 0x20, + MAC_ID_VLD = 0x40, + MAC_CNT_VLD = 0x80, +}; + +enum par_flag { + PAR_MT_EN = 0x80, +}; + +enum par_validity { + PAR_GRP_VLD = 0x08, + PAR_ID_VLD = 0x10, + PAR_ABS_VLD = 0x20, + PAR_WGHT_VLD = 0x40, + PAR_PCNT_VLD = 0x80, +}; + +struct hdr_sctn { + u8 infhflg1; + u8 infhflg2; /* reserved */ + u8 infhval1; /* reserved */ + u8 infhval2; /* reserved */ + u8 reserved[3]; + u8 infhygct; + u16 infhtotl; + u16 infhdln; + u16 infmoff; + u16 infmlen; + u16 infpoff; + u16 infplen; + u16 infhoff1; + u16 infhlen1; + u16 infgoff1; + u16 infglen1; + u16 infhoff2; + u16 infhlen2; + u16 infgoff2; + u16 infglen2; + u16 infhoff3; + u16 infhlen3; + u16 infgoff3; + u16 infglen3; + u8 reserved2[4]; +} __packed; + +struct mac_sctn { + u8 infmflg1; /* reserved */ + u8 infmflg2; /* reserved */ + u8 infmval1; + u8 infmval2; /* reserved */ + u16 infmscps; + u16 infmdcps; + u16 infmsifl; + u16 infmdifl; + char infmname[8]; + char infmtype[4]; + char infmmanu[16]; + char infmseq[16]; + char infmpman[4]; + u8 reserved[4]; +} __packed; + +struct par_sctn { + u8 infpflg1; + u8 infpflg2; /* reserved */ + u8 infpval1; + u8 infpval2; /* reserved */ + u16 infppnum; + u16 infpscps; + u16 infpdcps; + u16 infpsifl; + u16 infpdifl; + u16 reserved; + char infppnam[8]; + u32 infpwbcp; + u32 infpabcp; + u32 infpwbif; + u32 infpabif; + char infplgnm[8]; + u32 infplgcp; + u32 infplgif; +} __packed; + +struct sthyi_sctns { + struct hdr_sctn hdr; + struct mac_sctn mac; + struct par_sctn par; +} __packed; + +struct cpu_inf { + u64 lpar_cap; + u64 lpar_grp_cap; + u64 lpar_weight; + u64 all_weight; + int cpu_num_ded; + int cpu_num_shd; +}; + +struct lpar_cpu_inf { + struct cpu_inf cp; + struct cpu_inf ifl; +}; + +static inline u64 cpu_id(u8 ctidx, void *diag224_buf) +{ + return *((u64 *)(diag224_buf + (ctidx + 1) * DIAG204_CPU_NAME_LEN)); +} + +/* + * Scales the cpu capping from the lpar range to the one expected in + * sthyi data. + * + * diag204 reports a cap in hundredths of processor units. + * z/VM's range for one core is 0 - 0x10000. + */ +static u32 scale_cap(u32 in) +{ + return (0x10000 * in) / 100; +} + +static void fill_hdr(struct sthyi_sctns *sctns) +{ + sctns->hdr.infhdln = sizeof(sctns->hdr); + sctns->hdr.infmoff = sizeof(sctns->hdr); + sctns->hdr.infmlen = sizeof(sctns->mac); + sctns->hdr.infplen = sizeof(sctns->par); + sctns->hdr.infpoff = sctns->hdr.infhdln + sctns->hdr.infmlen; + sctns->hdr.infhtotl = sctns->hdr.infpoff + sctns->hdr.infplen; +} + +static void fill_stsi_mac(struct sthyi_sctns *sctns, + struct sysinfo_1_1_1 *sysinfo) +{ + if (stsi(sysinfo, 1, 1, 1)) + return; + + sclp_ocf_cpc_name_copy(sctns->mac.infmname); + + memcpy(sctns->mac.infmtype, sysinfo->type, sizeof(sctns->mac.infmtype)); + memcpy(sctns->mac.infmmanu, sysinfo->manufacturer, sizeof(sctns->mac.infmmanu)); + memcpy(sctns->mac.infmpman, sysinfo->plant, sizeof(sctns->mac.infmpman)); + memcpy(sctns->mac.infmseq, sysinfo->sequence, sizeof(sctns->mac.infmseq)); + + sctns->mac.infmval1 |= MAC_ID_VLD | MAC_NAME_VLD; +} + +static void fill_stsi_par(struct sthyi_sctns *sctns, + struct sysinfo_2_2_2 *sysinfo) +{ + if (stsi(sysinfo, 2, 2, 2)) + return; + + sctns->par.infppnum = sysinfo->lpar_number; + memcpy(sctns->par.infppnam, sysinfo->name, sizeof(sctns->par.infppnam)); + + sctns->par.infpval1 |= PAR_ID_VLD; +} + +static void fill_stsi(struct sthyi_sctns *sctns) +{ + void *sysinfo; + + /* Errors are handled through the validity bits in the response. */ + sysinfo = (void *)__get_free_page(GFP_KERNEL); + if (!sysinfo) + return; + + fill_stsi_mac(sctns, sysinfo); + fill_stsi_par(sctns, sysinfo); + + free_pages((unsigned long)sysinfo, 0); +} + +static void fill_diag_mac(struct sthyi_sctns *sctns, + struct diag204_x_phys_block *block, + void *diag224_buf) +{ + int i; + + for (i = 0; i < block->hdr.cpus; i++) { + switch (cpu_id(block->cpus[i].ctidx, diag224_buf)) { + case CP: + if (block->cpus[i].weight == DED_WEIGHT) + sctns->mac.infmdcps++; + else + sctns->mac.infmscps++; + break; + case IFL: + if (block->cpus[i].weight == DED_WEIGHT) + sctns->mac.infmdifl++; + else + sctns->mac.infmsifl++; + break; + } + } + sctns->mac.infmval1 |= MAC_CNT_VLD; +} + +/* Returns a pointer to the the next partition block. */ +static struct diag204_x_part_block *lpar_cpu_inf(struct lpar_cpu_inf *part_inf, + bool this_lpar, + void *diag224_buf, + struct diag204_x_part_block *block) +{ + int i, capped = 0, weight_cp = 0, weight_ifl = 0; + struct cpu_inf *cpu_inf; + + for (i = 0; i < block->hdr.rcpus; i++) { + if (!(block->cpus[i].cflag & DIAG204_CPU_ONLINE)) + continue; + + switch (cpu_id(block->cpus[i].ctidx, diag224_buf)) { + case CP: + cpu_inf = &part_inf->cp; + if (block->cpus[i].cur_weight < DED_WEIGHT) + weight_cp |= block->cpus[i].cur_weight; + break; + case IFL: + cpu_inf = &part_inf->ifl; + if (block->cpus[i].cur_weight < DED_WEIGHT) + weight_ifl |= block->cpus[i].cur_weight; + break; + default: + continue; + } + + if (!this_lpar) + continue; + + capped |= block->cpus[i].cflag & DIAG204_CPU_CAPPED; + cpu_inf->lpar_cap |= block->cpus[i].cpu_type_cap; + cpu_inf->lpar_grp_cap |= block->cpus[i].group_cpu_type_cap; + + if (block->cpus[i].weight == DED_WEIGHT) + cpu_inf->cpu_num_ded += 1; + else + cpu_inf->cpu_num_shd += 1; + } + + if (this_lpar && capped) { + part_inf->cp.lpar_weight = weight_cp; + part_inf->ifl.lpar_weight = weight_ifl; + } + part_inf->cp.all_weight += weight_cp; + part_inf->ifl.all_weight += weight_ifl; + return (struct diag204_x_part_block *)&block->cpus[i]; +} + +static void fill_diag(struct sthyi_sctns *sctns) +{ + int i, r, pages; + bool this_lpar; + void *diag204_buf; + void *diag224_buf = NULL; + struct diag204_x_info_blk_hdr *ti_hdr; + struct diag204_x_part_block *part_block; + struct diag204_x_phys_block *phys_block; + struct lpar_cpu_inf lpar_inf = {}; + + /* Errors are handled through the validity bits in the response. */ + pages = diag204((unsigned long)DIAG204_SUBC_RSI | + (unsigned long)DIAG204_INFO_EXT, 0, NULL); + if (pages <= 0) + return; + + diag204_buf = vmalloc(PAGE_SIZE * pages); + if (!diag204_buf) + return; + + r = diag204((unsigned long)DIAG204_SUBC_STIB7 | + (unsigned long)DIAG204_INFO_EXT, pages, diag204_buf); + if (r < 0) + goto out; + + diag224_buf = kmalloc(PAGE_SIZE, GFP_KERNEL | GFP_DMA); + if (!diag224_buf || diag224(diag224_buf)) + goto out; + + ti_hdr = diag204_buf; + part_block = diag204_buf + sizeof(*ti_hdr); + + for (i = 0; i < ti_hdr->npar; i++) { + /* + * For the calling lpar we also need to get the cpu + * caps and weights. The time information block header + * specifies the offset to the partition block of the + * caller lpar, so we know when we process its data. + */ + this_lpar = (void *)part_block - diag204_buf == ti_hdr->this_part; + part_block = lpar_cpu_inf(&lpar_inf, this_lpar, diag224_buf, + part_block); + } + + phys_block = (struct diag204_x_phys_block *)part_block; + part_block = diag204_buf + ti_hdr->this_part; + if (part_block->hdr.mtid) + sctns->par.infpflg1 = PAR_MT_EN; + + sctns->par.infpval1 |= PAR_GRP_VLD; + sctns->par.infplgcp = scale_cap(lpar_inf.cp.lpar_grp_cap); + sctns->par.infplgif = scale_cap(lpar_inf.ifl.lpar_grp_cap); + memcpy(sctns->par.infplgnm, part_block->hdr.hardware_group_name, + sizeof(sctns->par.infplgnm)); + + sctns->par.infpscps = lpar_inf.cp.cpu_num_shd; + sctns->par.infpdcps = lpar_inf.cp.cpu_num_ded; + sctns->par.infpsifl = lpar_inf.ifl.cpu_num_shd; + sctns->par.infpdifl = lpar_inf.ifl.cpu_num_ded; + sctns->par.infpval1 |= PAR_PCNT_VLD; + + sctns->par.infpabcp = scale_cap(lpar_inf.cp.lpar_cap); + sctns->par.infpabif = scale_cap(lpar_inf.ifl.lpar_cap); + sctns->par.infpval1 |= PAR_ABS_VLD; + + /* + * Everything below needs global performance data to be + * meaningful. + */ + if (!(ti_hdr->flags & DIAG204_LPAR_PHYS_FLG)) { + sctns->hdr.infhflg1 |= HDR_PERF_UNAV; + goto out; + } + + fill_diag_mac(sctns, phys_block, diag224_buf); + + if (lpar_inf.cp.lpar_weight) { + sctns->par.infpwbcp = sctns->mac.infmscps * 0x10000 * + lpar_inf.cp.lpar_weight / lpar_inf.cp.all_weight; + } + + if (lpar_inf.ifl.lpar_weight) { + sctns->par.infpwbif = sctns->mac.infmsifl * 0x10000 * + lpar_inf.ifl.lpar_weight / lpar_inf.ifl.all_weight; + } + sctns->par.infpval1 |= PAR_WGHT_VLD; + +out: + kfree(diag224_buf); + vfree(diag204_buf); +} + +static int sthyi(u64 vaddr) +{ + register u64 code asm("0") = 0; + register u64 addr asm("2") = vaddr; + int cc; + + asm volatile( + ".insn rre,0xB2560000,%[code],%[addr]\n" + "ipm %[cc]\n" + "srl %[cc],28\n" + : [cc] "=d" (cc) + : [code] "d" (code), [addr] "a" (addr) + : "memory", "cc"); + return cc; +} + +int handle_sthyi(struct kvm_vcpu *vcpu) +{ + int reg1, reg2, r = 0; + u64 code, addr, cc = 0; + struct sthyi_sctns *sctns = NULL; + + /* + * STHYI requires extensive locking in the higher hypervisors + * and is very computational/memory expensive. Therefore we + * ratelimit the executions per VM. + */ + if (!__ratelimit(&vcpu->kvm->arch.sthyi_limit)) { + kvm_s390_retry_instr(vcpu); + return 0; + } + + kvm_s390_get_regs_rre(vcpu, ®1, ®2); + code = vcpu->run->s.regs.gprs[reg1]; + addr = vcpu->run->s.regs.gprs[reg2]; + + vcpu->stat.instruction_sthyi++; + VCPU_EVENT(vcpu, 3, "STHYI: fc: %llu addr: 0x%016llx", code, addr); + trace_kvm_s390_handle_sthyi(vcpu, code, addr); + + if (reg1 == reg2 || reg1 & 1 || reg2 & 1 || addr & ~PAGE_MASK) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + if (code & 0xffff) { + cc = 3; + goto out; + } + + /* + * If the page has not yet been faulted in, we want to do that + * now and not after all the expensive calculations. + */ + r = write_guest(vcpu, addr, reg2, &cc, 1); + if (r) + return kvm_s390_inject_prog_cond(vcpu, r); + + sctns = (void *)get_zeroed_page(GFP_KERNEL); + if (!sctns) + return -ENOMEM; + + /* + * If we are a guest, we don't want to emulate an emulated + * instruction. We ask the hypervisor to provide the data. + */ + if (test_facility(74)) { + cc = sthyi((u64)sctns); + goto out; + } + + fill_hdr(sctns); + fill_stsi(sctns); + fill_diag(sctns); + +out: + if (!cc) { + r = write_guest(vcpu, addr, reg2, sctns, PAGE_SIZE); + if (r) { + free_page((unsigned long)sctns); + return kvm_s390_inject_prog_cond(vcpu, r); + } + } + + free_page((unsigned long)sctns); + vcpu->run->s.regs.gprs[reg2 + 1] = cc ? 4 : 0; + kvm_s390_set_psw_cc(vcpu, cc); + return r; +} diff --git a/arch/s390/kvm/trace.h b/arch/s390/kvm/trace.h index 916834d7a73a..4fc9d4e5be89 100644 --- a/arch/s390/kvm/trace.h +++ b/arch/s390/kvm/trace.h @@ -41,7 +41,7 @@ TRACE_EVENT(kvm_s390_skey_related_inst, TP_fast_assign( VCPU_ASSIGN_COMMON ), - VCPU_TP_PRINTK("%s", "first instruction related to skeys on vcpu") + VCPU_TP_PRINTK("%s", "storage key related instruction") ); TRACE_EVENT(kvm_s390_major_guest_pfault, @@ -185,8 +185,10 @@ TRACE_EVENT(kvm_s390_intercept_prog, __entry->code = code; ), - VCPU_TP_PRINTK("intercepted program interruption %04x", - __entry->code) + VCPU_TP_PRINTK("intercepted program interruption %04x (%s)", + __entry->code, + __print_symbolic(__entry->code, + icpt_prog_codes)) ); /* @@ -412,6 +414,47 @@ TRACE_EVENT(kvm_s390_handle_stsi, __entry->addr) ); +TRACE_EVENT(kvm_s390_handle_operexc, + TP_PROTO(VCPU_PROTO_COMMON, __u16 ipa, __u32 ipb), + TP_ARGS(VCPU_ARGS_COMMON, ipa, ipb), + + TP_STRUCT__entry( + VCPU_FIELD_COMMON + __field(__u64, instruction) + ), + + TP_fast_assign( + VCPU_ASSIGN_COMMON + __entry->instruction = ((__u64)ipa << 48) | + ((__u64)ipb << 16); + ), + + VCPU_TP_PRINTK("operation exception on instruction %016llx (%s)", + __entry->instruction, + __print_symbolic(icpt_insn_decoder(__entry->instruction), + icpt_insn_codes)) + ); + +TRACE_EVENT(kvm_s390_handle_sthyi, + TP_PROTO(VCPU_PROTO_COMMON, u64 code, u64 addr), + TP_ARGS(VCPU_ARGS_COMMON, code, addr), + + TP_STRUCT__entry( + VCPU_FIELD_COMMON + __field(u64, code) + __field(u64, addr) + ), + + TP_fast_assign( + VCPU_ASSIGN_COMMON + __entry->code = code; + __entry->addr = addr; + ), + + VCPU_TP_PRINTK("STHYI fc: %llu addr: %016llx", + __entry->code, __entry->addr) + ); + #endif /* _TRACE_KVM_H */ /* This part must be outside protection */ diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c new file mode 100644 index 000000000000..c106488b4137 --- /dev/null +++ b/arch/s390/kvm/vsie.c @@ -0,0 +1,1091 @@ +/* + * kvm nested virtualization support for s390x + * + * Copyright IBM Corp. 2016 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): David Hildenbrand <dahi@linux.vnet.ibm.com> + */ +#include <linux/vmalloc.h> +#include <linux/kvm_host.h> +#include <linux/bug.h> +#include <linux/list.h> +#include <linux/bitmap.h> +#include <asm/gmap.h> +#include <asm/mmu_context.h> +#include <asm/sclp.h> +#include <asm/nmi.h> +#include <asm/dis.h> +#include "kvm-s390.h" +#include "gaccess.h" + +struct vsie_page { + struct kvm_s390_sie_block scb_s; /* 0x0000 */ + /* the pinned originial scb */ + struct kvm_s390_sie_block *scb_o; /* 0x0200 */ + /* the shadow gmap in use by the vsie_page */ + struct gmap *gmap; /* 0x0208 */ + /* address of the last reported fault to guest2 */ + unsigned long fault_addr; /* 0x0210 */ + __u8 reserved[0x0700 - 0x0218]; /* 0x0218 */ + struct kvm_s390_crypto_cb crycb; /* 0x0700 */ + __u8 fac[S390_ARCH_FAC_LIST_SIZE_BYTE]; /* 0x0800 */ +} __packed; + +/* trigger a validity icpt for the given scb */ +static int set_validity_icpt(struct kvm_s390_sie_block *scb, + __u16 reason_code) +{ + scb->ipa = 0x1000; + scb->ipb = ((__u32) reason_code) << 16; + scb->icptcode = ICPT_VALIDITY; + return 1; +} + +/* mark the prefix as unmapped, this will block the VSIE */ +static void prefix_unmapped(struct vsie_page *vsie_page) +{ + atomic_or(PROG_REQUEST, &vsie_page->scb_s.prog20); +} + +/* mark the prefix as unmapped and wait until the VSIE has been left */ +static void prefix_unmapped_sync(struct vsie_page *vsie_page) +{ + prefix_unmapped(vsie_page); + if (vsie_page->scb_s.prog0c & PROG_IN_SIE) + atomic_or(CPUSTAT_STOP_INT, &vsie_page->scb_s.cpuflags); + while (vsie_page->scb_s.prog0c & PROG_IN_SIE) + cpu_relax(); +} + +/* mark the prefix as mapped, this will allow the VSIE to run */ +static void prefix_mapped(struct vsie_page *vsie_page) +{ + atomic_andnot(PROG_REQUEST, &vsie_page->scb_s.prog20); +} + +/* test if the prefix is mapped into the gmap shadow */ +static int prefix_is_mapped(struct vsie_page *vsie_page) +{ + return !(atomic_read(&vsie_page->scb_s.prog20) & PROG_REQUEST); +} + +/* copy the updated intervention request bits into the shadow scb */ +static void update_intervention_requests(struct vsie_page *vsie_page) +{ + const int bits = CPUSTAT_STOP_INT | CPUSTAT_IO_INT | CPUSTAT_EXT_INT; + int cpuflags; + + cpuflags = atomic_read(&vsie_page->scb_o->cpuflags); + atomic_andnot(bits, &vsie_page->scb_s.cpuflags); + atomic_or(cpuflags & bits, &vsie_page->scb_s.cpuflags); +} + +/* shadow (filter and validate) the cpuflags */ +static int prepare_cpuflags(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) +{ + struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; + struct kvm_s390_sie_block *scb_o = vsie_page->scb_o; + int newflags, cpuflags = atomic_read(&scb_o->cpuflags); + + /* we don't allow ESA/390 guests */ + if (!(cpuflags & CPUSTAT_ZARCH)) + return set_validity_icpt(scb_s, 0x0001U); + + if (cpuflags & (CPUSTAT_RRF | CPUSTAT_MCDS)) + return set_validity_icpt(scb_s, 0x0001U); + else if (cpuflags & (CPUSTAT_SLSV | CPUSTAT_SLSR)) + return set_validity_icpt(scb_s, 0x0007U); + + /* intervention requests will be set later */ + newflags = CPUSTAT_ZARCH; + if (cpuflags & CPUSTAT_GED && test_kvm_facility(vcpu->kvm, 8)) + newflags |= CPUSTAT_GED; + if (cpuflags & CPUSTAT_GED2 && test_kvm_facility(vcpu->kvm, 78)) { + if (cpuflags & CPUSTAT_GED) + return set_validity_icpt(scb_s, 0x0001U); + newflags |= CPUSTAT_GED2; + } + if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_GPERE)) + newflags |= cpuflags & CPUSTAT_P; + if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_GSLS)) + newflags |= cpuflags & CPUSTAT_SM; + if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_IBS)) + newflags |= cpuflags & CPUSTAT_IBS; + + atomic_set(&scb_s->cpuflags, newflags); + return 0; +} + +/* + * Create a shadow copy of the crycb block and setup key wrapping, if + * requested for guest 3 and enabled for guest 2. + * + * We only accept format-1 (no AP in g2), but convert it into format-2 + * There is nothing to do for format-0. + * + * Returns: - 0 if shadowed or nothing to do + * - > 0 if control has to be given to guest 2 + */ +static int shadow_crycb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) +{ + struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; + struct kvm_s390_sie_block *scb_o = vsie_page->scb_o; + u32 crycb_addr = scb_o->crycbd & 0x7ffffff8U; + unsigned long *b1, *b2; + u8 ecb3_flags; + + scb_s->crycbd = 0; + if (!(scb_o->crycbd & vcpu->arch.sie_block->crycbd & CRYCB_FORMAT1)) + return 0; + /* format-1 is supported with message-security-assist extension 3 */ + if (!test_kvm_facility(vcpu->kvm, 76)) + return 0; + /* we may only allow it if enabled for guest 2 */ + ecb3_flags = scb_o->ecb3 & vcpu->arch.sie_block->ecb3 & + (ECB3_AES | ECB3_DEA); + if (!ecb3_flags) + return 0; + + if ((crycb_addr & PAGE_MASK) != ((crycb_addr + 128) & PAGE_MASK)) + return set_validity_icpt(scb_s, 0x003CU); + else if (!crycb_addr) + return set_validity_icpt(scb_s, 0x0039U); + + /* copy only the wrapping keys */ + if (read_guest_real(vcpu, crycb_addr + 72, &vsie_page->crycb, 56)) + return set_validity_icpt(scb_s, 0x0035U); + + scb_s->ecb3 |= ecb3_flags; + scb_s->crycbd = ((__u32)(__u64) &vsie_page->crycb) | CRYCB_FORMAT1 | + CRYCB_FORMAT2; + + /* xor both blocks in one run */ + b1 = (unsigned long *) vsie_page->crycb.dea_wrapping_key_mask; + b2 = (unsigned long *) + vcpu->kvm->arch.crypto.crycb->dea_wrapping_key_mask; + /* as 56%8 == 0, bitmap_xor won't overwrite any data */ + bitmap_xor(b1, b1, b2, BITS_PER_BYTE * 56); + return 0; +} + +/* shadow (round up/down) the ibc to avoid validity icpt */ +static void prepare_ibc(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) +{ + struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; + struct kvm_s390_sie_block *scb_o = vsie_page->scb_o; + __u64 min_ibc = (sclp.ibc >> 16) & 0x0fffU; + + scb_s->ibc = 0; + /* ibc installed in g2 and requested for g3 */ + if (vcpu->kvm->arch.model.ibc && (scb_o->ibc & 0x0fffU)) { + scb_s->ibc = scb_o->ibc & 0x0fffU; + /* takte care of the minimum ibc level of the machine */ + if (scb_s->ibc < min_ibc) + scb_s->ibc = min_ibc; + /* take care of the maximum ibc level set for the guest */ + if (scb_s->ibc > vcpu->kvm->arch.model.ibc) + scb_s->ibc = vcpu->kvm->arch.model.ibc; + } +} + +/* unshadow the scb, copying parameters back to the real scb */ +static void unshadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) +{ + struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; + struct kvm_s390_sie_block *scb_o = vsie_page->scb_o; + + /* interception */ + scb_o->icptcode = scb_s->icptcode; + scb_o->icptstatus = scb_s->icptstatus; + scb_o->ipa = scb_s->ipa; + scb_o->ipb = scb_s->ipb; + scb_o->gbea = scb_s->gbea; + + /* timer */ + scb_o->cputm = scb_s->cputm; + scb_o->ckc = scb_s->ckc; + scb_o->todpr = scb_s->todpr; + + /* guest state */ + scb_o->gpsw = scb_s->gpsw; + scb_o->gg14 = scb_s->gg14; + scb_o->gg15 = scb_s->gg15; + memcpy(scb_o->gcr, scb_s->gcr, 128); + scb_o->pp = scb_s->pp; + + /* interrupt intercept */ + switch (scb_s->icptcode) { + case ICPT_PROGI: + case ICPT_INSTPROGI: + case ICPT_EXTINT: + memcpy((void *)((u64)scb_o + 0xc0), + (void *)((u64)scb_s + 0xc0), 0xf0 - 0xc0); + break; + case ICPT_PARTEXEC: + /* MVPG only */ + memcpy((void *)((u64)scb_o + 0xc0), + (void *)((u64)scb_s + 0xc0), 0xd0 - 0xc0); + break; + } + + if (scb_s->ihcpu != 0xffffU) + scb_o->ihcpu = scb_s->ihcpu; +} + +/* + * Setup the shadow scb by copying and checking the relevant parts of the g2 + * provided scb. + * + * Returns: - 0 if the scb has been shadowed + * - > 0 if control has to be given to guest 2 + */ +static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) +{ + struct kvm_s390_sie_block *scb_o = vsie_page->scb_o; + struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; + bool had_tx = scb_s->ecb & 0x10U; + unsigned long new_mso = 0; + int rc; + + /* make sure we don't have any leftovers when reusing the scb */ + scb_s->icptcode = 0; + scb_s->eca = 0; + scb_s->ecb = 0; + scb_s->ecb2 = 0; + scb_s->ecb3 = 0; + scb_s->ecd = 0; + scb_s->fac = 0; + + rc = prepare_cpuflags(vcpu, vsie_page); + if (rc) + goto out; + + /* timer */ + scb_s->cputm = scb_o->cputm; + scb_s->ckc = scb_o->ckc; + scb_s->todpr = scb_o->todpr; + scb_s->epoch = scb_o->epoch; + + /* guest state */ + scb_s->gpsw = scb_o->gpsw; + scb_s->gg14 = scb_o->gg14; + scb_s->gg15 = scb_o->gg15; + memcpy(scb_s->gcr, scb_o->gcr, 128); + scb_s->pp = scb_o->pp; + + /* interception / execution handling */ + scb_s->gbea = scb_o->gbea; + scb_s->lctl = scb_o->lctl; + scb_s->svcc = scb_o->svcc; + scb_s->ictl = scb_o->ictl; + /* + * SKEY handling functions can't deal with false setting of PTE invalid + * bits. Therefore we cannot provide interpretation and would later + * have to provide own emulation handlers. + */ + scb_s->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE; + scb_s->icpua = scb_o->icpua; + + if (!(atomic_read(&scb_s->cpuflags) & CPUSTAT_SM)) + new_mso = scb_o->mso & 0xfffffffffff00000UL; + /* if the hva of the prefix changes, we have to remap the prefix */ + if (scb_s->mso != new_mso || scb_s->prefix != scb_o->prefix) + prefix_unmapped(vsie_page); + /* SIE will do mso/msl validity and exception checks for us */ + scb_s->msl = scb_o->msl & 0xfffffffffff00000UL; + scb_s->mso = new_mso; + scb_s->prefix = scb_o->prefix; + + /* We have to definetly flush the tlb if this scb never ran */ + if (scb_s->ihcpu != 0xffffU) + scb_s->ihcpu = scb_o->ihcpu; + + /* MVPG and Protection Exception Interpretation are always available */ + scb_s->eca |= scb_o->eca & 0x01002000U; + /* Host-protection-interruption introduced with ESOP */ + if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_ESOP)) + scb_s->ecb |= scb_o->ecb & 0x02U; + /* transactional execution */ + if (test_kvm_facility(vcpu->kvm, 73)) { + /* remap the prefix is tx is toggled on */ + if ((scb_o->ecb & 0x10U) && !had_tx) + prefix_unmapped(vsie_page); + scb_s->ecb |= scb_o->ecb & 0x10U; + } + /* SIMD */ + if (test_kvm_facility(vcpu->kvm, 129)) { + scb_s->eca |= scb_o->eca & 0x00020000U; + scb_s->ecd |= scb_o->ecd & 0x20000000U; + } + /* Run-time-Instrumentation */ + if (test_kvm_facility(vcpu->kvm, 64)) + scb_s->ecb3 |= scb_o->ecb3 & 0x01U; + if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_SIIF)) + scb_s->eca |= scb_o->eca & 0x00000001U; + if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_IB)) + scb_s->eca |= scb_o->eca & 0x40000000U; + if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_CEI)) + scb_s->eca |= scb_o->eca & 0x80000000U; + + prepare_ibc(vcpu, vsie_page); + rc = shadow_crycb(vcpu, vsie_page); +out: + if (rc) + unshadow_scb(vcpu, vsie_page); + return rc; +} + +void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start, + unsigned long end) +{ + struct kvm *kvm = gmap->private; + struct vsie_page *cur; + unsigned long prefix; + struct page *page; + int i; + + if (!gmap_is_shadow(gmap)) + return; + if (start >= 1UL << 31) + /* We are only interested in prefix pages */ + return; + + /* + * Only new shadow blocks are added to the list during runtime, + * therefore we can safely reference them all the time. + */ + for (i = 0; i < kvm->arch.vsie.page_count; i++) { + page = READ_ONCE(kvm->arch.vsie.pages[i]); + if (!page) + continue; + cur = page_to_virt(page); + if (READ_ONCE(cur->gmap) != gmap) + continue; + prefix = cur->scb_s.prefix << GUEST_PREFIX_SHIFT; + /* with mso/msl, the prefix lies at an offset */ + prefix += cur->scb_s.mso; + if (prefix <= end && start <= prefix + 2 * PAGE_SIZE - 1) + prefix_unmapped_sync(cur); + } +} + +/* + * Map the first prefix page and if tx is enabled also the second prefix page. + * + * The prefix will be protected, a gmap notifier will inform about unmaps. + * The shadow scb must not be executed until the prefix is remapped, this is + * guaranteed by properly handling PROG_REQUEST. + * + * Returns: - 0 on if successfully mapped or already mapped + * - > 0 if control has to be given to guest 2 + * - -EAGAIN if the caller can retry immediately + * - -ENOMEM if out of memory + */ +static int map_prefix(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) +{ + struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; + u64 prefix = scb_s->prefix << GUEST_PREFIX_SHIFT; + int rc; + + if (prefix_is_mapped(vsie_page)) + return 0; + + /* mark it as mapped so we can catch any concurrent unmappers */ + prefix_mapped(vsie_page); + + /* with mso/msl, the prefix lies at offset *mso* */ + prefix += scb_s->mso; + + rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix); + if (!rc && (scb_s->ecb & 0x10U)) + rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, + prefix + PAGE_SIZE); + /* + * We don't have to mprotect, we will be called for all unshadows. + * SIE will detect if protection applies and trigger a validity. + */ + if (rc) + prefix_unmapped(vsie_page); + if (rc > 0 || rc == -EFAULT) + rc = set_validity_icpt(scb_s, 0x0037U); + return rc; +} + +/* + * Pin the guest page given by gpa and set hpa to the pinned host address. + * Will always be pinned writable. + * + * Returns: - 0 on success + * - -EINVAL if the gpa is not valid guest storage + * - -ENOMEM if out of memory + */ +static int pin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t *hpa) +{ + struct page *page; + hva_t hva; + int rc; + + hva = gfn_to_hva(kvm, gpa_to_gfn(gpa)); + if (kvm_is_error_hva(hva)) + return -EINVAL; + rc = get_user_pages_fast(hva, 1, 1, &page); + if (rc < 0) + return rc; + else if (rc != 1) + return -ENOMEM; + *hpa = (hpa_t) page_to_virt(page) + (gpa & ~PAGE_MASK); + return 0; +} + +/* Unpins a page previously pinned via pin_guest_page, marking it as dirty. */ +static void unpin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t hpa) +{ + struct page *page; + + page = virt_to_page(hpa); + set_page_dirty_lock(page); + put_page(page); + /* mark the page always as dirty for migration */ + mark_page_dirty(kvm, gpa_to_gfn(gpa)); +} + +/* unpin all blocks previously pinned by pin_blocks(), marking them dirty */ +static void unpin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) +{ + struct kvm_s390_sie_block *scb_o = vsie_page->scb_o; + struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; + hpa_t hpa; + gpa_t gpa; + + hpa = (u64) scb_s->scaoh << 32 | scb_s->scaol; + if (hpa) { + gpa = scb_o->scaol & ~0xfUL; + if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_64BSCAO)) + gpa |= (u64) scb_o->scaoh << 32; + unpin_guest_page(vcpu->kvm, gpa, hpa); + scb_s->scaol = 0; + scb_s->scaoh = 0; + } + + hpa = scb_s->itdba; + if (hpa) { + gpa = scb_o->itdba & ~0xffUL; + unpin_guest_page(vcpu->kvm, gpa, hpa); + scb_s->itdba = 0; + } + + hpa = scb_s->gvrd; + if (hpa) { + gpa = scb_o->gvrd & ~0x1ffUL; + unpin_guest_page(vcpu->kvm, gpa, hpa); + scb_s->gvrd = 0; + } + + hpa = scb_s->riccbd; + if (hpa) { + gpa = scb_o->riccbd & ~0x3fUL; + unpin_guest_page(vcpu->kvm, gpa, hpa); + scb_s->riccbd = 0; + } +} + +/* + * Instead of shadowing some blocks, we can simply forward them because the + * addresses in the scb are 64 bit long. + * + * This works as long as the data lies in one page. If blocks ever exceed one + * page, we have to fall back to shadowing. + * + * As we reuse the sca, the vcpu pointers contained in it are invalid. We must + * therefore not enable any facilities that access these pointers (e.g. SIGPIF). + * + * Returns: - 0 if all blocks were pinned. + * - > 0 if control has to be given to guest 2 + * - -ENOMEM if out of memory + */ +static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) +{ + struct kvm_s390_sie_block *scb_o = vsie_page->scb_o; + struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; + hpa_t hpa; + gpa_t gpa; + int rc = 0; + + gpa = scb_o->scaol & ~0xfUL; + if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_64BSCAO)) + gpa |= (u64) scb_o->scaoh << 32; + if (gpa) { + if (!(gpa & ~0x1fffUL)) + rc = set_validity_icpt(scb_s, 0x0038U); + else if ((gpa & ~0x1fffUL) == kvm_s390_get_prefix(vcpu)) + rc = set_validity_icpt(scb_s, 0x0011U); + else if ((gpa & PAGE_MASK) != + ((gpa + sizeof(struct bsca_block) - 1) & PAGE_MASK)) + rc = set_validity_icpt(scb_s, 0x003bU); + if (!rc) { + rc = pin_guest_page(vcpu->kvm, gpa, &hpa); + if (rc == -EINVAL) + rc = set_validity_icpt(scb_s, 0x0034U); + } + if (rc) + goto unpin; + scb_s->scaoh = (u32)((u64)hpa >> 32); + scb_s->scaol = (u32)(u64)hpa; + } + + gpa = scb_o->itdba & ~0xffUL; + if (gpa && (scb_s->ecb & 0x10U)) { + if (!(gpa & ~0x1fffU)) { + rc = set_validity_icpt(scb_s, 0x0080U); + goto unpin; + } + /* 256 bytes cannot cross page boundaries */ + rc = pin_guest_page(vcpu->kvm, gpa, &hpa); + if (rc == -EINVAL) + rc = set_validity_icpt(scb_s, 0x0080U); + if (rc) + goto unpin; + scb_s->itdba = hpa; + } + + gpa = scb_o->gvrd & ~0x1ffUL; + if (gpa && (scb_s->eca & 0x00020000U) && + !(scb_s->ecd & 0x20000000U)) { + if (!(gpa & ~0x1fffUL)) { + rc = set_validity_icpt(scb_s, 0x1310U); + goto unpin; + } + /* + * 512 bytes vector registers cannot cross page boundaries + * if this block gets bigger, we have to shadow it. + */ + rc = pin_guest_page(vcpu->kvm, gpa, &hpa); + if (rc == -EINVAL) + rc = set_validity_icpt(scb_s, 0x1310U); + if (rc) + goto unpin; + scb_s->gvrd = hpa; + } + + gpa = scb_o->riccbd & ~0x3fUL; + if (gpa && (scb_s->ecb3 & 0x01U)) { + if (!(gpa & ~0x1fffUL)) { + rc = set_validity_icpt(scb_s, 0x0043U); + goto unpin; + } + /* 64 bytes cannot cross page boundaries */ + rc = pin_guest_page(vcpu->kvm, gpa, &hpa); + if (rc == -EINVAL) + rc = set_validity_icpt(scb_s, 0x0043U); + /* Validity 0x0044 will be checked by SIE */ + if (rc) + goto unpin; + scb_s->gvrd = hpa; + } + return 0; +unpin: + unpin_blocks(vcpu, vsie_page); + return rc; +} + +/* unpin the scb provided by guest 2, marking it as dirty */ +static void unpin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page, + gpa_t gpa) +{ + hpa_t hpa = (hpa_t) vsie_page->scb_o; + + if (hpa) + unpin_guest_page(vcpu->kvm, gpa, hpa); + vsie_page->scb_o = NULL; +} + +/* + * Pin the scb at gpa provided by guest 2 at vsie_page->scb_o. + * + * Returns: - 0 if the scb was pinned. + * - > 0 if control has to be given to guest 2 + * - -ENOMEM if out of memory + */ +static int pin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page, + gpa_t gpa) +{ + hpa_t hpa; + int rc; + + rc = pin_guest_page(vcpu->kvm, gpa, &hpa); + if (rc == -EINVAL) { + rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + if (!rc) + rc = 1; + } + if (!rc) + vsie_page->scb_o = (struct kvm_s390_sie_block *) hpa; + return rc; +} + +/* + * Inject a fault into guest 2. + * + * Returns: - > 0 if control has to be given to guest 2 + * < 0 if an error occurred during injection. + */ +static int inject_fault(struct kvm_vcpu *vcpu, __u16 code, __u64 vaddr, + bool write_flag) +{ + struct kvm_s390_pgm_info pgm = { + .code = code, + .trans_exc_code = + /* 0-51: virtual address */ + (vaddr & 0xfffffffffffff000UL) | + /* 52-53: store / fetch */ + (((unsigned int) !write_flag) + 1) << 10, + /* 62-63: asce id (alway primary == 0) */ + .exc_access_id = 0, /* always primary */ + .op_access_id = 0, /* not MVPG */ + }; + int rc; + + if (code == PGM_PROTECTION) + pgm.trans_exc_code |= 0x4UL; + + rc = kvm_s390_inject_prog_irq(vcpu, &pgm); + return rc ? rc : 1; +} + +/* + * Handle a fault during vsie execution on a gmap shadow. + * + * Returns: - 0 if the fault was resolved + * - > 0 if control has to be given to guest 2 + * - < 0 if an error occurred + */ +static int handle_fault(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) +{ + int rc; + + if (current->thread.gmap_int_code == PGM_PROTECTION) + /* we can directly forward all protection exceptions */ + return inject_fault(vcpu, PGM_PROTECTION, + current->thread.gmap_addr, 1); + + rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, + current->thread.gmap_addr); + if (rc > 0) { + rc = inject_fault(vcpu, rc, + current->thread.gmap_addr, + current->thread.gmap_write_flag); + if (rc >= 0) + vsie_page->fault_addr = current->thread.gmap_addr; + } + return rc; +} + +/* + * Retry the previous fault that required guest 2 intervention. This avoids + * one superfluous SIE re-entry and direct exit. + * + * Will ignore any errors. The next SIE fault will do proper fault handling. + */ +static void handle_last_fault(struct kvm_vcpu *vcpu, + struct vsie_page *vsie_page) +{ + if (vsie_page->fault_addr) + kvm_s390_shadow_fault(vcpu, vsie_page->gmap, + vsie_page->fault_addr); + vsie_page->fault_addr = 0; +} + +static inline void clear_vsie_icpt(struct vsie_page *vsie_page) +{ + vsie_page->scb_s.icptcode = 0; +} + +/* rewind the psw and clear the vsie icpt, so we can retry execution */ +static void retry_vsie_icpt(struct vsie_page *vsie_page) +{ + struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; + int ilen = insn_length(scb_s->ipa >> 8); + + /* take care of EXECUTE instructions */ + if (scb_s->icptstatus & 1) { + ilen = (scb_s->icptstatus >> 4) & 0x6; + if (!ilen) + ilen = 4; + } + scb_s->gpsw.addr = __rewind_psw(scb_s->gpsw, ilen); + clear_vsie_icpt(vsie_page); +} + +/* + * Try to shadow + enable the guest 2 provided facility list. + * Retry instruction execution if enabled for and provided by guest 2. + * + * Returns: - 0 if handled (retry or guest 2 icpt) + * - > 0 if control has to be given to guest 2 + */ +static int handle_stfle(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) +{ + struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; + __u32 fac = vsie_page->scb_o->fac & 0x7ffffff8U; + + if (fac && test_kvm_facility(vcpu->kvm, 7)) { + retry_vsie_icpt(vsie_page); + if (read_guest_real(vcpu, fac, &vsie_page->fac, + sizeof(vsie_page->fac))) + return set_validity_icpt(scb_s, 0x1090U); + scb_s->fac = (__u32)(__u64) &vsie_page->fac; + } + return 0; +} + +/* + * Run the vsie on a shadow scb and a shadow gmap, without any further + * sanity checks, handling SIE faults. + * + * Returns: - 0 everything went fine + * - > 0 if control has to be given to guest 2 + * - < 0 if an error occurred + */ +static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) +{ + struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; + struct kvm_s390_sie_block *scb_o = vsie_page->scb_o; + int rc; + + handle_last_fault(vcpu, vsie_page); + + if (need_resched()) + schedule(); + if (test_cpu_flag(CIF_MCCK_PENDING)) + s390_handle_mcck(); + + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); + local_irq_disable(); + guest_enter_irqoff(); + local_irq_enable(); + + rc = sie64a(scb_s, vcpu->run->s.regs.gprs); + + local_irq_disable(); + guest_exit_irqoff(); + local_irq_enable(); + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); + + if (rc > 0) + rc = 0; /* we could still have an icpt */ + else if (rc == -EFAULT) + return handle_fault(vcpu, vsie_page); + + switch (scb_s->icptcode) { + case ICPT_INST: + if (scb_s->ipa == 0xb2b0) + rc = handle_stfle(vcpu, vsie_page); + break; + case ICPT_STOP: + /* stop not requested by g2 - must have been a kick */ + if (!(atomic_read(&scb_o->cpuflags) & CPUSTAT_STOP_INT)) + clear_vsie_icpt(vsie_page); + break; + case ICPT_VALIDITY: + if ((scb_s->ipa & 0xf000) != 0xf000) + scb_s->ipa += 0x1000; + break; + } + return rc; +} + +static void release_gmap_shadow(struct vsie_page *vsie_page) +{ + if (vsie_page->gmap) + gmap_put(vsie_page->gmap); + WRITE_ONCE(vsie_page->gmap, NULL); + prefix_unmapped(vsie_page); +} + +static int acquire_gmap_shadow(struct kvm_vcpu *vcpu, + struct vsie_page *vsie_page) +{ + unsigned long asce; + union ctlreg0 cr0; + struct gmap *gmap; + int edat; + + asce = vcpu->arch.sie_block->gcr[1]; + cr0.val = vcpu->arch.sie_block->gcr[0]; + edat = cr0.edat && test_kvm_facility(vcpu->kvm, 8); + edat += edat && test_kvm_facility(vcpu->kvm, 78); + + /* + * ASCE or EDAT could have changed since last icpt, or the gmap + * we're holding has been unshadowed. If the gmap is still valid, + * we can safely reuse it. + */ + if (vsie_page->gmap && gmap_shadow_valid(vsie_page->gmap, asce, edat)) + return 0; + + /* release the old shadow - if any, and mark the prefix as unmapped */ + release_gmap_shadow(vsie_page); + gmap = gmap_shadow(vcpu->arch.gmap, asce, edat); + if (IS_ERR(gmap)) + return PTR_ERR(gmap); + gmap->private = vcpu->kvm; + WRITE_ONCE(vsie_page->gmap, gmap); + return 0; +} + +/* + * Register the shadow scb at the VCPU, e.g. for kicking out of vsie. + */ +static void register_shadow_scb(struct kvm_vcpu *vcpu, + struct vsie_page *vsie_page) +{ + struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; + + WRITE_ONCE(vcpu->arch.vsie_block, &vsie_page->scb_s); + /* + * External calls have to lead to a kick of the vcpu and + * therefore the vsie -> Simulate Wait state. + */ + atomic_or(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags); + /* + * We have to adjust the g3 epoch by the g2 epoch. The epoch will + * automatically be adjusted on tod clock changes via kvm_sync_clock. + */ + preempt_disable(); + scb_s->epoch += vcpu->kvm->arch.epoch; + preempt_enable(); +} + +/* + * Unregister a shadow scb from a VCPU. + */ +static void unregister_shadow_scb(struct kvm_vcpu *vcpu) +{ + atomic_andnot(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags); + WRITE_ONCE(vcpu->arch.vsie_block, NULL); +} + +/* + * Run the vsie on a shadowed scb, managing the gmap shadow, handling + * prefix pages and faults. + * + * Returns: - 0 if no errors occurred + * - > 0 if control has to be given to guest 2 + * - -ENOMEM if out of memory + */ +static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) +{ + struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; + int rc = 0; + + while (1) { + rc = acquire_gmap_shadow(vcpu, vsie_page); + if (!rc) + rc = map_prefix(vcpu, vsie_page); + if (!rc) { + gmap_enable(vsie_page->gmap); + update_intervention_requests(vsie_page); + rc = do_vsie_run(vcpu, vsie_page); + gmap_enable(vcpu->arch.gmap); + } + atomic_andnot(PROG_BLOCK_SIE, &scb_s->prog20); + + if (rc == -EAGAIN) + rc = 0; + if (rc || scb_s->icptcode || signal_pending(current) || + kvm_s390_vcpu_has_irq(vcpu, 0)) + break; + }; + + if (rc == -EFAULT) { + /* + * Addressing exceptions are always presentes as intercepts. + * As addressing exceptions are suppressing and our guest 3 PSW + * points at the responsible instruction, we have to + * forward the PSW and set the ilc. If we can't read guest 3 + * instruction, we can use an arbitrary ilc. Let's always use + * ilen = 4 for now, so we can avoid reading in guest 3 virtual + * memory. (we could also fake the shadow so the hardware + * handles it). + */ + scb_s->icptcode = ICPT_PROGI; + scb_s->iprcc = PGM_ADDRESSING; + scb_s->pgmilc = 4; + scb_s->gpsw.addr = __rewind_psw(scb_s->gpsw, 4); + } + return rc; +} + +/* + * Get or create a vsie page for a scb address. + * + * Returns: - address of a vsie page (cached or new one) + * - NULL if the same scb address is already used by another VCPU + * - ERR_PTR(-ENOMEM) if out of memory + */ +static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr) +{ + struct vsie_page *vsie_page; + struct page *page; + int nr_vcpus; + + rcu_read_lock(); + page = radix_tree_lookup(&kvm->arch.vsie.addr_to_page, addr >> 9); + rcu_read_unlock(); + if (page) { + if (page_ref_inc_return(page) == 2) + return page_to_virt(page); + page_ref_dec(page); + } + + /* + * We want at least #online_vcpus shadows, so every VCPU can execute + * the VSIE in parallel. + */ + nr_vcpus = atomic_read(&kvm->online_vcpus); + + mutex_lock(&kvm->arch.vsie.mutex); + if (kvm->arch.vsie.page_count < nr_vcpus) { + page = alloc_page(GFP_KERNEL | __GFP_ZERO | GFP_DMA); + if (!page) { + mutex_unlock(&kvm->arch.vsie.mutex); + return ERR_PTR(-ENOMEM); + } + page_ref_inc(page); + kvm->arch.vsie.pages[kvm->arch.vsie.page_count] = page; + kvm->arch.vsie.page_count++; + } else { + /* reuse an existing entry that belongs to nobody */ + while (true) { + page = kvm->arch.vsie.pages[kvm->arch.vsie.next]; + if (page_ref_inc_return(page) == 2) + break; + page_ref_dec(page); + kvm->arch.vsie.next++; + kvm->arch.vsie.next %= nr_vcpus; + } + radix_tree_delete(&kvm->arch.vsie.addr_to_page, page->index >> 9); + } + page->index = addr; + /* double use of the same address */ + if (radix_tree_insert(&kvm->arch.vsie.addr_to_page, addr >> 9, page)) { + page_ref_dec(page); + mutex_unlock(&kvm->arch.vsie.mutex); + return NULL; + } + mutex_unlock(&kvm->arch.vsie.mutex); + + vsie_page = page_to_virt(page); + memset(&vsie_page->scb_s, 0, sizeof(struct kvm_s390_sie_block)); + release_gmap_shadow(vsie_page); + vsie_page->fault_addr = 0; + vsie_page->scb_s.ihcpu = 0xffffU; + return vsie_page; +} + +/* put a vsie page acquired via get_vsie_page */ +static void put_vsie_page(struct kvm *kvm, struct vsie_page *vsie_page) +{ + struct page *page = pfn_to_page(__pa(vsie_page) >> PAGE_SHIFT); + + page_ref_dec(page); +} + +int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu) +{ + struct vsie_page *vsie_page; + unsigned long scb_addr; + int rc; + + vcpu->stat.instruction_sie++; + if (!test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_SIEF2)) + return -EOPNOTSUPP; + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + BUILD_BUG_ON(sizeof(struct vsie_page) != 4096); + scb_addr = kvm_s390_get_base_disp_s(vcpu, NULL); + + /* 512 byte alignment */ + if (unlikely(scb_addr & 0x1ffUL)) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + if (signal_pending(current) || kvm_s390_vcpu_has_irq(vcpu, 0)) + return 0; + + vsie_page = get_vsie_page(vcpu->kvm, scb_addr); + if (IS_ERR(vsie_page)) + return PTR_ERR(vsie_page); + else if (!vsie_page) + /* double use of sie control block - simply do nothing */ + return 0; + + rc = pin_scb(vcpu, vsie_page, scb_addr); + if (rc) + goto out_put; + rc = shadow_scb(vcpu, vsie_page); + if (rc) + goto out_unpin_scb; + rc = pin_blocks(vcpu, vsie_page); + if (rc) + goto out_unshadow; + register_shadow_scb(vcpu, vsie_page); + rc = vsie_run(vcpu, vsie_page); + unregister_shadow_scb(vcpu); + unpin_blocks(vcpu, vsie_page); +out_unshadow: + unshadow_scb(vcpu, vsie_page); +out_unpin_scb: + unpin_scb(vcpu, vsie_page, scb_addr); +out_put: + put_vsie_page(vcpu->kvm, vsie_page); + + return rc < 0 ? rc : 0; +} + +/* Init the vsie data structures. To be called when a vm is initialized. */ +void kvm_s390_vsie_init(struct kvm *kvm) +{ + mutex_init(&kvm->arch.vsie.mutex); + INIT_RADIX_TREE(&kvm->arch.vsie.addr_to_page, GFP_KERNEL); +} + +/* Destroy the vsie data structures. To be called when a vm is destroyed. */ +void kvm_s390_vsie_destroy(struct kvm *kvm) +{ + struct vsie_page *vsie_page; + struct page *page; + int i; + + mutex_lock(&kvm->arch.vsie.mutex); + for (i = 0; i < kvm->arch.vsie.page_count; i++) { + page = kvm->arch.vsie.pages[i]; + kvm->arch.vsie.pages[i] = NULL; + vsie_page = page_to_virt(page); + release_gmap_shadow(vsie_page); + /* free the radix tree entry */ + radix_tree_delete(&kvm->arch.vsie.addr_to_page, page->index >> 9); + __free_page(page); + } + kvm->arch.vsie.page_count = 0; + mutex_unlock(&kvm->arch.vsie.mutex); +} + +void kvm_s390_vsie_kick(struct kvm_vcpu *vcpu) +{ + struct kvm_s390_sie_block *scb = READ_ONCE(vcpu->arch.vsie_block); + + /* + * Even if the VCPU lets go of the shadow sie block reference, it is + * still valid in the cache. So we can safely kick it. + */ + if (scb) { + atomic_or(PROG_BLOCK_SIE, &scb->prog20); + if (scb->prog0c & PROG_IN_SIE) + atomic_or(CPUSTAT_STOP_INT, &scb->cpuflags); + } +} diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c index d96596128e9f..f481fcde067b 100644 --- a/arch/s390/lib/uaccess.c +++ b/arch/s390/lib/uaccess.c @@ -104,6 +104,7 @@ static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr, unsigned long __copy_from_user(void *to, const void __user *from, unsigned long n) { + check_object_size(to, n, false); if (static_branch_likely(&have_mvcos)) return copy_from_user_mvcos(to, from, n); return copy_from_user_mvcp(to, from, n); @@ -177,6 +178,7 @@ static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x, unsigned long __copy_to_user(void __user *to, const void *from, unsigned long n) { + check_object_size(from, n, true); if (static_branch_likely(&have_mvcos)) return copy_to_user_mvcos(to, from, n); return copy_to_user_mvcs(to, from, n); diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 25783dc3c813..a58bca62a93b 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -418,6 +418,8 @@ static inline int do_exception(struct pt_regs *regs, int access) (struct gmap *) S390_lowcore.gmap : NULL; if (gmap) { current->thread.gmap_addr = address; + current->thread.gmap_write_flag = !!(flags & FAULT_FLAG_WRITE); + current->thread.gmap_int_code = regs->int_code & 0xffff; address = __gmap_translate(gmap, address); if (address == -EFAULT) { fault = VM_FAULT_BADMAP; diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 063c721ec0dc..2ce6bb3bab32 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -20,14 +20,16 @@ #include <asm/gmap.h> #include <asm/tlb.h> +#define GMAP_SHADOW_FAKE_TABLE 1ULL + /** - * gmap_alloc - allocate a guest address space + * gmap_alloc - allocate and initialize a guest address space * @mm: pointer to the parent mm_struct * @limit: maximum address of the gmap address space * * Returns a guest address space structure. */ -struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit) +static struct gmap *gmap_alloc(unsigned long limit) { struct gmap *gmap; struct page *page; @@ -55,10 +57,14 @@ struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit) if (!gmap) goto out; INIT_LIST_HEAD(&gmap->crst_list); + INIT_LIST_HEAD(&gmap->children); + INIT_LIST_HEAD(&gmap->pt_list); INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL); INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC); + INIT_RADIX_TREE(&gmap->host_to_rmap, GFP_ATOMIC); spin_lock_init(&gmap->guest_table_lock); - gmap->mm = mm; + spin_lock_init(&gmap->shadow_lock); + atomic_set(&gmap->ref_count, 1); page = alloc_pages(GFP_KERNEL, 2); if (!page) goto out_free; @@ -70,9 +76,6 @@ struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit) gmap->asce = atype | _ASCE_TABLE_LENGTH | _ASCE_USER_BITS | __pa(table); gmap->asce_end = limit; - down_write(&mm->mmap_sem); - list_add(&gmap->list, &mm->context.gmap_list); - up_write(&mm->mmap_sem); return gmap; out_free: @@ -80,7 +83,28 @@ out_free: out: return NULL; } -EXPORT_SYMBOL_GPL(gmap_alloc); + +/** + * gmap_create - create a guest address space + * @mm: pointer to the parent mm_struct + * @limit: maximum size of the gmap address space + * + * Returns a guest address space structure. + */ +struct gmap *gmap_create(struct mm_struct *mm, unsigned long limit) +{ + struct gmap *gmap; + + gmap = gmap_alloc(limit); + if (!gmap) + return NULL; + gmap->mm = mm; + spin_lock(&mm->context.gmap_lock); + list_add_rcu(&gmap->list, &mm->context.gmap_list); + spin_unlock(&mm->context.gmap_lock); + return gmap; +} +EXPORT_SYMBOL_GPL(gmap_create); static void gmap_flush_tlb(struct gmap *gmap) { @@ -114,31 +138,117 @@ static void gmap_radix_tree_free(struct radix_tree_root *root) } while (nr > 0); } +static void gmap_rmap_radix_tree_free(struct radix_tree_root *root) +{ + struct gmap_rmap *rmap, *rnext, *head; + struct radix_tree_iter iter; + unsigned long indices[16]; + unsigned long index; + void **slot; + int i, nr; + + /* A radix tree is freed by deleting all of its entries */ + index = 0; + do { + nr = 0; + radix_tree_for_each_slot(slot, root, &iter, index) { + indices[nr] = iter.index; + if (++nr == 16) + break; + } + for (i = 0; i < nr; i++) { + index = indices[i]; + head = radix_tree_delete(root, index); + gmap_for_each_rmap_safe(rmap, rnext, head) + kfree(rmap); + } + } while (nr > 0); +} + /** * gmap_free - free a guest address space * @gmap: pointer to the guest address space structure + * + * No locks required. There are no references to this gmap anymore. */ -void gmap_free(struct gmap *gmap) +static void gmap_free(struct gmap *gmap) { struct page *page, *next; - /* Flush tlb. */ - if (MACHINE_HAS_IDTE) - __tlb_flush_idte(gmap->asce); - else - __tlb_flush_global(); - + /* Flush tlb of all gmaps (if not already done for shadows) */ + if (!(gmap_is_shadow(gmap) && gmap->removed)) + gmap_flush_tlb(gmap); /* Free all segment & region tables. */ list_for_each_entry_safe(page, next, &gmap->crst_list, lru) __free_pages(page, 2); gmap_radix_tree_free(&gmap->guest_to_host); gmap_radix_tree_free(&gmap->host_to_guest); - down_write(&gmap->mm->mmap_sem); - list_del(&gmap->list); - up_write(&gmap->mm->mmap_sem); + + /* Free additional data for a shadow gmap */ + if (gmap_is_shadow(gmap)) { + /* Free all page tables. */ + list_for_each_entry_safe(page, next, &gmap->pt_list, lru) + page_table_free_pgste(page); + gmap_rmap_radix_tree_free(&gmap->host_to_rmap); + /* Release reference to the parent */ + gmap_put(gmap->parent); + } + kfree(gmap); } -EXPORT_SYMBOL_GPL(gmap_free); + +/** + * gmap_get - increase reference counter for guest address space + * @gmap: pointer to the guest address space structure + * + * Returns the gmap pointer + */ +struct gmap *gmap_get(struct gmap *gmap) +{ + atomic_inc(&gmap->ref_count); + return gmap; +} +EXPORT_SYMBOL_GPL(gmap_get); + +/** + * gmap_put - decrease reference counter for guest address space + * @gmap: pointer to the guest address space structure + * + * If the reference counter reaches zero the guest address space is freed. + */ +void gmap_put(struct gmap *gmap) +{ + if (atomic_dec_return(&gmap->ref_count) == 0) + gmap_free(gmap); +} +EXPORT_SYMBOL_GPL(gmap_put); + +/** + * gmap_remove - remove a guest address space but do not free it yet + * @gmap: pointer to the guest address space structure + */ +void gmap_remove(struct gmap *gmap) +{ + struct gmap *sg, *next; + + /* Remove all shadow gmaps linked to this gmap */ + if (!list_empty(&gmap->children)) { + spin_lock(&gmap->shadow_lock); + list_for_each_entry_safe(sg, next, &gmap->children, list) { + list_del(&sg->list); + gmap_put(sg); + } + spin_unlock(&gmap->shadow_lock); + } + /* Remove gmap from the pre-mm list */ + spin_lock(&gmap->mm->context.gmap_lock); + list_del_rcu(&gmap->list); + spin_unlock(&gmap->mm->context.gmap_lock); + synchronize_rcu(); + /* Put reference */ + gmap_put(gmap); +} +EXPORT_SYMBOL_GPL(gmap_remove); /** * gmap_enable - switch primary space to the guest address space @@ -160,6 +270,17 @@ void gmap_disable(struct gmap *gmap) } EXPORT_SYMBOL_GPL(gmap_disable); +/** + * gmap_get_enabled - get a pointer to the currently enabled gmap + * + * Returns a pointer to the currently enabled gmap. 0 if none is enabled. + */ +struct gmap *gmap_get_enabled(void) +{ + return (struct gmap *) S390_lowcore.gmap; +} +EXPORT_SYMBOL_GPL(gmap_get_enabled); + /* * gmap_alloc_table is assumed to be called with mmap_sem held */ @@ -175,7 +296,7 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, return -ENOMEM; new = (unsigned long *) page_to_phys(page); crst_table_init(new, init); - spin_lock(&gmap->mm->page_table_lock); + spin_lock(&gmap->guest_table_lock); if (*table & _REGION_ENTRY_INVALID) { list_add(&page->lru, &gmap->crst_list); *table = (unsigned long) new | _REGION_ENTRY_LENGTH | @@ -183,7 +304,7 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, page->index = gaddr; page = NULL; } - spin_unlock(&gmap->mm->page_table_lock); + spin_unlock(&gmap->guest_table_lock); if (page) __free_pages(page, 2); return 0; @@ -219,6 +340,7 @@ static int __gmap_unlink_by_vmaddr(struct gmap *gmap, unsigned long vmaddr) unsigned long *entry; int flush = 0; + BUG_ON(gmap_is_shadow(gmap)); spin_lock(&gmap->guest_table_lock); entry = radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT); if (entry) { @@ -258,6 +380,7 @@ int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) unsigned long off; int flush; + BUG_ON(gmap_is_shadow(gmap)); if ((to | len) & (PMD_SIZE - 1)) return -EINVAL; if (len == 0 || to + len < to) @@ -289,6 +412,7 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from, unsigned long off; int flush; + BUG_ON(gmap_is_shadow(gmap)); if ((from | to | len) & (PMD_SIZE - 1)) return -EINVAL; if (len == 0 || from + len < from || to + len < to || @@ -326,6 +450,8 @@ EXPORT_SYMBOL_GPL(gmap_map_segment); * This function does not establish potentially missing page table entries. * The mmap_sem of the mm that belongs to the address space must be held * when this function gets called. + * + * Note: Can also be called for shadow gmaps. */ unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr) { @@ -333,6 +459,7 @@ unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr) vmaddr = (unsigned long) radix_tree_lookup(&gmap->guest_to_host, gaddr >> PMD_SHIFT); + /* Note: guest_to_host is empty for a shadow gmap */ return vmaddr ? (vmaddr | (gaddr & ~PMD_MASK)) : -EFAULT; } EXPORT_SYMBOL_GPL(__gmap_translate); @@ -369,11 +496,13 @@ void gmap_unlink(struct mm_struct *mm, unsigned long *table, struct gmap *gmap; int flush; - list_for_each_entry(gmap, &mm->context.gmap_list, list) { + rcu_read_lock(); + list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { flush = __gmap_unlink_by_vmaddr(gmap, vmaddr); if (flush) gmap_flush_tlb(gmap); } + rcu_read_unlock(); } /** @@ -397,6 +526,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) pmd_t *pmd; int rc; + BUG_ON(gmap_is_shadow(gmap)); /* Create higher level tables in the gmap page table */ table = gmap->table; if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION1) { @@ -552,116 +682,1412 @@ static LIST_HEAD(gmap_notifier_list); static DEFINE_SPINLOCK(gmap_notifier_lock); /** - * gmap_register_ipte_notifier - register a pte invalidation callback + * gmap_register_pte_notifier - register a pte invalidation callback * @nb: pointer to the gmap notifier block */ -void gmap_register_ipte_notifier(struct gmap_notifier *nb) +void gmap_register_pte_notifier(struct gmap_notifier *nb) { spin_lock(&gmap_notifier_lock); - list_add(&nb->list, &gmap_notifier_list); + list_add_rcu(&nb->list, &gmap_notifier_list); spin_unlock(&gmap_notifier_lock); } -EXPORT_SYMBOL_GPL(gmap_register_ipte_notifier); +EXPORT_SYMBOL_GPL(gmap_register_pte_notifier); /** - * gmap_unregister_ipte_notifier - remove a pte invalidation callback + * gmap_unregister_pte_notifier - remove a pte invalidation callback * @nb: pointer to the gmap notifier block */ -void gmap_unregister_ipte_notifier(struct gmap_notifier *nb) +void gmap_unregister_pte_notifier(struct gmap_notifier *nb) { spin_lock(&gmap_notifier_lock); - list_del_init(&nb->list); + list_del_rcu(&nb->list); spin_unlock(&gmap_notifier_lock); + synchronize_rcu(); +} +EXPORT_SYMBOL_GPL(gmap_unregister_pte_notifier); + +/** + * gmap_call_notifier - call all registered invalidation callbacks + * @gmap: pointer to guest mapping meta data structure + * @start: start virtual address in the guest address space + * @end: end virtual address in the guest address space + */ +static void gmap_call_notifier(struct gmap *gmap, unsigned long start, + unsigned long end) +{ + struct gmap_notifier *nb; + + list_for_each_entry(nb, &gmap_notifier_list, list) + nb->notifier_call(gmap, start, end); +} + +/** + * gmap_table_walk - walk the gmap page tables + * @gmap: pointer to guest mapping meta data structure + * @gaddr: virtual address in the guest address space + * @level: page table level to stop at + * + * Returns a table entry pointer for the given guest address and @level + * @level=0 : returns a pointer to a page table table entry (or NULL) + * @level=1 : returns a pointer to a segment table entry (or NULL) + * @level=2 : returns a pointer to a region-3 table entry (or NULL) + * @level=3 : returns a pointer to a region-2 table entry (or NULL) + * @level=4 : returns a pointer to a region-1 table entry (or NULL) + * + * Returns NULL if the gmap page tables could not be walked to the + * requested level. + * + * Note: Can also be called for shadow gmaps. + */ +static inline unsigned long *gmap_table_walk(struct gmap *gmap, + unsigned long gaddr, int level) +{ + unsigned long *table; + + if ((gmap->asce & _ASCE_TYPE_MASK) + 4 < (level * 4)) + return NULL; + if (gmap_is_shadow(gmap) && gmap->removed) + return NULL; + if (gaddr & (-1UL << (31 + ((gmap->asce & _ASCE_TYPE_MASK) >> 2)*11))) + return NULL; + table = gmap->table; + switch (gmap->asce & _ASCE_TYPE_MASK) { + case _ASCE_TYPE_REGION1: + table += (gaddr >> 53) & 0x7ff; + if (level == 4) + break; + if (*table & _REGION_ENTRY_INVALID) + return NULL; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + /* Fallthrough */ + case _ASCE_TYPE_REGION2: + table += (gaddr >> 42) & 0x7ff; + if (level == 3) + break; + if (*table & _REGION_ENTRY_INVALID) + return NULL; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + /* Fallthrough */ + case _ASCE_TYPE_REGION3: + table += (gaddr >> 31) & 0x7ff; + if (level == 2) + break; + if (*table & _REGION_ENTRY_INVALID) + return NULL; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + /* Fallthrough */ + case _ASCE_TYPE_SEGMENT: + table += (gaddr >> 20) & 0x7ff; + if (level == 1) + break; + if (*table & _REGION_ENTRY_INVALID) + return NULL; + table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN); + table += (gaddr >> 12) & 0xff; + } + return table; +} + +/** + * gmap_pte_op_walk - walk the gmap page table, get the page table lock + * and return the pte pointer + * @gmap: pointer to guest mapping meta data structure + * @gaddr: virtual address in the guest address space + * @ptl: pointer to the spinlock pointer + * + * Returns a pointer to the locked pte for a guest address, or NULL + * + * Note: Can also be called for shadow gmaps. + */ +static pte_t *gmap_pte_op_walk(struct gmap *gmap, unsigned long gaddr, + spinlock_t **ptl) +{ + unsigned long *table; + + if (gmap_is_shadow(gmap)) + spin_lock(&gmap->guest_table_lock); + /* Walk the gmap page table, lock and get pte pointer */ + table = gmap_table_walk(gmap, gaddr, 1); /* get segment pointer */ + if (!table || *table & _SEGMENT_ENTRY_INVALID) { + if (gmap_is_shadow(gmap)) + spin_unlock(&gmap->guest_table_lock); + return NULL; + } + if (gmap_is_shadow(gmap)) { + *ptl = &gmap->guest_table_lock; + return pte_offset_map((pmd_t *) table, gaddr); + } + return pte_alloc_map_lock(gmap->mm, (pmd_t *) table, gaddr, ptl); +} + +/** + * gmap_pte_op_fixup - force a page in and connect the gmap page table + * @gmap: pointer to guest mapping meta data structure + * @gaddr: virtual address in the guest address space + * @vmaddr: address in the host process address space + * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE + * + * Returns 0 if the caller can retry __gmap_translate (might fail again), + * -ENOMEM if out of memory and -EFAULT if anything goes wrong while fixing + * up or connecting the gmap page table. + */ +static int gmap_pte_op_fixup(struct gmap *gmap, unsigned long gaddr, + unsigned long vmaddr, int prot) +{ + struct mm_struct *mm = gmap->mm; + unsigned int fault_flags; + bool unlocked = false; + + BUG_ON(gmap_is_shadow(gmap)); + fault_flags = (prot == PROT_WRITE) ? FAULT_FLAG_WRITE : 0; + if (fixup_user_fault(current, mm, vmaddr, fault_flags, &unlocked)) + return -EFAULT; + if (unlocked) + /* lost mmap_sem, caller has to retry __gmap_translate */ + return 0; + /* Connect the page tables */ + return __gmap_link(gmap, gaddr, vmaddr); } -EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier); /** - * gmap_ipte_notify - mark a range of ptes for invalidation notification + * gmap_pte_op_end - release the page table lock + * @ptl: pointer to the spinlock pointer + */ +static void gmap_pte_op_end(spinlock_t *ptl) +{ + spin_unlock(ptl); +} + +/* + * gmap_protect_range - remove access rights to memory and set pgste bits * @gmap: pointer to guest mapping meta data structure * @gaddr: virtual address in the guest address space * @len: size of area + * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE + * @bits: pgste notification bits to set * - * Returns 0 if for each page in the given range a gmap mapping exists and - * the invalidation notification could be set. If the gmap mapping is missing - * for one or more pages -EFAULT is returned. If no memory could be allocated - * -ENOMEM is returned. This function establishes missing page table entries. + * Returns 0 if successfully protected, -ENOMEM if out of memory and + * -EFAULT if gaddr is invalid (or mapping for shadows is missing). + * + * Called with sg->mm->mmap_sem in read. + * + * Note: Can also be called for shadow gmaps. */ -int gmap_ipte_notify(struct gmap *gmap, unsigned long gaddr, unsigned long len) +static int gmap_protect_range(struct gmap *gmap, unsigned long gaddr, + unsigned long len, int prot, unsigned long bits) { - unsigned long addr; + unsigned long vmaddr; spinlock_t *ptl; pte_t *ptep; - bool unlocked; - int rc = 0; + int rc; + + while (len) { + rc = -EAGAIN; + ptep = gmap_pte_op_walk(gmap, gaddr, &ptl); + if (ptep) { + rc = ptep_force_prot(gmap->mm, gaddr, ptep, prot, bits); + gmap_pte_op_end(ptl); + } + if (rc) { + vmaddr = __gmap_translate(gmap, gaddr); + if (IS_ERR_VALUE(vmaddr)) + return vmaddr; + rc = gmap_pte_op_fixup(gmap, gaddr, vmaddr, prot); + if (rc) + return rc; + continue; + } + gaddr += PAGE_SIZE; + len -= PAGE_SIZE; + } + return 0; +} + +/** + * gmap_mprotect_notify - change access rights for a range of ptes and + * call the notifier if any pte changes again + * @gmap: pointer to guest mapping meta data structure + * @gaddr: virtual address in the guest address space + * @len: size of area + * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE + * + * Returns 0 if for each page in the given range a gmap mapping exists, + * the new access rights could be set and the notifier could be armed. + * If the gmap mapping is missing for one or more pages -EFAULT is + * returned. If no memory could be allocated -ENOMEM is returned. + * This function establishes missing page table entries. + */ +int gmap_mprotect_notify(struct gmap *gmap, unsigned long gaddr, + unsigned long len, int prot) +{ + int rc; - if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK)) + if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK) || gmap_is_shadow(gmap)) + return -EINVAL; + if (!MACHINE_HAS_ESOP && prot == PROT_READ) return -EINVAL; down_read(&gmap->mm->mmap_sem); - while (len) { - unlocked = false; - /* Convert gmap address and connect the page tables */ - addr = __gmap_translate(gmap, gaddr); - if (IS_ERR_VALUE(addr)) { - rc = addr; + rc = gmap_protect_range(gmap, gaddr, len, prot, PGSTE_IN_BIT); + up_read(&gmap->mm->mmap_sem); + return rc; +} +EXPORT_SYMBOL_GPL(gmap_mprotect_notify); + +/** + * gmap_read_table - get an unsigned long value from a guest page table using + * absolute addressing, without marking the page referenced. + * @gmap: pointer to guest mapping meta data structure + * @gaddr: virtual address in the guest address space + * @val: pointer to the unsigned long value to return + * + * Returns 0 if the value was read, -ENOMEM if out of memory and -EFAULT + * if reading using the virtual address failed. + * + * Called with gmap->mm->mmap_sem in read. + */ +int gmap_read_table(struct gmap *gmap, unsigned long gaddr, unsigned long *val) +{ + unsigned long address, vmaddr; + spinlock_t *ptl; + pte_t *ptep, pte; + int rc; + + while (1) { + rc = -EAGAIN; + ptep = gmap_pte_op_walk(gmap, gaddr, &ptl); + if (ptep) { + pte = *ptep; + if (pte_present(pte) && (pte_val(pte) & _PAGE_READ)) { + address = pte_val(pte) & PAGE_MASK; + address += gaddr & ~PAGE_MASK; + *val = *(unsigned long *) address; + pte_val(*ptep) |= _PAGE_YOUNG; + /* Do *NOT* clear the _PAGE_INVALID bit! */ + rc = 0; + } + gmap_pte_op_end(ptl); + } + if (!rc) + break; + vmaddr = __gmap_translate(gmap, gaddr); + if (IS_ERR_VALUE(vmaddr)) { + rc = vmaddr; break; } - /* Get the page mapped */ - if (fixup_user_fault(current, gmap->mm, addr, FAULT_FLAG_WRITE, - &unlocked)) { - rc = -EFAULT; + rc = gmap_pte_op_fixup(gmap, gaddr, vmaddr, PROT_READ); + if (rc) break; + } + return rc; +} +EXPORT_SYMBOL_GPL(gmap_read_table); + +/** + * gmap_insert_rmap - add a rmap to the host_to_rmap radix tree + * @sg: pointer to the shadow guest address space structure + * @vmaddr: vm address associated with the rmap + * @rmap: pointer to the rmap structure + * + * Called with the sg->guest_table_lock + */ +static inline void gmap_insert_rmap(struct gmap *sg, unsigned long vmaddr, + struct gmap_rmap *rmap) +{ + void **slot; + + BUG_ON(!gmap_is_shadow(sg)); + slot = radix_tree_lookup_slot(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT); + if (slot) { + rmap->next = radix_tree_deref_slot_protected(slot, + &sg->guest_table_lock); + radix_tree_replace_slot(slot, rmap); + } else { + rmap->next = NULL; + radix_tree_insert(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT, + rmap); + } +} + +/** + * gmap_protect_rmap - modify access rights to memory and create an rmap + * @sg: pointer to the shadow guest address space structure + * @raddr: rmap address in the shadow gmap + * @paddr: address in the parent guest address space + * @len: length of the memory area to protect + * @prot: indicates access rights: none, read-only or read-write + * + * Returns 0 if successfully protected and the rmap was created, -ENOMEM + * if out of memory and -EFAULT if paddr is invalid. + */ +static int gmap_protect_rmap(struct gmap *sg, unsigned long raddr, + unsigned long paddr, unsigned long len, int prot) +{ + struct gmap *parent; + struct gmap_rmap *rmap; + unsigned long vmaddr; + spinlock_t *ptl; + pte_t *ptep; + int rc; + + BUG_ON(!gmap_is_shadow(sg)); + parent = sg->parent; + while (len) { + vmaddr = __gmap_translate(parent, paddr); + if (IS_ERR_VALUE(vmaddr)) + return vmaddr; + rmap = kzalloc(sizeof(*rmap), GFP_KERNEL); + if (!rmap) + return -ENOMEM; + rmap->raddr = raddr; + rc = radix_tree_preload(GFP_KERNEL); + if (rc) { + kfree(rmap); + return rc; + } + rc = -EAGAIN; + ptep = gmap_pte_op_walk(parent, paddr, &ptl); + if (ptep) { + spin_lock(&sg->guest_table_lock); + rc = ptep_force_prot(parent->mm, paddr, ptep, prot, + PGSTE_VSIE_BIT); + if (!rc) + gmap_insert_rmap(sg, vmaddr, rmap); + spin_unlock(&sg->guest_table_lock); + gmap_pte_op_end(ptl); } - /* While trying to map mmap_sem got unlocked. Let us retry */ - if (unlocked) + radix_tree_preload_end(); + if (rc) { + kfree(rmap); + rc = gmap_pte_op_fixup(parent, paddr, vmaddr, prot); + if (rc) + return rc; continue; - rc = __gmap_link(gmap, gaddr, addr); + } + paddr += PAGE_SIZE; + len -= PAGE_SIZE; + } + return 0; +} + +#define _SHADOW_RMAP_MASK 0x7 +#define _SHADOW_RMAP_REGION1 0x5 +#define _SHADOW_RMAP_REGION2 0x4 +#define _SHADOW_RMAP_REGION3 0x3 +#define _SHADOW_RMAP_SEGMENT 0x2 +#define _SHADOW_RMAP_PGTABLE 0x1 + +/** + * gmap_idte_one - invalidate a single region or segment table entry + * @asce: region or segment table *origin* + table-type bits + * @vaddr: virtual address to identify the table entry to flush + * + * The invalid bit of a single region or segment table entry is set + * and the associated TLB entries depending on the entry are flushed. + * The table-type of the @asce identifies the portion of the @vaddr + * that is used as the invalidation index. + */ +static inline void gmap_idte_one(unsigned long asce, unsigned long vaddr) +{ + asm volatile( + " .insn rrf,0xb98e0000,%0,%1,0,0" + : : "a" (asce), "a" (vaddr) : "cc", "memory"); +} + +/** + * gmap_unshadow_page - remove a page from a shadow page table + * @sg: pointer to the shadow guest address space structure + * @raddr: rmap address in the shadow guest address space + * + * Called with the sg->guest_table_lock + */ +static void gmap_unshadow_page(struct gmap *sg, unsigned long raddr) +{ + unsigned long *table; + + BUG_ON(!gmap_is_shadow(sg)); + table = gmap_table_walk(sg, raddr, 0); /* get page table pointer */ + if (!table || *table & _PAGE_INVALID) + return; + gmap_call_notifier(sg, raddr, raddr + (1UL << 12) - 1); + ptep_unshadow_pte(sg->mm, raddr, (pte_t *) table); +} + +/** + * __gmap_unshadow_pgt - remove all entries from a shadow page table + * @sg: pointer to the shadow guest address space structure + * @raddr: rmap address in the shadow guest address space + * @pgt: pointer to the start of a shadow page table + * + * Called with the sg->guest_table_lock + */ +static void __gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr, + unsigned long *pgt) +{ + int i; + + BUG_ON(!gmap_is_shadow(sg)); + for (i = 0; i < 256; i++, raddr += 1UL << 12) + pgt[i] = _PAGE_INVALID; +} + +/** + * gmap_unshadow_pgt - remove a shadow page table from a segment entry + * @sg: pointer to the shadow guest address space structure + * @raddr: address in the shadow guest address space + * + * Called with the sg->guest_table_lock + */ +static void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr) +{ + unsigned long sto, *ste, *pgt; + struct page *page; + + BUG_ON(!gmap_is_shadow(sg)); + ste = gmap_table_walk(sg, raddr, 1); /* get segment pointer */ + if (!ste || !(*ste & _SEGMENT_ENTRY_ORIGIN)) + return; + gmap_call_notifier(sg, raddr, raddr + (1UL << 20) - 1); + sto = (unsigned long) (ste - ((raddr >> 20) & 0x7ff)); + gmap_idte_one(sto | _ASCE_TYPE_SEGMENT, raddr); + pgt = (unsigned long *)(*ste & _SEGMENT_ENTRY_ORIGIN); + *ste = _SEGMENT_ENTRY_EMPTY; + __gmap_unshadow_pgt(sg, raddr, pgt); + /* Free page table */ + page = pfn_to_page(__pa(pgt) >> PAGE_SHIFT); + list_del(&page->lru); + page_table_free_pgste(page); +} + +/** + * __gmap_unshadow_sgt - remove all entries from a shadow segment table + * @sg: pointer to the shadow guest address space structure + * @raddr: rmap address in the shadow guest address space + * @sgt: pointer to the start of a shadow segment table + * + * Called with the sg->guest_table_lock + */ +static void __gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr, + unsigned long *sgt) +{ + unsigned long asce, *pgt; + struct page *page; + int i; + + BUG_ON(!gmap_is_shadow(sg)); + asce = (unsigned long) sgt | _ASCE_TYPE_SEGMENT; + for (i = 0; i < 2048; i++, raddr += 1UL << 20) { + if (!(sgt[i] & _SEGMENT_ENTRY_ORIGIN)) + continue; + pgt = (unsigned long *)(sgt[i] & _REGION_ENTRY_ORIGIN); + sgt[i] = _SEGMENT_ENTRY_EMPTY; + __gmap_unshadow_pgt(sg, raddr, pgt); + /* Free page table */ + page = pfn_to_page(__pa(pgt) >> PAGE_SHIFT); + list_del(&page->lru); + page_table_free_pgste(page); + } +} + +/** + * gmap_unshadow_sgt - remove a shadow segment table from a region-3 entry + * @sg: pointer to the shadow guest address space structure + * @raddr: rmap address in the shadow guest address space + * + * Called with the shadow->guest_table_lock + */ +static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr) +{ + unsigned long r3o, *r3e, *sgt; + struct page *page; + + BUG_ON(!gmap_is_shadow(sg)); + r3e = gmap_table_walk(sg, raddr, 2); /* get region-3 pointer */ + if (!r3e || !(*r3e & _REGION_ENTRY_ORIGIN)) + return; + gmap_call_notifier(sg, raddr, raddr + (1UL << 31) - 1); + r3o = (unsigned long) (r3e - ((raddr >> 31) & 0x7ff)); + gmap_idte_one(r3o | _ASCE_TYPE_REGION3, raddr); + sgt = (unsigned long *)(*r3e & _REGION_ENTRY_ORIGIN); + *r3e = _REGION3_ENTRY_EMPTY; + __gmap_unshadow_sgt(sg, raddr, sgt); + /* Free segment table */ + page = pfn_to_page(__pa(sgt) >> PAGE_SHIFT); + list_del(&page->lru); + __free_pages(page, 2); +} + +/** + * __gmap_unshadow_r3t - remove all entries from a shadow region-3 table + * @sg: pointer to the shadow guest address space structure + * @raddr: address in the shadow guest address space + * @r3t: pointer to the start of a shadow region-3 table + * + * Called with the sg->guest_table_lock + */ +static void __gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr, + unsigned long *r3t) +{ + unsigned long asce, *sgt; + struct page *page; + int i; + + BUG_ON(!gmap_is_shadow(sg)); + asce = (unsigned long) r3t | _ASCE_TYPE_REGION3; + for (i = 0; i < 2048; i++, raddr += 1UL << 31) { + if (!(r3t[i] & _REGION_ENTRY_ORIGIN)) + continue; + sgt = (unsigned long *)(r3t[i] & _REGION_ENTRY_ORIGIN); + r3t[i] = _REGION3_ENTRY_EMPTY; + __gmap_unshadow_sgt(sg, raddr, sgt); + /* Free segment table */ + page = pfn_to_page(__pa(sgt) >> PAGE_SHIFT); + list_del(&page->lru); + __free_pages(page, 2); + } +} + +/** + * gmap_unshadow_r3t - remove a shadow region-3 table from a region-2 entry + * @sg: pointer to the shadow guest address space structure + * @raddr: rmap address in the shadow guest address space + * + * Called with the sg->guest_table_lock + */ +static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr) +{ + unsigned long r2o, *r2e, *r3t; + struct page *page; + + BUG_ON(!gmap_is_shadow(sg)); + r2e = gmap_table_walk(sg, raddr, 3); /* get region-2 pointer */ + if (!r2e || !(*r2e & _REGION_ENTRY_ORIGIN)) + return; + gmap_call_notifier(sg, raddr, raddr + (1UL << 42) - 1); + r2o = (unsigned long) (r2e - ((raddr >> 42) & 0x7ff)); + gmap_idte_one(r2o | _ASCE_TYPE_REGION2, raddr); + r3t = (unsigned long *)(*r2e & _REGION_ENTRY_ORIGIN); + *r2e = _REGION2_ENTRY_EMPTY; + __gmap_unshadow_r3t(sg, raddr, r3t); + /* Free region 3 table */ + page = pfn_to_page(__pa(r3t) >> PAGE_SHIFT); + list_del(&page->lru); + __free_pages(page, 2); +} + +/** + * __gmap_unshadow_r2t - remove all entries from a shadow region-2 table + * @sg: pointer to the shadow guest address space structure + * @raddr: rmap address in the shadow guest address space + * @r2t: pointer to the start of a shadow region-2 table + * + * Called with the sg->guest_table_lock + */ +static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr, + unsigned long *r2t) +{ + unsigned long asce, *r3t; + struct page *page; + int i; + + BUG_ON(!gmap_is_shadow(sg)); + asce = (unsigned long) r2t | _ASCE_TYPE_REGION2; + for (i = 0; i < 2048; i++, raddr += 1UL << 42) { + if (!(r2t[i] & _REGION_ENTRY_ORIGIN)) + continue; + r3t = (unsigned long *)(r2t[i] & _REGION_ENTRY_ORIGIN); + r2t[i] = _REGION2_ENTRY_EMPTY; + __gmap_unshadow_r3t(sg, raddr, r3t); + /* Free region 3 table */ + page = pfn_to_page(__pa(r3t) >> PAGE_SHIFT); + list_del(&page->lru); + __free_pages(page, 2); + } +} + +/** + * gmap_unshadow_r2t - remove a shadow region-2 table from a region-1 entry + * @sg: pointer to the shadow guest address space structure + * @raddr: rmap address in the shadow guest address space + * + * Called with the sg->guest_table_lock + */ +static void gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr) +{ + unsigned long r1o, *r1e, *r2t; + struct page *page; + + BUG_ON(!gmap_is_shadow(sg)); + r1e = gmap_table_walk(sg, raddr, 4); /* get region-1 pointer */ + if (!r1e || !(*r1e & _REGION_ENTRY_ORIGIN)) + return; + gmap_call_notifier(sg, raddr, raddr + (1UL << 53) - 1); + r1o = (unsigned long) (r1e - ((raddr >> 53) & 0x7ff)); + gmap_idte_one(r1o | _ASCE_TYPE_REGION1, raddr); + r2t = (unsigned long *)(*r1e & _REGION_ENTRY_ORIGIN); + *r1e = _REGION1_ENTRY_EMPTY; + __gmap_unshadow_r2t(sg, raddr, r2t); + /* Free region 2 table */ + page = pfn_to_page(__pa(r2t) >> PAGE_SHIFT); + list_del(&page->lru); + __free_pages(page, 2); +} + +/** + * __gmap_unshadow_r1t - remove all entries from a shadow region-1 table + * @sg: pointer to the shadow guest address space structure + * @raddr: rmap address in the shadow guest address space + * @r1t: pointer to the start of a shadow region-1 table + * + * Called with the shadow->guest_table_lock + */ +static void __gmap_unshadow_r1t(struct gmap *sg, unsigned long raddr, + unsigned long *r1t) +{ + unsigned long asce, *r2t; + struct page *page; + int i; + + BUG_ON(!gmap_is_shadow(sg)); + asce = (unsigned long) r1t | _ASCE_TYPE_REGION1; + for (i = 0; i < 2048; i++, raddr += 1UL << 53) { + if (!(r1t[i] & _REGION_ENTRY_ORIGIN)) + continue; + r2t = (unsigned long *)(r1t[i] & _REGION_ENTRY_ORIGIN); + __gmap_unshadow_r2t(sg, raddr, r2t); + /* Clear entry and flush translation r1t -> r2t */ + gmap_idte_one(asce, raddr); + r1t[i] = _REGION1_ENTRY_EMPTY; + /* Free region 2 table */ + page = pfn_to_page(__pa(r2t) >> PAGE_SHIFT); + list_del(&page->lru); + __free_pages(page, 2); + } +} + +/** + * gmap_unshadow - remove a shadow page table completely + * @sg: pointer to the shadow guest address space structure + * + * Called with sg->guest_table_lock + */ +static void gmap_unshadow(struct gmap *sg) +{ + unsigned long *table; + + BUG_ON(!gmap_is_shadow(sg)); + if (sg->removed) + return; + sg->removed = 1; + gmap_call_notifier(sg, 0, -1UL); + gmap_flush_tlb(sg); + table = (unsigned long *)(sg->asce & _ASCE_ORIGIN); + switch (sg->asce & _ASCE_TYPE_MASK) { + case _ASCE_TYPE_REGION1: + __gmap_unshadow_r1t(sg, 0, table); + break; + case _ASCE_TYPE_REGION2: + __gmap_unshadow_r2t(sg, 0, table); + break; + case _ASCE_TYPE_REGION3: + __gmap_unshadow_r3t(sg, 0, table); + break; + case _ASCE_TYPE_SEGMENT: + __gmap_unshadow_sgt(sg, 0, table); + break; + } +} + +/** + * gmap_find_shadow - find a specific asce in the list of shadow tables + * @parent: pointer to the parent gmap + * @asce: ASCE for which the shadow table is created + * @edat_level: edat level to be used for the shadow translation + * + * Returns the pointer to a gmap if a shadow table with the given asce is + * already available, ERR_PTR(-EAGAIN) if another one is just being created, + * otherwise NULL + */ +static struct gmap *gmap_find_shadow(struct gmap *parent, unsigned long asce, + int edat_level) +{ + struct gmap *sg; + + list_for_each_entry(sg, &parent->children, list) { + if (sg->orig_asce != asce || sg->edat_level != edat_level || + sg->removed) + continue; + if (!sg->initialized) + return ERR_PTR(-EAGAIN); + atomic_inc(&sg->ref_count); + return sg; + } + return NULL; +} + +/** + * gmap_shadow_valid - check if a shadow guest address space matches the + * given properties and is still valid + * @sg: pointer to the shadow guest address space structure + * @asce: ASCE for which the shadow table is requested + * @edat_level: edat level to be used for the shadow translation + * + * Returns 1 if the gmap shadow is still valid and matches the given + * properties, the caller can continue using it. Returns 0 otherwise, the + * caller has to request a new shadow gmap in this case. + * + */ +int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level) +{ + if (sg->removed) + return 0; + return sg->orig_asce == asce && sg->edat_level == edat_level; +} +EXPORT_SYMBOL_GPL(gmap_shadow_valid); + +/** + * gmap_shadow - create/find a shadow guest address space + * @parent: pointer to the parent gmap + * @asce: ASCE for which the shadow table is created + * @edat_level: edat level to be used for the shadow translation + * + * The pages of the top level page table referred by the asce parameter + * will be set to read-only and marked in the PGSTEs of the kvm process. + * The shadow table will be removed automatically on any change to the + * PTE mapping for the source table. + * + * Returns a guest address space structure, ERR_PTR(-ENOMEM) if out of memory, + * ERR_PTR(-EAGAIN) if the caller has to retry and ERR_PTR(-EFAULT) if the + * parent gmap table could not be protected. + */ +struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, + int edat_level) +{ + struct gmap *sg, *new; + unsigned long limit; + int rc; + + BUG_ON(gmap_is_shadow(parent)); + spin_lock(&parent->shadow_lock); + sg = gmap_find_shadow(parent, asce, edat_level); + spin_unlock(&parent->shadow_lock); + if (sg) + return sg; + /* Create a new shadow gmap */ + limit = -1UL >> (33 - (((asce & _ASCE_TYPE_MASK) >> 2) * 11)); + if (asce & _ASCE_REAL_SPACE) + limit = -1UL; + new = gmap_alloc(limit); + if (!new) + return ERR_PTR(-ENOMEM); + new->mm = parent->mm; + new->parent = gmap_get(parent); + new->orig_asce = asce; + new->edat_level = edat_level; + new->initialized = false; + spin_lock(&parent->shadow_lock); + /* Recheck if another CPU created the same shadow */ + sg = gmap_find_shadow(parent, asce, edat_level); + if (sg) { + spin_unlock(&parent->shadow_lock); + gmap_free(new); + return sg; + } + if (asce & _ASCE_REAL_SPACE) { + /* only allow one real-space gmap shadow */ + list_for_each_entry(sg, &parent->children, list) { + if (sg->orig_asce & _ASCE_REAL_SPACE) { + spin_lock(&sg->guest_table_lock); + gmap_unshadow(sg); + spin_unlock(&sg->guest_table_lock); + list_del(&sg->list); + gmap_put(sg); + break; + } + } + } + atomic_set(&new->ref_count, 2); + list_add(&new->list, &parent->children); + if (asce & _ASCE_REAL_SPACE) { + /* nothing to protect, return right away */ + new->initialized = true; + spin_unlock(&parent->shadow_lock); + return new; + } + spin_unlock(&parent->shadow_lock); + /* protect after insertion, so it will get properly invalidated */ + down_read(&parent->mm->mmap_sem); + rc = gmap_protect_range(parent, asce & _ASCE_ORIGIN, + ((asce & _ASCE_TABLE_LENGTH) + 1) * 4096, + PROT_READ, PGSTE_VSIE_BIT); + up_read(&parent->mm->mmap_sem); + spin_lock(&parent->shadow_lock); + new->initialized = true; + if (rc) { + list_del(&new->list); + gmap_free(new); + new = ERR_PTR(rc); + } + spin_unlock(&parent->shadow_lock); + return new; +} +EXPORT_SYMBOL_GPL(gmap_shadow); + +/** + * gmap_shadow_r2t - create an empty shadow region 2 table + * @sg: pointer to the shadow guest address space structure + * @saddr: faulting address in the shadow gmap + * @r2t: parent gmap address of the region 2 table to get shadowed + * @fake: r2t references contiguous guest memory block, not a r2t + * + * The r2t parameter specifies the address of the source table. The + * four pages of the source table are made read-only in the parent gmap + * address space. A write to the source table area @r2t will automatically + * remove the shadow r2 table and all of its decendents. + * + * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the + * shadow table structure is incomplete, -ENOMEM if out of memory and + * -EFAULT if an address in the parent gmap could not be resolved. + * + * Called with sg->mm->mmap_sem in read. + */ +int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t, + int fake) +{ + unsigned long raddr, origin, offset, len; + unsigned long *s_r2t, *table; + struct page *page; + int rc; + + BUG_ON(!gmap_is_shadow(sg)); + /* Allocate a shadow region second table */ + page = alloc_pages(GFP_KERNEL, 2); + if (!page) + return -ENOMEM; + page->index = r2t & _REGION_ENTRY_ORIGIN; + if (fake) + page->index |= GMAP_SHADOW_FAKE_TABLE; + s_r2t = (unsigned long *) page_to_phys(page); + /* Install shadow region second table */ + spin_lock(&sg->guest_table_lock); + table = gmap_table_walk(sg, saddr, 4); /* get region-1 pointer */ + if (!table) { + rc = -EAGAIN; /* Race with unshadow */ + goto out_free; + } + if (!(*table & _REGION_ENTRY_INVALID)) { + rc = 0; /* Already established */ + goto out_free; + } else if (*table & _REGION_ENTRY_ORIGIN) { + rc = -EAGAIN; /* Race with shadow */ + goto out_free; + } + crst_table_init(s_r2t, _REGION2_ENTRY_EMPTY); + /* mark as invalid as long as the parent table is not protected */ + *table = (unsigned long) s_r2t | _REGION_ENTRY_LENGTH | + _REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INVALID; + if (sg->edat_level >= 1) + *table |= (r2t & _REGION_ENTRY_PROTECT); + list_add(&page->lru, &sg->crst_list); + if (fake) { + /* nothing to protect for fake tables */ + *table &= ~_REGION_ENTRY_INVALID; + spin_unlock(&sg->guest_table_lock); + return 0; + } + spin_unlock(&sg->guest_table_lock); + /* Make r2t read-only in parent gmap page table */ + raddr = (saddr & 0xffe0000000000000UL) | _SHADOW_RMAP_REGION1; + origin = r2t & _REGION_ENTRY_ORIGIN; + offset = ((r2t & _REGION_ENTRY_OFFSET) >> 6) * 4096; + len = ((r2t & _REGION_ENTRY_LENGTH) + 1) * 4096 - offset; + rc = gmap_protect_rmap(sg, raddr, origin + offset, len, PROT_READ); + spin_lock(&sg->guest_table_lock); + if (!rc) { + table = gmap_table_walk(sg, saddr, 4); + if (!table || (*table & _REGION_ENTRY_ORIGIN) != + (unsigned long) s_r2t) + rc = -EAGAIN; /* Race with unshadow */ + else + *table &= ~_REGION_ENTRY_INVALID; + } else { + gmap_unshadow_r2t(sg, raddr); + } + spin_unlock(&sg->guest_table_lock); + return rc; +out_free: + spin_unlock(&sg->guest_table_lock); + __free_pages(page, 2); + return rc; +} +EXPORT_SYMBOL_GPL(gmap_shadow_r2t); + +/** + * gmap_shadow_r3t - create a shadow region 3 table + * @sg: pointer to the shadow guest address space structure + * @saddr: faulting address in the shadow gmap + * @r3t: parent gmap address of the region 3 table to get shadowed + * @fake: r3t references contiguous guest memory block, not a r3t + * + * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the + * shadow table structure is incomplete, -ENOMEM if out of memory and + * -EFAULT if an address in the parent gmap could not be resolved. + * + * Called with sg->mm->mmap_sem in read. + */ +int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t, + int fake) +{ + unsigned long raddr, origin, offset, len; + unsigned long *s_r3t, *table; + struct page *page; + int rc; + + BUG_ON(!gmap_is_shadow(sg)); + /* Allocate a shadow region second table */ + page = alloc_pages(GFP_KERNEL, 2); + if (!page) + return -ENOMEM; + page->index = r3t & _REGION_ENTRY_ORIGIN; + if (fake) + page->index |= GMAP_SHADOW_FAKE_TABLE; + s_r3t = (unsigned long *) page_to_phys(page); + /* Install shadow region second table */ + spin_lock(&sg->guest_table_lock); + table = gmap_table_walk(sg, saddr, 3); /* get region-2 pointer */ + if (!table) { + rc = -EAGAIN; /* Race with unshadow */ + goto out_free; + } + if (!(*table & _REGION_ENTRY_INVALID)) { + rc = 0; /* Already established */ + goto out_free; + } else if (*table & _REGION_ENTRY_ORIGIN) { + rc = -EAGAIN; /* Race with shadow */ + } + crst_table_init(s_r3t, _REGION3_ENTRY_EMPTY); + /* mark as invalid as long as the parent table is not protected */ + *table = (unsigned long) s_r3t | _REGION_ENTRY_LENGTH | + _REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INVALID; + if (sg->edat_level >= 1) + *table |= (r3t & _REGION_ENTRY_PROTECT); + list_add(&page->lru, &sg->crst_list); + if (fake) { + /* nothing to protect for fake tables */ + *table &= ~_REGION_ENTRY_INVALID; + spin_unlock(&sg->guest_table_lock); + return 0; + } + spin_unlock(&sg->guest_table_lock); + /* Make r3t read-only in parent gmap page table */ + raddr = (saddr & 0xfffffc0000000000UL) | _SHADOW_RMAP_REGION2; + origin = r3t & _REGION_ENTRY_ORIGIN; + offset = ((r3t & _REGION_ENTRY_OFFSET) >> 6) * 4096; + len = ((r3t & _REGION_ENTRY_LENGTH) + 1) * 4096 - offset; + rc = gmap_protect_rmap(sg, raddr, origin + offset, len, PROT_READ); + spin_lock(&sg->guest_table_lock); + if (!rc) { + table = gmap_table_walk(sg, saddr, 3); + if (!table || (*table & _REGION_ENTRY_ORIGIN) != + (unsigned long) s_r3t) + rc = -EAGAIN; /* Race with unshadow */ + else + *table &= ~_REGION_ENTRY_INVALID; + } else { + gmap_unshadow_r3t(sg, raddr); + } + spin_unlock(&sg->guest_table_lock); + return rc; +out_free: + spin_unlock(&sg->guest_table_lock); + __free_pages(page, 2); + return rc; +} +EXPORT_SYMBOL_GPL(gmap_shadow_r3t); + +/** + * gmap_shadow_sgt - create a shadow segment table + * @sg: pointer to the shadow guest address space structure + * @saddr: faulting address in the shadow gmap + * @sgt: parent gmap address of the segment table to get shadowed + * @fake: sgt references contiguous guest memory block, not a sgt + * + * Returns: 0 if successfully shadowed or already shadowed, -EAGAIN if the + * shadow table structure is incomplete, -ENOMEM if out of memory and + * -EFAULT if an address in the parent gmap could not be resolved. + * + * Called with sg->mm->mmap_sem in read. + */ +int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt, + int fake) +{ + unsigned long raddr, origin, offset, len; + unsigned long *s_sgt, *table; + struct page *page; + int rc; + + BUG_ON(!gmap_is_shadow(sg) || (sgt & _REGION3_ENTRY_LARGE)); + /* Allocate a shadow segment table */ + page = alloc_pages(GFP_KERNEL, 2); + if (!page) + return -ENOMEM; + page->index = sgt & _REGION_ENTRY_ORIGIN; + if (fake) + page->index |= GMAP_SHADOW_FAKE_TABLE; + s_sgt = (unsigned long *) page_to_phys(page); + /* Install shadow region second table */ + spin_lock(&sg->guest_table_lock); + table = gmap_table_walk(sg, saddr, 2); /* get region-3 pointer */ + if (!table) { + rc = -EAGAIN; /* Race with unshadow */ + goto out_free; + } + if (!(*table & _REGION_ENTRY_INVALID)) { + rc = 0; /* Already established */ + goto out_free; + } else if (*table & _REGION_ENTRY_ORIGIN) { + rc = -EAGAIN; /* Race with shadow */ + goto out_free; + } + crst_table_init(s_sgt, _SEGMENT_ENTRY_EMPTY); + /* mark as invalid as long as the parent table is not protected */ + *table = (unsigned long) s_sgt | _REGION_ENTRY_LENGTH | + _REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID; + if (sg->edat_level >= 1) + *table |= sgt & _REGION_ENTRY_PROTECT; + list_add(&page->lru, &sg->crst_list); + if (fake) { + /* nothing to protect for fake tables */ + *table &= ~_REGION_ENTRY_INVALID; + spin_unlock(&sg->guest_table_lock); + return 0; + } + spin_unlock(&sg->guest_table_lock); + /* Make sgt read-only in parent gmap page table */ + raddr = (saddr & 0xffffffff80000000UL) | _SHADOW_RMAP_REGION3; + origin = sgt & _REGION_ENTRY_ORIGIN; + offset = ((sgt & _REGION_ENTRY_OFFSET) >> 6) * 4096; + len = ((sgt & _REGION_ENTRY_LENGTH) + 1) * 4096 - offset; + rc = gmap_protect_rmap(sg, raddr, origin + offset, len, PROT_READ); + spin_lock(&sg->guest_table_lock); + if (!rc) { + table = gmap_table_walk(sg, saddr, 2); + if (!table || (*table & _REGION_ENTRY_ORIGIN) != + (unsigned long) s_sgt) + rc = -EAGAIN; /* Race with unshadow */ + else + *table &= ~_REGION_ENTRY_INVALID; + } else { + gmap_unshadow_sgt(sg, raddr); + } + spin_unlock(&sg->guest_table_lock); + return rc; +out_free: + spin_unlock(&sg->guest_table_lock); + __free_pages(page, 2); + return rc; +} +EXPORT_SYMBOL_GPL(gmap_shadow_sgt); + +/** + * gmap_shadow_lookup_pgtable - find a shadow page table + * @sg: pointer to the shadow guest address space structure + * @saddr: the address in the shadow aguest address space + * @pgt: parent gmap address of the page table to get shadowed + * @dat_protection: if the pgtable is marked as protected by dat + * @fake: pgt references contiguous guest memory block, not a pgtable + * + * Returns 0 if the shadow page table was found and -EAGAIN if the page + * table was not found. + * + * Called with sg->mm->mmap_sem in read. + */ +int gmap_shadow_pgt_lookup(struct gmap *sg, unsigned long saddr, + unsigned long *pgt, int *dat_protection, + int *fake) +{ + unsigned long *table; + struct page *page; + int rc; + + BUG_ON(!gmap_is_shadow(sg)); + spin_lock(&sg->guest_table_lock); + table = gmap_table_walk(sg, saddr, 1); /* get segment pointer */ + if (table && !(*table & _SEGMENT_ENTRY_INVALID)) { + /* Shadow page tables are full pages (pte+pgste) */ + page = pfn_to_page(*table >> PAGE_SHIFT); + *pgt = page->index & ~GMAP_SHADOW_FAKE_TABLE; + *dat_protection = !!(*table & _SEGMENT_ENTRY_PROTECT); + *fake = !!(page->index & GMAP_SHADOW_FAKE_TABLE); + rc = 0; + } else { + rc = -EAGAIN; + } + spin_unlock(&sg->guest_table_lock); + return rc; + +} +EXPORT_SYMBOL_GPL(gmap_shadow_pgt_lookup); + +/** + * gmap_shadow_pgt - instantiate a shadow page table + * @sg: pointer to the shadow guest address space structure + * @saddr: faulting address in the shadow gmap + * @pgt: parent gmap address of the page table to get shadowed + * @fake: pgt references contiguous guest memory block, not a pgtable + * + * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the + * shadow table structure is incomplete, -ENOMEM if out of memory, + * -EFAULT if an address in the parent gmap could not be resolved and + * + * Called with gmap->mm->mmap_sem in read + */ +int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt, + int fake) +{ + unsigned long raddr, origin; + unsigned long *s_pgt, *table; + struct page *page; + int rc; + + BUG_ON(!gmap_is_shadow(sg) || (pgt & _SEGMENT_ENTRY_LARGE)); + /* Allocate a shadow page table */ + page = page_table_alloc_pgste(sg->mm); + if (!page) + return -ENOMEM; + page->index = pgt & _SEGMENT_ENTRY_ORIGIN; + if (fake) + page->index |= GMAP_SHADOW_FAKE_TABLE; + s_pgt = (unsigned long *) page_to_phys(page); + /* Install shadow page table */ + spin_lock(&sg->guest_table_lock); + table = gmap_table_walk(sg, saddr, 1); /* get segment pointer */ + if (!table) { + rc = -EAGAIN; /* Race with unshadow */ + goto out_free; + } + if (!(*table & _SEGMENT_ENTRY_INVALID)) { + rc = 0; /* Already established */ + goto out_free; + } else if (*table & _SEGMENT_ENTRY_ORIGIN) { + rc = -EAGAIN; /* Race with shadow */ + goto out_free; + } + /* mark as invalid as long as the parent table is not protected */ + *table = (unsigned long) s_pgt | _SEGMENT_ENTRY | + (pgt & _SEGMENT_ENTRY_PROTECT) | _SEGMENT_ENTRY_INVALID; + list_add(&page->lru, &sg->pt_list); + if (fake) { + /* nothing to protect for fake tables */ + *table &= ~_SEGMENT_ENTRY_INVALID; + spin_unlock(&sg->guest_table_lock); + return 0; + } + spin_unlock(&sg->guest_table_lock); + /* Make pgt read-only in parent gmap page table (not the pgste) */ + raddr = (saddr & 0xfffffffffff00000UL) | _SHADOW_RMAP_SEGMENT; + origin = pgt & _SEGMENT_ENTRY_ORIGIN & PAGE_MASK; + rc = gmap_protect_rmap(sg, raddr, origin, PAGE_SIZE, PROT_READ); + spin_lock(&sg->guest_table_lock); + if (!rc) { + table = gmap_table_walk(sg, saddr, 1); + if (!table || (*table & _SEGMENT_ENTRY_ORIGIN) != + (unsigned long) s_pgt) + rc = -EAGAIN; /* Race with unshadow */ + else + *table &= ~_SEGMENT_ENTRY_INVALID; + } else { + gmap_unshadow_pgt(sg, raddr); + } + spin_unlock(&sg->guest_table_lock); + return rc; +out_free: + spin_unlock(&sg->guest_table_lock); + page_table_free_pgste(page); + return rc; + +} +EXPORT_SYMBOL_GPL(gmap_shadow_pgt); + +/** + * gmap_shadow_page - create a shadow page mapping + * @sg: pointer to the shadow guest address space structure + * @saddr: faulting address in the shadow gmap + * @pte: pte in parent gmap address space to get shadowed + * + * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the + * shadow table structure is incomplete, -ENOMEM if out of memory and + * -EFAULT if an address in the parent gmap could not be resolved. + * + * Called with sg->mm->mmap_sem in read. + */ +int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte) +{ + struct gmap *parent; + struct gmap_rmap *rmap; + unsigned long vmaddr, paddr; + spinlock_t *ptl; + pte_t *sptep, *tptep; + int prot; + int rc; + + BUG_ON(!gmap_is_shadow(sg)); + parent = sg->parent; + prot = (pte_val(pte) & _PAGE_PROTECT) ? PROT_READ : PROT_WRITE; + + rmap = kzalloc(sizeof(*rmap), GFP_KERNEL); + if (!rmap) + return -ENOMEM; + rmap->raddr = (saddr & PAGE_MASK) | _SHADOW_RMAP_PGTABLE; + + while (1) { + paddr = pte_val(pte) & PAGE_MASK; + vmaddr = __gmap_translate(parent, paddr); + if (IS_ERR_VALUE(vmaddr)) { + rc = vmaddr; + break; + } + rc = radix_tree_preload(GFP_KERNEL); if (rc) break; - /* Walk the process page table, lock and get pte pointer */ - ptep = get_locked_pte(gmap->mm, addr, &ptl); - VM_BUG_ON(!ptep); - /* Set notification bit in the pgste of the pte */ - if ((pte_val(*ptep) & (_PAGE_INVALID | _PAGE_PROTECT)) == 0) { - ptep_set_notify(gmap->mm, addr, ptep); - gaddr += PAGE_SIZE; - len -= PAGE_SIZE; + rc = -EAGAIN; + sptep = gmap_pte_op_walk(parent, paddr, &ptl); + if (sptep) { + spin_lock(&sg->guest_table_lock); + /* Get page table pointer */ + tptep = (pte_t *) gmap_table_walk(sg, saddr, 0); + if (!tptep) { + spin_unlock(&sg->guest_table_lock); + gmap_pte_op_end(ptl); + radix_tree_preload_end(); + break; + } + rc = ptep_shadow_pte(sg->mm, saddr, sptep, tptep, pte); + if (rc > 0) { + /* Success and a new mapping */ + gmap_insert_rmap(sg, vmaddr, rmap); + rmap = NULL; + rc = 0; + } + gmap_pte_op_end(ptl); + spin_unlock(&sg->guest_table_lock); } - pte_unmap_unlock(ptep, ptl); + radix_tree_preload_end(); + if (!rc) + break; + rc = gmap_pte_op_fixup(parent, paddr, vmaddr, prot); + if (rc) + break; } - up_read(&gmap->mm->mmap_sem); + kfree(rmap); return rc; } -EXPORT_SYMBOL_GPL(gmap_ipte_notify); +EXPORT_SYMBOL_GPL(gmap_shadow_page); + +/** + * gmap_shadow_notify - handle notifications for shadow gmap + * + * Called with sg->parent->shadow_lock. + */ +static void gmap_shadow_notify(struct gmap *sg, unsigned long vmaddr, + unsigned long offset, pte_t *pte) +{ + struct gmap_rmap *rmap, *rnext, *head; + unsigned long gaddr, start, end, bits, raddr; + unsigned long *table; + + BUG_ON(!gmap_is_shadow(sg)); + spin_lock(&sg->parent->guest_table_lock); + table = radix_tree_lookup(&sg->parent->host_to_guest, + vmaddr >> PMD_SHIFT); + gaddr = table ? __gmap_segment_gaddr(table) + offset : 0; + spin_unlock(&sg->parent->guest_table_lock); + if (!table) + return; + + spin_lock(&sg->guest_table_lock); + if (sg->removed) { + spin_unlock(&sg->guest_table_lock); + return; + } + /* Check for top level table */ + start = sg->orig_asce & _ASCE_ORIGIN; + end = start + ((sg->orig_asce & _ASCE_TABLE_LENGTH) + 1) * 4096; + if (!(sg->orig_asce & _ASCE_REAL_SPACE) && gaddr >= start && + gaddr < end) { + /* The complete shadow table has to go */ + gmap_unshadow(sg); + spin_unlock(&sg->guest_table_lock); + list_del(&sg->list); + gmap_put(sg); + return; + } + /* Remove the page table tree from on specific entry */ + head = radix_tree_delete(&sg->host_to_rmap, vmaddr >> 12); + gmap_for_each_rmap_safe(rmap, rnext, head) { + bits = rmap->raddr & _SHADOW_RMAP_MASK; + raddr = rmap->raddr ^ bits; + switch (bits) { + case _SHADOW_RMAP_REGION1: + gmap_unshadow_r2t(sg, raddr); + break; + case _SHADOW_RMAP_REGION2: + gmap_unshadow_r3t(sg, raddr); + break; + case _SHADOW_RMAP_REGION3: + gmap_unshadow_sgt(sg, raddr); + break; + case _SHADOW_RMAP_SEGMENT: + gmap_unshadow_pgt(sg, raddr); + break; + case _SHADOW_RMAP_PGTABLE: + gmap_unshadow_page(sg, raddr); + break; + } + kfree(rmap); + } + spin_unlock(&sg->guest_table_lock); +} /** * ptep_notify - call all invalidation callbacks for a specific pte. * @mm: pointer to the process mm_struct * @addr: virtual address in the process address space * @pte: pointer to the page table entry + * @bits: bits from the pgste that caused the notify call * * This function is assumed to be called with the page table lock held * for the pte to notify. */ -void ptep_notify(struct mm_struct *mm, unsigned long vmaddr, pte_t *pte) +void ptep_notify(struct mm_struct *mm, unsigned long vmaddr, + pte_t *pte, unsigned long bits) { unsigned long offset, gaddr; unsigned long *table; - struct gmap_notifier *nb; - struct gmap *gmap; + struct gmap *gmap, *sg, *next; offset = ((unsigned long) pte) & (255 * sizeof(pte_t)); offset = offset * (4096 / sizeof(pte_t)); - spin_lock(&gmap_notifier_lock); - list_for_each_entry(gmap, &mm->context.gmap_list, list) { + rcu_read_lock(); + list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { + if (!list_empty(&gmap->children) && (bits & PGSTE_VSIE_BIT)) { + spin_lock(&gmap->shadow_lock); + list_for_each_entry_safe(sg, next, + &gmap->children, list) + gmap_shadow_notify(sg, vmaddr, offset, pte); + spin_unlock(&gmap->shadow_lock); + } + if (!(bits & PGSTE_IN_BIT)) + continue; + spin_lock(&gmap->guest_table_lock); table = radix_tree_lookup(&gmap->host_to_guest, vmaddr >> PMD_SHIFT); - if (!table) - continue; - gaddr = __gmap_segment_gaddr(table) + offset; - list_for_each_entry(nb, &gmap_notifier_list, list) - nb->notifier_call(gmap, gaddr); + if (table) + gaddr = __gmap_segment_gaddr(table) + offset; + spin_unlock(&gmap->guest_table_lock); + if (table) + gmap_call_notifier(gmap, gaddr, gaddr + PAGE_SIZE - 1); } - spin_unlock(&gmap_notifier_lock); + rcu_read_unlock(); } EXPORT_SYMBOL_GPL(ptep_notify); diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index e2565d2d0c32..995f78532cc2 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -137,6 +137,29 @@ static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits) return new; } +#ifdef CONFIG_PGSTE + +struct page *page_table_alloc_pgste(struct mm_struct *mm) +{ + struct page *page; + unsigned long *table; + + page = alloc_page(GFP_KERNEL|__GFP_REPEAT); + if (page) { + table = (unsigned long *) page_to_phys(page); + clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); + clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2); + } + return page; +} + +void page_table_free_pgste(struct page *page) +{ + __free_page(page); +} + +#endif /* CONFIG_PGSTE */ + /* * page table entry allocation/free routines. */ @@ -149,7 +172,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) /* Try to get a fragment of a 4K page as a 2K page table */ if (!mm_alloc_pgste(mm)) { table = NULL; - spin_lock_bh(&mm->context.list_lock); + spin_lock_bh(&mm->context.pgtable_lock); if (!list_empty(&mm->context.pgtable_list)) { page = list_first_entry(&mm->context.pgtable_list, struct page, lru); @@ -164,7 +187,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) list_del(&page->lru); } } - spin_unlock_bh(&mm->context.list_lock); + spin_unlock_bh(&mm->context.pgtable_lock); if (table) return table; } @@ -187,9 +210,9 @@ unsigned long *page_table_alloc(struct mm_struct *mm) /* Return the first 2K fragment of the page */ atomic_set(&page->_mapcount, 1); clear_table(table, _PAGE_INVALID, PAGE_SIZE); - spin_lock_bh(&mm->context.list_lock); + spin_lock_bh(&mm->context.pgtable_lock); list_add(&page->lru, &mm->context.pgtable_list); - spin_unlock_bh(&mm->context.list_lock); + spin_unlock_bh(&mm->context.pgtable_lock); } return table; } @@ -203,13 +226,13 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) if (!mm_alloc_pgste(mm)) { /* Free 2K page table fragment of a 4K page */ bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)); - spin_lock_bh(&mm->context.list_lock); + spin_lock_bh(&mm->context.pgtable_lock); mask = atomic_xor_bits(&page->_mapcount, 1U << bit); if (mask & 3) list_add(&page->lru, &mm->context.pgtable_list); else list_del(&page->lru); - spin_unlock_bh(&mm->context.list_lock); + spin_unlock_bh(&mm->context.pgtable_lock); if (mask != 0) return; } @@ -235,13 +258,13 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table, return; } bit = (__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)); - spin_lock_bh(&mm->context.list_lock); + spin_lock_bh(&mm->context.pgtable_lock); mask = atomic_xor_bits(&page->_mapcount, 0x11U << bit); if (mask & 3) list_add_tail(&page->lru, &mm->context.pgtable_list); else list_del(&page->lru); - spin_unlock_bh(&mm->context.list_lock); + spin_unlock_bh(&mm->context.pgtable_lock); table = (unsigned long *) (__pa(table) | (1U << bit)); tlb_remove_table(tlb, table); } diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index b98d1a152d46..5f092015aaa7 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -174,14 +174,17 @@ static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry) return pgste; } -static inline pgste_t pgste_ipte_notify(struct mm_struct *mm, - unsigned long addr, - pte_t *ptep, pgste_t pgste) +static inline pgste_t pgste_pte_notify(struct mm_struct *mm, + unsigned long addr, + pte_t *ptep, pgste_t pgste) { #ifdef CONFIG_PGSTE - if (pgste_val(pgste) & PGSTE_IN_BIT) { - pgste_val(pgste) &= ~PGSTE_IN_BIT; - ptep_notify(mm, addr, ptep); + unsigned long bits; + + bits = pgste_val(pgste) & (PGSTE_IN_BIT | PGSTE_VSIE_BIT); + if (bits) { + pgste_val(pgste) ^= bits; + ptep_notify(mm, addr, ptep, bits); } #endif return pgste; @@ -194,7 +197,7 @@ static inline pgste_t ptep_xchg_start(struct mm_struct *mm, if (mm_has_pgste(mm)) { pgste = pgste_get_lock(ptep); - pgste = pgste_ipte_notify(mm, addr, ptep, pgste); + pgste = pgste_pte_notify(mm, addr, ptep, pgste); } return pgste; } @@ -459,6 +462,90 @@ void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep) preempt_enable(); } +/** + * ptep_force_prot - change access rights of a locked pte + * @mm: pointer to the process mm_struct + * @addr: virtual address in the guest address space + * @ptep: pointer to the page table entry + * @prot: indicates guest access rights: PROT_NONE, PROT_READ or PROT_WRITE + * @bit: pgste bit to set (e.g. for notification) + * + * Returns 0 if the access rights were changed and -EAGAIN if the current + * and requested access rights are incompatible. + */ +int ptep_force_prot(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, int prot, unsigned long bit) +{ + pte_t entry; + pgste_t pgste; + int pte_i, pte_p; + + pgste = pgste_get_lock(ptep); + entry = *ptep; + /* Check pte entry after all locks have been acquired */ + pte_i = pte_val(entry) & _PAGE_INVALID; + pte_p = pte_val(entry) & _PAGE_PROTECT; + if ((pte_i && (prot != PROT_NONE)) || + (pte_p && (prot & PROT_WRITE))) { + pgste_set_unlock(ptep, pgste); + return -EAGAIN; + } + /* Change access rights and set pgste bit */ + if (prot == PROT_NONE && !pte_i) { + ptep_flush_direct(mm, addr, ptep); + pgste = pgste_update_all(entry, pgste, mm); + pte_val(entry) |= _PAGE_INVALID; + } + if (prot == PROT_READ && !pte_p) { + ptep_flush_direct(mm, addr, ptep); + pte_val(entry) &= ~_PAGE_INVALID; + pte_val(entry) |= _PAGE_PROTECT; + } + pgste_val(pgste) |= bit; + pgste = pgste_set_pte(ptep, pgste, entry); + pgste_set_unlock(ptep, pgste); + return 0; +} + +int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr, + pte_t *sptep, pte_t *tptep, pte_t pte) +{ + pgste_t spgste, tpgste; + pte_t spte, tpte; + int rc = -EAGAIN; + + if (!(pte_val(*tptep) & _PAGE_INVALID)) + return 0; /* already shadowed */ + spgste = pgste_get_lock(sptep); + spte = *sptep; + if (!(pte_val(spte) & _PAGE_INVALID) && + !((pte_val(spte) & _PAGE_PROTECT) && + !(pte_val(pte) & _PAGE_PROTECT))) { + pgste_val(spgste) |= PGSTE_VSIE_BIT; + tpgste = pgste_get_lock(tptep); + pte_val(tpte) = (pte_val(spte) & PAGE_MASK) | + (pte_val(pte) & _PAGE_PROTECT); + /* don't touch the storage key - it belongs to parent pgste */ + tpgste = pgste_set_pte(tptep, tpgste, tpte); + pgste_set_unlock(tptep, tpgste); + rc = 1; + } + pgste_set_unlock(sptep, spgste); + return rc; +} + +void ptep_unshadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *ptep) +{ + pgste_t pgste; + + pgste = pgste_get_lock(ptep); + /* notifier is called by the caller */ + ptep_flush_direct(mm, saddr, ptep); + /* don't touch the storage key - it belongs to parent pgste */ + pgste = pgste_set_pte(ptep, pgste, __pte(_PAGE_INVALID)); + pgste_set_unlock(ptep, pgste); +} + static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry) { if (!non_swap_entry(entry)) @@ -532,7 +619,7 @@ bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr) pgste_val(pgste) &= ~PGSTE_UC_BIT; pte = *ptep; if (dirty && (pte_val(pte) & _PAGE_PRESENT)) { - pgste = pgste_ipte_notify(mm, addr, ptep, pgste); + pgste = pgste_pte_notify(mm, addr, ptep, pgste); __ptep_ipte(addr, ptep); if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE)) pte_val(pte) |= _PAGE_PROTECT; @@ -555,12 +642,9 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, pgste_t old, new; pte_t *ptep; - down_read(&mm->mmap_sem); ptep = get_locked_pte(mm, addr, &ptl); - if (unlikely(!ptep)) { - up_read(&mm->mmap_sem); + if (unlikely(!ptep)) return -EFAULT; - } new = old = pgste_get_lock(ptep); pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT | @@ -587,45 +671,100 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, pgste_set_unlock(ptep, new); pte_unmap_unlock(ptep, ptl); - up_read(&mm->mmap_sem); return 0; } EXPORT_SYMBOL(set_guest_storage_key); -unsigned char get_guest_storage_key(struct mm_struct *mm, unsigned long addr) +/** + * Conditionally set a guest storage key (handling csske). + * oldkey will be updated when either mr or mc is set and a pointer is given. + * + * Returns 0 if a guests storage key update wasn't necessary, 1 if the guest + * storage key was updated and -EFAULT on access errors. + */ +int cond_set_guest_storage_key(struct mm_struct *mm, unsigned long addr, + unsigned char key, unsigned char *oldkey, + bool nq, bool mr, bool mc) +{ + unsigned char tmp, mask = _PAGE_ACC_BITS | _PAGE_FP_BIT; + int rc; + + /* we can drop the pgste lock between getting and setting the key */ + if (mr | mc) { + rc = get_guest_storage_key(current->mm, addr, &tmp); + if (rc) + return rc; + if (oldkey) + *oldkey = tmp; + if (!mr) + mask |= _PAGE_REFERENCED; + if (!mc) + mask |= _PAGE_CHANGED; + if (!((tmp ^ key) & mask)) + return 0; + } + rc = set_guest_storage_key(current->mm, addr, key, nq); + return rc < 0 ? rc : 1; +} +EXPORT_SYMBOL(cond_set_guest_storage_key); + +/** + * Reset a guest reference bit (rrbe), returning the reference and changed bit. + * + * Returns < 0 in case of error, otherwise the cc to be reported to the guest. + */ +int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr) { - unsigned char key; spinlock_t *ptl; - pgste_t pgste; + pgste_t old, new; pte_t *ptep; + int cc = 0; - down_read(&mm->mmap_sem); ptep = get_locked_pte(mm, addr, &ptl); - if (unlikely(!ptep)) { - up_read(&mm->mmap_sem); + if (unlikely(!ptep)) return -EFAULT; - } - pgste = pgste_get_lock(ptep); - if (pte_val(*ptep) & _PAGE_INVALID) { - key = (pgste_val(pgste) & PGSTE_ACC_BITS) >> 56; - key |= (pgste_val(pgste) & PGSTE_FP_BIT) >> 56; - key |= (pgste_val(pgste) & PGSTE_GR_BIT) >> 48; - key |= (pgste_val(pgste) & PGSTE_GC_BIT) >> 48; - } else { - key = page_get_storage_key(pte_val(*ptep) & PAGE_MASK); + new = old = pgste_get_lock(ptep); + /* Reset guest reference bit only */ + pgste_val(new) &= ~PGSTE_GR_BIT; - /* Reflect guest's logical view, not physical */ - if (pgste_val(pgste) & PGSTE_GR_BIT) - key |= _PAGE_REFERENCED; - if (pgste_val(pgste) & PGSTE_GC_BIT) - key |= _PAGE_CHANGED; + if (!(pte_val(*ptep) & _PAGE_INVALID)) { + cc = page_reset_referenced(pte_val(*ptep) & PAGE_MASK); + /* Merge real referenced bit into host-set */ + pgste_val(new) |= ((unsigned long) cc << 53) & PGSTE_HR_BIT; } + /* Reflect guest's logical view, not physical */ + cc |= (pgste_val(old) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 49; + /* Changing the guest storage key is considered a change of the page */ + if ((pgste_val(new) ^ pgste_val(old)) & PGSTE_GR_BIT) + pgste_val(new) |= PGSTE_UC_BIT; + + pgste_set_unlock(ptep, new); + pte_unmap_unlock(ptep, ptl); + return 0; +} +EXPORT_SYMBOL(reset_guest_reference_bit); + +int get_guest_storage_key(struct mm_struct *mm, unsigned long addr, + unsigned char *key) +{ + spinlock_t *ptl; + pgste_t pgste; + pte_t *ptep; + ptep = get_locked_pte(mm, addr, &ptl); + if (unlikely(!ptep)) + return -EFAULT; + + pgste = pgste_get_lock(ptep); + *key = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56; + if (!(pte_val(*ptep) & _PAGE_INVALID)) + *key = page_get_storage_key(pte_val(*ptep) & PAGE_MASK); + /* Reflect guest's logical view, not physical */ + *key |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48; pgste_set_unlock(ptep, pgste); pte_unmap_unlock(ptep, ptl); - up_read(&mm->mmap_sem); - return key; + return 0; } EXPORT_SYMBOL(get_guest_storage_key); #endif diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index 070f1ae5cfad..7297fce9bf80 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -286,7 +286,7 @@ static inline void zpci_err_dma(unsigned long rc, unsigned long addr) static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); unsigned long nr_pages, iommu_page_index; @@ -332,7 +332,7 @@ out_err: static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr, size_t size, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); unsigned long iommu_page_index; @@ -355,7 +355,7 @@ static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr, static void *s390_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag, - struct dma_attrs *attrs) + unsigned long attrs) { struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); struct page *page; @@ -370,7 +370,7 @@ static void *s390_dma_alloc(struct device *dev, size_t size, pa = page_to_phys(page); memset((void *) pa, 0, size); - map = s390_dma_map_pages(dev, page, 0, size, DMA_BIDIRECTIONAL, NULL); + map = s390_dma_map_pages(dev, page, 0, size, DMA_BIDIRECTIONAL, 0); if (dma_mapping_error(dev, map)) { free_pages(pa, get_order(size)); return NULL; @@ -384,19 +384,19 @@ static void *s390_dma_alloc(struct device *dev, size_t size, static void s390_dma_free(struct device *dev, size_t size, void *pa, dma_addr_t dma_handle, - struct dma_attrs *attrs) + unsigned long attrs) { struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); size = PAGE_ALIGN(size); atomic64_sub(size / PAGE_SIZE, &zdev->allocated_pages); - s390_dma_unmap_pages(dev, dma_handle, size, DMA_BIDIRECTIONAL, NULL); + s390_dma_unmap_pages(dev, dma_handle, size, DMA_BIDIRECTIONAL, 0); free_pages((unsigned long) pa, get_order(size)); } static int s390_dma_map_sg(struct device *dev, struct scatterlist *sg, int nr_elements, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { int mapped_elements = 0; struct scatterlist *s; @@ -405,7 +405,7 @@ static int s390_dma_map_sg(struct device *dev, struct scatterlist *sg, for_each_sg(sg, s, nr_elements, i) { struct page *page = sg_page(s); s->dma_address = s390_dma_map_pages(dev, page, s->offset, - s->length, dir, NULL); + s->length, dir, 0); if (!dma_mapping_error(dev, s->dma_address)) { s->dma_length = s->length; mapped_elements++; @@ -419,7 +419,7 @@ unmap: for_each_sg(sg, s, mapped_elements, i) { if (s->dma_address) s390_dma_unmap_pages(dev, s->dma_address, s->dma_length, - dir, NULL); + dir, 0); s->dma_address = 0; s->dma_length = 0; } @@ -429,13 +429,14 @@ unmap: static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nr_elements, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { struct scatterlist *s; int i; for_each_sg(sg, s, nr_elements, i) { - s390_dma_unmap_pages(dev, s->dma_address, s->dma_length, dir, NULL); + s390_dma_unmap_pages(dev, s->dma_address, s->dma_length, dir, + 0); s->dma_address = 0; s->dma_length = 0; } diff --git a/arch/score/mm/init.c b/arch/score/mm/init.c index 9fbce49ad3bd..444c26c0f750 100644 --- a/arch/score/mm/init.c +++ b/arch/score/mm/init.c @@ -91,7 +91,7 @@ void free_initrd_mem(unsigned long start, unsigned long end) } #endif -void __init_refok free_initmem(void) +void __ref free_initmem(void) { free_initmem_default(POISON_FREE_INITMEM); } diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 0d5f3a9bb315..ee086958b2b2 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -38,6 +38,7 @@ config SUPERH select GENERIC_IDLE_POLL_SETUP select GENERIC_CLOCKEVENTS select GENERIC_CMOS_UPDATE if SH_SH03 || SH_DREAMCAST + select GENERIC_SCHED_CLOCK select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER select HAVE_MOD_ARCH_SPECIFIC if DWARF_UNWINDER @@ -45,6 +46,7 @@ config SUPERH select OLD_SIGSUSPEND select OLD_SIGACTION select HAVE_ARCH_AUDITSYSCALL + select HAVE_FUTEX_CMPXCHG if FUTEX select HAVE_NMI help The SuperH is a RISC processor targeted for use in embedded systems @@ -184,6 +186,12 @@ config CPU_SH2A select CPU_SH2 select UNCACHED_MAPPING +config CPU_J2 + bool + select CPU_SH2 + select OF + select OF_EARLY_FLATTREE + config CPU_SH3 bool select CPU_HAS_INTEVT @@ -250,6 +258,12 @@ config CPU_SUBTYPE_SH7619 select CPU_SH2 select SYS_SUPPORTS_SH_CMT +config CPU_SUBTYPE_J2 + bool "Support J2 processor" + select CPU_J2 + select SYS_SUPPORTS_SMP + select GENERIC_CLOCKEVENTS_BROADCAST if SMP + # SH-2A Processor Support config CPU_SUBTYPE_SH7201 @@ -739,6 +753,26 @@ endmenu menu "Boot options" +config USE_BUILTIN_DTB + bool "Use builtin DTB" + default n + depends on SH_DEVICE_TREE + help + Link a device tree blob for particular hardware into the kernel, + suppressing use of the DTB pointer provided by the bootloader. + This option should only be used with legacy bootloaders that are + not capable of providing a DTB to the kernel, or for experimental + hardware without stable device tree bindings. + +config BUILTIN_DTB_SOURCE + string "Source file for builtin DTB" + default "" + depends on USE_BUILTIN_DTB + help + Base name (without suffix, relative to arch/sh/boot/dts) for the + a DTS file that will be used to produce the DTB linked into the + kernel. + config ZERO_PAGE_OFFSET hex default "0x00010000" if PAGE_SIZE_64KB || SH_RTS7751R2D || \ diff --git a/arch/sh/Makefile b/arch/sh/Makefile index bf5b3f5f4962..00476662ac2c 100644 --- a/arch/sh/Makefile +++ b/arch/sh/Makefile @@ -31,6 +31,7 @@ isa-y := $(isa-y)-up endif cflags-$(CONFIG_CPU_SH2) := $(call cc-option,-m2,) +cflags-$(CONFIG_CPU_J2) := $(call cc-option,-mj2,) cflags-$(CONFIG_CPU_SH2A) += $(call cc-option,-m2a,) \ $(call cc-option,-m2a-nofpu,) \ $(call cc-option,-m4-nofpu,) @@ -130,6 +131,8 @@ head-y := arch/sh/kernel/head_$(BITS).o core-y += arch/sh/kernel/ arch/sh/mm/ arch/sh/boards/ core-$(CONFIG_SH_FPU_EMU) += arch/sh/math-emu/ +core-$(CONFIG_USE_BUILTIN_DTB) += arch/sh/boot/dts/ + # Mach groups machdir-$(CONFIG_SOLUTION_ENGINE) += mach-se machdir-$(CONFIG_SH_HP6XX) += mach-hp6xx diff --git a/arch/sh/boards/Kconfig b/arch/sh/boards/Kconfig index e0db04664e2e..e9c2c42031fe 100644 --- a/arch/sh/boards/Kconfig +++ b/arch/sh/boards/Kconfig @@ -11,6 +11,7 @@ config SH_DEVICE_TREE select OF select OF_EARLY_FLATTREE select CLKSRC_OF + select COMMON_CLK select GENERIC_CALIBRATE_DELAY help Select Board Described by Device Tree to build a kernel that diff --git a/arch/sh/boards/board-secureedge5410.c b/arch/sh/boards/board-secureedge5410.c index 98b36205aa7b..97ec67ffec2b 100644 --- a/arch/sh/boards/board-secureedge5410.c +++ b/arch/sh/boards/board-secureedge5410.c @@ -14,7 +14,6 @@ #include <linux/interrupt.h> #include <linux/timer.h> #include <linux/delay.h> -#include <linux/module.h> #include <linux/sched.h> #include <asm/machvec.h> #include <mach/secureedge5410.h> @@ -49,7 +48,7 @@ static int __init eraseconfig_init(void) irq); return 0; } -module_init(eraseconfig_init); +device_initcall(eraseconfig_init); /* * Initialize IRQ setting diff --git a/arch/sh/boards/of-generic.c b/arch/sh/boards/of-generic.c index 911ffb9f115b..1fb6d5714bae 100644 --- a/arch/sh/boards/of-generic.c +++ b/arch/sh/boards/of-generic.c @@ -124,13 +124,22 @@ static void __init sh_of_time_init(void) static void __init sh_of_setup(char **cmdline_p) { + struct device_node *root; + +#ifdef CONFIG_USE_BUILTIN_DTB + unflatten_and_copy_device_tree(); +#else unflatten_device_tree(); +#endif board_time_init = sh_of_time_init; - sh_mv.mv_name = of_flat_dt_get_machine_name(); - if (!sh_mv.mv_name) - sh_mv.mv_name = "Unknown SH model"; + sh_mv.mv_name = "Unknown SH model"; + root = of_find_node_by_path("/"); + if (root) { + of_property_read_string(root, "model", &sh_mv.mv_name); + of_node_put(root); + } sh_of_smp_probe(); } diff --git a/arch/sh/boot/dts/Makefile b/arch/sh/boot/dts/Makefile new file mode 100644 index 000000000000..e5ce3a0de7f4 --- /dev/null +++ b/arch/sh/boot/dts/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_USE_BUILTIN_DTB) += $(patsubst "%",%,$(CONFIG_BUILTIN_DTB_SOURCE)).dtb.o + +clean-files := *.dtb.S diff --git a/arch/sh/boot/dts/j2_mimas_v2.dts b/arch/sh/boot/dts/j2_mimas_v2.dts new file mode 100755 index 000000000000..880de75360b3 --- /dev/null +++ b/arch/sh/boot/dts/j2_mimas_v2.dts @@ -0,0 +1,96 @@ +/dts-v1/; + +/ { + compatible = "jcore,j2-soc"; + model = "J2 FPGA SoC on Mimas v2 board"; + + #address-cells = <1>; + #size-cells = <1>; + + interrupt-parent = <&aic>; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu@0 { + device_type = "cpu"; + compatible = "jcore,j2"; + reg = <0>; + clock-frequency = <50000000>; + d-cache-size = <8192>; + i-cache-size = <8192>; + d-cache-block-size = <16>; + i-cache-block-size = <16>; + }; + }; + + memory@10000000 { + device_type = "memory"; + reg = <0x10000000 0x4000000>; + }; + + aliases { + serial0 = &uart0; + spi0 = &spi0; + }; + + chosen { + stdout-path = "serial0"; + }; + + soc@abcd0000 { + compatible = "simple-bus"; + ranges = <0 0xabcd0000 0x100000>; + + #address-cells = <1>; + #size-cells = <1>; + + aic: interrupt-controller@200 { + compatible = "jcore,aic1"; + reg = <0x200 0x10>; + interrupt-controller; + #interrupt-cells = <1>; + }; + + cache-controller@c0 { + compatible = "jcore,cache"; + reg = <0xc0 4>; + }; + + timer@200 { + compatible = "jcore,pit"; + reg = <0x200 0x30>; + interrupts = <0x48>; + }; + + spi0: spi@40 { + compatible = "jcore,spi2"; + + #address-cells = <1>; + #size-cells = <0>; + + spi-max-frequency = <25000000>; + + reg = <0x40 0x8>; + + sdcard@0 { + compatible = "mmc-spi-slot"; + reg = <0>; + spi-max-frequency = <25000000>; + voltage-ranges = <3200 3400>; + mode = <0>; + }; + }; + + uart0: serial@100 { + clock-frequency = <125000000>; + compatible = "xlnx,xps-uartlite-1.00.a"; + current-speed = <19200>; + device_type = "serial"; + interrupts = <0x12>; + port-number = <0>; + reg = <0x100 0x10>; + }; + }; +}; diff --git a/arch/sh/configs/j2_defconfig b/arch/sh/configs/j2_defconfig new file mode 100644 index 000000000000..94d1eca52f72 --- /dev/null +++ b/arch/sh/configs/j2_defconfig @@ -0,0 +1,40 @@ +CONFIG_SMP=y +CONFIG_SYSVIPC=y +CONFIG_POSIX_MQUEUE=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_CPU_SUBTYPE_J2=y +CONFIG_MEMORY_START=0x10000000 +CONFIG_MEMORY_SIZE=0x04000000 +CONFIG_CPU_BIG_ENDIAN=y +CONFIG_SH_DEVICE_TREE=y +CONFIG_HZ_100=y +CONFIG_CMDLINE_OVERWRITE=y +CONFIG_CMDLINE="console=ttyUL0 earlycon" +CONFIG_BINFMT_ELF_FDPIC=y +CONFIG_BINFMT_FLAT=y +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_INET=y +CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_MOUNT=y +CONFIG_NETDEVICES=y +CONFIG_SERIAL_UARTLITE=y +CONFIG_SERIAL_UARTLITE_CONSOLE=y +CONFIG_I2C=y +CONFIG_SPI=y +CONFIG_SPI_JCORE=y +CONFIG_WATCHDOG=y +CONFIG_MMC=y +CONFIG_MMC_SPI=y +CONFIG_CLKSRC_JCORE_PIT=y +CONFIG_JCORE_AIC=y +CONFIG_EXT4_FS=y +CONFIG_VFAT_FS=y +CONFIG_FAT_DEFAULT_IOCHARSET="ascii" +CONFIG_FAT_DEFAULT_UTF8=y +CONFIG_NLS_DEFAULT="utf8" +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_ASCII=y +CONFIG_NLS_UTF8=y diff --git a/arch/sh/drivers/heartbeat.c b/arch/sh/drivers/heartbeat.c index 7efc9c354fc7..49bace446a1a 100644 --- a/arch/sh/drivers/heartbeat.c +++ b/arch/sh/drivers/heartbeat.c @@ -19,7 +19,6 @@ * for more details. */ #include <linux/init.h> -#include <linux/module.h> #include <linux/platform_device.h> #include <linux/sched.h> #include <linux/timer.h> @@ -139,26 +138,11 @@ static int heartbeat_drv_probe(struct platform_device *pdev) return mod_timer(&hd->timer, jiffies + 1); } -static int heartbeat_drv_remove(struct platform_device *pdev) -{ - struct heartbeat_data *hd = platform_get_drvdata(pdev); - - del_timer_sync(&hd->timer); - iounmap(hd->base); - - platform_set_drvdata(pdev, NULL); - - if (!pdev->dev.platform_data) - kfree(hd); - - return 0; -} - static struct platform_driver heartbeat_driver = { .probe = heartbeat_drv_probe, - .remove = heartbeat_drv_remove, .driver = { - .name = DRV_NAME, + .name = DRV_NAME, + .suppress_bind_attrs = true, }, }; @@ -167,14 +151,4 @@ static int __init heartbeat_init(void) printk(KERN_NOTICE DRV_NAME ": version %s loaded\n", DRV_VERSION); return platform_driver_register(&heartbeat_driver); } - -static void __exit heartbeat_exit(void) -{ - platform_driver_unregister(&heartbeat_driver); -} -module_init(heartbeat_init); -module_exit(heartbeat_exit); - -MODULE_VERSION(DRV_VERSION); -MODULE_AUTHOR("Paul Mundt"); -MODULE_LICENSE("GPL v2"); +device_initcall(heartbeat_init); diff --git a/arch/sh/drivers/pci/pci.c b/arch/sh/drivers/pci/pci.c index d5462b7bc514..84563e39a5b8 100644 --- a/arch/sh/drivers/pci/pci.c +++ b/arch/sh/drivers/pci/pci.c @@ -221,7 +221,7 @@ pcibios_bus_report_status_early(struct pci_channel *hose, * We can't use pci_find_device() here since we are * called from interrupt context. */ -static void __init_refok +static void __ref pcibios_bus_report_status(struct pci_bus *bus, unsigned int status_mask, int warn) { @@ -256,7 +256,7 @@ pcibios_bus_report_status(struct pci_bus *bus, unsigned int status_mask, pcibios_bus_report_status(dev->subordinate, status_mask, warn); } -void __init_refok pcibios_report_status(unsigned int status_mask, int warn) +void __ref pcibios_report_status(unsigned int status_mask, int warn) { struct pci_channel *hose; diff --git a/arch/sh/include/asm/atomic.h b/arch/sh/include/asm/atomic.h index c399e1c55685..8a7bd80c8b33 100644 --- a/arch/sh/include/asm/atomic.h +++ b/arch/sh/include/asm/atomic.h @@ -1,6 +1,12 @@ #ifndef __ASM_SH_ATOMIC_H #define __ASM_SH_ATOMIC_H +#if defined(CONFIG_CPU_J2) + +#include <asm-generic/atomic.h> + +#else + /* * Atomic operations that C can't guarantee us. Useful for * resource counting etc.. @@ -63,4 +69,6 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u) return c; } +#endif /* CONFIG_CPU_J2 */ + #endif /* __ASM_SH_ATOMIC_H */ diff --git a/arch/sh/include/asm/barrier.h b/arch/sh/include/asm/barrier.h index 8a84e05adb2e..3c30b6e166b6 100644 --- a/arch/sh/include/asm/barrier.h +++ b/arch/sh/include/asm/barrier.h @@ -29,6 +29,11 @@ #define wmb() mb() #define ctrl_barrier() __icbi(PAGE_OFFSET) #else +#if defined(CONFIG_CPU_J2) && defined(CONFIG_SMP) +#define __smp_mb() do { int tmp = 0; __asm__ __volatile__ ("cas.l %0,%0,@%1" : "+r"(tmp) : "z"(&tmp) : "memory", "t"); } while(0) +#define __smp_rmb() __smp_mb() +#define __smp_wmb() __smp_mb() +#endif #define ctrl_barrier() __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop") #endif diff --git a/arch/sh/include/asm/bitops-cas.h b/arch/sh/include/asm/bitops-cas.h new file mode 100644 index 000000000000..88f793c04d3c --- /dev/null +++ b/arch/sh/include/asm/bitops-cas.h @@ -0,0 +1,93 @@ +#ifndef __ASM_SH_BITOPS_CAS_H +#define __ASM_SH_BITOPS_CAS_H + +static inline unsigned __bo_cas(volatile unsigned *p, unsigned old, unsigned new) +{ + __asm__ __volatile__("cas.l %1,%0,@r0" + : "+r"(new) + : "r"(old), "z"(p) + : "t", "memory" ); + return new; +} + +static inline void set_bit(int nr, volatile void *addr) +{ + unsigned mask, old; + volatile unsigned *a = addr; + + a += nr >> 5; + mask = 1U << (nr & 0x1f); + + do old = *a; + while (__bo_cas(a, old, old|mask) != old); +} + +static inline void clear_bit(int nr, volatile void *addr) +{ + unsigned mask, old; + volatile unsigned *a = addr; + + a += nr >> 5; + mask = 1U << (nr & 0x1f); + + do old = *a; + while (__bo_cas(a, old, old&~mask) != old); +} + +static inline void change_bit(int nr, volatile void *addr) +{ + unsigned mask, old; + volatile unsigned *a = addr; + + a += nr >> 5; + mask = 1U << (nr & 0x1f); + + do old = *a; + while (__bo_cas(a, old, old^mask) != old); +} + +static inline int test_and_set_bit(int nr, volatile void *addr) +{ + unsigned mask, old; + volatile unsigned *a = addr; + + a += nr >> 5; + mask = 1U << (nr & 0x1f); + + do old = *a; + while (__bo_cas(a, old, old|mask) != old); + + return !!(old & mask); +} + +static inline int test_and_clear_bit(int nr, volatile void *addr) +{ + unsigned mask, old; + volatile unsigned *a = addr; + + a += nr >> 5; + mask = 1U << (nr & 0x1f); + + do old = *a; + while (__bo_cas(a, old, old&~mask) != old); + + return !!(old & mask); +} + +static inline int test_and_change_bit(int nr, volatile void *addr) +{ + unsigned mask, old; + volatile unsigned *a = addr; + + a += nr >> 5; + mask = 1U << (nr & 0x1f); + + do old = *a; + while (__bo_cas(a, old, old^mask) != old); + + return !!(old & mask); +} + +#include <asm-generic/bitops/non-atomic.h> + +#endif /* __ASM_SH_BITOPS_CAS_H */ diff --git a/arch/sh/include/asm/bitops.h b/arch/sh/include/asm/bitops.h index fc8e652cf173..a8699d60a8c4 100644 --- a/arch/sh/include/asm/bitops.h +++ b/arch/sh/include/asm/bitops.h @@ -18,6 +18,8 @@ #include <asm/bitops-op32.h> #elif defined(CONFIG_CPU_SH4A) #include <asm/bitops-llsc.h> +#elif defined(CONFIG_CPU_J2) && defined(CONFIG_SMP) +#include <asm/bitops-cas.h> #else #include <asm-generic/bitops/atomic.h> #include <asm-generic/bitops/non-atomic.h> diff --git a/arch/sh/include/asm/cmpxchg-cas.h b/arch/sh/include/asm/cmpxchg-cas.h new file mode 100644 index 000000000000..d0d86649e8c1 --- /dev/null +++ b/arch/sh/include/asm/cmpxchg-cas.h @@ -0,0 +1,24 @@ +#ifndef __ASM_SH_CMPXCHG_CAS_H +#define __ASM_SH_CMPXCHG_CAS_H + +static inline unsigned long +__cmpxchg_u32(volatile u32 *m, unsigned long old, unsigned long new) +{ + __asm__ __volatile__("cas.l %1,%0,@r0" + : "+r"(new) + : "r"(old), "z"(m) + : "t", "memory" ); + return new; +} + +static inline unsigned long xchg_u32(volatile u32 *m, unsigned long val) +{ + unsigned long old; + do old = *m; + while (__cmpxchg_u32(m, old, val) != old); + return old; +} + +#include <asm/cmpxchg-xchg.h> + +#endif /* __ASM_SH_CMPXCHG_CAS_H */ diff --git a/arch/sh/include/asm/cmpxchg-xchg.h b/arch/sh/include/asm/cmpxchg-xchg.h index 7219719c23a3..1e881f5db659 100644 --- a/arch/sh/include/asm/cmpxchg-xchg.h +++ b/arch/sh/include/asm/cmpxchg-xchg.h @@ -21,7 +21,7 @@ static inline u32 __xchg_cmpxchg(volatile void *ptr, u32 x, int size) int off = (unsigned long)ptr % sizeof(u32); volatile u32 *p = ptr - off; #ifdef __BIG_ENDIAN - int bitoff = (sizeof(u32) - 1 - off) * BITS_PER_BYTE; + int bitoff = (sizeof(u32) - size - off) * BITS_PER_BYTE; #else int bitoff = off * BITS_PER_BYTE; #endif diff --git a/arch/sh/include/asm/cmpxchg.h b/arch/sh/include/asm/cmpxchg.h index 5225916c1057..3dfe0467a773 100644 --- a/arch/sh/include/asm/cmpxchg.h +++ b/arch/sh/include/asm/cmpxchg.h @@ -13,6 +13,8 @@ #include <asm/cmpxchg-grb.h> #elif defined(CONFIG_CPU_SH4A) #include <asm/cmpxchg-llsc.h> +#elif defined(CONFIG_CPU_J2) && defined(CONFIG_SMP) +#include <asm/cmpxchg-cas.h> #else #include <asm/cmpxchg-irq.h> #endif diff --git a/arch/sh/include/asm/dma-mapping.h b/arch/sh/include/asm/dma-mapping.h index e11cf0c8206b..0052ad40e86d 100644 --- a/arch/sh/include/asm/dma-mapping.h +++ b/arch/sh/include/asm/dma-mapping.h @@ -17,9 +17,9 @@ void dma_cache_sync(struct device *dev, void *vaddr, size_t size, /* arch/sh/mm/consistent.c */ extern void *dma_generic_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, gfp_t flag, - struct dma_attrs *attrs); + unsigned long attrs); extern void dma_generic_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, - struct dma_attrs *attrs); + unsigned long attrs); #endif /* __ASM_SH_DMA_MAPPING_H */ diff --git a/arch/sh/include/asm/futex-cas.h b/arch/sh/include/asm/futex-cas.h new file mode 100644 index 000000000000..267cb7a5f101 --- /dev/null +++ b/arch/sh/include/asm/futex-cas.h @@ -0,0 +1,34 @@ +#ifndef __ASM_SH_FUTEX_CAS_H +#define __ASM_SH_FUTEX_CAS_H + +static inline int atomic_futex_op_cmpxchg_inatomic(u32 *uval, + u32 __user *uaddr, + u32 oldval, u32 newval) +{ + int err = 0; + __asm__ __volatile__( + "1:\n\t" + "cas.l %2, %1, @r0\n" + "2:\n\t" +#ifdef CONFIG_MMU + ".section .fixup,\"ax\"\n" + "3:\n\t" + "mov.l 4f, %0\n\t" + "jmp @%0\n\t" + " mov %3, %0\n\t" + ".balign 4\n" + "4: .long 2b\n\t" + ".previous\n" + ".section __ex_table,\"a\"\n\t" + ".long 1b, 3b\n\t" + ".previous" +#endif + :"+r" (err), "+r" (newval) + :"r" (oldval), "i" (-EFAULT), "z" (uaddr) + :"t", "memory"); + if (err) return err; + *uval = newval; + return 0; +} + +#endif /* __ASM_SH_FUTEX_CAS_H */ diff --git a/arch/sh/include/asm/futex-irq.h b/arch/sh/include/asm/futex-irq.h index 63d33129ea23..ab01dbee0a82 100644 --- a/arch/sh/include/asm/futex-irq.h +++ b/arch/sh/include/asm/futex-irq.h @@ -1,92 +1,6 @@ #ifndef __ASM_SH_FUTEX_IRQ_H #define __ASM_SH_FUTEX_IRQ_H - -static inline int atomic_futex_op_xchg_set(int oparg, u32 __user *uaddr, - int *oldval) -{ - unsigned long flags; - int ret; - - local_irq_save(flags); - - ret = get_user(*oldval, uaddr); - if (!ret) - ret = put_user(oparg, uaddr); - - local_irq_restore(flags); - - return ret; -} - -static inline int atomic_futex_op_xchg_add(int oparg, u32 __user *uaddr, - int *oldval) -{ - unsigned long flags; - int ret; - - local_irq_save(flags); - - ret = get_user(*oldval, uaddr); - if (!ret) - ret = put_user(*oldval + oparg, uaddr); - - local_irq_restore(flags); - - return ret; -} - -static inline int atomic_futex_op_xchg_or(int oparg, u32 __user *uaddr, - int *oldval) -{ - unsigned long flags; - int ret; - - local_irq_save(flags); - - ret = get_user(*oldval, uaddr); - if (!ret) - ret = put_user(*oldval | oparg, uaddr); - - local_irq_restore(flags); - - return ret; -} - -static inline int atomic_futex_op_xchg_and(int oparg, u32 __user *uaddr, - int *oldval) -{ - unsigned long flags; - int ret; - - local_irq_save(flags); - - ret = get_user(*oldval, uaddr); - if (!ret) - ret = put_user(*oldval & oparg, uaddr); - - local_irq_restore(flags); - - return ret; -} - -static inline int atomic_futex_op_xchg_xor(int oparg, u32 __user *uaddr, - int *oldval) -{ - unsigned long flags; - int ret; - - local_irq_save(flags); - - ret = get_user(*oldval, uaddr); - if (!ret) - ret = put_user(*oldval ^ oparg, uaddr); - - local_irq_restore(flags); - - return ret; -} - static inline int atomic_futex_op_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newval) diff --git a/arch/sh/include/asm/futex-llsc.h b/arch/sh/include/asm/futex-llsc.h new file mode 100644 index 000000000000..23591703bec0 --- /dev/null +++ b/arch/sh/include/asm/futex-llsc.h @@ -0,0 +1,41 @@ +#ifndef __ASM_SH_FUTEX_LLSC_H +#define __ASM_SH_FUTEX_LLSC_H + +static inline int atomic_futex_op_cmpxchg_inatomic(u32 *uval, + u32 __user *uaddr, + u32 oldval, u32 newval) +{ + int err = 0; + __asm__ __volatile__( + "synco\n" + "1:\n\t" + "movli.l @%2, r0\n\t" + "mov r0, %1\n\t" + "cmp/eq %1, %4\n\t" + "bf 2f\n\t" + "mov %5, r0\n\t" + "movco.l r0, @%2\n\t" + "bf 1b\n" + "2:\n\t" + "synco\n\t" +#ifdef CONFIG_MMU + ".section .fixup,\"ax\"\n" + "3:\n\t" + "mov.l 4f, %0\n\t" + "jmp @%0\n\t" + " mov %3, %0\n\t" + ".balign 4\n" + "4: .long 2b\n\t" + ".previous\n" + ".section __ex_table,\"a\"\n\t" + ".long 1b, 3b\n\t" + ".previous" +#endif + :"+r" (err), "=&r" (*uval) + :"r" (uaddr), "i" (-EFAULT), "r" (oldval), "r" (newval) + :"t", "memory", "r0"); + if (err) return err; + return 0; +} + +#endif /* __ASM_SH_FUTEX_LLSC_H */ diff --git a/arch/sh/include/asm/futex.h b/arch/sh/include/asm/futex.h index 7be39a646fbd..d0078747d308 100644 --- a/arch/sh/include/asm/futex.h +++ b/arch/sh/include/asm/futex.h @@ -7,16 +7,34 @@ #include <linux/uaccess.h> #include <asm/errno.h> -/* XXX: UP variants, fix for SH-4A and SMP.. */ +#if !defined(CONFIG_SMP) #include <asm/futex-irq.h> +#elif defined(CONFIG_CPU_J2) +#include <asm/futex-cas.h> +#elif defined(CONFIG_CPU_SH4A) +#include <asm/futex-llsc.h> +#else +#error SMP not supported on this configuration. +#endif + +static inline int +futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, + u32 oldval, u32 newval) +{ + if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) + return -EFAULT; + + return atomic_futex_op_cmpxchg_inatomic(uval, uaddr, oldval, newval); +} static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) { int op = (encoded_op >> 28) & 7; int cmp = (encoded_op >> 24) & 15; - int oparg = (encoded_op << 8) >> 20; - int cmparg = (encoded_op << 20) >> 20; - int oldval = 0, ret; + u32 oparg = (encoded_op << 8) >> 20; + u32 cmparg = (encoded_op << 20) >> 20; + u32 oldval, newval, prev; + int ret; if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) oparg = 1 << oparg; @@ -26,26 +44,39 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) pagefault_disable(); - switch (op) { - case FUTEX_OP_SET: - ret = atomic_futex_op_xchg_set(oparg, uaddr, &oldval); - break; - case FUTEX_OP_ADD: - ret = atomic_futex_op_xchg_add(oparg, uaddr, &oldval); - break; - case FUTEX_OP_OR: - ret = atomic_futex_op_xchg_or(oparg, uaddr, &oldval); - break; - case FUTEX_OP_ANDN: - ret = atomic_futex_op_xchg_and(~oparg, uaddr, &oldval); - break; - case FUTEX_OP_XOR: - ret = atomic_futex_op_xchg_xor(oparg, uaddr, &oldval); - break; - default: - ret = -ENOSYS; - break; - } + do { + if (op == FUTEX_OP_SET) + ret = oldval = 0; + else + ret = get_user(oldval, uaddr); + + if (ret) break; + + switch (op) { + case FUTEX_OP_SET: + newval = oparg; + break; + case FUTEX_OP_ADD: + newval = oldval + oparg; + break; + case FUTEX_OP_OR: + newval = oldval | oparg; + break; + case FUTEX_OP_ANDN: + newval = oldval & ~oparg; + break; + case FUTEX_OP_XOR: + newval = oldval ^ oparg; + break; + default: + ret = -ENOSYS; + break; + } + + if (ret) break; + + ret = futex_atomic_cmpxchg_inatomic(&prev, uaddr, oldval, newval); + } while (!ret && prev != oldval); pagefault_enable(); @@ -53,10 +84,10 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) switch (cmp) { case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; - case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; - case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; - case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; - case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; + case FUTEX_OP_CMP_LT: ret = ((int)oldval < (int)cmparg); break; + case FUTEX_OP_CMP_GE: ret = ((int)oldval >= (int)cmparg); break; + case FUTEX_OP_CMP_LE: ret = ((int)oldval <= (int)cmparg); break; + case FUTEX_OP_CMP_GT: ret = ((int)oldval > (int)cmparg); break; default: ret = -ENOSYS; } } @@ -64,15 +95,5 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) return ret; } -static inline int -futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, - u32 oldval, u32 newval) -{ - if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) - return -EFAULT; - - return atomic_futex_op_cmpxchg_inatomic(uval, uaddr, oldval, newval); -} - #endif /* __KERNEL__ */ #endif /* __ASM_SH_FUTEX_H */ diff --git a/arch/sh/include/asm/mc146818rtc.h b/arch/sh/include/asm/mc146818rtc.h deleted file mode 100644 index 0aee96a97330..000000000000 --- a/arch/sh/include/asm/mc146818rtc.h +++ /dev/null @@ -1,7 +0,0 @@ -/* - * Machine dependent access functions for RTC registers. - */ -#ifndef _ASM_MC146818RTC_H -#define _ASM_MC146818RTC_H - -#endif /* _ASM_MC146818RTC_H */ diff --git a/arch/sh/include/asm/processor.h b/arch/sh/include/asm/processor.h index 1506897648aa..f9a09942a32d 100644 --- a/arch/sh/include/asm/processor.h +++ b/arch/sh/include/asm/processor.h @@ -15,7 +15,7 @@ */ enum cpu_type { /* SH-2 types */ - CPU_SH7619, + CPU_SH7619, CPU_J2, /* SH-2A types */ CPU_SH7201, CPU_SH7203, CPU_SH7206, CPU_SH7263, CPU_SH7264, CPU_SH7269, diff --git a/arch/sh/include/asm/rtc.h b/arch/sh/include/asm/rtc.h index 52b0c2dba979..f7b010d48af7 100644 --- a/arch/sh/include/asm/rtc.h +++ b/arch/sh/include/asm/rtc.h @@ -6,17 +6,6 @@ extern void (*board_time_init)(void); extern void (*rtc_sh_get_time)(struct timespec *); extern int (*rtc_sh_set_time)(const time_t); -/* some dummy definitions */ -#define RTC_BATT_BAD 0x100 /* battery bad */ -#define RTC_SQWE 0x08 /* enable square-wave output */ -#define RTC_DM_BINARY 0x04 /* all time/date values are BCD if clear */ -#define RTC_24H 0x02 /* 24 hour mode - else hours bit 7 means pm */ -#define RTC_DST_EN 0x01 /* auto switch DST - works f. USA only */ - -struct rtc_time; -unsigned int get_rtc_time(struct rtc_time *); -int set_rtc_time(struct rtc_time *); - #define RTC_CAP_4_DIGIT_YEAR (1 << 0) struct sh_rtc_platform_info { diff --git a/arch/sh/include/asm/spinlock-cas.h b/arch/sh/include/asm/spinlock-cas.h new file mode 100644 index 000000000000..c46e8cc7b515 --- /dev/null +++ b/arch/sh/include/asm/spinlock-cas.h @@ -0,0 +1,117 @@ +/* + * include/asm-sh/spinlock-cas.h + * + * Copyright (C) 2015 SEI + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ +#ifndef __ASM_SH_SPINLOCK_CAS_H +#define __ASM_SH_SPINLOCK_CAS_H + +#include <asm/barrier.h> +#include <asm/processor.h> + +static inline unsigned __sl_cas(volatile unsigned *p, unsigned old, unsigned new) +{ + __asm__ __volatile__("cas.l %1,%0,@r0" + : "+r"(new) + : "r"(old), "z"(p) + : "t", "memory" ); + return new; +} + +/* + * Your basic SMP spinlocks, allowing only a single CPU anywhere + */ + +#define arch_spin_is_locked(x) ((x)->lock <= 0) +#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) + +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + smp_cond_load_acquire(&lock->lock, VAL > 0); +} + +static inline void arch_spin_lock(arch_spinlock_t *lock) +{ + while (!__sl_cas(&lock->lock, 1, 0)); +} + +static inline void arch_spin_unlock(arch_spinlock_t *lock) +{ + __sl_cas(&lock->lock, 0, 1); +} + +static inline int arch_spin_trylock(arch_spinlock_t *lock) +{ + return __sl_cas(&lock->lock, 1, 0); +} + +/* + * Read-write spinlocks, allowing multiple readers but only one writer. + * + * NOTE! it is quite common to have readers in interrupts but no interrupt + * writers. For those circumstances we can "mix" irq-safe locks - any writer + * needs to get a irq-safe write-lock, but readers can get non-irqsafe + * read-locks. + */ + +/** + * read_can_lock - would read_trylock() succeed? + * @lock: the rwlock in question. + */ +#define arch_read_can_lock(x) ((x)->lock > 0) + +/** + * write_can_lock - would write_trylock() succeed? + * @lock: the rwlock in question. + */ +#define arch_write_can_lock(x) ((x)->lock == RW_LOCK_BIAS) + +static inline void arch_read_lock(arch_rwlock_t *rw) +{ + unsigned old; + do old = rw->lock; + while (!old || __sl_cas(&rw->lock, old, old-1) != old); +} + +static inline void arch_read_unlock(arch_rwlock_t *rw) +{ + unsigned old; + do old = rw->lock; + while (__sl_cas(&rw->lock, old, old+1) != old); +} + +static inline void arch_write_lock(arch_rwlock_t *rw) +{ + while (__sl_cas(&rw->lock, RW_LOCK_BIAS, 0) != RW_LOCK_BIAS); +} + +static inline void arch_write_unlock(arch_rwlock_t *rw) +{ + __sl_cas(&rw->lock, 0, RW_LOCK_BIAS); +} + +static inline int arch_read_trylock(arch_rwlock_t *rw) +{ + unsigned old; + do old = rw->lock; + while (old && __sl_cas(&rw->lock, old, old-1) != old); + return !!old; +} + +static inline int arch_write_trylock(arch_rwlock_t *rw) +{ + return __sl_cas(&rw->lock, RW_LOCK_BIAS, 0) == RW_LOCK_BIAS; +} + +#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) +#define arch_write_lock_flags(lock, flags) arch_write_lock(lock) + +#define arch_spin_relax(lock) cpu_relax() +#define arch_read_relax(lock) cpu_relax() +#define arch_write_relax(lock) cpu_relax() + +#endif /* __ASM_SH_SPINLOCK_CAS_H */ diff --git a/arch/sh/include/asm/spinlock-llsc.h b/arch/sh/include/asm/spinlock-llsc.h new file mode 100644 index 000000000000..cec78143fa83 --- /dev/null +++ b/arch/sh/include/asm/spinlock-llsc.h @@ -0,0 +1,224 @@ +/* + * include/asm-sh/spinlock-llsc.h + * + * Copyright (C) 2002, 2003 Paul Mundt + * Copyright (C) 2006, 2007 Akio Idehara + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ +#ifndef __ASM_SH_SPINLOCK_LLSC_H +#define __ASM_SH_SPINLOCK_LLSC_H + +#include <asm/barrier.h> +#include <asm/processor.h> + +/* + * Your basic SMP spinlocks, allowing only a single CPU anywhere + */ + +#define arch_spin_is_locked(x) ((x)->lock <= 0) +#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) + +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + smp_cond_load_acquire(&lock->lock, VAL > 0); +} + +/* + * Simple spin lock operations. There are two variants, one clears IRQ's + * on the local processor, one does not. + * + * We make no fairness assumptions. They have a cost. + */ +static inline void arch_spin_lock(arch_spinlock_t *lock) +{ + unsigned long tmp; + unsigned long oldval; + + __asm__ __volatile__ ( + "1: \n\t" + "movli.l @%2, %0 ! arch_spin_lock \n\t" + "mov %0, %1 \n\t" + "mov #0, %0 \n\t" + "movco.l %0, @%2 \n\t" + "bf 1b \n\t" + "cmp/pl %1 \n\t" + "bf 1b \n\t" + : "=&z" (tmp), "=&r" (oldval) + : "r" (&lock->lock) + : "t", "memory" + ); +} + +static inline void arch_spin_unlock(arch_spinlock_t *lock) +{ + unsigned long tmp; + + __asm__ __volatile__ ( + "mov #1, %0 ! arch_spin_unlock \n\t" + "mov.l %0, @%1 \n\t" + : "=&z" (tmp) + : "r" (&lock->lock) + : "t", "memory" + ); +} + +static inline int arch_spin_trylock(arch_spinlock_t *lock) +{ + unsigned long tmp, oldval; + + __asm__ __volatile__ ( + "1: \n\t" + "movli.l @%2, %0 ! arch_spin_trylock \n\t" + "mov %0, %1 \n\t" + "mov #0, %0 \n\t" + "movco.l %0, @%2 \n\t" + "bf 1b \n\t" + "synco \n\t" + : "=&z" (tmp), "=&r" (oldval) + : "r" (&lock->lock) + : "t", "memory" + ); + + return oldval; +} + +/* + * Read-write spinlocks, allowing multiple readers but only one writer. + * + * NOTE! it is quite common to have readers in interrupts but no interrupt + * writers. For those circumstances we can "mix" irq-safe locks - any writer + * needs to get a irq-safe write-lock, but readers can get non-irqsafe + * read-locks. + */ + +/** + * read_can_lock - would read_trylock() succeed? + * @lock: the rwlock in question. + */ +#define arch_read_can_lock(x) ((x)->lock > 0) + +/** + * write_can_lock - would write_trylock() succeed? + * @lock: the rwlock in question. + */ +#define arch_write_can_lock(x) ((x)->lock == RW_LOCK_BIAS) + +static inline void arch_read_lock(arch_rwlock_t *rw) +{ + unsigned long tmp; + + __asm__ __volatile__ ( + "1: \n\t" + "movli.l @%1, %0 ! arch_read_lock \n\t" + "cmp/pl %0 \n\t" + "bf 1b \n\t" + "add #-1, %0 \n\t" + "movco.l %0, @%1 \n\t" + "bf 1b \n\t" + : "=&z" (tmp) + : "r" (&rw->lock) + : "t", "memory" + ); +} + +static inline void arch_read_unlock(arch_rwlock_t *rw) +{ + unsigned long tmp; + + __asm__ __volatile__ ( + "1: \n\t" + "movli.l @%1, %0 ! arch_read_unlock \n\t" + "add #1, %0 \n\t" + "movco.l %0, @%1 \n\t" + "bf 1b \n\t" + : "=&z" (tmp) + : "r" (&rw->lock) + : "t", "memory" + ); +} + +static inline void arch_write_lock(arch_rwlock_t *rw) +{ + unsigned long tmp; + + __asm__ __volatile__ ( + "1: \n\t" + "movli.l @%1, %0 ! arch_write_lock \n\t" + "cmp/hs %2, %0 \n\t" + "bf 1b \n\t" + "sub %2, %0 \n\t" + "movco.l %0, @%1 \n\t" + "bf 1b \n\t" + : "=&z" (tmp) + : "r" (&rw->lock), "r" (RW_LOCK_BIAS) + : "t", "memory" + ); +} + +static inline void arch_write_unlock(arch_rwlock_t *rw) +{ + __asm__ __volatile__ ( + "mov.l %1, @%0 ! arch_write_unlock \n\t" + : + : "r" (&rw->lock), "r" (RW_LOCK_BIAS) + : "t", "memory" + ); +} + +static inline int arch_read_trylock(arch_rwlock_t *rw) +{ + unsigned long tmp, oldval; + + __asm__ __volatile__ ( + "1: \n\t" + "movli.l @%2, %0 ! arch_read_trylock \n\t" + "mov %0, %1 \n\t" + "cmp/pl %0 \n\t" + "bf 2f \n\t" + "add #-1, %0 \n\t" + "movco.l %0, @%2 \n\t" + "bf 1b \n\t" + "2: \n\t" + "synco \n\t" + : "=&z" (tmp), "=&r" (oldval) + : "r" (&rw->lock) + : "t", "memory" + ); + + return (oldval > 0); +} + +static inline int arch_write_trylock(arch_rwlock_t *rw) +{ + unsigned long tmp, oldval; + + __asm__ __volatile__ ( + "1: \n\t" + "movli.l @%2, %0 ! arch_write_trylock \n\t" + "mov %0, %1 \n\t" + "cmp/hs %3, %0 \n\t" + "bf 2f \n\t" + "sub %3, %0 \n\t" + "2: \n\t" + "movco.l %0, @%2 \n\t" + "bf 1b \n\t" + "synco \n\t" + : "=&z" (tmp), "=&r" (oldval) + : "r" (&rw->lock), "r" (RW_LOCK_BIAS) + : "t", "memory" + ); + + return (oldval > (RW_LOCK_BIAS - 1)); +} + +#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) +#define arch_write_lock_flags(lock, flags) arch_write_lock(lock) + +#define arch_spin_relax(lock) cpu_relax() +#define arch_read_relax(lock) cpu_relax() +#define arch_write_relax(lock) cpu_relax() + +#endif /* __ASM_SH_SPINLOCK_LLSC_H */ diff --git a/arch/sh/include/asm/spinlock.h b/arch/sh/include/asm/spinlock.h index 416834b60ad0..c2c61ea6a8e2 100644 --- a/arch/sh/include/asm/spinlock.h +++ b/arch/sh/include/asm/spinlock.h @@ -11,222 +11,12 @@ #ifndef __ASM_SH_SPINLOCK_H #define __ASM_SH_SPINLOCK_H -/* - * The only locking implemented here uses SH-4A opcodes. For others, - * split this out as per atomic-*.h. - */ -#ifndef CONFIG_CPU_SH4A -#error "Need movli.l/movco.l for spinlocks" +#if defined(CONFIG_CPU_SH4A) +#include <asm/spinlock-llsc.h> +#elif defined(CONFIG_CPU_J2) +#include <asm/spinlock-cas.h> +#else +#error "The configured cpu type does not support spinlocks" #endif -#include <asm/barrier.h> -#include <asm/processor.h> - -/* - * Your basic SMP spinlocks, allowing only a single CPU anywhere - */ - -#define arch_spin_is_locked(x) ((x)->lock <= 0) -#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) - -static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) -{ - smp_cond_load_acquire(&lock->lock, VAL > 0); -} - -/* - * Simple spin lock operations. There are two variants, one clears IRQ's - * on the local processor, one does not. - * - * We make no fairness assumptions. They have a cost. - */ -static inline void arch_spin_lock(arch_spinlock_t *lock) -{ - unsigned long tmp; - unsigned long oldval; - - __asm__ __volatile__ ( - "1: \n\t" - "movli.l @%2, %0 ! arch_spin_lock \n\t" - "mov %0, %1 \n\t" - "mov #0, %0 \n\t" - "movco.l %0, @%2 \n\t" - "bf 1b \n\t" - "cmp/pl %1 \n\t" - "bf 1b \n\t" - : "=&z" (tmp), "=&r" (oldval) - : "r" (&lock->lock) - : "t", "memory" - ); -} - -static inline void arch_spin_unlock(arch_spinlock_t *lock) -{ - unsigned long tmp; - - __asm__ __volatile__ ( - "mov #1, %0 ! arch_spin_unlock \n\t" - "mov.l %0, @%1 \n\t" - : "=&z" (tmp) - : "r" (&lock->lock) - : "t", "memory" - ); -} - -static inline int arch_spin_trylock(arch_spinlock_t *lock) -{ - unsigned long tmp, oldval; - - __asm__ __volatile__ ( - "1: \n\t" - "movli.l @%2, %0 ! arch_spin_trylock \n\t" - "mov %0, %1 \n\t" - "mov #0, %0 \n\t" - "movco.l %0, @%2 \n\t" - "bf 1b \n\t" - "synco \n\t" - : "=&z" (tmp), "=&r" (oldval) - : "r" (&lock->lock) - : "t", "memory" - ); - - return oldval; -} - -/* - * Read-write spinlocks, allowing multiple readers but only one writer. - * - * NOTE! it is quite common to have readers in interrupts but no interrupt - * writers. For those circumstances we can "mix" irq-safe locks - any writer - * needs to get a irq-safe write-lock, but readers can get non-irqsafe - * read-locks. - */ - -/** - * read_can_lock - would read_trylock() succeed? - * @lock: the rwlock in question. - */ -#define arch_read_can_lock(x) ((x)->lock > 0) - -/** - * write_can_lock - would write_trylock() succeed? - * @lock: the rwlock in question. - */ -#define arch_write_can_lock(x) ((x)->lock == RW_LOCK_BIAS) - -static inline void arch_read_lock(arch_rwlock_t *rw) -{ - unsigned long tmp; - - __asm__ __volatile__ ( - "1: \n\t" - "movli.l @%1, %0 ! arch_read_lock \n\t" - "cmp/pl %0 \n\t" - "bf 1b \n\t" - "add #-1, %0 \n\t" - "movco.l %0, @%1 \n\t" - "bf 1b \n\t" - : "=&z" (tmp) - : "r" (&rw->lock) - : "t", "memory" - ); -} - -static inline void arch_read_unlock(arch_rwlock_t *rw) -{ - unsigned long tmp; - - __asm__ __volatile__ ( - "1: \n\t" - "movli.l @%1, %0 ! arch_read_unlock \n\t" - "add #1, %0 \n\t" - "movco.l %0, @%1 \n\t" - "bf 1b \n\t" - : "=&z" (tmp) - : "r" (&rw->lock) - : "t", "memory" - ); -} - -static inline void arch_write_lock(arch_rwlock_t *rw) -{ - unsigned long tmp; - - __asm__ __volatile__ ( - "1: \n\t" - "movli.l @%1, %0 ! arch_write_lock \n\t" - "cmp/hs %2, %0 \n\t" - "bf 1b \n\t" - "sub %2, %0 \n\t" - "movco.l %0, @%1 \n\t" - "bf 1b \n\t" - : "=&z" (tmp) - : "r" (&rw->lock), "r" (RW_LOCK_BIAS) - : "t", "memory" - ); -} - -static inline void arch_write_unlock(arch_rwlock_t *rw) -{ - __asm__ __volatile__ ( - "mov.l %1, @%0 ! arch_write_unlock \n\t" - : - : "r" (&rw->lock), "r" (RW_LOCK_BIAS) - : "t", "memory" - ); -} - -static inline int arch_read_trylock(arch_rwlock_t *rw) -{ - unsigned long tmp, oldval; - - __asm__ __volatile__ ( - "1: \n\t" - "movli.l @%2, %0 ! arch_read_trylock \n\t" - "mov %0, %1 \n\t" - "cmp/pl %0 \n\t" - "bf 2f \n\t" - "add #-1, %0 \n\t" - "movco.l %0, @%2 \n\t" - "bf 1b \n\t" - "2: \n\t" - "synco \n\t" - : "=&z" (tmp), "=&r" (oldval) - : "r" (&rw->lock) - : "t", "memory" - ); - - return (oldval > 0); -} - -static inline int arch_write_trylock(arch_rwlock_t *rw) -{ - unsigned long tmp, oldval; - - __asm__ __volatile__ ( - "1: \n\t" - "movli.l @%2, %0 ! arch_write_trylock \n\t" - "mov %0, %1 \n\t" - "cmp/hs %3, %0 \n\t" - "bf 2f \n\t" - "sub %3, %0 \n\t" - "2: \n\t" - "movco.l %0, @%2 \n\t" - "bf 1b \n\t" - "synco \n\t" - : "=&z" (tmp), "=&r" (oldval) - : "r" (&rw->lock), "r" (RW_LOCK_BIAS) - : "t", "memory" - ); - - return (oldval > (RW_LOCK_BIAS - 1)); -} - -#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) -#define arch_write_lock_flags(lock, flags) arch_write_lock(lock) - -#define arch_spin_relax(lock) cpu_relax() -#define arch_read_relax(lock) cpu_relax() -#define arch_write_relax(lock) cpu_relax() - #endif /* __ASM_SH_SPINLOCK_H */ diff --git a/arch/sh/include/asm/thread_info.h b/arch/sh/include/asm/thread_info.h index 2afa321157be..6c65dcd470ab 100644 --- a/arch/sh/include/asm/thread_info.h +++ b/arch/sh/include/asm/thread_info.h @@ -151,19 +151,10 @@ extern void init_thread_xstate(void); * ever touches our thread-synchronous status, so we don't * have to worry about atomic accesses. */ -#define TS_RESTORE_SIGMASK 0x0001 /* restore signal mask in do_signal() */ #define TS_USEDFPU 0x0002 /* FPU used by this task this quantum */ #ifndef __ASSEMBLY__ -#define HAVE_SET_RESTORE_SIGMASK 1 -static inline void set_restore_sigmask(void) -{ - struct thread_info *ti = current_thread_info(); - ti->status |= TS_RESTORE_SIGMASK; - WARN_ON(!test_bit(TIF_SIGPENDING, (unsigned long *)&ti->flags)); -} - #define TI_FLAG_FAULT_CODE_SHIFT 24 /* @@ -182,23 +173,6 @@ static inline unsigned int get_thread_fault_code(void) return ti->flags >> TI_FLAG_FAULT_CODE_SHIFT; } -static inline void clear_restore_sigmask(void) -{ - current_thread_info()->status &= ~TS_RESTORE_SIGMASK; -} -static inline bool test_restore_sigmask(void) -{ - return current_thread_info()->status & TS_RESTORE_SIGMASK; -} -static inline bool test_and_clear_restore_sigmask(void) -{ - struct thread_info *ti = current_thread_info(); - if (!(ti->status & TS_RESTORE_SIGMASK)) - return false; - ti->status &= ~TS_RESTORE_SIGMASK; - return true; -} - #endif /* !__ASSEMBLY__ */ #endif /* __KERNEL__ */ diff --git a/arch/sh/include/uapi/asm/cpu-features.h b/arch/sh/include/uapi/asm/cpu-features.h index 694abe490edb..2f1bc851042a 100644 --- a/arch/sh/include/uapi/asm/cpu-features.h +++ b/arch/sh/include/uapi/asm/cpu-features.h @@ -22,5 +22,6 @@ #define CPU_HAS_L2_CACHE 0x0080 /* Secondary cache / URAM */ #define CPU_HAS_OP32 0x0100 /* 32-bit instruction support */ #define CPU_HAS_PTEAEX 0x0200 /* PTE ASID Extension support */ +#define CPU_HAS_CAS_L 0x0400 /* cas.l atomic compare-and-swap */ #endif /* __ASM_SH_CPU_FEATURES_H */ diff --git a/arch/sh/include/uapi/asm/sigcontext.h b/arch/sh/include/uapi/asm/sigcontext.h index 8ce1435bc0bf..faa5d0833412 100644 --- a/arch/sh/include/uapi/asm/sigcontext.h +++ b/arch/sh/include/uapi/asm/sigcontext.h @@ -25,8 +25,6 @@ struct sigcontext { unsigned long sc_mach; unsigned long sc_macl; -#if defined(__SH4__) || defined(CONFIG_CPU_SH4) || \ - defined(__SH2A__) || defined(CONFIG_CPU_SH2A) /* FPU registers */ unsigned long sc_fpregs[16]; unsigned long sc_xfpregs[16]; @@ -34,7 +32,6 @@ struct sigcontext { unsigned int sc_fpul; unsigned int sc_ownedfp; #endif -#endif }; #endif /* __ASM_SH_SIGCONTEXT_H */ diff --git a/arch/sh/include/uapi/asm/unistd_32.h b/arch/sh/include/uapi/asm/unistd_32.h index d13a1d623736..c801bde9e6f5 100644 --- a/arch/sh/include/uapi/asm/unistd_32.h +++ b/arch/sh/include/uapi/asm/unistd_32.h @@ -380,7 +380,21 @@ #define __NR_process_vm_writev 366 #define __NR_kcmp 367 #define __NR_finit_module 368 +#define __NR_sched_getattr 369 +#define __NR_sched_setattr 370 +#define __NR_renameat2 371 +#define __NR_seccomp 372 +#define __NR_getrandom 373 +#define __NR_memfd_create 374 +#define __NR_bpf 375 +#define __NR_execveat 376 +#define __NR_userfaultfd 377 +#define __NR_membarrier 378 +#define __NR_mlock2 379 +#define __NR_copy_file_range 380 +#define __NR_preadv2 381 +#define __NR_pwritev2 382 -#define NR_syscalls 369 +#define NR_syscalls 383 #endif /* __ASM_SH_UNISTD_32_H */ diff --git a/arch/sh/include/uapi/asm/unistd_64.h b/arch/sh/include/uapi/asm/unistd_64.h index 47ebd5b5ed55..ce0cb3598b62 100644 --- a/arch/sh/include/uapi/asm/unistd_64.h +++ b/arch/sh/include/uapi/asm/unistd_64.h @@ -400,7 +400,21 @@ #define __NR_process_vm_writev 377 #define __NR_kcmp 378 #define __NR_finit_module 379 +#define __NR_sched_getattr 380 +#define __NR_sched_setattr 381 +#define __NR_renameat2 382 +#define __NR_seccomp 383 +#define __NR_getrandom 384 +#define __NR_memfd_create 385 +#define __NR_bpf 386 +#define __NR_execveat 387 +#define __NR_userfaultfd 388 +#define __NR_membarrier 389 +#define __NR_mlock2 390 +#define __NR_copy_file_range 391 +#define __NR_preadv2 392 +#define __NR_pwritev2 393 -#define NR_syscalls 380 +#define NR_syscalls 394 #endif /* __ASM_SH_UNISTD_64_H */ diff --git a/arch/sh/kernel/cpu/clock.c b/arch/sh/kernel/cpu/clock.c index 4187cf4fe185..fca9b1e78a63 100644 --- a/arch/sh/kernel/cpu/clock.c +++ b/arch/sh/kernel/cpu/clock.c @@ -24,11 +24,13 @@ int __init clk_init(void) { int ret; +#ifndef CONFIG_COMMON_CLK ret = arch_clk_init(); if (unlikely(ret)) { pr_err("%s: CPU clock registration failed.\n", __func__); return ret; } +#endif if (sh_mv.mv_clk_init) { ret = sh_mv.mv_clk_init(); @@ -39,11 +41,13 @@ int __init clk_init(void) } } +#ifndef CONFIG_COMMON_CLK /* Kick the child clocks.. */ recalculate_root_clocks(); /* Enable the necessary init clocks */ clk_enable_init_clocks(); +#endif return ret; } diff --git a/arch/sh/kernel/cpu/init.c b/arch/sh/kernel/cpu/init.c index 0d7360d549c1..c8b3be1b54e6 100644 --- a/arch/sh/kernel/cpu/init.c +++ b/arch/sh/kernel/cpu/init.c @@ -106,7 +106,7 @@ void __attribute__ ((weak)) l2_cache_init(void) /* * Generic first-level cache init */ -#ifdef CONFIG_SUPERH32 +#if defined(CONFIG_SUPERH32) && !defined(CONFIG_CPU_J2) static void cache_init(void) { unsigned long ccr, flags; @@ -323,9 +323,13 @@ asmlinkage void cpu_init(void) cache_init(); if (raw_smp_processor_id() == 0) { +#ifdef CONFIG_MMU shm_align_mask = max_t(unsigned long, current_cpu_data.dcache.way_size - 1, PAGE_SIZE - 1); +#else + shm_align_mask = PAGE_SIZE - 1; +#endif /* Boot CPU sets the cache shape */ detect_cache_shape(); diff --git a/arch/sh/kernel/cpu/proc.c b/arch/sh/kernel/cpu/proc.c index 9e6624c9108b..4df4b284f591 100644 --- a/arch/sh/kernel/cpu/proc.c +++ b/arch/sh/kernel/cpu/proc.c @@ -27,6 +27,7 @@ static const char *cpu_name[] = { [CPU_MXG] = "MX-G", [CPU_SH7723] = "SH7723", [CPU_SH7366] = "SH7366", [CPU_SH7724] = "SH7724", [CPU_SH7372] = "SH7372", [CPU_SH7734] = "SH7734", + [CPU_J2] = "J2", [CPU_SH_NONE] = "Unknown" }; diff --git a/arch/sh/kernel/cpu/sh2/Makefile b/arch/sh/kernel/cpu/sh2/Makefile index f0f059acfcfb..904c4283d923 100644 --- a/arch/sh/kernel/cpu/sh2/Makefile +++ b/arch/sh/kernel/cpu/sh2/Makefile @@ -5,3 +5,7 @@ obj-y := ex.o probe.o entry.o obj-$(CONFIG_CPU_SUBTYPE_SH7619) += setup-sh7619.o clock-sh7619.o + +# SMP setup +smp-$(CONFIG_CPU_J2) := smp-j2.o +obj-$(CONFIG_SMP) += $(smp-y) diff --git a/arch/sh/kernel/cpu/sh2/entry.S b/arch/sh/kernel/cpu/sh2/entry.S index a1505956ef28..1ee0a6e774c6 100644 --- a/arch/sh/kernel/cpu/sh2/entry.S +++ b/arch/sh/kernel/cpu/sh2/entry.S @@ -47,6 +47,13 @@ ENTRY(exception_handler) mov.l r3,@-sp cli mov.l $cpu_mode,r2 +#ifdef CONFIG_SMP + mov.l $cpuid,r3 + mov.l @r3,r3 + mov.l @r3,r3 + shll2 r3 + add r3,r2 +#endif mov.l @r2,r0 mov.l @(5*4,r15),r3 ! previous SR or r0,r3 ! set MD @@ -57,6 +64,13 @@ ENTRY(exception_handler) mov.l __md_bit,r0 mov.l r0,@r2 ! enter kernel mode mov.l $current_thread_info,r2 +#ifdef CONFIG_SMP + mov.l $cpuid,r0 + mov.l @r0,r0 + mov.l @r0,r0 + shll2 r0 + add r0,r2 +#endif mov.l @r2,r2 mov #(THREAD_SIZE >> 8),r0 shll8 r0 @@ -147,6 +161,11 @@ ENTRY(exception_handler) mov #31,r8 cmp/hs r8,r9 bt trap_entry ! 64 > vec >= 31 is trap +#ifdef CONFIG_CPU_J2 + mov #16,r8 + cmp/hs r8,r9 + bt interrupt_entry ! 31 > vec >= 16 is interrupt +#endif mov.l 4f,r8 mov r9,r4 @@ -260,6 +279,13 @@ restore_all: lds.l @r0+,macl mov r15,r0 mov.l $cpu_mode,r2 +#ifdef CONFIG_SMP + mov.l $cpuid,r3 + mov.l @r3,r3 + mov.l @r3,r3 + shll2 r3 + add r3,r2 +#endif mov #OFF_SR,r3 mov.l @(r0,r3),r1 mov.l __md_bit,r3 @@ -276,6 +302,13 @@ restore_all: mov.l r1,@r2 ! set pc get_current_thread_info r0, r1 mov.l $current_thread_info,r1 +#ifdef CONFIG_SMP + mov.l $cpuid,r3 + mov.l @r3,r3 + mov.l @r3,r3 + shll2 r3 + add r3,r1 +#endif mov.l r0,@r1 mov.l @r15+,r0 mov.l @r15+,r1 @@ -303,19 +336,41 @@ $current_thread_info: .long __current_thread_info $cpu_mode: .long __cpu_mode +#ifdef CONFIG_SMP +$cpuid: + .long sh2_cpuid_addr +#endif ! common exception handler #include "../../entry-common.S" + +#ifdef CONFIG_NR_CPUS +#define NR_CPUS CONFIG_NR_CPUS +#else +#define NR_CPUS 1 +#endif .data ! cpu operation mode ! bit30 = MD (compatible SH3/4) __cpu_mode: + .rept NR_CPUS .long 0x40000000 + .endr + +#ifdef CONFIG_SMP +.global sh2_cpuid_addr +sh2_cpuid_addr: + .long dummy_cpuid +dummy_cpuid: + .long 0 +#endif .section .bss __current_thread_info: + .rept NR_CPUS .long 0 + .endr ENTRY(exception_handling_table) .space 4*32 diff --git a/arch/sh/kernel/cpu/sh2/probe.c b/arch/sh/kernel/cpu/sh2/probe.c index 6c687ae812ef..4205f6d42b69 100644 --- a/arch/sh/kernel/cpu/sh2/probe.c +++ b/arch/sh/kernel/cpu/sh2/probe.c @@ -10,10 +10,27 @@ * for more details. */ #include <linux/init.h> +#include <linux/of_fdt.h> +#include <linux/smp.h> +#include <linux/io.h> #include <asm/processor.h> #include <asm/cache.h> -void cpu_probe(void) +#if defined(CONFIG_CPU_J2) +extern u32 __iomem *j2_ccr_base; +static int __init scan_cache(unsigned long node, const char *uname, + int depth, void *data) +{ + if (!of_flat_dt_is_compatible(node, "jcore,cache")) + return 0; + + j2_ccr_base = (u32 __iomem *)of_flat_dt_translate_address(node); + + return 1; +} +#endif + +void __ref cpu_probe(void) { #if defined(CONFIG_CPU_SUBTYPE_SH7619) boot_cpu_data.type = CPU_SH7619; @@ -24,10 +41,30 @@ void cpu_probe(void) boot_cpu_data.dcache.linesz = L1_CACHE_BYTES; boot_cpu_data.dcache.flags = 0; #endif + +#if defined(CONFIG_CPU_J2) + unsigned cpu = hard_smp_processor_id(); + if (cpu == 0) of_scan_flat_dt(scan_cache, NULL); + if (j2_ccr_base) __raw_writel(0x80000303, j2_ccr_base + 4*cpu); + if (cpu != 0) return; + boot_cpu_data.type = CPU_J2; + + /* These defaults are appropriate for the original/current + * J2 cache. Once there is a proper framework for getting cache + * info from device tree, we should switch to that. */ + boot_cpu_data.dcache.ways = 1; + boot_cpu_data.dcache.sets = 256; + boot_cpu_data.dcache.entry_shift = 5; + boot_cpu_data.dcache.linesz = 32; + boot_cpu_data.dcache.flags = 0; + + boot_cpu_data.flags |= CPU_HAS_CAS_L; +#else /* * SH-2 doesn't have separate caches */ boot_cpu_data.dcache.flags |= SH_CACHE_COMBINED; +#endif boot_cpu_data.icache = boot_cpu_data.dcache; boot_cpu_data.family = CPU_FAMILY_SH2; } diff --git a/arch/sh/kernel/cpu/sh2/smp-j2.c b/arch/sh/kernel/cpu/sh2/smp-j2.c new file mode 100644 index 000000000000..6ccd7e4dc008 --- /dev/null +++ b/arch/sh/kernel/cpu/sh2/smp-j2.c @@ -0,0 +1,139 @@ +/* + * SMP support for J2 processor + * + * Copyright (C) 2015-2016 Smart Energy Instruments, Inc. + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ + +#include <linux/smp.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/of_address.h> +#include <linux/of_irq.h> +#include <asm/cmpxchg.h> + +DEFINE_PER_CPU(unsigned, j2_ipi_messages); + +extern u32 *sh2_cpuid_addr; +static u32 *j2_ipi_trigger; +static int j2_ipi_irq; + +static irqreturn_t j2_ipi_interrupt_handler(int irq, void *arg) +{ + unsigned cpu = hard_smp_processor_id(); + volatile unsigned *pmsg = &per_cpu(j2_ipi_messages, cpu); + unsigned messages, i; + + do messages = *pmsg; + while (cmpxchg(pmsg, messages, 0) != messages); + + if (!messages) return IRQ_NONE; + + for (i=0; i<SMP_MSG_NR; i++) + if (messages & (1U<<i)) + smp_message_recv(i); + + return IRQ_HANDLED; +} + +static void j2_smp_setup(void) +{ +} + +static void j2_prepare_cpus(unsigned int max_cpus) +{ + struct device_node *np; + unsigned i, max = 1; + + np = of_find_compatible_node(NULL, NULL, "jcore,ipi-controller"); + if (!np) + goto out; + + j2_ipi_irq = irq_of_parse_and_map(np, 0); + j2_ipi_trigger = of_iomap(np, 0); + if (!j2_ipi_irq || !j2_ipi_trigger) + goto out; + + np = of_find_compatible_node(NULL, NULL, "jcore,cpuid-mmio"); + if (!np) + goto out; + + sh2_cpuid_addr = of_iomap(np, 0); + if (!sh2_cpuid_addr) + goto out; + + if (request_irq(j2_ipi_irq, j2_ipi_interrupt_handler, IRQF_PERCPU, + "ipi", (void *)j2_ipi_interrupt_handler) != 0) + goto out; + + max = max_cpus; +out: + /* Disable any cpus past max_cpus, or all secondaries if we didn't + * get the necessary resources to support SMP. */ + for (i=max; i<NR_CPUS; i++) { + set_cpu_possible(i, false); + set_cpu_present(i, false); + } +} + +static void j2_start_cpu(unsigned int cpu, unsigned long entry_point) +{ + struct device_node *np; + u32 regs[2]; + void __iomem *release, *initpc; + + if (!cpu) return; + + np = of_get_cpu_node(cpu, NULL); + if (!np) return; + + if (of_property_read_u32_array(np, "cpu-release-addr", regs, 2)) return; + release = ioremap_nocache(regs[0], sizeof(u32)); + initpc = ioremap_nocache(regs[1], sizeof(u32)); + + __raw_writel(entry_point, initpc); + __raw_writel(1, release); + + iounmap(initpc); + iounmap(release); + + pr_info("J2 SMP: requested start of cpu %u\n", cpu); +} + +static unsigned int j2_smp_processor_id(void) +{ + return __raw_readl(sh2_cpuid_addr); +} + +static void j2_send_ipi(unsigned int cpu, unsigned int message) +{ + volatile unsigned *pmsg; + unsigned old; + unsigned long val; + + /* There is only one IPI interrupt shared by all messages, so + * we keep a separate interrupt flag per message type in sw. */ + pmsg = &per_cpu(j2_ipi_messages, cpu); + do old = *pmsg; + while (cmpxchg(pmsg, old, old|(1U<<message)) != old); + + /* Generate the actual interrupt by writing to CCRn bit 28. */ + val = __raw_readl(j2_ipi_trigger + cpu); + __raw_writel(val | (1U<<28), j2_ipi_trigger + cpu); +} + +static struct plat_smp_ops j2_smp_ops = { + .smp_setup = j2_smp_setup, + .prepare_cpus = j2_prepare_cpus, + .start_cpu = j2_start_cpu, + .smp_processor_id = j2_smp_processor_id, + .send_ipi = j2_send_ipi, + .cpu_die = native_cpu_die, + .cpu_disable = native_cpu_disable, + .play_dead = native_play_dead, +}; + +CPU_METHOD_OF_DECLARE(j2_cpu_method, "jcore,spin-table", &j2_smp_ops); diff --git a/arch/sh/kernel/dma-nommu.c b/arch/sh/kernel/dma-nommu.c index 5b0bfcda6d0b..eadb669a7329 100644 --- a/arch/sh/kernel/dma-nommu.c +++ b/arch/sh/kernel/dma-nommu.c @@ -13,7 +13,7 @@ static dma_addr_t nommu_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { dma_addr_t addr = page_to_phys(page) + offset; @@ -25,7 +25,7 @@ static dma_addr_t nommu_map_page(struct device *dev, struct page *page, static int nommu_map_sg(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { struct scatterlist *s; int i; diff --git a/arch/sh/kernel/dwarf.c b/arch/sh/kernel/dwarf.c index 9d209a07235e..e1d751ae2498 100644 --- a/arch/sh/kernel/dwarf.c +++ b/arch/sh/kernel/dwarf.c @@ -1009,10 +1009,8 @@ static void __init dwarf_unwinder_cleanup(void) rbtree_postorder_for_each_entry_safe(cie, next_cie, &cie_root, node) kfree(cie); - if (dwarf_reg_pool) - mempool_destroy(dwarf_reg_pool); - if (dwarf_frame_pool) - mempool_destroy(dwarf_frame_pool); + mempool_destroy(dwarf_reg_pool); + mempool_destroy(dwarf_frame_pool); kmem_cache_destroy(dwarf_reg_cachep); kmem_cache_destroy(dwarf_frame_cachep); } diff --git a/arch/sh/kernel/head_32.S b/arch/sh/kernel/head_32.S index 974bc152cc84..4e352c3f79e6 100644 --- a/arch/sh/kernel/head_32.S +++ b/arch/sh/kernel/head_32.S @@ -67,7 +67,7 @@ ENTRY(_stext) ldc r0, r6_bank #endif -#ifdef CONFIG_OF +#ifdef CONFIG_OF_FLATTREE mov r4, r12 ! Store device tree blob pointer in r12 #endif @@ -318,7 +318,7 @@ ENTRY(_stext) 10: #endif -#ifdef CONFIG_OF +#ifdef CONFIG_OF_FLATTREE mov.l 8f, r0 ! Make flat device tree available early. jsr @r0 mov r12, r4 @@ -349,7 +349,7 @@ ENTRY(stack_start) 5: .long start_kernel 6: .long cpu_init 7: .long init_thread_union -#if defined(CONFIG_OF) +#if defined(CONFIG_OF_FLATTREE) 8: .long sh_fdt_init #endif diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c index 5d34605b58b5..e7b49d81053e 100644 --- a/arch/sh/kernel/setup.c +++ b/arch/sh/kernel/setup.c @@ -242,7 +242,7 @@ void __init __weak plat_early_device_setup(void) { } -#ifdef CONFIG_OF +#ifdef CONFIG_OF_FLATTREE void __ref sh_fdt_init(phys_addr_t dt_phys) { static int done = 0; @@ -251,7 +251,11 @@ void __ref sh_fdt_init(phys_addr_t dt_phys) /* Avoid calling an __init function on secondary cpus. */ if (done) return; +#ifdef CONFIG_USE_BUILTIN_DTB + dt_virt = __dtb_start; +#else dt_virt = phys_to_virt(dt_phys); +#endif if (!dt_virt || !early_init_dt_scan(dt_virt)) { pr_crit("Error: invalid device tree blob" diff --git a/arch/sh/kernel/syscalls_32.S b/arch/sh/kernel/syscalls_32.S index 734234be2f01..254bc22ee57d 100644 --- a/arch/sh/kernel/syscalls_32.S +++ b/arch/sh/kernel/syscalls_32.S @@ -386,3 +386,17 @@ ENTRY(sys_call_table) .long sys_process_vm_writev .long sys_kcmp .long sys_finit_module + .long sys_sched_getattr + .long sys_sched_setattr /* 370 */ + .long sys_renameat2 + .long sys_seccomp + .long sys_getrandom + .long sys_memfd_create + .long sys_bpf /* 375 */ + .long sys_execveat + .long sys_userfaultfd + .long sys_membarrier + .long sys_mlock2 + .long sys_copy_file_range /* 380 */ + .long sys_preadv2 + .long sys_pwritev2 diff --git a/arch/sh/kernel/syscalls_64.S b/arch/sh/kernel/syscalls_64.S index 579fcb9a896b..d6a27f7a4c54 100644 --- a/arch/sh/kernel/syscalls_64.S +++ b/arch/sh/kernel/syscalls_64.S @@ -406,3 +406,17 @@ sys_call_table: .long sys_process_vm_writev .long sys_kcmp .long sys_finit_module + .long sys_sched_getattr /* 380 */ + .long sys_sched_setattr + .long sys_renameat2 + .long sys_seccomp + .long sys_getrandom + .long sys_memfd_create /* 385 */ + .long sys_bpf + .long sys_execveat + .long sys_userfaultfd + .long sys_membarrier + .long sys_mlock2 /* 390 */ + .long sys_copy_file_range + .long sys_preadv2 + .long sys_pwritev2 diff --git a/arch/sh/kernel/time.c b/arch/sh/kernel/time.c index d6d0a986c6e9..fcd5e41977d1 100644 --- a/arch/sh/kernel/time.c +++ b/arch/sh/kernel/time.c @@ -11,7 +11,6 @@ * for more details. */ #include <linux/kernel.h> -#include <linux/module.h> #include <linux/init.h> #include <linux/profile.h> #include <linux/timex.h> @@ -50,27 +49,31 @@ int update_persistent_clock(struct timespec now) } #endif -unsigned int get_rtc_time(struct rtc_time *tm) +static int rtc_generic_get_time(struct device *dev, struct rtc_time *tm) { - if (rtc_sh_get_time != null_rtc_get_time) { - struct timespec tv; + struct timespec tv; - rtc_sh_get_time(&tv); - rtc_time_to_tm(tv.tv_sec, tm); - } - - return RTC_24H; + rtc_sh_get_time(&tv); + rtc_time_to_tm(tv.tv_sec, tm); + return 0; } -EXPORT_SYMBOL(get_rtc_time); -int set_rtc_time(struct rtc_time *tm) +static int rtc_generic_set_time(struct device *dev, struct rtc_time *tm) { unsigned long secs; rtc_tm_to_time(tm, &secs); - return rtc_sh_set_time(secs); + if ((rtc_sh_set_time == null_rtc_set_time) || + (rtc_sh_set_time(secs) < 0)) + return -EOPNOTSUPP; + + return 0; } -EXPORT_SYMBOL(set_rtc_time); + +static const struct rtc_class_ops rtc_generic_ops = { + .read_time = rtc_generic_get_time, + .set_time = rtc_generic_set_time, +}; static int __init rtc_generic_init(void) { @@ -79,11 +82,14 @@ static int __init rtc_generic_init(void) if (rtc_sh_get_time == null_rtc_get_time) return -ENODEV; - pdev = platform_device_register_simple("rtc-generic", -1, NULL, 0); + pdev = platform_device_register_data(NULL, "rtc-generic", -1, + &rtc_generic_ops, + sizeof(rtc_generic_ops)); + return PTR_ERR_OR_ZERO(pdev); } -module_init(rtc_generic_init); +device_initcall(rtc_generic_init); void (*board_time_init)(void); diff --git a/arch/sh/mm/Makefile b/arch/sh/mm/Makefile index cee6b9999d86..92c3bd96aee5 100644 --- a/arch/sh/mm/Makefile +++ b/arch/sh/mm/Makefile @@ -4,7 +4,8 @@ obj-y := alignment.o cache.o init.o consistent.o mmap.o -cacheops-$(CONFIG_CPU_SH2) := cache-sh2.o +cacheops-$(CONFIG_CPU_J2) := cache-j2.o +cacheops-$(CONFIG_CPU_SUBTYPE_SH7619) := cache-sh2.o cacheops-$(CONFIG_CPU_SH2A) := cache-sh2a.o cacheops-$(CONFIG_CPU_SH3) := cache-sh3.o cacheops-$(CONFIG_CPU_SH4) := cache-sh4.o flush-sh4.o diff --git a/arch/sh/mm/asids-debugfs.c b/arch/sh/mm/asids-debugfs.c index ecfc6b0c1da1..bf95fdaedd0c 100644 --- a/arch/sh/mm/asids-debugfs.c +++ b/arch/sh/mm/asids-debugfs.c @@ -17,7 +17,6 @@ * for more details. */ #include <linux/init.h> -#include <linux/module.h> #include <linux/debugfs.h> #include <linux/seq_file.h> #include <linux/spinlock.h> @@ -70,6 +69,4 @@ static int __init asids_debugfs_init(void) return PTR_ERR_OR_ZERO(asids_dentry); } -module_init(asids_debugfs_init); - -MODULE_LICENSE("GPL v2"); +device_initcall(asids_debugfs_init); diff --git a/arch/sh/mm/cache-j2.c b/arch/sh/mm/cache-j2.c new file mode 100644 index 000000000000..391698bcac5b --- /dev/null +++ b/arch/sh/mm/cache-j2.c @@ -0,0 +1,65 @@ +/* + * arch/sh/mm/cache-j2.c + * + * Copyright (C) 2015-2016 Smart Energy Instruments, Inc. + * + * Released under the terms of the GNU GPL v2.0. + */ + +#include <linux/init.h> +#include <linux/mm.h> +#include <linux/cpumask.h> + +#include <asm/cache.h> +#include <asm/addrspace.h> +#include <asm/processor.h> +#include <asm/cacheflush.h> +#include <asm/io.h> + +#define ICACHE_ENABLE 0x1 +#define DCACHE_ENABLE 0x2 +#define CACHE_ENABLE (ICACHE_ENABLE | DCACHE_ENABLE) +#define ICACHE_FLUSH 0x100 +#define DCACHE_FLUSH 0x200 +#define CACHE_FLUSH (ICACHE_FLUSH | DCACHE_FLUSH) + +u32 __iomem *j2_ccr_base; + +static void j2_flush_icache(void *args) +{ + unsigned cpu; + for_each_possible_cpu(cpu) + __raw_writel(CACHE_ENABLE | ICACHE_FLUSH, j2_ccr_base + cpu); +} + +static void j2_flush_dcache(void *args) +{ + unsigned cpu; + for_each_possible_cpu(cpu) + __raw_writel(CACHE_ENABLE | DCACHE_FLUSH, j2_ccr_base + cpu); +} + +static void j2_flush_both(void *args) +{ + unsigned cpu; + for_each_possible_cpu(cpu) + __raw_writel(CACHE_ENABLE | CACHE_FLUSH, j2_ccr_base + cpu); +} + +void __init j2_cache_init(void) +{ + if (!j2_ccr_base) + return; + + local_flush_cache_all = j2_flush_both; + local_flush_cache_mm = j2_flush_both; + local_flush_cache_dup_mm = j2_flush_both; + local_flush_cache_page = j2_flush_both; + local_flush_cache_range = j2_flush_both; + local_flush_dcache_page = j2_flush_dcache; + local_flush_icache_range = j2_flush_icache; + local_flush_icache_page = j2_flush_icache; + local_flush_cache_sigtramp = j2_flush_icache; + + pr_info("Initial J2 CCR is %.8x\n", __raw_readl(j2_ccr_base)); +} diff --git a/arch/sh/mm/cache.c b/arch/sh/mm/cache.c index e58cfbf45150..36554a9ea99b 100644 --- a/arch/sh/mm/cache.c +++ b/arch/sh/mm/cache.c @@ -42,6 +42,8 @@ static inline void cacheop_on_each_cpu(void (*func) (void *info), void *info, { preempt_disable(); + /* Needing IPI for cross-core flush is SHX3-specific. */ +#ifdef CONFIG_CPU_SHX3 /* * It's possible that this gets called early on when IRQs are * still disabled due to ioremapping by the boot CPU, so don't @@ -49,6 +51,7 @@ static inline void cacheop_on_each_cpu(void (*func) (void *info), void *info, */ if (num_online_cpus() > 1) smp_call_function(func, info, wait); +#endif func(info); @@ -244,7 +247,11 @@ void flush_cache_sigtramp(unsigned long address) static void compute_alias(struct cache_info *c) { +#ifdef CONFIG_MMU c->alias_mask = ((c->sets - 1) << c->entry_shift) & ~(PAGE_SIZE - 1); +#else + c->alias_mask = 0; +#endif c->n_aliases = c->alias_mask ? (c->alias_mask >> PAGE_SHIFT) + 1 : 0; } @@ -305,7 +312,11 @@ void __init cpu_cache_init(void) if (unlikely(cache_disabled)) goto skip; - if (boot_cpu_data.family == CPU_FAMILY_SH2) { + if (boot_cpu_data.type == CPU_J2) { + extern void __weak j2_cache_init(void); + + j2_cache_init(); + } else if (boot_cpu_data.family == CPU_FAMILY_SH2) { extern void __weak sh2_cache_init(void); sh2_cache_init(); diff --git a/arch/sh/mm/consistent.c b/arch/sh/mm/consistent.c index b81d9dbf9fef..92b6976fde59 100644 --- a/arch/sh/mm/consistent.c +++ b/arch/sh/mm/consistent.c @@ -34,7 +34,7 @@ fs_initcall(dma_init); void *dma_generic_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, - struct dma_attrs *attrs) + unsigned long attrs) { void *ret, *ret_nocache; int order = get_order(size); @@ -66,7 +66,7 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t size, void dma_generic_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, - struct dma_attrs *attrs) + unsigned long attrs) { int order = get_order(size); unsigned long pfn = dma_handle >> PAGE_SHIFT; diff --git a/arch/sh/mm/ioremap.c b/arch/sh/mm/ioremap.c index 0c99ec2e7ed8..d09ddfe58fd8 100644 --- a/arch/sh/mm/ioremap.c +++ b/arch/sh/mm/ioremap.c @@ -34,7 +34,7 @@ * have to convert them into an offset in a page-aligned mapping, but the * caller shouldn't need to know that small detail. */ -void __iomem * __init_refok +void __iomem * __ref __ioremap_caller(phys_addr_t phys_addr, unsigned long size, pgprot_t pgprot, void *caller) { diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 546293d9e6c5..59b09600dd32 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -43,6 +43,7 @@ config SPARC select OLD_SIGSUSPEND select ARCH_HAS_SG_CHAIN select CPU_NO_EFFICIENT_FFS + select HAVE_ARCH_HARDENED_USERCOPY config SPARC32 def_bool !64BIT diff --git a/arch/sparc/include/asm/io_32.h b/arch/sparc/include/asm/io_32.h index 57f26c398dc9..4dd268a3a8b0 100644 --- a/arch/sparc/include/asm/io_32.h +++ b/arch/sparc/include/asm/io_32.h @@ -140,16 +140,6 @@ void ioport_unmap(void __iomem *); struct pci_dev; void pci_iounmap(struct pci_dev *dev, void __iomem *); - - -/* - * At the moment, we do not use CMOS_READ anywhere outside of rtc.c, - * so rtc_port is static in it. This should not change unless a new - * hardware pops up. - */ -#define RTC_PORT(x) (rtc_port + (x)) -#define RTC_ALWAYS_BCD 0 - static inline int sbus_can_dma_64bit(void) { return 0; /* actually, sparc_cpu_model==sun4d */ diff --git a/arch/sparc/include/asm/pci_64.h b/arch/sparc/include/asm/pci_64.h index 022d16008a00..2303635158f5 100644 --- a/arch/sparc/include/asm/pci_64.h +++ b/arch/sparc/include/asm/pci_64.h @@ -55,9 +55,6 @@ static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel) } #define HAVE_ARCH_PCI_RESOURCE_TO_USER -void pci_resource_to_user(const struct pci_dev *dev, int bar, - const struct resource *rsrc, - resource_size_t *start, resource_size_t *end); #endif /* __KERNEL__ */ #endif /* __SPARC64_PCI_H */ diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h index bde59825d06c..3d7b925f6516 100644 --- a/arch/sparc/include/asm/thread_info_64.h +++ b/arch/sparc/include/asm/thread_info_64.h @@ -222,32 +222,8 @@ register struct thread_info *current_thread_info_reg asm("g6"); * * Note that there are only 8 bits available. */ -#define TS_RESTORE_SIGMASK 0x0001 /* restore signal mask in do_signal() */ #ifndef __ASSEMBLY__ -#define HAVE_SET_RESTORE_SIGMASK 1 -static inline void set_restore_sigmask(void) -{ - struct thread_info *ti = current_thread_info(); - ti->status |= TS_RESTORE_SIGMASK; - WARN_ON(!test_bit(TIF_SIGPENDING, &ti->flags)); -} -static inline void clear_restore_sigmask(void) -{ - current_thread_info()->status &= ~TS_RESTORE_SIGMASK; -} -static inline bool test_restore_sigmask(void) -{ - return current_thread_info()->status & TS_RESTORE_SIGMASK; -} -static inline bool test_and_clear_restore_sigmask(void) -{ - struct thread_info *ti = current_thread_info(); - if (!(ti->status & TS_RESTORE_SIGMASK)) - return false; - ti->status &= ~TS_RESTORE_SIGMASK; - return true; -} #define thread32_stack_is_64bit(__SP) (((__SP) & 0x1) != 0) #define test_thread_64bit_stack(__SP) \ diff --git a/arch/sparc/include/asm/uaccess_32.h b/arch/sparc/include/asm/uaccess_32.h index 57aca2792d29..341a5a133f48 100644 --- a/arch/sparc/include/asm/uaccess_32.h +++ b/arch/sparc/include/asm/uaccess_32.h @@ -248,22 +248,28 @@ unsigned long __copy_user(void __user *to, const void __user *from, unsigned lon static inline unsigned long copy_to_user(void __user *to, const void *from, unsigned long n) { - if (n && __access_ok((unsigned long) to, n)) + if (n && __access_ok((unsigned long) to, n)) { + if (!__builtin_constant_p(n)) + check_object_size(from, n, true); return __copy_user(to, (__force void __user *) from, n); - else + } else return n; } static inline unsigned long __copy_to_user(void __user *to, const void *from, unsigned long n) { + if (!__builtin_constant_p(n)) + check_object_size(from, n, true); return __copy_user(to, (__force void __user *) from, n); } static inline unsigned long copy_from_user(void *to, const void __user *from, unsigned long n) { - if (n && __access_ok((unsigned long) from, n)) + if (n && __access_ok((unsigned long) from, n)) { + if (!__builtin_constant_p(n)) + check_object_size(to, n, false); return __copy_user((__force void __user *) to, from, n); - else + } else return n; } diff --git a/arch/sparc/include/asm/uaccess_64.h b/arch/sparc/include/asm/uaccess_64.h index e9a51d64974d..8bda94fab8e8 100644 --- a/arch/sparc/include/asm/uaccess_64.h +++ b/arch/sparc/include/asm/uaccess_64.h @@ -210,8 +210,12 @@ unsigned long copy_from_user_fixup(void *to, const void __user *from, static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long size) { - unsigned long ret = ___copy_from_user(to, from, size); + unsigned long ret; + if (!__builtin_constant_p(size)) + check_object_size(to, size, false); + + ret = ___copy_from_user(to, from, size); if (unlikely(ret)) ret = copy_from_user_fixup(to, from, size); @@ -227,8 +231,11 @@ unsigned long copy_to_user_fixup(void __user *to, const void *from, static inline unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long size) { - unsigned long ret = ___copy_to_user(to, from, size); + unsigned long ret; + if (!__builtin_constant_p(size)) + check_object_size(from, size, true); + ret = ___copy_to_user(to, from, size); if (unlikely(ret)) ret = copy_to_user_fixup(to, from, size); return ret; diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c index 37686828c3d9..5c615abff030 100644 --- a/arch/sparc/kernel/iommu.c +++ b/arch/sparc/kernel/iommu.c @@ -196,7 +196,7 @@ static inline void iommu_free_ctx(struct iommu *iommu, int ctx) static void *dma_4u_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addrp, gfp_t gfp, - struct dma_attrs *attrs) + unsigned long attrs) { unsigned long order, first_page; struct iommu *iommu; @@ -245,7 +245,7 @@ static void *dma_4u_alloc_coherent(struct device *dev, size_t size, static void dma_4u_free_coherent(struct device *dev, size_t size, void *cpu, dma_addr_t dvma, - struct dma_attrs *attrs) + unsigned long attrs) { struct iommu *iommu; unsigned long order, npages; @@ -263,7 +263,7 @@ static void dma_4u_free_coherent(struct device *dev, size_t size, static dma_addr_t dma_4u_map_page(struct device *dev, struct page *page, unsigned long offset, size_t sz, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { struct iommu *iommu; struct strbuf *strbuf; @@ -385,7 +385,7 @@ do_flush_sync: static void dma_4u_unmap_page(struct device *dev, dma_addr_t bus_addr, size_t sz, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { struct iommu *iommu; struct strbuf *strbuf; @@ -431,7 +431,7 @@ static void dma_4u_unmap_page(struct device *dev, dma_addr_t bus_addr, static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist, int nelems, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { struct scatterlist *s, *outs, *segstart; unsigned long flags, handle, prot, ctx; @@ -607,7 +607,7 @@ static unsigned long fetch_sg_ctx(struct iommu *iommu, struct scatterlist *sg) static void dma_4u_unmap_sg(struct device *dev, struct scatterlist *sglist, int nelems, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { unsigned long flags, ctx; struct scatterlist *sg; diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c index ffd5ff4678cf..2344103414d1 100644 --- a/arch/sparc/kernel/ioport.c +++ b/arch/sparc/kernel/ioport.c @@ -260,7 +260,7 @@ EXPORT_SYMBOL(sbus_set_sbus64); */ static void *sbus_alloc_coherent(struct device *dev, size_t len, dma_addr_t *dma_addrp, gfp_t gfp, - struct dma_attrs *attrs) + unsigned long attrs) { struct platform_device *op = to_platform_device(dev); unsigned long len_total = PAGE_ALIGN(len); @@ -315,7 +315,7 @@ err_nopages: } static void sbus_free_coherent(struct device *dev, size_t n, void *p, - dma_addr_t ba, struct dma_attrs *attrs) + dma_addr_t ba, unsigned long attrs) { struct resource *res; struct page *pgv; @@ -355,7 +355,7 @@ static void sbus_free_coherent(struct device *dev, size_t n, void *p, static dma_addr_t sbus_map_page(struct device *dev, struct page *page, unsigned long offset, size_t len, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { void *va = page_address(page) + offset; @@ -371,20 +371,20 @@ static dma_addr_t sbus_map_page(struct device *dev, struct page *page, } static void sbus_unmap_page(struct device *dev, dma_addr_t ba, size_t n, - enum dma_data_direction dir, struct dma_attrs *attrs) + enum dma_data_direction dir, unsigned long attrs) { mmu_release_scsi_one(dev, ba, n); } static int sbus_map_sg(struct device *dev, struct scatterlist *sg, int n, - enum dma_data_direction dir, struct dma_attrs *attrs) + enum dma_data_direction dir, unsigned long attrs) { mmu_get_scsi_sgl(dev, sg, n); return n; } static void sbus_unmap_sg(struct device *dev, struct scatterlist *sg, int n, - enum dma_data_direction dir, struct dma_attrs *attrs) + enum dma_data_direction dir, unsigned long attrs) { mmu_release_scsi_sgl(dev, sg, n); } @@ -429,7 +429,7 @@ arch_initcall(sparc_register_ioport); */ static void *pci32_alloc_coherent(struct device *dev, size_t len, dma_addr_t *pba, gfp_t gfp, - struct dma_attrs *attrs) + unsigned long attrs) { unsigned long len_total = PAGE_ALIGN(len); void *va; @@ -482,7 +482,7 @@ err_nopages: * past this call are illegal. */ static void pci32_free_coherent(struct device *dev, size_t n, void *p, - dma_addr_t ba, struct dma_attrs *attrs) + dma_addr_t ba, unsigned long attrs) { struct resource *res; @@ -518,14 +518,14 @@ static void pci32_free_coherent(struct device *dev, size_t n, void *p, static dma_addr_t pci32_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { /* IIep is write-through, not flushing. */ return page_to_phys(page) + offset; } static void pci32_unmap_page(struct device *dev, dma_addr_t ba, size_t size, - enum dma_data_direction dir, struct dma_attrs *attrs) + enum dma_data_direction dir, unsigned long attrs) { if (dir != PCI_DMA_TODEVICE) dma_make_coherent(ba, PAGE_ALIGN(size)); @@ -548,7 +548,7 @@ static void pci32_unmap_page(struct device *dev, dma_addr_t ba, size_t size, */ static int pci32_map_sg(struct device *device, struct scatterlist *sgl, int nents, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { struct scatterlist *sg; int n; @@ -567,7 +567,7 @@ static int pci32_map_sg(struct device *device, struct scatterlist *sgl, */ static void pci32_unmap_sg(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { struct scatterlist *sg; int n; diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c index c2b202d763a1..9c1878f4fa9f 100644 --- a/arch/sparc/kernel/pci.c +++ b/arch/sparc/kernel/pci.c @@ -986,16 +986,18 @@ void pci_resource_to_user(const struct pci_dev *pdev, int bar, const struct resource *rp, resource_size_t *start, resource_size_t *end) { - struct pci_pbm_info *pbm = pdev->dev.archdata.host_controller; - unsigned long offset; - - if (rp->flags & IORESOURCE_IO) - offset = pbm->io_space.start; - else - offset = pbm->mem_space.start; + struct pci_bus_region region; - *start = rp->start - offset; - *end = rp->end - offset; + /* + * "User" addresses are shown in /sys/devices/pci.../.../resource + * and /proc/bus/pci/devices and used as mmap offsets for + * /proc/bus/pci/BB/DD.F files (see proc_bus_pci_mmap()). + * + * On sparc, these are PCI bus addresses, i.e., raw BAR values. + */ + pcibios_resource_to_bus(pdev->bus, ®ion, (struct resource *) rp); + *start = region.start; + *end = region.end; } void pcibios_set_master(struct pci_dev *dev) diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c index 836e8cef47e2..61c6f935accc 100644 --- a/arch/sparc/kernel/pci_sun4v.c +++ b/arch/sparc/kernel/pci_sun4v.c @@ -130,7 +130,7 @@ static inline long iommu_batch_end(void) static void *dma_4v_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addrp, gfp_t gfp, - struct dma_attrs *attrs) + unsigned long attrs) { unsigned long flags, order, first_page, npages, n; struct iommu *iommu; @@ -213,7 +213,7 @@ static void dma_4v_iommu_demap(void *demap_arg, unsigned long entry, } static void dma_4v_free_coherent(struct device *dev, size_t size, void *cpu, - dma_addr_t dvma, struct dma_attrs *attrs) + dma_addr_t dvma, unsigned long attrs) { struct pci_pbm_info *pbm; struct iommu *iommu; @@ -235,7 +235,7 @@ static void dma_4v_free_coherent(struct device *dev, size_t size, void *cpu, static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page, unsigned long offset, size_t sz, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { struct iommu *iommu; unsigned long flags, npages, oaddr; @@ -294,7 +294,7 @@ iommu_map_fail: static void dma_4v_unmap_page(struct device *dev, dma_addr_t bus_addr, size_t sz, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { struct pci_pbm_info *pbm; struct iommu *iommu; @@ -322,7 +322,7 @@ static void dma_4v_unmap_page(struct device *dev, dma_addr_t bus_addr, static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist, int nelems, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { struct scatterlist *s, *outs, *segstart; unsigned long flags, handle, prot; @@ -466,7 +466,7 @@ iommu_map_failed: static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist, int nelems, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { struct pci_pbm_info *pbm; struct scatterlist *sg; diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S index 7d02b1fef025..d79b3b734245 100644 --- a/arch/sparc/kernel/vmlinux.lds.S +++ b/arch/sparc/kernel/vmlinux.lds.S @@ -150,6 +150,13 @@ SECTIONS } PERCPU_SECTION(SMP_CACHE_BYTES) +#ifdef CONFIG_JUMP_LABEL + . = ALIGN(PAGE_SIZE); + .exit.text : { + EXIT_TEXT + } +#endif + . = ALIGN(PAGE_SIZE); __init_end = .; BSS_SECTION(0, 0, 0) diff --git a/arch/tile/include/asm/thread_info.h b/arch/tile/include/asm/thread_info.h index c1467ac59ce6..b7659b8f1117 100644 --- a/arch/tile/include/asm/thread_info.h +++ b/arch/tile/include/asm/thread_info.h @@ -166,32 +166,5 @@ extern void _cpu_idle(void); #ifdef __tilegx__ #define TS_COMPAT 0x0001 /* 32-bit compatibility mode */ #endif -#define TS_RESTORE_SIGMASK 0x0008 /* restore signal mask in do_signal */ - -#ifndef __ASSEMBLY__ -#define HAVE_SET_RESTORE_SIGMASK 1 -static inline void set_restore_sigmask(void) -{ - struct thread_info *ti = current_thread_info(); - ti->status |= TS_RESTORE_SIGMASK; - WARN_ON(!test_bit(TIF_SIGPENDING, &ti->flags)); -} -static inline void clear_restore_sigmask(void) -{ - current_thread_info()->status &= ~TS_RESTORE_SIGMASK; -} -static inline bool test_restore_sigmask(void) -{ - return current_thread_info()->status & TS_RESTORE_SIGMASK; -} -static inline bool test_and_clear_restore_sigmask(void) -{ - struct thread_info *ti = current_thread_info(); - if (!(ti->status & TS_RESTORE_SIGMASK)) - return false; - ti->status &= ~TS_RESTORE_SIGMASK; - return true; -} -#endif /* !__ASSEMBLY__ */ #endif /* _ASM_TILE_THREAD_INFO_H */ diff --git a/arch/tile/kernel/pci-dma.c b/arch/tile/kernel/pci-dma.c index b6bc0547a4f6..09bb774b39cd 100644 --- a/arch/tile/kernel/pci-dma.c +++ b/arch/tile/kernel/pci-dma.c @@ -34,7 +34,7 @@ static void *tile_dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, - struct dma_attrs *attrs) + unsigned long attrs) { u64 dma_mask = (dev && dev->coherent_dma_mask) ? dev->coherent_dma_mask : DMA_BIT_MASK(32); @@ -78,7 +78,7 @@ static void *tile_dma_alloc_coherent(struct device *dev, size_t size, */ static void tile_dma_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, - struct dma_attrs *attrs) + unsigned long attrs) { homecache_free_pages((unsigned long)vaddr, get_order(size)); } @@ -202,7 +202,7 @@ static void __dma_complete_pa_range(dma_addr_t dma_addr, size_t size, static int tile_dma_map_sg(struct device *dev, struct scatterlist *sglist, int nents, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { struct scatterlist *sg; int i; @@ -224,7 +224,7 @@ static int tile_dma_map_sg(struct device *dev, struct scatterlist *sglist, static void tile_dma_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { struct scatterlist *sg; int i; @@ -240,7 +240,7 @@ static void tile_dma_unmap_sg(struct device *dev, struct scatterlist *sglist, static dma_addr_t tile_dma_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { BUG_ON(!valid_dma_direction(direction)); @@ -252,7 +252,7 @@ static dma_addr_t tile_dma_map_page(struct device *dev, struct page *page, static void tile_dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { BUG_ON(!valid_dma_direction(direction)); @@ -343,7 +343,7 @@ EXPORT_SYMBOL(tile_dma_map_ops); static void *tile_pci_dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, - struct dma_attrs *attrs) + unsigned long attrs) { int node = dev_to_node(dev); int order = get_order(size); @@ -368,14 +368,14 @@ static void *tile_pci_dma_alloc_coherent(struct device *dev, size_t size, */ static void tile_pci_dma_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, - struct dma_attrs *attrs) + unsigned long attrs) { homecache_free_pages((unsigned long)vaddr, get_order(size)); } static int tile_pci_dma_map_sg(struct device *dev, struct scatterlist *sglist, int nents, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { struct scatterlist *sg; int i; @@ -400,7 +400,7 @@ static int tile_pci_dma_map_sg(struct device *dev, struct scatterlist *sglist, static void tile_pci_dma_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { struct scatterlist *sg; int i; @@ -416,7 +416,7 @@ static void tile_pci_dma_unmap_sg(struct device *dev, static dma_addr_t tile_pci_dma_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { BUG_ON(!valid_dma_direction(direction)); @@ -429,7 +429,7 @@ static dma_addr_t tile_pci_dma_map_page(struct device *dev, struct page *page, static void tile_pci_dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { BUG_ON(!valid_dma_direction(direction)); @@ -531,7 +531,7 @@ EXPORT_SYMBOL(gx_pci_dma_map_ops); #ifdef CONFIG_SWIOTLB static void *tile_swiotlb_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, - struct dma_attrs *attrs) + unsigned long attrs) { gfp |= GFP_DMA; return swiotlb_alloc_coherent(dev, size, dma_handle, gfp); @@ -539,7 +539,7 @@ static void *tile_swiotlb_alloc_coherent(struct device *dev, size_t size, static void tile_swiotlb_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_addr, - struct dma_attrs *attrs) + unsigned long attrs) { swiotlb_free_coherent(dev, size, vaddr, dma_addr); } diff --git a/arch/tile/kernel/vmlinux.lds.S b/arch/tile/kernel/vmlinux.lds.S index 378f5d8d1ec8..9d449caf8910 100644 --- a/arch/tile/kernel/vmlinux.lds.S +++ b/arch/tile/kernel/vmlinux.lds.S @@ -60,6 +60,18 @@ SECTIONS /* "Init" is divided into two areas with very different virtual addresses. */ INIT_TEXT_SECTION(PAGE_SIZE) + /* + * Some things, like the __jump_table, may contain symbol references + * to __exit text, so include such text in the final image if so. + * In that case we also override the _einittext from INIT_TEXT_SECTION. + */ +#ifdef CONFIG_JUMP_LABEL + .exit.text : { + EXIT_TEXT + _einittext = .; + } +#endif + /* Now we skip back to PAGE_OFFSET for the data. */ . = (. - TEXT_OFFSET + PAGE_OFFSET); #undef LOAD_OFFSET diff --git a/arch/um/Kconfig.common b/arch/um/Kconfig.common index cc0013475444..fd443852103c 100644 --- a/arch/um/Kconfig.common +++ b/arch/um/Kconfig.common @@ -1,14 +1,17 @@ config UML bool default y + select ARCH_HAS_KCOV select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_SECCOMP_FILTER select HAVE_UID16 select HAVE_FUTEX_CMPXCHG if FUTEX + select HAVE_DEBUG_KMEMLEAK select GENERIC_IRQ_SHOW select GENERIC_CPU_DEVICES select GENERIC_IO select GENERIC_CLOCKEVENTS + select HAVE_GCC_PLUGINS select TTY # Needed for line.c config MMU @@ -30,10 +33,9 @@ config PCI config PCMCIA bool -# Yet to do! config TRACE_IRQFLAGS_SUPPORT bool - default n + default y config LOCKDEP_SUPPORT bool diff --git a/arch/um/Makefile b/arch/um/Makefile index e3abe6f3156d..0ca46ededfc7 100644 --- a/arch/um/Makefile +++ b/arch/um/Makefile @@ -78,8 +78,8 @@ include $(ARCH_DIR)/Makefile-os-$(OS) KBUILD_CPPFLAGS += -I$(srctree)/$(HOST_DIR)/include \ -I$(srctree)/$(HOST_DIR)/include/uapi \ - -I$(HOST_DIR)/include/generated \ - -I$(HOST_DIR)/include/generated/uapi + -I$(objtree)/$(HOST_DIR)/include/generated \ + -I$(objtree)/$(HOST_DIR)/include/generated/uapi # -Derrno=kernel_errno - This turns all kernel references to errno into # kernel_errno to separate them from the libc errno. This allows -fno-common diff --git a/arch/um/include/asm/irqflags.h b/arch/um/include/asm/irqflags.h index c780d8a16773..3bb221e1d5a4 100644 --- a/arch/um/include/asm/irqflags.h +++ b/arch/um/include/asm/irqflags.h @@ -6,37 +6,33 @@ extern int set_signals(int enable); extern void block_signals(void); extern void unblock_signals(void); +#define arch_local_save_flags arch_local_save_flags static inline unsigned long arch_local_save_flags(void) { return get_signals(); } +#define arch_local_irq_restore arch_local_irq_restore static inline void arch_local_irq_restore(unsigned long flags) { set_signals(flags); } +#define arch_local_irq_enable arch_local_irq_enable static inline void arch_local_irq_enable(void) { unblock_signals(); } +#define arch_local_irq_disable arch_local_irq_disable static inline void arch_local_irq_disable(void) { block_signals(); } -static inline unsigned long arch_local_irq_save(void) -{ - unsigned long flags; - flags = arch_local_save_flags(); - arch_local_irq_disable(); - return flags; -} +#define ARCH_IRQ_DISABLED 0 +#define ARCh_IRQ_ENABLED (SIGIO|SIGVTALRM) -static inline bool arch_irqs_disabled(void) -{ - return arch_local_save_flags() == 0; -} +#include <asm-generic/irqflags.h> #endif diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile index a6a5e42caaef..2f36d515762e 100644 --- a/arch/um/kernel/Makefile +++ b/arch/um/kernel/Makefile @@ -3,6 +3,11 @@ # Licensed under the GPL # +# Don't instrument UML-specific code; without this, we may crash when +# accessing the instrumentation buffer for the first time from the +# kernel. +KCOV_INSTRUMENT := n + CPPFLAGS_vmlinux.lds := -DSTART=$(LDS_START) \ -DELF_ARCH=$(LDS_ELF_ARCH) \ -DELF_FORMAT=$(LDS_ELF_FORMAT) \ diff --git a/arch/um/kernel/initrd.c b/arch/um/kernel/initrd.c index 55cead809b18..48bae81f8dca 100644 --- a/arch/um/kernel/initrd.c +++ b/arch/um/kernel/initrd.c @@ -37,8 +37,6 @@ static int __init read_initrd(void) } area = alloc_bootmem(size); - if (area == NULL) - return 0; if (load_initrd(initrd, area, size) == -1) return 0; diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index 16630e75f056..e8175a8aa22c 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -319,9 +319,6 @@ int __init linux_main(int argc, char **argv) start_vm = VMALLOC_START; - setup_physmem(uml_physmem, uml_reserved, physmem_size, highmem); - mem_total_pages(physmem_size, iomem_size, highmem); - virtmem_size = physmem_size; stack = (unsigned long) argv; stack &= ~(1024 * 1024 - 1); @@ -334,7 +331,6 @@ int __init linux_main(int argc, char **argv) printf("Kernel virtual memory size shrunk to %lu bytes\n", virtmem_size); - stack_protections((unsigned long) &init_thread_info); os_flush_stdout(); return start_uml(); @@ -342,6 +338,10 @@ int __init linux_main(int argc, char **argv) void __init setup_arch(char **cmdline_p) { + stack_protections((unsigned long) &init_thread_info); + setup_physmem(uml_physmem, uml_reserved, physmem_size, highmem); + mem_total_pages(physmem_size, iomem_size, highmem); + paging_init(); strlcpy(boot_command_line, command_line, COMMAND_LINE_SIZE); *cmdline_p = command_line; diff --git a/arch/um/os-Linux/Makefile b/arch/um/os-Linux/Makefile index 08ff5094fcdd..ada473bf6f46 100644 --- a/arch/um/os-Linux/Makefile +++ b/arch/um/os-Linux/Makefile @@ -3,6 +3,9 @@ # Licensed under the GPL # +# Don't instrument UML-specific code +KCOV_INSTRUMENT := n + obj-y = aio.o execvp.o file.o helper.o irq.o main.o mem.o process.o \ registers.o sigio.o signal.o start_up.o time.o tty.o \ umid.o user_syms.o util.o drivers/ skas/ diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c index 8acaf4e384c0..a86d7cc2c2d8 100644 --- a/arch/um/os-Linux/signal.c +++ b/arch/um/os-Linux/signal.c @@ -15,6 +15,7 @@ #include <kern_util.h> #include <os.h> #include <sysdep/mcontext.h> +#include <um_malloc.h> void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *) = { [SIGTRAP] = relay_signal, @@ -32,7 +33,7 @@ static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc) struct uml_pt_regs *r; int save_errno = errno; - r = malloc(sizeof(struct uml_pt_regs)); + r = uml_kmalloc(sizeof(struct uml_pt_regs), UM_GFP_ATOMIC); if (!r) panic("out of memory"); @@ -91,7 +92,7 @@ static void timer_real_alarm_handler(mcontext_t *mc) { struct uml_pt_regs *regs; - regs = malloc(sizeof(struct uml_pt_regs)); + regs = uml_kmalloc(sizeof(struct uml_pt_regs), UM_GFP_ATOMIC); if (!regs) panic("out of memory"); diff --git a/arch/unicore32/kernel/pci.c b/arch/unicore32/kernel/pci.c index d45fa5f3e9c4..62137d13c6f9 100644 --- a/arch/unicore32/kernel/pci.c +++ b/arch/unicore32/kernel/pci.c @@ -265,10 +265,8 @@ static int __init pci_common_init(void) pci_fixup_irqs(pci_common_swizzle, pci_puv3_map_irq); - if (!pci_has_flag(PCI_PROBE_ONLY)) { - pci_bus_size_bridges(puv3_bus); - pci_bus_assign_resources(puv3_bus); - } + pci_bus_size_bridges(puv3_bus); + pci_bus_assign_resources(puv3_bus); pci_bus_add_devices(puv3_bus); return 0; } @@ -279,9 +277,6 @@ char * __init pcibios_setup(char *str) if (!strcmp(str, "debug")) { debug_pci = 1; return NULL; - } else if (!strcmp(str, "firmware")) { - pci_add_flags(PCI_PROBE_ONLY); - return NULL; } return str; } diff --git a/arch/unicore32/mm/dma-swiotlb.c b/arch/unicore32/mm/dma-swiotlb.c index 16c08b2143a7..3e9f6489ba38 100644 --- a/arch/unicore32/mm/dma-swiotlb.c +++ b/arch/unicore32/mm/dma-swiotlb.c @@ -19,14 +19,14 @@ static void *unicore_swiotlb_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flags, - struct dma_attrs *attrs) + unsigned long attrs) { return swiotlb_alloc_coherent(dev, size, dma_handle, flags); } static void unicore_swiotlb_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_addr, - struct dma_attrs *attrs) + unsigned long attrs) { swiotlb_free_coherent(dev, size, vaddr, dma_addr); } diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 2fa55851d2a9..c580d8c33562 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -80,6 +80,7 @@ config X86 select HAVE_ALIGNED_STRUCT_PAGE if SLUB select HAVE_AOUT if X86_32 select HAVE_ARCH_AUDITSYSCALL + select HAVE_ARCH_HARDENED_USERCOPY select HAVE_ARCH_HUGE_VMAP if X86_64 || X86_PAE select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_KASAN if X86_64 && SPARSEMEM_VMEMMAP @@ -91,6 +92,7 @@ config X86 select HAVE_ARCH_SOFT_DIRTY if X86_64 select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE + select HAVE_ARCH_WITHIN_STACK_FRAMES select HAVE_EBPF_JIT if X86_64 select HAVE_CC_STACKPROTECTOR select HAVE_CMPXCHG_DOUBLE @@ -111,6 +113,7 @@ config X86 select HAVE_FUNCTION_GRAPH_FP_TEST select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER + select HAVE_GCC_PLUGINS select HAVE_GENERIC_DMA_COHERENT if X86_32 select HAVE_HW_BREAKPOINT select HAVE_IDE @@ -151,6 +154,7 @@ config X86 select OLD_SIGSUSPEND3 if X86_32 || IA32_EMULATION select PERF_EVENTS select RTC_LIB + select RTC_MC146818_LIB select SPARSE_IRQ select SRCU select SYSCTL_EXCEPTION_TRACE diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index be8e688fa0d4..12ea8f8384f4 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -96,7 +96,7 @@ $(obj)/zoffset.h: $(obj)/compressed/vmlinux FORCE $(call if_changed,zoffset) -AFLAGS_header.o += -I$(obj) +AFLAGS_header.o += -I$(objtree)/$(obj) $(obj)/header.o: $(obj)/zoffset.h LDFLAGS_setup.elf := -T diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index a1e71d431fed..1433f6b4607d 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -204,8 +204,12 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs) * handling, because syscall restart has a fixup for compat * syscalls. The fixup is exercised by the ptrace_syscall_32 * selftest. + * + * We also need to clear TS_REGS_POKED_I386: the 32-bit tracer + * special case only applies after poking regs and before the + * very next return to user mode. */ - ti->status &= ~TS_COMPAT; + ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED); #endif user_enter_irqoff(); diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 4cddd17153fb..f848572169ea 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -294,7 +294,7 @@ # 285 sys_setaltroot 286 i386 add_key sys_add_key 287 i386 request_key sys_request_key -288 i386 keyctl sys_keyctl +288 i386 keyctl sys_keyctl compat_sys_keyctl 289 i386 ioprio_set sys_ioprio_set 290 i386 ioprio_get sys_ioprio_get 291 i386 inotify_init sys_inotify_init diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index 6ba89a1ab0e5..d5409660f5de 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -75,7 +75,7 @@ CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \ -fno-omit-frame-pointer -foptimize-sibling-calls \ -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO -$(vobjs): KBUILD_CFLAGS += $(CFL) +$(vobjs): KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS),$(KBUILD_CFLAGS)) $(CFL) # # vDSO code runs in userspace and -pg doesn't help with profiling anyway. @@ -145,6 +145,7 @@ KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS)) KBUILD_CFLAGS_32 := $(filter-out -mcmodel=kernel,$(KBUILD_CFLAGS_32)) KBUILD_CFLAGS_32 := $(filter-out -fno-pic,$(KBUILD_CFLAGS_32)) KBUILD_CFLAGS_32 := $(filter-out -mfentry,$(KBUILD_CFLAGS_32)) +KBUILD_CFLAGS_32 := $(filter-out $(GCC_PLUGINS_CFLAGS),$(KBUILD_CFLAGS_32)) KBUILD_CFLAGS_32 += -m32 -msoft-float -mregparm=0 -fpic KBUILD_CFLAGS_32 += $(call cc-option, -fno-stack-protector) KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls) diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index 2f02d23a05ef..94d54d0defa7 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c @@ -96,9 +96,8 @@ static notrace cycle_t vread_pvclock(int *mode) { const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti; cycle_t ret; - u64 tsc, pvti_tsc; - u64 last, delta, pvti_system_time; - u32 version, pvti_tsc_to_system_mul, pvti_tsc_shift; + u64 last; + u32 version; /* * Note: The kernel and hypervisor must guarantee that cpu ID @@ -123,29 +122,15 @@ static notrace cycle_t vread_pvclock(int *mode) */ do { - version = pvti->version; - - smp_rmb(); + version = pvclock_read_begin(pvti); if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) { *mode = VCLOCK_NONE; return 0; } - tsc = rdtsc_ordered(); - pvti_tsc_to_system_mul = pvti->tsc_to_system_mul; - pvti_tsc_shift = pvti->tsc_shift; - pvti_system_time = pvti->system_time; - pvti_tsc = pvti->tsc_timestamp; - - /* Make sure that the version double-check is last. */ - smp_rmb(); - } while (unlikely((version & 1) || version != pvti->version)); - - delta = tsc - pvti_tsc; - ret = pvti_system_time + - pvclock_scale_delta(delta, pvti_tsc_to_system_mul, - pvti_tsc_shift); + ret = __pvclock_read_cycles(pvti); + } while (pvclock_read_retry(pvti, version)); /* refer to vread_tsc() comment for rationale */ last = gtod->cycle_last; diff --git a/arch/x86/entry/vdso/vdso2c.h b/arch/x86/entry/vdso/vdso2c.h index 63a03bb91497..4f741192846d 100644 --- a/arch/x86/entry/vdso/vdso2c.h +++ b/arch/x86/entry/vdso/vdso2c.h @@ -22,6 +22,9 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len, ELF(Phdr) *pt = (ELF(Phdr) *)(raw_addr + GET_LE(&hdr->e_phoff)); + if (hdr->e_type != ET_DYN) + fail("input is not a shared object\n"); + /* Walk the segment table. */ for (i = 0; i < GET_LE(&hdr->e_phnum); i++) { if (GET_LE(&pt[i].p_type) == PT_LOAD) { @@ -49,6 +52,9 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len, if (stripped_len < load_size) fail("stripped input is too short\n"); + if (!dyn) + fail("input has no PT_DYNAMIC section -- your toolchain is buggy\n"); + /* Walk the dynamic table */ for (i = 0; dyn + i < dyn_end && GET_LE(&dyn[i].d_tag) != DT_NULL; i++) { diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index fad97886d8b1..d0efb5cb1b00 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -263,10 +263,13 @@ static bool check_hw_exists(void) return true; msr_fail: - pr_cont("Broken PMU hardware detected, using software events only.\n"); - printk("%sFailed to access perfctr msr (MSR %x is %Lx)\n", - boot_cpu_has(X86_FEATURE_HYPERVISOR) ? KERN_INFO : KERN_ERR, - reg, val_new); + if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) { + pr_cont("PMU not available due to virtualization, using software events only.\n"); + } else { + pr_cont("Broken PMU hardware detected, using software events only.\n"); + pr_err("Failed to access perfctr msr (MSR %x is %Lx)\n", + reg, val_new); + } return false; } diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index 3a27b93e6261..44461626830e 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -9,7 +9,6 @@ #include <linux/kmemcheck.h> #include <linux/scatterlist.h> #include <linux/dma-debug.h> -#include <linux/dma-attrs.h> #include <asm/io.h> #include <asm/swiotlb.h> #include <linux/dma-contiguous.h> @@ -48,11 +47,11 @@ extern int dma_supported(struct device *hwdev, u64 mask); extern void *dma_generic_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, gfp_t flag, - struct dma_attrs *attrs); + unsigned long attrs); extern void dma_generic_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_addr, - struct dma_attrs *attrs); + unsigned long attrs); #ifdef CONFIG_X86_DMA_REMAP /* Platform code defines bridge-specific code */ extern bool dma_capable(struct device *dev, dma_addr_t addr, size_t size); diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index fea7724141a0..e7f155c3045e 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -344,8 +344,8 @@ extern int compat_arch_setup_additional_pages(struct linux_binprm *bprm, */ static inline int mmap_is_ia32(void) { - return config_enabled(CONFIG_X86_32) || - (config_enabled(CONFIG_COMPAT) && + return IS_ENABLED(CONFIG_X86_32) || + (IS_ENABLED(CONFIG_COMPAT) && test_thread_flag(TIF_ADDR32)); } diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 116b58347501..2737366ea583 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -137,9 +137,9 @@ static inline int copy_fregs_to_user(struct fregs_state __user *fx) static inline int copy_fxregs_to_user(struct fxregs_state __user *fx) { - if (config_enabled(CONFIG_X86_32)) + if (IS_ENABLED(CONFIG_X86_32)) return user_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx)); - else if (config_enabled(CONFIG_AS_FXSAVEQ)) + else if (IS_ENABLED(CONFIG_AS_FXSAVEQ)) return user_insn(fxsaveq %[fx], [fx] "=m" (*fx), "m" (*fx)); /* See comment in copy_fxregs_to_kernel() below. */ @@ -150,10 +150,10 @@ static inline void copy_kernel_to_fxregs(struct fxregs_state *fx) { int err; - if (config_enabled(CONFIG_X86_32)) { + if (IS_ENABLED(CONFIG_X86_32)) { err = check_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); } else { - if (config_enabled(CONFIG_AS_FXSAVEQ)) { + if (IS_ENABLED(CONFIG_AS_FXSAVEQ)) { err = check_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); } else { /* See comment in copy_fxregs_to_kernel() below. */ @@ -166,9 +166,9 @@ static inline void copy_kernel_to_fxregs(struct fxregs_state *fx) static inline int copy_user_to_fxregs(struct fxregs_state __user *fx) { - if (config_enabled(CONFIG_X86_32)) + if (IS_ENABLED(CONFIG_X86_32)) return user_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); - else if (config_enabled(CONFIG_AS_FXSAVEQ)) + else if (IS_ENABLED(CONFIG_AS_FXSAVEQ)) return user_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); /* See comment in copy_fxregs_to_kernel() below. */ @@ -190,9 +190,9 @@ static inline int copy_user_to_fregs(struct fregs_state __user *fx) static inline void copy_fxregs_to_kernel(struct fpu *fpu) { - if (config_enabled(CONFIG_X86_32)) + if (IS_ENABLED(CONFIG_X86_32)) asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state.fxsave)); - else if (config_enabled(CONFIG_AS_FXSAVEQ)) + else if (IS_ENABLED(CONFIG_AS_FXSAVEQ)) asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state.fxsave)); else { /* Using "rex64; fxsave %0" is broken because, if the memory diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 69e62862b622..33ae3a4d0159 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -35,8 +35,9 @@ #include <asm/asm.h> #include <asm/kvm_page_track.h> -#define KVM_MAX_VCPUS 255 -#define KVM_SOFT_MAX_VCPUS 160 +#define KVM_MAX_VCPUS 288 +#define KVM_SOFT_MAX_VCPUS 240 +#define KVM_MAX_VCPU_ID 1023 #define KVM_USER_MEM_SLOTS 509 /* memory slots that are not exposed to userspace */ #define KVM_PRIVATE_MEM_SLOTS 3 @@ -599,6 +600,7 @@ struct kvm_vcpu_arch { u64 mcg_cap; u64 mcg_status; u64 mcg_ctl; + u64 mcg_ext_ctl; u64 *mce_banks; /* Cache MMIO info */ @@ -682,9 +684,12 @@ struct kvm_arch_memory_slot { struct kvm_apic_map { struct rcu_head rcu; u8 mode; - struct kvm_lapic *phys_map[256]; - /* first index is cluster id second is cpu id in a cluster */ - struct kvm_lapic *logical_map[16][16]; + u32 max_apic_id; + union { + struct kvm_lapic *xapic_flat_map[8]; + struct kvm_lapic *xapic_cluster_map[16][4]; + }; + struct kvm_lapic *phys_map[]; }; /* Hyper-V emulation context */ @@ -779,6 +784,9 @@ struct kvm_arch { u32 ldr_mode; struct page *avic_logical_id_table_page; struct page *avic_physical_id_table_page; + + bool x2apic_format; + bool x2apic_broadcast_quirk_disabled; }; struct kvm_vm_stat { @@ -1006,6 +1014,11 @@ struct kvm_x86_ops { int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq, bool set); void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu); + + int (*set_hv_timer)(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc); + void (*cancel_hv_timer)(struct kvm_vcpu *vcpu); + + void (*setup_mce)(struct kvm_vcpu *vcpu); }; struct kvm_arch_async_pf { @@ -1026,7 +1039,7 @@ void kvm_mmu_setup(struct kvm_vcpu *vcpu); void kvm_mmu_init_vm(struct kvm *kvm); void kvm_mmu_uninit_vm(struct kvm *kvm); void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, - u64 dirty_mask, u64 nx_mask, u64 x_mask); + u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask); void kvm_mmu_reset_context(struct kvm_vcpu *vcpu); void kvm_mmu_slot_remove_write_access(struct kvm *kvm, @@ -1077,6 +1090,10 @@ extern u32 kvm_max_guest_tsc_khz; extern u8 kvm_tsc_scaling_ratio_frac_bits; /* maximum allowed value of TSC scaling ratio */ extern u64 kvm_max_tsc_scaling_ratio; +/* 1ull << kvm_tsc_scaling_ratio_frac_bits */ +extern u64 kvm_default_tsc_scaling_ratio; + +extern u64 kvm_mce_cap_supported; enum emulation_result { EMULATE_DONE, /* no further processing */ @@ -1352,7 +1369,7 @@ bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu); bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq, struct kvm_vcpu **dest_vcpu); -void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e, +void kvm_set_msi_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, struct kvm_lapic_irq *irq); static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) diff --git a/arch/x86/include/asm/mc146818rtc.h b/arch/x86/include/asm/mc146818rtc.h index 0f555cc31984..24acd9ba7837 100644 --- a/arch/x86/include/asm/mc146818rtc.h +++ b/arch/x86/include/asm/mc146818rtc.h @@ -6,7 +6,6 @@ #include <asm/io.h> #include <asm/processor.h> -#include <linux/mc146818rtc.h> #ifndef RTC_PORT #define RTC_PORT(x) (0x70 + (x)) diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 396348196aa7..d8abfcf524d1 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -155,7 +155,7 @@ static inline void arch_exit_mmap(struct mm_struct *mm) #ifdef CONFIG_X86_64 static inline bool is_64bit_mm(struct mm_struct *mm) { - return !config_enabled(CONFIG_IA32_EMULATION) || + return !IS_ENABLED(CONFIG_IA32_EMULATION) || !(mm->context.ia32_compat == TIF_IA32); } #else diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index 7c1c89598688..d019f0cc80ec 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h @@ -25,6 +25,24 @@ void pvclock_resume(void); void pvclock_touch_watchdogs(void); +static __always_inline +unsigned pvclock_read_begin(const struct pvclock_vcpu_time_info *src) +{ + unsigned version = src->version & ~1; + /* Make sure that the version is read before the data. */ + virt_rmb(); + return version; +} + +static __always_inline +bool pvclock_read_retry(const struct pvclock_vcpu_time_info *src, + unsigned version) +{ + /* Make sure that the version is re-read after the data. */ + virt_rmb(); + return unlikely(version != src->version); +} + /* * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, * yielding a 64-bit result. @@ -69,23 +87,12 @@ static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift) } static __always_inline -unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src, - cycle_t *cycles, u8 *flags) +cycle_t __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src) { - unsigned version; - cycle_t offset; - u64 delta; - - version = src->version; - /* Make the latest version visible */ - smp_rmb(); - - delta = rdtsc_ordered() - src->tsc_timestamp; - offset = pvclock_scale_delta(delta, src->tsc_to_system_mul, - src->tsc_shift); - *cycles = src->system_time + offset; - *flags = src->flags; - return version; + u64 delta = rdtsc_ordered() - src->tsc_timestamp; + cycle_t offset = pvclock_scale_delta(delta, src->tsc_to_system_mul, + src->tsc_shift); + return src->system_time + offset; } struct pvclock_vsyscall_time_info { diff --git a/arch/x86/include/asm/rtc.h b/arch/x86/include/asm/rtc.h deleted file mode 100644 index f71c3b0ed360..000000000000 --- a/arch/x86/include/asm/rtc.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/rtc.h> diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index d0fe23ec7e98..14824fc78f7e 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -193,7 +193,6 @@ struct __attribute__ ((__packed__)) vmcb { struct vmcb_save_area save; }; -#define SVM_CPUID_FEATURE_SHIFT 2 #define SVM_CPUID_FUNC 0x8000000a #define SVM_VM_CR_SVM_DISABLE 4 diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h index ab05d73e2bb7..d2f69b9ff732 100644 --- a/arch/x86/include/asm/swiotlb.h +++ b/arch/x86/include/asm/swiotlb.h @@ -31,9 +31,9 @@ static inline void dma_mark_clean(void *addr, size_t size) {} extern void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, dma_addr_t *dma_handle, gfp_t flags, - struct dma_attrs *attrs); + unsigned long attrs); extern void x86_swiotlb_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_addr, - struct dma_attrs *attrs); + unsigned long attrs); #endif /* _ASM_X86_SWIOTLB_H */ diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index 999b7cd2e78c..4e23dd15c661 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -60,7 +60,7 @@ static inline long syscall_get_error(struct task_struct *task, * TS_COMPAT is set for 32-bit syscall entries and then * remains set until we return to user mode. */ - if (task_thread_info(task)->status & TS_COMPAT) + if (task_thread_info(task)->status & (TS_COMPAT|TS_I386_REGS_POKED)) /* * Sign-extend the value so (int)-EFOO becomes (long)-EFOO * and will match correctly in comparisons. @@ -239,9 +239,6 @@ static inline int syscall_get_arch(void) * TS_COMPAT is set for 32-bit syscall entry and then * remains set until we return to user mode. * - * TIF_IA32 tasks should always have TS_COMPAT set at - * system call time. - * * x32 tasks should be considered AUDIT_ARCH_X86_64. */ if (task_thread_info(current)->status & TS_COMPAT) diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 89bff044a6f5..8b7c8d8e0852 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -176,6 +176,50 @@ static inline unsigned long current_stack_pointer(void) return sp; } +/* + * Walks up the stack frames to make sure that the specified object is + * entirely contained by a single stack frame. + * + * Returns: + * 1 if within a frame + * -1 if placed across a frame boundary (or outside stack) + * 0 unable to determine (no frame pointers, etc) + */ +static inline int arch_within_stack_frames(const void * const stack, + const void * const stackend, + const void *obj, unsigned long len) +{ +#if defined(CONFIG_FRAME_POINTER) + const void *frame = NULL; + const void *oldframe; + + oldframe = __builtin_frame_address(1); + if (oldframe) + frame = __builtin_frame_address(2); + /* + * low ----------------------------------------------> high + * [saved bp][saved ip][args][local vars][saved bp][saved ip] + * ^----------------^ + * allow copies only within here + */ + while (stack <= frame && frame < stackend) { + /* + * If obj + len extends past the last frame, this + * check won't pass and the next frame will be 0, + * causing us to bail out and correctly report + * the copy as invalid. + */ + if (obj + len <= frame) + return obj >= oldframe + 2 * sizeof(void *) ? 1 : -1; + oldframe = frame; + frame = *(const void * const *)frame; + } + return -1; +#else + return 0; +#endif +} + #else /* !__ASSEMBLY__ */ #ifdef CONFIG_X86_64 @@ -219,32 +263,11 @@ static inline unsigned long current_stack_pointer(void) * have to worry about atomic accesses. */ #define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/ -#define TS_RESTORE_SIGMASK 0x0008 /* restore signal mask in do_signal() */ +#ifdef CONFIG_COMPAT +#define TS_I386_REGS_POKED 0x0004 /* regs poked by 32-bit ptracer */ +#endif #ifndef __ASSEMBLY__ -#define HAVE_SET_RESTORE_SIGMASK 1 -static inline void set_restore_sigmask(void) -{ - struct thread_info *ti = current_thread_info(); - ti->status |= TS_RESTORE_SIGMASK; - WARN_ON(!test_bit(TIF_SIGPENDING, (unsigned long *)&ti->flags)); -} -static inline void clear_restore_sigmask(void) -{ - current_thread_info()->status &= ~TS_RESTORE_SIGMASK; -} -static inline bool test_restore_sigmask(void) -{ - return current_thread_info()->status & TS_RESTORE_SIGMASK; -} -static inline bool test_and_clear_restore_sigmask(void) -{ - struct thread_info *ti = current_thread_info(); - if (!(ti->status & TS_RESTORE_SIGMASK)) - return false; - ti->status &= ~TS_RESTORE_SIGMASK; - return true; -} static inline bool in_ia32_syscall(void) { diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index c03bfb68c503..a0ae610b9280 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -761,9 +761,10 @@ copy_from_user(void *to, const void __user *from, unsigned long n) * case, and do only runtime checking for non-constant sizes. */ - if (likely(sz < 0 || sz >= n)) + if (likely(sz < 0 || sz >= n)) { + check_object_size(to, n, false); n = _copy_from_user(to, from, n); - else if(__builtin_constant_p(n)) + } else if (__builtin_constant_p(n)) copy_from_user_overflow(); else __copy_from_user_overflow(sz, n); @@ -781,9 +782,10 @@ copy_to_user(void __user *to, const void *from, unsigned long n) might_fault(); /* See the comment in copy_from_user() above. */ - if (likely(sz < 0 || sz >= n)) + if (likely(sz < 0 || sz >= n)) { + check_object_size(from, n, true); n = _copy_to_user(to, from, n); - else if(__builtin_constant_p(n)) + } else if (__builtin_constant_p(n)) copy_to_user_overflow(); else __copy_to_user_overflow(sz, n); @@ -812,21 +814,21 @@ copy_to_user(void __user *to, const void *from, unsigned long n) #define user_access_begin() __uaccess_begin() #define user_access_end() __uaccess_end() -#define unsafe_put_user(x, ptr) \ -({ \ +#define unsafe_put_user(x, ptr, err_label) \ +do { \ int __pu_err; \ __put_user_size((x), (ptr), sizeof(*(ptr)), __pu_err, -EFAULT); \ - __builtin_expect(__pu_err, 0); \ -}) + if (unlikely(__pu_err)) goto err_label; \ +} while (0) -#define unsafe_get_user(x, ptr) \ -({ \ +#define unsafe_get_user(x, ptr, err_label) \ +do { \ int __gu_err; \ unsigned long __gu_val; \ __get_user_size(__gu_val, (ptr), sizeof(*(ptr)), __gu_err, -EFAULT); \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ - __builtin_expect(__gu_err, 0); \ -}) + if (unlikely(__gu_err)) goto err_label; \ +} while (0) #endif /* _ASM_X86_UACCESS_H */ diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index 4b32da24faaf..7d3bdd1ed697 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h @@ -37,6 +37,7 @@ unsigned long __must_check __copy_from_user_ll_nocache_nozero static __always_inline unsigned long __must_check __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n) { + check_object_size(from, n, true); return __copy_to_user_ll(to, from, n); } @@ -95,6 +96,7 @@ static __always_inline unsigned long __copy_from_user(void *to, const void __user *from, unsigned long n) { might_fault(); + check_object_size(to, n, false); if (__builtin_constant_p(n)) { unsigned long ret; diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 2eac2aa3e37f..673059a109fe 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -54,6 +54,7 @@ int __copy_from_user_nocheck(void *dst, const void __user *src, unsigned size) { int ret = 0; + check_object_size(dst, size, false); if (!__builtin_constant_p(size)) return copy_user_generic(dst, (__force void *)src, size); switch (size) { @@ -119,6 +120,7 @@ int __copy_to_user_nocheck(void __user *dst, const void *src, unsigned size) { int ret = 0; + check_object_size(src, size, true); if (!__builtin_constant_p(size)) return copy_user_generic((__force void *)dst, src, size); switch (size) { diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h index cce9ee68e335..0116b2ee9e64 100644 --- a/arch/x86/include/asm/virtext.h +++ b/arch/x86/include/asm/virtext.h @@ -83,23 +83,19 @@ static inline void cpu_emergency_vmxoff(void) */ static inline int cpu_has_svm(const char **msg) { - uint32_t eax, ebx, ecx, edx; - if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { if (msg) *msg = "not amd"; return 0; } - cpuid(0x80000000, &eax, &ebx, &ecx, &edx); - if (eax < SVM_CPUID_FUNC) { + if (boot_cpu_data.extended_cpuid_level < SVM_CPUID_FUNC) { if (msg) *msg = "can't execute cpuid_8000000a"; return 0; } - cpuid(0x80000001, &eax, &ebx, &ecx, &edx); - if (!(ecx & (1 << SVM_CPUID_FEATURE_SHIFT))) { + if (!boot_cpu_has(X86_FEATURE_SVM)) { if (msg) *msg = "svm not available"; return 0; diff --git a/arch/x86/include/asm/xen/page-coherent.h b/arch/x86/include/asm/xen/page-coherent.h index acd844c017d3..f02f025ff988 100644 --- a/arch/x86/include/asm/xen/page-coherent.h +++ b/arch/x86/include/asm/xen/page-coherent.h @@ -2,12 +2,11 @@ #define _ASM_X86_XEN_PAGE_COHERENT_H #include <asm/page.h> -#include <linux/dma-attrs.h> #include <linux/dma-mapping.h> static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size, dma_addr_t *dma_handle, gfp_t flags, - struct dma_attrs *attrs) + unsigned long attrs) { void *vstart = (void*)__get_free_pages(flags, get_order(size)); *dma_handle = virt_to_phys(vstart); @@ -16,18 +15,18 @@ static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size, static inline void xen_free_coherent_pages(struct device *hwdev, size_t size, void *cpu_addr, dma_addr_t dma_handle, - struct dma_attrs *attrs) + unsigned long attrs) { free_pages((unsigned long) cpu_addr, get_order(size)); } static inline void xen_dma_map_page(struct device *hwdev, struct page *page, dma_addr_t dev_addr, unsigned long offset, size_t size, - enum dma_data_direction dir, struct dma_attrs *attrs) { } + enum dma_data_direction dir, unsigned long attrs) { } static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) { } + unsigned long attrs) { } static inline void xen_dma_sync_single_for_cpu(struct device *hwdev, dma_addr_t handle, size_t size, enum dma_data_direction dir) { } diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index 42d27a62a404..63ff468a7986 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c @@ -241,7 +241,7 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, static dma_addr_t gart_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { unsigned long bus; phys_addr_t paddr = page_to_phys(page) + offset; @@ -263,7 +263,7 @@ static dma_addr_t gart_map_page(struct device *dev, struct page *page, */ static void gart_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { unsigned long iommu_page; int npages; @@ -285,7 +285,7 @@ static void gart_unmap_page(struct device *dev, dma_addr_t dma_addr, * Wrapper for pci_unmap_single working with scatterlists. */ static void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, - enum dma_data_direction dir, struct dma_attrs *attrs) + enum dma_data_direction dir, unsigned long attrs) { struct scatterlist *s; int i; @@ -293,7 +293,7 @@ static void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, for_each_sg(sg, s, nents, i) { if (!s->dma_length || !s->length) break; - gart_unmap_page(dev, s->dma_address, s->dma_length, dir, NULL); + gart_unmap_page(dev, s->dma_address, s->dma_length, dir, 0); } } @@ -315,7 +315,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, addr = dma_map_area(dev, addr, s->length, dir, 0); if (addr == bad_dma_addr) { if (i > 0) - gart_unmap_sg(dev, sg, i, dir, NULL); + gart_unmap_sg(dev, sg, i, dir, 0); nents = 0; sg[0].dma_length = 0; break; @@ -386,7 +386,7 @@ dma_map_cont(struct device *dev, struct scatterlist *start, int nelems, * Merge chunks that have page aligned sizes into a continuous mapping. */ static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, - enum dma_data_direction dir, struct dma_attrs *attrs) + enum dma_data_direction dir, unsigned long attrs) { struct scatterlist *s, *ps, *start_sg, *sgmap; int need = 0, nextneed, i, out, start; @@ -456,7 +456,7 @@ static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, error: flush_gart(); - gart_unmap_sg(dev, sg, out, dir, NULL); + gart_unmap_sg(dev, sg, out, dir, 0); /* When it was forced or merged try again in a dumb way */ if (force_iommu || iommu_merge) { @@ -476,7 +476,7 @@ error: /* allocate and map a coherent mapping */ static void * gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, - gfp_t flag, struct dma_attrs *attrs) + gfp_t flag, unsigned long attrs) { dma_addr_t paddr; unsigned long align_mask; @@ -508,9 +508,9 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, /* free a coherent mapping */ static void gart_free_coherent(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_addr, struct dma_attrs *attrs) + dma_addr_t dma_addr, unsigned long attrs) { - gart_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL, NULL); + gart_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL, 0); dma_generic_free_coherent(dev, size, vaddr, dma_addr, attrs); } diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 7943d38c57ca..20abd912f0e4 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -147,7 +147,7 @@ static int force_enable_local_apic __initdata; */ static int __init parse_lapic(char *arg) { - if (config_enabled(CONFIG_X86_32) && !arg) + if (IS_ENABLED(CONFIG_X86_32) && !arg) force_enable_local_apic = 1; else if (arg && !strncmp(arg, "notscdeadline", 13)) setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER); diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index a5e400afc563..6066d945c40e 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -523,7 +523,7 @@ static int apic_set_affinity(struct irq_data *irq_data, struct apic_chip_data *data = irq_data->chip_data; int err, irq = irq_data->irq; - if (!config_enabled(CONFIG_SMP)) + if (!IS_ENABLED(CONFIG_SMP)) return -EPERM; if (!cpumask_intersects(dest, cpu_online_mask)) diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 9e231d88bb33..a184c210efba 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -159,8 +159,8 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) struct task_struct *tsk = current; int ia32_fxstate = (buf != buf_fx); - ia32_fxstate &= (config_enabled(CONFIG_X86_32) || - config_enabled(CONFIG_IA32_EMULATION)); + ia32_fxstate &= (IS_ENABLED(CONFIG_X86_32) || + IS_ENABLED(CONFIG_IA32_EMULATION)); if (!access_ok(VERIFY_WRITE, buf, size)) return -EACCES; @@ -268,8 +268,8 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) u64 xfeatures = 0; int fx_only = 0; - ia32_fxstate &= (config_enabled(CONFIG_X86_32) || - config_enabled(CONFIG_IA32_EMULATION)); + ia32_fxstate &= (IS_ENABLED(CONFIG_X86_32) || + IS_ENABLED(CONFIG_IA32_EMULATION)); if (!buf) { fpu__clear(fpu); @@ -416,8 +416,8 @@ void fpu__init_prepare_fx_sw_frame(void) fx_sw_reserved.xfeatures = xfeatures_mask; fx_sw_reserved.xstate_size = fpu_user_xstate_size; - if (config_enabled(CONFIG_IA32_EMULATION) || - config_enabled(CONFIG_X86_32)) { + if (IS_ENABLED(CONFIG_IA32_EMULATION) || + IS_ENABLED(CONFIG_X86_32)) { int fsave_header_size = sizeof(struct fregs_state); fx_sw_reserved_ia32 = fx_sw_reserved; diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 3d747070fe67..ed16e58658a4 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -1019,7 +1019,6 @@ void hpet_disable(void) */ #include <linux/mc146818rtc.h> #include <linux/rtc.h> -#include <asm/rtc.h> #define DEFAULT_RTC_INT_FREQ 64 #define DEFAULT_RTC_SHIFT 6 @@ -1243,7 +1242,7 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id) memset(&curr_time, 0, sizeof(struct rtc_time)); if (hpet_rtc_flags & (RTC_UIE | RTC_AIE)) - get_rtc_time(&curr_time); + mc146818_set_time(&curr_time); if (hpet_rtc_flags & RTC_UIE && curr_time.tm_sec != hpet_prev_update_sec) { diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 04b132a767f1..bfe4d6c96fbd 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -17,6 +17,7 @@ #include <linux/debugfs.h> #include <linux/delay.h> #include <linux/hardirq.h> +#include <linux/ratelimit.h> #include <linux/slab.h> #include <linux/export.h> diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 833b1d329c47..5d400ba1349d 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -340,7 +340,7 @@ static inline struct iommu_table *find_iommu_table(struct device *dev) static void calgary_unmap_sg(struct device *dev, struct scatterlist *sglist, int nelems,enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { struct iommu_table *tbl = find_iommu_table(dev); struct scatterlist *s; @@ -364,7 +364,7 @@ static void calgary_unmap_sg(struct device *dev, struct scatterlist *sglist, static int calgary_map_sg(struct device *dev, struct scatterlist *sg, int nelems, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { struct iommu_table *tbl = find_iommu_table(dev); struct scatterlist *s; @@ -396,7 +396,7 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg, return nelems; error: - calgary_unmap_sg(dev, sg, nelems, dir, NULL); + calgary_unmap_sg(dev, sg, nelems, dir, 0); for_each_sg(sg, s, nelems, i) { sg->dma_address = DMA_ERROR_CODE; sg->dma_length = 0; @@ -407,7 +407,7 @@ error: static dma_addr_t calgary_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { void *vaddr = page_address(page) + offset; unsigned long uaddr; @@ -422,7 +422,7 @@ static dma_addr_t calgary_map_page(struct device *dev, struct page *page, static void calgary_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { struct iommu_table *tbl = find_iommu_table(dev); unsigned int npages; @@ -432,7 +432,7 @@ static void calgary_unmap_page(struct device *dev, dma_addr_t dma_addr, } static void* calgary_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flag, struct dma_attrs *attrs) + dma_addr_t *dma_handle, gfp_t flag, unsigned long attrs) { void *ret = NULL; dma_addr_t mapping; @@ -466,7 +466,7 @@ error: static void calgary_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, - struct dma_attrs *attrs) + unsigned long attrs) { unsigned int npages; struct iommu_table *tbl = find_iommu_table(dev); diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 6ba014c61d62..d30c37750765 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -77,7 +77,7 @@ void __init pci_iommu_alloc(void) } void *dma_generic_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, gfp_t flag, - struct dma_attrs *attrs) + unsigned long attrs) { unsigned long dma_mask; struct page *page; @@ -120,7 +120,7 @@ again: } void dma_generic_free_coherent(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_addr, struct dma_attrs *attrs) + dma_addr_t dma_addr, unsigned long attrs) { unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; struct page *page = virt_to_page(vaddr); diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index da15918d1c81..00e71ce396a8 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -28,7 +28,7 @@ check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size) static dma_addr_t nommu_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { dma_addr_t bus = page_to_phys(page) + offset; WARN_ON(size == 0); @@ -55,7 +55,7 @@ static dma_addr_t nommu_map_page(struct device *dev, struct page *page, */ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, int nents, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { struct scatterlist *s; int i; diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index 5069ef560d83..b47edb8f5256 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -16,7 +16,7 @@ int swiotlb __read_mostly; void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, dma_addr_t *dma_handle, gfp_t flags, - struct dma_attrs *attrs) + unsigned long attrs) { void *vaddr; @@ -37,7 +37,7 @@ void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, void x86_swiotlb_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_addr, - struct dma_attrs *attrs) + unsigned long attrs) { if (is_swiotlb_buffer(dma_to_phys(dev, dma_addr))) swiotlb_free_coherent(dev, size, vaddr, dma_addr); diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 600edd225e81..f79576a541ff 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -923,15 +923,18 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 value) case offsetof(struct user32, regs.orig_eax): /* - * A 32-bit debugger setting orig_eax means to restore - * the state of the task restarting a 32-bit syscall. - * Make sure we interpret the -ERESTART* codes correctly - * in case the task is not actually still sitting at the - * exit from a 32-bit syscall with TS_COMPAT still set. + * Warning: bizarre corner case fixup here. A 32-bit + * debugger setting orig_eax to -1 wants to disable + * syscall restart. Make sure that the syscall + * restart code sign-extends orig_ax. Also make sure + * we interpret the -ERESTART* codes correctly if + * loaded into regs->ax in case the task is not + * actually still sitting at the exit from a 32-bit + * syscall with TS_COMPAT still set. */ regs->orig_ax = value; if (syscall_get_nr(child, regs) >= 0) - task_thread_info(child)->status |= TS_COMPAT; + task_thread_info(child)->status |= TS_I386_REGS_POKED; break; case offsetof(struct user32, regs.eflags): diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index 06c58ce46762..3599404e3089 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c @@ -64,14 +64,9 @@ u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src) u8 flags; do { - version = src->version; - /* Make the latest version visible */ - smp_rmb(); - + version = pvclock_read_begin(src); flags = src->flags; - /* Make sure that the version double-check is last. */ - smp_rmb(); - } while ((src->version & 1) || version != src->version); + } while (pvclock_read_retry(src, version)); return flags & valid_flags; } @@ -84,10 +79,10 @@ cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) u8 flags; do { - version = __pvclock_read_cycles(src, &ret, &flags); - /* Make sure that the version double-check is last. */ - smp_rmb(); - } while ((src->version & 1) || version != src->version); + version = pvclock_read_begin(src); + ret = __pvclock_read_cycles(src); + flags = src->flags; + } while (pvclock_read_retry(src, version)); if (unlikely((flags & PVCLOCK_GUEST_STOPPED) != 0)) { src->flags &= ~PVCLOCK_GUEST_STOPPED; diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c index eceaa082ec3f..79c6311cd912 100644 --- a/arch/x86/kernel/rtc.c +++ b/arch/x86/kernel/rtc.c @@ -13,7 +13,6 @@ #include <asm/x86_init.h> #include <asm/time.h> #include <asm/intel-mid.h> -#include <asm/rtc.h> #include <asm/setup.h> #ifdef CONFIG_X86_32 @@ -47,7 +46,7 @@ int mach_set_rtc_mmss(const struct timespec *now) rtc_time_to_tm(nowtime, &tm); if (!rtc_valid_tm(&tm)) { - retval = set_rtc_time(&tm); + retval = mc146818_set_time(&tm); if (retval) printk(KERN_ERR "%s: RTC write failed with error %d\n", __func__, retval); diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 22cc2f9f8aec..04cb3212db2d 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -146,7 +146,7 @@ static int restore_sigcontext(struct pt_regs *regs, buf = (void __user *)buf_val; } get_user_catch(err); - err |= fpu__restore_sig(buf, config_enabled(CONFIG_X86_32)); + err |= fpu__restore_sig(buf, IS_ENABLED(CONFIG_X86_32)); force_iret(); @@ -245,14 +245,14 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, struct fpu *fpu = ¤t->thread.fpu; /* redzone */ - if (config_enabled(CONFIG_X86_64)) + if (IS_ENABLED(CONFIG_X86_64)) sp -= 128; /* This is the X/Open sanctioned signal stack switching. */ if (ka->sa.sa_flags & SA_ONSTACK) { if (sas_ss_flags(sp) == 0) sp = current->sas_ss_sp + current->sas_ss_size; - } else if (config_enabled(CONFIG_X86_32) && + } else if (IS_ENABLED(CONFIG_X86_32) && !onsigstack && (regs->ss & 0xffff) != __USER_DS && !(ka->sa.sa_flags & SA_RESTORER) && @@ -262,7 +262,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, } if (fpu->fpstate_active) { - sp = fpu__alloc_mathframe(sp, config_enabled(CONFIG_X86_32), + sp = fpu__alloc_mathframe(sp, IS_ENABLED(CONFIG_X86_32), &buf_fx, &math_size); *fpstate = (void __user *)sp; } @@ -662,18 +662,18 @@ badframe: static inline int is_ia32_compat_frame(void) { - return config_enabled(CONFIG_IA32_EMULATION) && + return IS_ENABLED(CONFIG_IA32_EMULATION) && test_thread_flag(TIF_IA32); } static inline int is_ia32_frame(void) { - return config_enabled(CONFIG_X86_32) || is_ia32_compat_frame(); + return IS_ENABLED(CONFIG_X86_32) || is_ia32_compat_frame(); } static inline int is_x32_frame(void) { - return config_enabled(CONFIG_X86_X32_ABI) && test_thread_flag(TIF_X32); + return IS_ENABLED(CONFIG_X86_X32_ABI) && test_thread_flag(TIF_X32); } static int @@ -760,8 +760,30 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs) static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs) { -#ifdef CONFIG_X86_64 - if (in_ia32_syscall()) + /* + * This function is fundamentally broken as currently + * implemented. + * + * The idea is that we want to trigger a call to the + * restart_block() syscall and that we want in_ia32_syscall(), + * in_x32_syscall(), etc. to match whatever they were in the + * syscall being restarted. We assume that the syscall + * instruction at (regs->ip - 2) matches whatever syscall + * instruction we used to enter in the first place. + * + * The problem is that we can get here when ptrace pokes + * syscall-like values into regs even if we're not in a syscall + * at all. + * + * For now, we maintain historical behavior and guess based on + * stored state. We could do better by saving the actual + * syscall arch in restart_block or (with caveats on x32) by + * checking if regs->ip points to 'int $0x80'. The current + * behavior is incorrect if a tracer has a different bitness + * than the tracee. + */ +#ifdef CONFIG_IA32_EMULATION + if (current_thread_info()->status & (TS_COMPAT|TS_I386_REGS_POKED)) return __NR_ia32_restart_syscall; #endif #ifdef CONFIG_X86_X32_ABI diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 639a6e34500c..ab8e32f7b9a8 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -32,7 +32,6 @@ config KVM select HAVE_KVM_IRQ_BYPASS select HAVE_KVM_IRQ_ROUTING select HAVE_KVM_EVENTFD - select KVM_APIC_ARCHITECTURE select KVM_ASYNC_PF select USER_RETURN_NOTIFIER select KVM_MMIO diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index a4bf5b45d65a..5fb6c620180e 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -645,7 +645,6 @@ static const struct kvm_io_device_ops speaker_dev_ops = { .write = speaker_ioport_write, }; -/* Caller must hold slots_lock */ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags) { struct kvm_pit *pit; @@ -690,6 +689,7 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags) kvm_pit_set_reinject(pit, true); + mutex_lock(&kvm->slots_lock); kvm_iodevice_init(&pit->dev, &pit_dev_ops); ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, KVM_PIT_BASE_ADDRESS, KVM_PIT_MEM_LENGTH, &pit->dev); @@ -704,12 +704,14 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags) if (ret < 0) goto fail_register_speaker; } + mutex_unlock(&kvm->slots_lock); return pit; fail_register_speaker: kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &pit->dev); fail_register_pit: + mutex_unlock(&kvm->slots_lock); kvm_pit_set_reinject(pit, false); kthread_stop(pit->worker_task); fail_kthread: diff --git a/arch/x86/kvm/iommu.c b/arch/x86/kvm/iommu.c index 95e0e6481f07..b181426f67b4 100644 --- a/arch/x86/kvm/iommu.c +++ b/arch/x86/kvm/iommu.c @@ -28,9 +28,7 @@ #include <linux/moduleparam.h> #include <linux/pci.h> #include <linux/stat.h> -#include <linux/dmar.h> #include <linux/iommu.h> -#include <linux/intel-iommu.h> #include "assigned-dev.h" static bool allow_unsafe_assigned_interrupts; diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 61ebdc13a29a..035731eb3897 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -120,4 +120,7 @@ void __kvm_migrate_timers(struct kvm_vcpu *vcpu); int apic_has_pending_timer(struct kvm_vcpu *vcpu); +int kvm_setup_default_irq_routing(struct kvm *kvm); +int kvm_setup_empty_irq_routing(struct kvm *kvm); + #endif diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c index dfb4c6476877..25810b144b58 100644 --- a/arch/x86/kvm/irq_comm.c +++ b/arch/x86/kvm/irq_comm.c @@ -110,13 +110,17 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, return r; } -void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e, +void kvm_set_msi_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, struct kvm_lapic_irq *irq) { - trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data); + trace_kvm_msi_set_irq(e->msi.address_lo | (kvm->arch.x2apic_format ? + (u64)e->msi.address_hi << 32 : 0), + e->msi.data); irq->dest_id = (e->msi.address_lo & MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT; + if (kvm->arch.x2apic_format) + irq->dest_id |= MSI_ADDR_EXT_DEST_ID(e->msi.address_hi); irq->vector = (e->msi.data & MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT; irq->dest_mode = (1 << MSI_ADDR_DEST_MODE_SHIFT) & e->msi.address_lo; @@ -129,15 +133,24 @@ void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e, } EXPORT_SYMBOL_GPL(kvm_set_msi_irq); +static inline bool kvm_msi_route_invalid(struct kvm *kvm, + struct kvm_kernel_irq_routing_entry *e) +{ + return kvm->arch.x2apic_format && (e->msi.address_hi & 0xff); +} + int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, int irq_source_id, int level, bool line_status) { struct kvm_lapic_irq irq; + if (kvm_msi_route_invalid(kvm, e)) + return -EINVAL; + if (!level) return -1; - kvm_set_msi_irq(e, &irq); + kvm_set_msi_irq(kvm, e, &irq); return kvm_irq_delivery_to_apic(kvm, NULL, &irq, NULL); } @@ -153,7 +166,10 @@ int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e, if (unlikely(e->type != KVM_IRQ_ROUTING_MSI)) return -EWOULDBLOCK; - kvm_set_msi_irq(e, &irq); + if (kvm_msi_route_invalid(kvm, e)) + return -EINVAL; + + kvm_set_msi_irq(kvm, e, &irq); if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL)) return r; @@ -248,7 +264,8 @@ static int kvm_hv_set_sint(struct kvm_kernel_irq_routing_entry *e, return kvm_hv_synic_set_irq(kvm, e->hv_sint.vcpu, e->hv_sint.sint); } -int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e, +int kvm_set_routing_entry(struct kvm *kvm, + struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue) { int r = -EINVAL; @@ -285,6 +302,9 @@ int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e, e->msi.address_lo = ue->u.msi.address_lo; e->msi.address_hi = ue->u.msi.address_hi; e->msi.data = ue->u.msi.data; + + if (kvm_msi_route_invalid(kvm, e)) + goto out; break; case KVM_IRQ_ROUTING_HV_SINT: e->set = kvm_hv_set_sint; @@ -388,21 +408,16 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, kvm->arch.nr_reserved_ioapic_pins); for (i = 0; i < nr_ioapic_pins; ++i) { hlist_for_each_entry(entry, &table->map[i], link) { - u32 dest_id, dest_mode; - bool level; + struct kvm_lapic_irq irq; if (entry->type != KVM_IRQ_ROUTING_MSI) continue; - dest_id = (entry->msi.address_lo >> 12) & 0xff; - dest_mode = (entry->msi.address_lo >> 2) & 0x1; - level = entry->msi.data & MSI_DATA_TRIGGER_LEVEL; - if (level && kvm_apic_match_dest(vcpu, NULL, 0, - dest_id, dest_mode)) { - u32 vector = entry->msi.data & 0xff; - - __set_bit(vector, - ioapic_handled_vectors); - } + + kvm_set_msi_irq(vcpu->kvm, entry, &irq); + + if (irq.level && kvm_apic_match_dest(vcpu, NULL, 0, + irq.dest_id, irq.dest_mode)) + __set_bit(irq.vector, ioapic_handled_vectors); } } srcu_read_unlock(&kvm->irq_srcu, idx); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 57549ed47ca5..b62c85229711 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -115,26 +115,43 @@ static inline int apic_enabled(struct kvm_lapic *apic) (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \ APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER) -/* The logical map is definitely wrong if we have multiple - * modes at the same time. (Physical map is always right.) - */ -static inline bool kvm_apic_logical_map_valid(struct kvm_apic_map *map) -{ - return !(map->mode & (map->mode - 1)); +static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map, + u32 dest_id, struct kvm_lapic ***cluster, u16 *mask) { + switch (map->mode) { + case KVM_APIC_MODE_X2APIC: { + u32 offset = (dest_id >> 16) * 16; + u32 max_apic_id = map->max_apic_id; + + if (offset <= max_apic_id) { + u8 cluster_size = min(max_apic_id - offset + 1, 16U); + + *cluster = &map->phys_map[offset]; + *mask = dest_id & (0xffff >> (16 - cluster_size)); + } else { + *mask = 0; + } + + return true; + } + case KVM_APIC_MODE_XAPIC_FLAT: + *cluster = map->xapic_flat_map; + *mask = dest_id & 0xff; + return true; + case KVM_APIC_MODE_XAPIC_CLUSTER: + *cluster = map->xapic_cluster_map[dest_id >> 4]; + *mask = dest_id & 0xf; + return true; + default: + /* Not optimized. */ + return false; + } } -static inline void -apic_logical_id(struct kvm_apic_map *map, u32 dest_id, u16 *cid, u16 *lid) +static void kvm_apic_map_free(struct rcu_head *rcu) { - unsigned lid_bits; + struct kvm_apic_map *map = container_of(rcu, struct kvm_apic_map, rcu); - BUILD_BUG_ON(KVM_APIC_MODE_XAPIC_CLUSTER != 4); - BUILD_BUG_ON(KVM_APIC_MODE_XAPIC_FLAT != 8); - BUILD_BUG_ON(KVM_APIC_MODE_X2APIC != 16); - lid_bits = map->mode; - - *cid = dest_id >> lid_bits; - *lid = dest_id & ((1 << lid_bits) - 1); + kvfree(map); } static void recalculate_apic_map(struct kvm *kvm) @@ -142,17 +159,26 @@ static void recalculate_apic_map(struct kvm *kvm) struct kvm_apic_map *new, *old = NULL; struct kvm_vcpu *vcpu; int i; - - new = kzalloc(sizeof(struct kvm_apic_map), GFP_KERNEL); + u32 max_id = 255; mutex_lock(&kvm->arch.apic_map_lock); + kvm_for_each_vcpu(i, vcpu, kvm) + if (kvm_apic_present(vcpu)) + max_id = max(max_id, kvm_apic_id(vcpu->arch.apic)); + + new = kvm_kvzalloc(sizeof(struct kvm_apic_map) + + sizeof(struct kvm_lapic *) * ((u64)max_id + 1)); + if (!new) goto out; + new->max_apic_id = max_id; + kvm_for_each_vcpu(i, vcpu, kvm) { struct kvm_lapic *apic = vcpu->arch.apic; - u16 cid, lid; + struct kvm_lapic **cluster; + u16 mask; u32 ldr, aid; if (!kvm_apic_present(vcpu)) @@ -161,7 +187,7 @@ static void recalculate_apic_map(struct kvm *kvm) aid = kvm_apic_id(apic); ldr = kvm_lapic_get_reg(apic, APIC_LDR); - if (aid < ARRAY_SIZE(new->phys_map)) + if (aid <= new->max_apic_id) new->phys_map[aid] = apic; if (apic_x2apic_mode(apic)) { @@ -174,13 +200,11 @@ static void recalculate_apic_map(struct kvm *kvm) new->mode |= KVM_APIC_MODE_XAPIC_CLUSTER; } - if (!kvm_apic_logical_map_valid(new)) + if (!kvm_apic_map_get_logical_dest(new, ldr, &cluster, &mask)) continue; - apic_logical_id(new, ldr, &cid, &lid); - - if (lid && cid < ARRAY_SIZE(new->logical_map)) - new->logical_map[cid][ffs(lid) - 1] = apic; + if (mask) + cluster[ffs(mask) - 1] = apic; } out: old = rcu_dereference_protected(kvm->arch.apic_map, @@ -189,7 +213,7 @@ out: mutex_unlock(&kvm->arch.apic_map_lock); if (old) - kfree_rcu(old, rcu); + call_rcu(&old->rcu, kvm_apic_map_free); kvm_make_scan_ioapic_request(kvm); } @@ -210,7 +234,7 @@ static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val) } } -static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id) +static inline void kvm_apic_set_xapic_id(struct kvm_lapic *apic, u8 id) { kvm_lapic_set_reg(apic, APIC_ID, id << 24); recalculate_apic_map(apic->vcpu->kvm); @@ -222,11 +246,11 @@ static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id) recalculate_apic_map(apic->vcpu->kvm); } -static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u8 id) +static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id) { u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf)); - kvm_lapic_set_reg(apic, APIC_ID, id << 24); + kvm_lapic_set_reg(apic, APIC_ID, id); kvm_lapic_set_reg(apic, APIC_LDR, ldr); recalculate_apic_map(apic->vcpu->kvm); } @@ -599,17 +623,30 @@ static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda) } } -/* KVM APIC implementation has two quirks - * - dest always begins at 0 while xAPIC MDA has offset 24, - * - IOxAPIC messages have to be delivered (directly) to x2APIC. +/* The KVM local APIC implementation has two quirks: + * + * - the xAPIC MDA stores the destination at bits 24-31, while this + * is not true of struct kvm_lapic_irq's dest_id field. This is + * just a quirk in the API and is not problematic. + * + * - in-kernel IOAPIC messages have to be delivered directly to + * x2APIC, because the kernel does not support interrupt remapping. + * In order to support broadcast without interrupt remapping, x2APIC + * rewrites the destination of non-IPI messages from APIC_BROADCAST + * to X2APIC_BROADCAST. + * + * The broadcast quirk can be disabled with KVM_CAP_X2APIC_API. This is + * important when userspace wants to use x2APIC-format MSIs, because + * APIC_BROADCAST (0xff) is a legal route for "cluster 0, CPUs 0-7". */ -static u32 kvm_apic_mda(unsigned int dest_id, struct kvm_lapic *source, - struct kvm_lapic *target) +static u32 kvm_apic_mda(struct kvm_vcpu *vcpu, unsigned int dest_id, + struct kvm_lapic *source, struct kvm_lapic *target) { bool ipi = source != NULL; bool x2apic_mda = apic_x2apic_mode(ipi ? source : target); - if (!ipi && dest_id == APIC_BROADCAST && x2apic_mda) + if (!vcpu->kvm->arch.x2apic_broadcast_quirk_disabled && + !ipi && dest_id == APIC_BROADCAST && x2apic_mda) return X2APIC_BROADCAST; return x2apic_mda ? dest_id : SET_APIC_DEST_FIELD(dest_id); @@ -619,7 +656,7 @@ bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, int short_hand, unsigned int dest, int dest_mode) { struct kvm_lapic *target = vcpu->arch.apic; - u32 mda = kvm_apic_mda(dest, source, target); + u32 mda = kvm_apic_mda(vcpu, dest, source, target); apic_debug("target %p, source %p, dest 0x%x, " "dest_mode 0x%x, short_hand 0x%x\n", @@ -671,102 +708,126 @@ static void kvm_apic_disabled_lapic_found(struct kvm *kvm) } } -bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, - struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map) +static bool kvm_apic_is_broadcast_dest(struct kvm *kvm, struct kvm_lapic **src, + struct kvm_lapic_irq *irq, struct kvm_apic_map *map) { - struct kvm_apic_map *map; - unsigned long bitmap = 1; - struct kvm_lapic **dst; - int i; - bool ret, x2apic_ipi; + if (kvm->arch.x2apic_broadcast_quirk_disabled) { + if ((irq->dest_id == APIC_BROADCAST && + map->mode != KVM_APIC_MODE_X2APIC)) + return true; + if (irq->dest_id == X2APIC_BROADCAST) + return true; + } else { + bool x2apic_ipi = src && *src && apic_x2apic_mode(*src); + if (irq->dest_id == (x2apic_ipi ? + X2APIC_BROADCAST : APIC_BROADCAST)) + return true; + } - *r = -1; + return false; +} - if (irq->shorthand == APIC_DEST_SELF) { - *r = kvm_apic_set_irq(src->vcpu, irq, dest_map); - return true; - } +/* Return true if the interrupt can be handled by using *bitmap as index mask + * for valid destinations in *dst array. + * Return false if kvm_apic_map_get_dest_lapic did nothing useful. + * Note: we may have zero kvm_lapic destinations when we return true, which + * means that the interrupt should be dropped. In this case, *bitmap would be + * zero and *dst undefined. + */ +static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm, + struct kvm_lapic **src, struct kvm_lapic_irq *irq, + struct kvm_apic_map *map, struct kvm_lapic ***dst, + unsigned long *bitmap) +{ + int i, lowest; - if (irq->shorthand) + if (irq->shorthand == APIC_DEST_SELF && src) { + *dst = src; + *bitmap = 1; + return true; + } else if (irq->shorthand) return false; - x2apic_ipi = src && apic_x2apic_mode(src); - if (irq->dest_id == (x2apic_ipi ? X2APIC_BROADCAST : APIC_BROADCAST)) + if (!map || kvm_apic_is_broadcast_dest(kvm, src, irq, map)) return false; - ret = true; - rcu_read_lock(); - map = rcu_dereference(kvm->arch.apic_map); - - if (!map) { - ret = false; - goto out; + if (irq->dest_mode == APIC_DEST_PHYSICAL) { + if (irq->dest_id > map->max_apic_id) { + *bitmap = 0; + } else { + *dst = &map->phys_map[irq->dest_id]; + *bitmap = 1; + } + return true; } - if (irq->dest_mode == APIC_DEST_PHYSICAL) { - if (irq->dest_id >= ARRAY_SIZE(map->phys_map)) - goto out; + *bitmap = 0; + if (!kvm_apic_map_get_logical_dest(map, irq->dest_id, dst, + (u16 *)bitmap)) + return false; - dst = &map->phys_map[irq->dest_id]; - } else { - u16 cid; + if (!kvm_lowest_prio_delivery(irq)) + return true; - if (!kvm_apic_logical_map_valid(map)) { - ret = false; - goto out; + if (!kvm_vector_hashing_enabled()) { + lowest = -1; + for_each_set_bit(i, bitmap, 16) { + if (!(*dst)[i]) + continue; + if (lowest < 0) + lowest = i; + else if (kvm_apic_compare_prio((*dst)[i]->vcpu, + (*dst)[lowest]->vcpu) < 0) + lowest = i; } + } else { + if (!*bitmap) + return true; - apic_logical_id(map, irq->dest_id, &cid, (u16 *)&bitmap); + lowest = kvm_vector_to_index(irq->vector, hweight16(*bitmap), + bitmap, 16); - if (cid >= ARRAY_SIZE(map->logical_map)) - goto out; + if (!(*dst)[lowest]) { + kvm_apic_disabled_lapic_found(kvm); + *bitmap = 0; + return true; + } + } - dst = map->logical_map[cid]; + *bitmap = (lowest >= 0) ? 1 << lowest : 0; - if (!kvm_lowest_prio_delivery(irq)) - goto set_irq; + return true; +} - if (!kvm_vector_hashing_enabled()) { - int l = -1; - for_each_set_bit(i, &bitmap, 16) { - if (!dst[i]) - continue; - if (l < 0) - l = i; - else if (kvm_apic_compare_prio(dst[i]->vcpu, - dst[l]->vcpu) < 0) - l = i; - } - bitmap = (l >= 0) ? 1 << l : 0; - } else { - int idx; - unsigned int dest_vcpus; +bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, + struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map) +{ + struct kvm_apic_map *map; + unsigned long bitmap; + struct kvm_lapic **dst = NULL; + int i; + bool ret; - dest_vcpus = hweight16(bitmap); - if (dest_vcpus == 0) - goto out; + *r = -1; - idx = kvm_vector_to_index(irq->vector, - dest_vcpus, &bitmap, 16); + if (irq->shorthand == APIC_DEST_SELF) { + *r = kvm_apic_set_irq(src->vcpu, irq, dest_map); + return true; + } - if (!dst[idx]) { - kvm_apic_disabled_lapic_found(kvm); - goto out; - } + rcu_read_lock(); + map = rcu_dereference(kvm->arch.apic_map); - bitmap = (idx >= 0) ? 1 << idx : 0; + ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dst, &bitmap); + if (ret) + for_each_set_bit(i, &bitmap, 16) { + if (!dst[i]) + continue; + if (*r < 0) + *r = 0; + *r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map); } - } -set_irq: - for_each_set_bit(i, &bitmap, 16) { - if (!dst[i]) - continue; - if (*r < 0) - *r = 0; - *r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map); - } -out: rcu_read_unlock(); return ret; } @@ -789,8 +850,9 @@ bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq, struct kvm_vcpu **dest_vcpu) { struct kvm_apic_map *map; + unsigned long bitmap; + struct kvm_lapic **dst = NULL; bool ret = false; - struct kvm_lapic *dst = NULL; if (irq->shorthand) return false; @@ -798,69 +860,16 @@ bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq, rcu_read_lock(); map = rcu_dereference(kvm->arch.apic_map); - if (!map) - goto out; - - if (irq->dest_mode == APIC_DEST_PHYSICAL) { - if (irq->dest_id == 0xFF) - goto out; - - if (irq->dest_id >= ARRAY_SIZE(map->phys_map)) - goto out; - - dst = map->phys_map[irq->dest_id]; - if (dst && kvm_apic_present(dst->vcpu)) - *dest_vcpu = dst->vcpu; - else - goto out; - } else { - u16 cid; - unsigned long bitmap = 1; - int i, r = 0; - - if (!kvm_apic_logical_map_valid(map)) - goto out; - - apic_logical_id(map, irq->dest_id, &cid, (u16 *)&bitmap); - - if (cid >= ARRAY_SIZE(map->logical_map)) - goto out; - - if (kvm_vector_hashing_enabled() && - kvm_lowest_prio_delivery(irq)) { - int idx; - unsigned int dest_vcpus; - - dest_vcpus = hweight16(bitmap); - if (dest_vcpus == 0) - goto out; + if (kvm_apic_map_get_dest_lapic(kvm, NULL, irq, map, &dst, &bitmap) && + hweight16(bitmap) == 1) { + unsigned long i = find_first_bit(&bitmap, 16); - idx = kvm_vector_to_index(irq->vector, dest_vcpus, - &bitmap, 16); - - dst = map->logical_map[cid][idx]; - if (!dst) { - kvm_apic_disabled_lapic_found(kvm); - goto out; - } - - *dest_vcpu = dst->vcpu; - } else { - for_each_set_bit(i, &bitmap, 16) { - dst = map->logical_map[cid][i]; - if (++r == 2) - goto out; - } - - if (dst && kvm_apic_present(dst->vcpu)) - *dest_vcpu = dst->vcpu; - else - goto out; + if (dst[i]) { + *dest_vcpu = dst[i]->vcpu; + ret = true; } } - ret = true; -out: rcu_read_unlock(); return ret; } @@ -1127,12 +1136,6 @@ static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset) return 0; switch (offset) { - case APIC_ID: - if (apic_x2apic_mode(apic)) - val = kvm_apic_id(apic); - else - val = kvm_apic_id(apic) << 24; - break; case APIC_ARBPRI: apic_debug("Access APIC ARBPRI register which is for P6\n"); break; @@ -1314,6 +1317,111 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu) nsec_to_cycles(vcpu, lapic_timer_advance_ns))); } +static void start_sw_tscdeadline(struct kvm_lapic *apic) +{ + u64 guest_tsc, tscdeadline = apic->lapic_timer.tscdeadline; + u64 ns = 0; + ktime_t expire; + struct kvm_vcpu *vcpu = apic->vcpu; + unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz; + unsigned long flags; + ktime_t now; + + if (unlikely(!tscdeadline || !this_tsc_khz)) + return; + + local_irq_save(flags); + + now = apic->lapic_timer.timer.base->get_time(); + guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); + if (likely(tscdeadline > guest_tsc)) { + ns = (tscdeadline - guest_tsc) * 1000000ULL; + do_div(ns, this_tsc_khz); + expire = ktime_add_ns(now, ns); + expire = ktime_sub_ns(expire, lapic_timer_advance_ns); + hrtimer_start(&apic->lapic_timer.timer, + expire, HRTIMER_MODE_ABS_PINNED); + } else + apic_timer_expired(apic); + + local_irq_restore(flags); +} + +bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu) +{ + if (!lapic_in_kernel(vcpu)) + return false; + + return vcpu->arch.apic->lapic_timer.hv_timer_in_use; +} +EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use); + +static void cancel_hv_tscdeadline(struct kvm_lapic *apic) +{ + kvm_x86_ops->cancel_hv_timer(apic->vcpu); + apic->lapic_timer.hv_timer_in_use = false; +} + +void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu) +{ + struct kvm_lapic *apic = vcpu->arch.apic; + + WARN_ON(!apic->lapic_timer.hv_timer_in_use); + WARN_ON(swait_active(&vcpu->wq)); + cancel_hv_tscdeadline(apic); + apic_timer_expired(apic); +} +EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer); + +static bool start_hv_tscdeadline(struct kvm_lapic *apic) +{ + u64 tscdeadline = apic->lapic_timer.tscdeadline; + + if (atomic_read(&apic->lapic_timer.pending) || + kvm_x86_ops->set_hv_timer(apic->vcpu, tscdeadline)) { + if (apic->lapic_timer.hv_timer_in_use) + cancel_hv_tscdeadline(apic); + } else { + apic->lapic_timer.hv_timer_in_use = true; + hrtimer_cancel(&apic->lapic_timer.timer); + + /* In case the sw timer triggered in the window */ + if (atomic_read(&apic->lapic_timer.pending)) + cancel_hv_tscdeadline(apic); + } + trace_kvm_hv_timer_state(apic->vcpu->vcpu_id, + apic->lapic_timer.hv_timer_in_use); + return apic->lapic_timer.hv_timer_in_use; +} + +void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu) +{ + struct kvm_lapic *apic = vcpu->arch.apic; + + WARN_ON(apic->lapic_timer.hv_timer_in_use); + + if (apic_lvtt_tscdeadline(apic)) + start_hv_tscdeadline(apic); +} +EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_hv_timer); + +void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu) +{ + struct kvm_lapic *apic = vcpu->arch.apic; + + /* Possibly the TSC deadline timer is not enabled yet */ + if (!apic->lapic_timer.hv_timer_in_use) + return; + + cancel_hv_tscdeadline(apic); + + if (atomic_read(&apic->lapic_timer.pending)) + return; + + start_sw_tscdeadline(apic); +} +EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_sw_timer); + static void start_apic_timer(struct kvm_lapic *apic) { ktime_t now; @@ -1360,32 +1468,8 @@ static void start_apic_timer(struct kvm_lapic *apic) ktime_to_ns(ktime_add_ns(now, apic->lapic_timer.period))); } else if (apic_lvtt_tscdeadline(apic)) { - /* lapic timer in tsc deadline mode */ - u64 guest_tsc, tscdeadline = apic->lapic_timer.tscdeadline; - u64 ns = 0; - ktime_t expire; - struct kvm_vcpu *vcpu = apic->vcpu; - unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz; - unsigned long flags; - - if (unlikely(!tscdeadline || !this_tsc_khz)) - return; - - local_irq_save(flags); - - now = apic->lapic_timer.timer.base->get_time(); - guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); - if (likely(tscdeadline > guest_tsc)) { - ns = (tscdeadline - guest_tsc) * 1000000ULL; - do_div(ns, this_tsc_khz); - expire = ktime_add_ns(now, ns); - expire = ktime_sub_ns(expire, lapic_timer_advance_ns); - hrtimer_start(&apic->lapic_timer.timer, - expire, HRTIMER_MODE_ABS_PINNED); - } else - apic_timer_expired(apic); - - local_irq_restore(flags); + if (!(kvm_x86_ops->set_hv_timer && start_hv_tscdeadline(apic))) + start_sw_tscdeadline(apic); } } @@ -1413,7 +1497,7 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) switch (reg) { case APIC_ID: /* Local APIC ID */ if (!apic_x2apic_mode(apic)) - kvm_apic_set_id(apic, val >> 24); + kvm_apic_set_xapic_id(apic, val >> 24); else ret = 1; break; @@ -1674,9 +1758,10 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) /* update jump label if enable bit changes */ if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE) { - if (value & MSR_IA32_APICBASE_ENABLE) + if (value & MSR_IA32_APICBASE_ENABLE) { + kvm_apic_set_xapic_id(apic, vcpu->vcpu_id); static_key_slow_dec_deferred(&apic_hw_disabled); - else + } else static_key_slow_inc(&apic_hw_disabled.key); recalculate_apic_map(vcpu->kvm); } @@ -1716,8 +1801,11 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) /* Stop the timer in case it's a reset to an active apic */ hrtimer_cancel(&apic->lapic_timer.timer); - if (!init_event) - kvm_apic_set_id(apic, vcpu->vcpu_id); + if (!init_event) { + kvm_lapic_set_base(vcpu, APIC_DEFAULT_PHYS_BASE | + MSR_IA32_APICBASE_ENABLE); + kvm_apic_set_xapic_id(apic, vcpu->vcpu_id); + } kvm_apic_set_version(apic->vcpu); for (i = 0; i < KVM_APIC_LVT_NUM; i++) @@ -1856,9 +1944,6 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu) * thinking that APIC satet has changed. */ vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE; - kvm_lapic_set_base(vcpu, - APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE); - static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */ kvm_lapic_reset(vcpu, false); kvm_iodevice_init(&apic->dev, &apic_mmio_ops); @@ -1938,17 +2023,48 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) return vector; } -void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, - struct kvm_lapic_state *s) +static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu, + struct kvm_lapic_state *s, bool set) +{ + if (apic_x2apic_mode(vcpu->arch.apic)) { + u32 *id = (u32 *)(s->regs + APIC_ID); + + if (vcpu->kvm->arch.x2apic_format) { + if (*id != vcpu->vcpu_id) + return -EINVAL; + } else { + if (set) + *id >>= 24; + else + *id <<= 24; + } + } + + return 0; +} + +int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) +{ + memcpy(s->regs, vcpu->arch.apic->regs, sizeof(*s)); + return kvm_apic_state_fixup(vcpu, s, false); +} + +int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) { struct kvm_lapic *apic = vcpu->arch.apic; + int r; + kvm_lapic_set_base(vcpu, vcpu->arch.apic_base); /* set SPIV separately to get count of SW disabled APICs right */ apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV))); + + r = kvm_apic_state_fixup(vcpu, s, true); + if (r) + return r; memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s); - /* call kvm_apic_set_id() to put apic into apic_map */ - kvm_apic_set_id(apic, kvm_apic_id(apic)); + + recalculate_apic_map(vcpu->kvm); kvm_apic_set_version(vcpu); apic_update_ppr(apic); @@ -1974,6 +2090,8 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, kvm_rtc_eoi_tracking_restore_one(vcpu); vcpu->arch.apic_arb_prio = 0; + + return 0; } void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 891c6da7d4aa..f60d01c29d51 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -20,6 +20,7 @@ struct kvm_timer { u64 tscdeadline; u64 expired_tscdeadline; atomic_t pending; /* accumulated triggered timers */ + bool hv_timer_in_use; }; struct kvm_lapic { @@ -80,8 +81,8 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, u64 kvm_get_apic_base(struct kvm_vcpu *vcpu); int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info); -void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, - struct kvm_lapic_state *s); +int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s); +int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s); int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu); @@ -199,9 +200,15 @@ static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu) return lapic_in_kernel(vcpu) && test_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events); } -static inline int kvm_apic_id(struct kvm_lapic *apic) +static inline u32 kvm_apic_id(struct kvm_lapic *apic) { - return (kvm_lapic_get_reg(apic, APIC_ID) >> 24) & 0xff; + /* To avoid a race between apic_base and following APIC_ID update when + * switching to x2apic_mode, the x2apic mode returns initial x2apic id. + */ + if (apic_x2apic_mode(apic)) + return apic->vcpu->vcpu_id; + + return kvm_lapic_get_reg(apic, APIC_ID) >> 24; } bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector); @@ -212,4 +219,8 @@ bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq, struct kvm_vcpu **dest_vcpu); int kvm_vector_to_index(u32 vector, u32 dest_vcpus, const unsigned long *bitmap, u32 bitmap_size); +void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu); +void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu); +void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu); +bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu); #endif diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 745a5f445ae2..3d4cc8cc56a3 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -176,6 +176,7 @@ static u64 __read_mostly shadow_user_mask; static u64 __read_mostly shadow_accessed_mask; static u64 __read_mostly shadow_dirty_mask; static u64 __read_mostly shadow_mmio_mask; +static u64 __read_mostly shadow_present_mask; static void mmu_spte_set(u64 *sptep, u64 spte); static void mmu_free_roots(struct kvm_vcpu *vcpu); @@ -283,13 +284,14 @@ static bool check_mmio_spte(struct kvm_vcpu *vcpu, u64 spte) } void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, - u64 dirty_mask, u64 nx_mask, u64 x_mask) + u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask) { shadow_user_mask = user_mask; shadow_accessed_mask = accessed_mask; shadow_dirty_mask = dirty_mask; shadow_nx_mask = nx_mask; shadow_x_mask = x_mask; + shadow_present_mask = p_mask; } EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); @@ -305,7 +307,7 @@ static int is_nx(struct kvm_vcpu *vcpu) static int is_shadow_present_pte(u64 pte) { - return pte & PT_PRESENT_MASK && !is_mmio_spte(pte); + return (pte & 0xFFFFFFFFull) && !is_mmio_spte(pte); } static int is_large_pte(u64 pte) @@ -524,7 +526,7 @@ static void mmu_spte_set(u64 *sptep, u64 new_spte) } /* Rules for using mmu_spte_update: - * Update the state bits, it means the mapped pfn is not changged. + * Update the state bits, it means the mapped pfn is not changed. * * Whenever we overwrite a writable spte with a read-only one we * should flush remote TLBs. Otherwise rmap_write_protect @@ -2246,10 +2248,9 @@ static void link_shadow_page(struct kvm_vcpu *vcpu, u64 *sptep, { u64 spte; - BUILD_BUG_ON(VMX_EPT_READABLE_MASK != PT_PRESENT_MASK || - VMX_EPT_WRITABLE_MASK != PT_WRITABLE_MASK); + BUILD_BUG_ON(VMX_EPT_WRITABLE_MASK != PT_WRITABLE_MASK); - spte = __pa(sp->spt) | PT_PRESENT_MASK | PT_WRITABLE_MASK | + spte = __pa(sp->spt) | shadow_present_mask | PT_WRITABLE_MASK | shadow_user_mask | shadow_x_mask | shadow_accessed_mask; mmu_spte_set(sptep, spte); @@ -2516,13 +2517,19 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn, kvm_pfn_t pfn, bool speculative, bool can_unsync, bool host_writable) { - u64 spte; + u64 spte = 0; int ret = 0; if (set_mmio_spte(vcpu, sptep, gfn, pfn, pte_access)) return 0; - spte = PT_PRESENT_MASK; + /* + * For the EPT case, shadow_present_mask is 0 if hardware + * supports exec-only page table entries. In that case, + * ACC_USER_MASK and shadow_user_mask are used to represent + * read access. See FNAME(gpte_access) in paging_tmpl.h. + */ + spte |= shadow_present_mask; if (!speculative) spte |= shadow_accessed_mask; @@ -3190,7 +3197,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) MMU_WARN_ON(VALID_PAGE(root)); if (vcpu->arch.mmu.root_level == PT32E_ROOT_LEVEL) { pdptr = vcpu->arch.mmu.get_pdptr(vcpu, i); - if (!is_present_gpte(pdptr)) { + if (!(pdptr & PT_PRESENT_MASK)) { vcpu->arch.mmu.pae_root[i] = 0; continue; } @@ -3915,9 +3922,7 @@ static void update_permission_bitmask(struct kvm_vcpu *vcpu, * clearer. */ smap = cr4_smap && u && !uf && !ff; - } else - /* Not really needed: no U/S accesses on ept */ - u = 1; + } fault = (ff && !x) || (uf && !u) || (wf && !w) || (smapf && smap); diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 66b33b96a31b..ddc56e91f2e4 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -93,11 +93,6 @@ static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu) return kvm_mmu_load(vcpu); } -static inline int is_present_gpte(unsigned long pte) -{ - return pte & PT_PRESENT_MASK; -} - /* * Currently, we have two sorts of write-protection, a) the first one * write-protects guest page to sync the guest modification, b) another one is diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index bc019f70e0b6..a01105485315 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -131,7 +131,7 @@ static inline void FNAME(protect_clean_gpte)(unsigned *access, unsigned gpte) static inline int FNAME(is_present_gpte)(unsigned long pte) { #if PTTYPE != PTTYPE_EPT - return is_present_gpte(pte); + return pte & PT_PRESENT_MASK; #else return pte & 7; #endif @@ -181,13 +181,19 @@ no_present: return true; } +/* + * For PTTYPE_EPT, a page table can be executable but not readable + * on supported processors. Therefore, set_spte does not automatically + * set bit 0 if execute only is supported. Here, we repurpose ACC_USER_MASK + * to signify readability since it isn't used in the EPT case + */ static inline unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, u64 gpte) { unsigned access; #if PTTYPE == PTTYPE_EPT access = ((gpte & VMX_EPT_WRITABLE_MASK) ? ACC_WRITE_MASK : 0) | ((gpte & VMX_EPT_EXECUTABLE_MASK) ? ACC_EXEC_MASK : 0) | - ACC_USER_MASK; + ((gpte & VMX_EPT_READABLE_MASK) ? ACC_USER_MASK : 0); #else BUILD_BUG_ON(ACC_EXEC_MASK != PT_PRESENT_MASK); BUILD_BUG_ON(ACC_EXEC_MASK != 1); diff --git a/arch/x86/kvm/pmu_intel.c b/arch/x86/kvm/pmu_intel.c index ab38af4f4947..9d4a8504a95a 100644 --- a/arch/x86/kvm/pmu_intel.c +++ b/arch/x86/kvm/pmu_intel.c @@ -93,7 +93,7 @@ static unsigned intel_find_fixed_event(int idx) return intel_arch_events[fixed_pmc_events[idx]].event_type; } -/* check if a PMC is enabled by comparising it with globl_ctrl bits. */ +/* check if a PMC is enabled by comparing it with globl_ctrl bits. */ static bool intel_pmc_is_enabled(struct kvm_pmc *pmc) { struct kvm_pmu *pmu = pmc_to_pmu(pmc); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 16ef31b87452..af523d84d102 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1577,7 +1577,7 @@ static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu) static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) { /* - * Any change of EFLAGS.VM is accompained by a reload of SS + * Any change of EFLAGS.VM is accompanied by a reload of SS * (caused by either a task switch or an inter-privilege IRET), * so we do not need to update the CPL here. */ @@ -4940,6 +4940,12 @@ out: static void svm_handle_external_intr(struct kvm_vcpu *vcpu) { local_irq_enable(); + /* + * We must have an instruction with interrupts enabled, so + * the timer interrupt isn't delayed by the interrupt shadow. + */ + asm("nop"); + local_irq_disable(); } static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu) diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 8de925031b5c..0a6cc6754ec5 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -1348,6 +1348,21 @@ TRACE_EVENT(kvm_avic_unaccelerated_access, __entry->vec) ); +TRACE_EVENT(kvm_hv_timer_state, + TP_PROTO(unsigned int vcpu_id, unsigned int hv_timer_in_use), + TP_ARGS(vcpu_id, hv_timer_in_use), + TP_STRUCT__entry( + __field(unsigned int, vcpu_id) + __field(unsigned int, hv_timer_in_use) + ), + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->hv_timer_in_use = hv_timer_in_use; + ), + TP_printk("vcpu_id %x hv_timer %x\n", + __entry->vcpu_id, + __entry->hv_timer_in_use) +); #endif /* _TRACE_KVM_H */ #undef TRACE_INCLUDE_PATH diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index df07a0a4611f..a45d8580f91e 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -110,6 +110,13 @@ module_param_named(pml, enable_pml, bool, S_IRUGO); #define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL +/* Guest_tsc -> host_tsc conversion requires 64-bit division. */ +static int __read_mostly cpu_preemption_timer_multi; +static bool __read_mostly enable_preemption_timer = 1; +#ifdef CONFIG_X86_64 +module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO); +#endif + #define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD) #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE) #define KVM_VM_CR0_ALWAYS_ON \ @@ -398,6 +405,12 @@ struct nested_vmx { /* The host-usable pointer to the above */ struct page *current_vmcs12_page; struct vmcs12 *current_vmcs12; + /* + * Cache of the guest's VMCS, existing outside of guest memory. + * Loaded from guest memory during VMPTRLD. Flushed to guest + * memory during VMXOFF, VMCLEAR, VMPTRLD. + */ + struct vmcs12 *cached_vmcs12; struct vmcs *current_shadow_vmcs; /* * Indicates if the shadow vmcs must be updated with the @@ -421,7 +434,6 @@ struct nested_vmx { struct pi_desc *pi_desc; bool pi_pending; u16 posted_intr_nv; - u64 msr_ia32_feature_control; struct hrtimer preemption_timer; bool preemption_timer_expired; @@ -597,11 +609,22 @@ struct vcpu_vmx { #define PML_ENTITY_NUM 512 struct page *pml_pg; + /* apic deadline value in host tsc */ + u64 hv_deadline_tsc; + u64 current_tsc_ratio; bool guest_pkru_valid; u32 guest_pkru; u32 host_pkru; + + /* + * Only bits masked by msr_ia32_feature_control_valid_bits can be set in + * msr_ia32_feature_control. FEATURE_CONTROL_LOCKED is always included + * in msr_ia32_feature_control_valid_bits. + */ + u64 msr_ia32_feature_control; + u64 msr_ia32_feature_control_valid_bits; }; enum segment_cache_field { @@ -841,7 +864,7 @@ static inline short vmcs_field_to_offset(unsigned long field) static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu) { - return to_vmx(vcpu)->nested.current_vmcs12; + return to_vmx(vcpu)->nested.cached_vmcs12; } static struct page *nested_get_page(struct kvm_vcpu *vcpu, gpa_t addr) @@ -1056,6 +1079,58 @@ static inline bool cpu_has_vmx_virtual_intr_delivery(void) SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY; } +/* + * Comment's format: document - errata name - stepping - processor name. + * Refer from + * https://www.virtualbox.org/svn/vbox/trunk/src/VBox/VMM/VMMR0/HMR0.cpp + */ +static u32 vmx_preemption_cpu_tfms[] = { +/* 323344.pdf - BA86 - D0 - Xeon 7500 Series */ +0x000206E6, +/* 323056.pdf - AAX65 - C2 - Xeon L3406 */ +/* 322814.pdf - AAT59 - C2 - i7-600, i5-500, i5-400 and i3-300 Mobile */ +/* 322911.pdf - AAU65 - C2 - i5-600, i3-500 Desktop and Pentium G6950 */ +0x00020652, +/* 322911.pdf - AAU65 - K0 - i5-600, i3-500 Desktop and Pentium G6950 */ +0x00020655, +/* 322373.pdf - AAO95 - B1 - Xeon 3400 Series */ +/* 322166.pdf - AAN92 - B1 - i7-800 and i5-700 Desktop */ +/* + * 320767.pdf - AAP86 - B1 - + * i7-900 Mobile Extreme, i7-800 and i7-700 Mobile + */ +0x000106E5, +/* 321333.pdf - AAM126 - C0 - Xeon 3500 */ +0x000106A0, +/* 321333.pdf - AAM126 - C1 - Xeon 3500 */ +0x000106A1, +/* 320836.pdf - AAJ124 - C0 - i7-900 Desktop Extreme and i7-900 Desktop */ +0x000106A4, + /* 321333.pdf - AAM126 - D0 - Xeon 3500 */ + /* 321324.pdf - AAK139 - D0 - Xeon 5500 */ + /* 320836.pdf - AAJ124 - D0 - i7-900 Extreme and i7-900 Desktop */ +0x000106A5, +}; + +static inline bool cpu_has_broken_vmx_preemption_timer(void) +{ + u32 eax = cpuid_eax(0x00000001), i; + + /* Clear the reserved bits */ + eax &= ~(0x3U << 14 | 0xfU << 28); + for (i = 0; i < ARRAY_SIZE(vmx_preemption_cpu_tfms); i++) + if (eax == vmx_preemption_cpu_tfms[i]) + return true; + + return false; +} + +static inline bool cpu_has_vmx_preemption_timer(void) +{ + return vmcs_config.pin_based_exec_ctrl & + PIN_BASED_VMX_PREEMPTION_TIMER; +} + static inline bool cpu_has_vmx_posted_intr(void) { return IS_ENABLED(CONFIG_X86_LOCAL_APIC) && @@ -1603,6 +1678,11 @@ static __always_inline void vmcs_set_bits(unsigned long field, u32 mask) __vmcs_writel(field, __vmcs_readl(field) | mask); } +static inline void vm_entry_controls_reset_shadow(struct vcpu_vmx *vmx) +{ + vmx->vm_entry_controls_shadow = vmcs_read32(VM_ENTRY_CONTROLS); +} + static inline void vm_entry_controls_init(struct vcpu_vmx *vmx, u32 val) { vmcs_write32(VM_ENTRY_CONTROLS, val); @@ -1631,6 +1711,11 @@ static inline void vm_entry_controls_clearbit(struct vcpu_vmx *vmx, u32 val) vm_entry_controls_set(vmx, vm_entry_controls_get(vmx) & ~val); } +static inline void vm_exit_controls_reset_shadow(struct vcpu_vmx *vmx) +{ + vmx->vm_exit_controls_shadow = vmcs_read32(VM_EXIT_CONTROLS); +} + static inline void vm_exit_controls_init(struct vcpu_vmx *vmx, u32 val) { vmcs_write32(VM_EXIT_CONTROLS, val); @@ -2121,22 +2206,14 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); + bool already_loaded = vmx->loaded_vmcs->cpu == cpu; if (!vmm_exclusive) kvm_cpu_vmxon(phys_addr); - else if (vmx->loaded_vmcs->cpu != cpu) + else if (!already_loaded) loaded_vmcs_clear(vmx->loaded_vmcs); - if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) { - per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs; - vmcs_load(vmx->loaded_vmcs->vmcs); - } - - if (vmx->loaded_vmcs->cpu != cpu) { - struct desc_ptr *gdt = this_cpu_ptr(&host_gdt); - unsigned long sysenter_esp; - - kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); + if (!already_loaded) { local_irq_disable(); crash_disable_local_vmclear(cpu); @@ -2151,6 +2228,18 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) &per_cpu(loaded_vmcss_on_cpu, cpu)); crash_enable_local_vmclear(cpu); local_irq_enable(); + } + + if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) { + per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs; + vmcs_load(vmx->loaded_vmcs->vmcs); + } + + if (!already_loaded) { + struct desc_ptr *gdt = this_cpu_ptr(&host_gdt); + unsigned long sysenter_esp; + + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); /* * Linux uses per-cpu TSS and GDT, so set these when switching @@ -2716,13 +2805,12 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) vmx->nested.nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT | VMX_EPTP_WB_BIT | VMX_EPT_2MB_PAGE_BIT | VMX_EPT_INVEPT_BIT; + if (cpu_has_vmx_ept_execute_only()) + vmx->nested.nested_vmx_ept_caps |= + VMX_EPT_EXECUTE_ONLY_BIT; vmx->nested.nested_vmx_ept_caps &= vmx_capability.ept; - /* - * For nested guests, we don't do anything specific - * for single context invalidation. Hence, only advertise - * support for global context invalidation. - */ - vmx->nested.nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT; + vmx->nested.nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT | + VMX_EPT_EXTENT_CONTEXT_BIT; } else vmx->nested.nested_vmx_ept_caps = 0; @@ -2853,7 +2941,6 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) vmx->nested.nested_vmx_secondary_ctls_high); break; case MSR_IA32_VMX_EPT_VPID_CAP: - /* Currently, no nested vpid support */ *pdata = vmx->nested.nested_vmx_ept_caps | ((u64)vmx->nested.nested_vmx_vpid_caps << 32); break; @@ -2864,6 +2951,14 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) return 0; } +static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu, + uint64_t val) +{ + uint64_t valid_bits = to_vmx(vcpu)->msr_ia32_feature_control_valid_bits; + + return !(val & ~valid_bits); +} + /* * Reads an msr value (of 'msr_index') into 'pdata'. * Returns 0 on success, non-0 otherwise. @@ -2905,10 +3000,15 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 1; msr_info->data = vmcs_read64(GUEST_BNDCFGS); break; - case MSR_IA32_FEATURE_CONTROL: - if (!nested_vmx_allowed(vcpu)) + case MSR_IA32_MCG_EXT_CTL: + if (!msr_info->host_initiated && + !(to_vmx(vcpu)->msr_ia32_feature_control & + FEATURE_CONTROL_LMCE)) return 1; - msr_info->data = to_vmx(vcpu)->nested.msr_ia32_feature_control; + msr_info->data = vcpu->arch.mcg_ext_ctl; + break; + case MSR_IA32_FEATURE_CONTROL: + msr_info->data = to_vmx(vcpu)->msr_ia32_feature_control; break; case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: if (!nested_vmx_allowed(vcpu)) @@ -2998,12 +3098,20 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_TSC_ADJUST: ret = kvm_set_msr_common(vcpu, msr_info); break; + case MSR_IA32_MCG_EXT_CTL: + if ((!msr_info->host_initiated && + !(to_vmx(vcpu)->msr_ia32_feature_control & + FEATURE_CONTROL_LMCE)) || + (data & ~MCG_EXT_CTL_LMCE_EN)) + return 1; + vcpu->arch.mcg_ext_ctl = data; + break; case MSR_IA32_FEATURE_CONTROL: - if (!nested_vmx_allowed(vcpu) || - (to_vmx(vcpu)->nested.msr_ia32_feature_control & + if (!vmx_feature_control_msr_valid(vcpu, data) || + (to_vmx(vcpu)->msr_ia32_feature_control & FEATURE_CONTROL_LOCKED && !msr_info->host_initiated)) return 1; - vmx->nested.msr_ia32_feature_control = data; + vmx->msr_ia32_feature_control = data; if (msr_info->host_initiated && data == 0) vmx_leave_nested(vcpu); break; @@ -3297,25 +3405,27 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) vmx_capability.ept, vmx_capability.vpid); } - min = VM_EXIT_SAVE_DEBUG_CONTROLS; + min = VM_EXIT_SAVE_DEBUG_CONTROLS | VM_EXIT_ACK_INTR_ON_EXIT; #ifdef CONFIG_X86_64 min |= VM_EXIT_HOST_ADDR_SPACE_SIZE; #endif opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT | - VM_EXIT_ACK_INTR_ON_EXIT | VM_EXIT_CLEAR_BNDCFGS; + VM_EXIT_CLEAR_BNDCFGS; if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS, &_vmexit_control) < 0) return -EIO; min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING; - opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR; + opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR | + PIN_BASED_VMX_PREEMPTION_TIMER; if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS, &_pin_based_exec_control) < 0) return -EIO; + if (cpu_has_broken_vmx_preemption_timer()) + _pin_based_exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER; if (!(_cpu_based_2nd_exec_control & - SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) || - !(_vmexit_control & VM_EXIT_ACK_INTR_ON_EXIT)) + SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)) _pin_based_exec_control &= ~PIN_BASED_POSTED_INTR; min = VM_ENTRY_LOAD_DEBUG_CONTROLS; @@ -3364,7 +3474,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) /* * Some cpus support VM_ENTRY_(LOAD|SAVE)_IA32_PERF_GLOBAL_CTRL - * but due to arrata below it can't be used. Workaround is to use + * but due to errata below it can't be used. Workaround is to use * msr load mechanism to switch IA32_PERF_GLOBAL_CTRL. * * VM Exit May Incorrectly Clear IA32_PERF_GLOBAL_CTRL [34:32] @@ -4781,6 +4891,8 @@ static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx) if (!kvm_vcpu_apicv_active(&vmx->vcpu)) pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR; + /* Enable the preemption timer dynamically */ + pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER; return pin_based_exec_ctrl; } @@ -4896,6 +5008,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) /* Control */ vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx)); + vmx->hv_deadline_tsc = -1; vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx)); @@ -6016,12 +6129,14 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); trace_kvm_page_fault(gpa, exit_qualification); - /* It is a write fault? */ - error_code = exit_qualification & PFERR_WRITE_MASK; + /* it is a read fault? */ + error_code = (exit_qualification << 2) & PFERR_USER_MASK; + /* it is a write fault? */ + error_code |= exit_qualification & PFERR_WRITE_MASK; /* It is a fetch fault? */ error_code |= (exit_qualification << 2) & PFERR_FETCH_MASK; /* ept page table is present? */ - error_code |= (exit_qualification >> 3) & PFERR_PRESENT_MASK; + error_code |= (exit_qualification & 0x38) != 0; vcpu->arch.exit_qualification = exit_qualification; @@ -6355,9 +6470,6 @@ static __init int hardware_setup(void) for (msr = 0x800; msr <= 0x8ff; msr++) vmx_disable_intercept_msr_read_x2apic(msr); - /* According SDM, in x2apic mode, the whole id reg is used. But in - * KVM, it only use the highest eight bits. Need to intercept it */ - vmx_enable_intercept_msr_read_x2apic(0x802); /* TMCCT */ vmx_enable_intercept_msr_read_x2apic(0x839); /* TPR */ @@ -6368,10 +6480,12 @@ static __init int hardware_setup(void) vmx_disable_intercept_msr_write_x2apic(0x83f); if (enable_ept) { - kvm_mmu_set_mask_ptes(0ull, + kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK, (enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull, (enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull, - 0ull, VMX_EPT_EXECUTABLE_MASK); + 0ull, VMX_EPT_EXECUTABLE_MASK, + cpu_has_vmx_ept_execute_only() ? + 0ull : VMX_EPT_READABLE_MASK); ept_set_mmio_spte_mask(); kvm_enable_tdp(); } else @@ -6393,8 +6507,21 @@ static __init int hardware_setup(void) kvm_x86_ops->enable_log_dirty_pt_masked = NULL; } + if (cpu_has_vmx_preemption_timer() && enable_preemption_timer) { + u64 vmx_msr; + + rdmsrl(MSR_IA32_VMX_MISC, vmx_msr); + cpu_preemption_timer_multi = + vmx_msr & VMX_MISC_PREEMPTION_TIMER_RATE_MASK; + } else { + kvm_x86_ops->set_hv_timer = NULL; + kvm_x86_ops->cancel_hv_timer = NULL; + } + kvm_set_posted_intr_wakeup_handler(wakeup_handler); + kvm_mce_cap_supported |= MCG_LMCE_P; + return alloc_kvm_area(); out8: @@ -6862,16 +6989,22 @@ static int handle_vmon(struct kvm_vcpu *vcpu) return 1; } - if ((vmx->nested.msr_ia32_feature_control & VMXON_NEEDED_FEATURES) + if ((vmx->msr_ia32_feature_control & VMXON_NEEDED_FEATURES) != VMXON_NEEDED_FEATURES) { kvm_inject_gp(vcpu, 0); return 1; } + vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL); + if (!vmx->nested.cached_vmcs12) + return -ENOMEM; + if (enable_shadow_vmcs) { shadow_vmcs = alloc_vmcs(); - if (!shadow_vmcs) + if (!shadow_vmcs) { + kfree(vmx->nested.cached_vmcs12); return -ENOMEM; + } /* mark vmcs as shadow */ shadow_vmcs->revision_id |= (1u << 31); /* init shadow vmcs */ @@ -6942,6 +7075,11 @@ static inline void nested_release_vmcs12(struct vcpu_vmx *vmx) vmcs_write64(VMCS_LINK_POINTER, -1ull); } vmx->nested.posted_intr_nv = -1; + + /* Flush VMCS12 to guest memory */ + memcpy(vmx->nested.current_vmcs12, vmx->nested.cached_vmcs12, + VMCS12_SIZE); + kunmap(vmx->nested.current_vmcs12_page); nested_release_page(vmx->nested.current_vmcs12_page); vmx->nested.current_vmptr = -1ull; @@ -6962,6 +7100,7 @@ static void free_nested(struct vcpu_vmx *vmx) nested_release_vmcs12(vmx); if (enable_shadow_vmcs) free_vmcs(vmx->nested.current_shadow_vmcs); + kfree(vmx->nested.cached_vmcs12); /* Unpin physical memory we referred to in current vmcs02 */ if (vmx->nested.apic_access_page) { nested_release_page(vmx->nested.apic_access_page); @@ -7365,6 +7504,13 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) vmx->nested.current_vmptr = vmptr; vmx->nested.current_vmcs12 = new_vmcs12; vmx->nested.current_vmcs12_page = page; + /* + * Load VMCS12 from guest memory since it is not already + * cached. + */ + memcpy(vmx->nested.cached_vmcs12, + vmx->nested.current_vmcs12, VMCS12_SIZE); + if (enable_shadow_vmcs) { vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL, SECONDARY_EXEC_SHADOW_VMCS); @@ -7458,12 +7604,16 @@ static int handle_invept(struct kvm_vcpu *vcpu) switch (type) { case VMX_EPT_EXTENT_GLOBAL: + /* + * TODO: track mappings and invalidate + * single context requests appropriately + */ + case VMX_EPT_EXTENT_CONTEXT: kvm_mmu_sync_roots(vcpu); kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); nested_vmx_succeed(vcpu); break; default: - /* Trap single context invalidation invept calls */ BUG_ON(1); break; } @@ -7560,6 +7710,12 @@ static int handle_pml_full(struct kvm_vcpu *vcpu) return 1; } +static int handle_preemption_timer(struct kvm_vcpu *vcpu) +{ + kvm_lapic_expired_hv_timer(vcpu); + return 1; +} + /* * The exit handlers return 1 if the exit was handled fully and guest execution * may resume. Otherwise they set the kvm_run parameter to indicate what needs @@ -7610,6 +7766,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { [EXIT_REASON_XSAVES] = handle_xsaves, [EXIT_REASON_XRSTORS] = handle_xrstors, [EXIT_REASON_PML_FULL] = handle_pml_full, + [EXIT_REASON_PREEMPTION_TIMER] = handle_preemption_timer, }; static const int kvm_vmx_max_exit_handlers = @@ -7918,6 +8075,8 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) * the XSS exit bitmap in vmcs12. */ return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES); + case EXIT_REASON_PREEMPTION_TIMER: + return false; default: return true; } @@ -8303,7 +8462,7 @@ static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa) * the next L2->L1 exit. */ if (!is_guest_mode(vcpu) || - !nested_cpu_has2(vmx->nested.current_vmcs12, + !nested_cpu_has2(get_vmcs12(&vmx->vcpu), SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) vmcs_write64(APIC_ACCESS_ADDR, hpa); } @@ -8436,7 +8595,6 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) "push %[sp]\n\t" #endif "pushf\n\t" - "orl $0x200, (%%" _ASM_SP ")\n\t" __ASM_SIZE(push) " $%c[cs]\n\t" "call *%[entry]\n\t" : @@ -8449,8 +8607,7 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) [ss]"i"(__KERNEL_DS), [cs]"i"(__KERNEL_CS) ); - } else - local_irq_enable(); + } } static bool vmx_has_high_real_mode_segbase(void) @@ -8601,6 +8758,26 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx) msrs[i].host); } +void vmx_arm_hv_timer(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + u64 tscl; + u32 delta_tsc; + + if (vmx->hv_deadline_tsc == -1) + return; + + tscl = rdtsc(); + if (vmx->hv_deadline_tsc > tscl) + /* sure to be 32 bit only because checked on set_hv_timer */ + delta_tsc = (u32)((vmx->hv_deadline_tsc - tscl) >> + cpu_preemption_timer_multi); + else + delta_tsc = 0; + + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, delta_tsc); +} + static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -8650,6 +8827,8 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) atomic_switch_perf_msrs(vmx); debugctlmsr = get_debugctlmsr(); + vmx_arm_hv_timer(vcpu); + vmx->__launched = vmx->loaded_vmcs->launched; asm( /* Store host registers */ @@ -8940,6 +9119,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) vmx->nested.current_vmptr = -1ull; vmx->nested.current_vmcs12 = NULL; + vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED; + return &vmx->vcpu; free_vmcs: @@ -9080,6 +9261,13 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) if (cpu_has_secondary_exec_ctrls()) vmcs_set_secondary_exec_control(secondary_exec_ctl); + + if (nested_vmx_allowed(vcpu)) + to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |= + FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; + else + to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &= + ~FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; } static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) @@ -9636,9 +9824,14 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) vmcs_write64(VMCS_LINK_POINTER, -1ull); exec_control = vmcs12->pin_based_vm_exec_control; - exec_control |= vmcs_config.pin_based_exec_ctrl; + + /* Preemption timer setting is only taken from vmcs01. */ exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER; + exec_control |= vmcs_config.pin_based_exec_ctrl; + if (vmx->hv_deadline_tsc == -1) + exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER; + /* Posted interrupts setting is only taken from vmcs12. */ if (nested_cpu_has_posted_intr(vmcs12)) { /* * Note that we use L0's vector here and in @@ -10556,8 +10749,8 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, vmcs12->vm_exit_intr_error_code, KVM_ISA_VMX); - vm_entry_controls_init(vmx, vmcs_read32(VM_ENTRY_CONTROLS)); - vm_exit_controls_init(vmx, vmcs_read32(VM_EXIT_CONTROLS)); + vm_entry_controls_reset_shadow(vmx); + vm_exit_controls_reset_shadow(vmx); vmx_segment_cache_clear(vmx); /* if no vmcs02 cache requested, remove the one we used */ @@ -10566,8 +10759,14 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, load_vmcs12_host_state(vcpu, vmcs12); - /* Update TSC_OFFSET if TSC was changed while L2 ran */ + /* Update any VMCS fields that might have changed while L2 ran */ vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset); + if (vmx->hv_deadline_tsc == -1) + vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL, + PIN_BASED_VMX_PREEMPTION_TIMER); + else + vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL, + PIN_BASED_VMX_PREEMPTION_TIMER); /* This is needed for same reason as it was needed in prepare_vmcs02 */ vmx->host_rsp = 0; @@ -10647,6 +10846,64 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu, return X86EMUL_CONTINUE; } +#ifdef CONFIG_X86_64 +/* (a << shift) / divisor, return 1 if overflow otherwise 0 */ +static inline int u64_shl_div_u64(u64 a, unsigned int shift, + u64 divisor, u64 *result) +{ + u64 low = a << shift, high = a >> (64 - shift); + + /* To avoid the overflow on divq */ + if (high >= divisor) + return 1; + + /* Low hold the result, high hold rem which is discarded */ + asm("divq %2\n\t" : "=a" (low), "=d" (high) : + "rm" (divisor), "0" (low), "1" (high)); + *result = low; + + return 0; +} + +static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + u64 tscl = rdtsc(); + u64 guest_tscl = kvm_read_l1_tsc(vcpu, tscl); + u64 delta_tsc = max(guest_deadline_tsc, guest_tscl) - guest_tscl; + + /* Convert to host delta tsc if tsc scaling is enabled */ + if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio && + u64_shl_div_u64(delta_tsc, + kvm_tsc_scaling_ratio_frac_bits, + vcpu->arch.tsc_scaling_ratio, + &delta_tsc)) + return -ERANGE; + + /* + * If the delta tsc can't fit in the 32 bit after the multi shift, + * we can't use the preemption timer. + * It's possible that it fits on later vmentries, but checking + * on every vmentry is costly so we just use an hrtimer. + */ + if (delta_tsc >> (cpu_preemption_timer_multi + 32)) + return -ERANGE; + + vmx->hv_deadline_tsc = tscl + delta_tsc; + vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL, + PIN_BASED_VMX_PREEMPTION_TIMER); + return 0; +} + +static void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + vmx->hv_deadline_tsc = -1; + vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL, + PIN_BASED_VMX_PREEMPTION_TIMER); +} +#endif + static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu) { if (ple_gap) @@ -10691,7 +10948,7 @@ static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm, * this case, return 1, otherwise, return 0. * */ -static int vmx_pre_block(struct kvm_vcpu *vcpu) +static int pi_pre_block(struct kvm_vcpu *vcpu) { unsigned long flags; unsigned int dest; @@ -10758,7 +11015,18 @@ static int vmx_pre_block(struct kvm_vcpu *vcpu) return 0; } -static void vmx_post_block(struct kvm_vcpu *vcpu) +static int vmx_pre_block(struct kvm_vcpu *vcpu) +{ + if (pi_pre_block(vcpu)) + return 1; + + if (kvm_lapic_hv_timer_in_use(vcpu)) + kvm_lapic_switch_to_sw_timer(vcpu); + + return 0; +} + +static void pi_post_block(struct kvm_vcpu *vcpu) { struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); struct pi_desc old, new; @@ -10800,6 +11068,14 @@ static void vmx_post_block(struct kvm_vcpu *vcpu) } } +static void vmx_post_block(struct kvm_vcpu *vcpu) +{ + if (kvm_x86_ops->set_hv_timer) + kvm_lapic_switch_to_hv_timer(vcpu); + + pi_post_block(vcpu); +} + /* * vmx_update_pi_irte - set IRTE for Posted-Interrupts * @@ -10844,7 +11120,7 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq, * We will support full lowest-priority interrupt later. */ - kvm_set_msi_irq(e, &irq); + kvm_set_msi_irq(kvm, e, &irq); if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) { /* * Make sure the IRTE is in remapped mode if @@ -10889,6 +11165,16 @@ out: return ret; } +static void vmx_setup_mce(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.mcg_cap & MCG_LMCE_P) + to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |= + FEATURE_CONTROL_LMCE; + else + to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &= + ~FEATURE_CONTROL_LMCE; +} + static struct kvm_x86_ops vmx_x86_ops = { .cpu_has_kvm_support = cpu_has_kvm_support, .disabled_by_bios = vmx_disabled_by_bios, @@ -11013,6 +11299,13 @@ static struct kvm_x86_ops vmx_x86_ops = { .pmu_ops = &intel_pmu_ops, .update_pi_irte = vmx_update_pi_irte, + +#ifdef CONFIG_X86_64 + .set_hv_timer = vmx_set_hv_timer, + .cancel_hv_timer = vmx_cancel_hv_timer, +#endif + + .setup_mce = vmx_setup_mce, }; static int __init vmx_init(void) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9c496c7e8c00..19f9f9e05c2a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -71,7 +71,8 @@ #define MAX_IO_MSRS 256 #define KVM_MAX_MCE_BANKS 32 -#define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P) +u64 __read_mostly kvm_mce_cap_supported = MCG_CTL_P | MCG_SER_P; +EXPORT_SYMBOL_GPL(kvm_mce_cap_supported); #define emul_to_vcpu(ctxt) \ container_of(ctxt, struct kvm_vcpu, arch.emulate_ctxt) @@ -90,8 +91,12 @@ static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE); #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU +#define KVM_X2APIC_API_VALID_FLAGS (KVM_X2APIC_API_USE_32BIT_IDS | \ + KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK) + static void update_cr8_intercept(struct kvm_vcpu *vcpu); static void process_nmi(struct kvm_vcpu *vcpu); +static void enter_smm(struct kvm_vcpu *vcpu); static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); struct kvm_x86_ops *kvm_x86_ops __read_mostly; @@ -114,7 +119,8 @@ u8 __read_mostly kvm_tsc_scaling_ratio_frac_bits; EXPORT_SYMBOL_GPL(kvm_tsc_scaling_ratio_frac_bits); u64 __read_mostly kvm_max_tsc_scaling_ratio; EXPORT_SYMBOL_GPL(kvm_max_tsc_scaling_ratio); -static u64 __read_mostly kvm_default_tsc_scaling_ratio; +u64 __read_mostly kvm_default_tsc_scaling_ratio; +EXPORT_SYMBOL_GPL(kvm_default_tsc_scaling_ratio); /* tsc tolerance in parts per million - default to 1/2 of the NTP threshold */ static u32 __read_mostly tsc_tolerance_ppm = 250; @@ -538,7 +544,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3) goto out; } for (i = 0; i < ARRAY_SIZE(pdpte); ++i) { - if (is_present_gpte(pdpte[i]) && + if ((pdpte[i] & PT_PRESENT_MASK) && (pdpte[i] & vcpu->arch.mmu.guest_rsvd_check.rsvd_bits_mask[0][2])) { ret = 0; @@ -983,6 +989,7 @@ static u32 emulated_msrs[] = { MSR_IA32_MISC_ENABLE, MSR_IA32_MCG_STATUS, MSR_IA32_MCG_CTL, + MSR_IA32_MCG_EXT_CTL, MSR_IA32_SMBASE, }; @@ -1162,7 +1169,7 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) int version; int r; struct pvclock_wall_clock wc; - struct timespec boot; + struct timespec64 boot; if (!wall_clock) return; @@ -1185,13 +1192,13 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) * wall clock specified here. guest system time equals host * system time for us, thus we must fill in host boot time here. */ - getboottime(&boot); + getboottime64(&boot); if (kvm->arch.kvmclock_offset) { - struct timespec ts = ns_to_timespec(kvm->arch.kvmclock_offset); - boot = timespec_sub(boot, ts); + struct timespec64 ts = ns_to_timespec64(kvm->arch.kvmclock_offset); + boot = timespec64_sub(boot, ts); } - wc.sec = boot.tv_sec; + wc.sec = (u32)boot.tv_sec; /* overflow in 2106 guest time */ wc.nsec = boot.tv_nsec; wc.version = version; @@ -2616,6 +2623,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_TSC_CONTROL: r = kvm_has_tsc_control; break; + case KVM_CAP_X2APIC_API: + r = KVM_X2APIC_API_VALID_FLAGS; + break; default: r = 0; break; @@ -2678,11 +2688,9 @@ long kvm_arch_dev_ioctl(struct file *filp, break; } case KVM_X86_GET_MCE_CAP_SUPPORTED: { - u64 mce_cap; - - mce_cap = KVM_MCE_CAP_SUPPORTED; r = -EFAULT; - if (copy_to_user(argp, &mce_cap, sizeof mce_cap)) + if (copy_to_user(argp, &kvm_mce_cap_supported, + sizeof(kvm_mce_cap_supported))) goto out; r = 0; break; @@ -2734,6 +2742,11 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) rdtsc() - vcpu->arch.last_host_tsc; if (tsc_delta < 0) mark_tsc_unstable("KVM discovered backwards TSC"); + + if (kvm_lapic_hv_timer_in_use(vcpu) && + kvm_x86_ops->set_hv_timer(vcpu, + kvm_get_lapic_tscdeadline_msr(vcpu))) + kvm_lapic_switch_to_sw_timer(vcpu); if (check_tsc_unstable()) { u64 offset = kvm_compute_tsc_offset(vcpu, vcpu->arch.last_guest_tsc); @@ -2767,15 +2780,17 @@ static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, if (vcpu->arch.apicv_active) kvm_x86_ops->sync_pir_to_irr(vcpu); - memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s); - - return 0; + return kvm_apic_get_state(vcpu, s); } static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) { - kvm_apic_post_state_restore(vcpu, s); + int r; + + r = kvm_apic_set_state(vcpu, s); + if (r) + return r; update_cr8_intercept(vcpu); return 0; @@ -2860,7 +2875,7 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu, r = -EINVAL; if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS) goto out; - if (mcg_cap & ~(KVM_MCE_CAP_SUPPORTED | 0xff | 0xff0000)) + if (mcg_cap & ~(kvm_mce_cap_supported | 0xff | 0xff0000)) goto out; r = 0; vcpu->arch.mcg_cap = mcg_cap; @@ -2870,6 +2885,9 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu, /* Init IA32_MCi_CTL to all 1s */ for (bank = 0; bank < bank_num; bank++) vcpu->arch.mce_banks[bank*4] = ~(u64)0; + + if (kvm_x86_ops->setup_mce) + kvm_x86_ops->setup_mce(vcpu); out: return r; } @@ -3768,7 +3786,7 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, r = -EEXIST; if (irqchip_in_kernel(kvm)) goto split_irqchip_unlock; - if (atomic_read(&kvm->online_vcpus)) + if (kvm->created_vcpus) goto split_irqchip_unlock; r = kvm_setup_empty_irq_routing(kvm); if (r) @@ -3782,6 +3800,18 @@ split_irqchip_unlock: mutex_unlock(&kvm->lock); break; } + case KVM_CAP_X2APIC_API: + r = -EINVAL; + if (cap->args[0] & ~KVM_X2APIC_API_VALID_FLAGS) + break; + + if (cap->args[0] & KVM_X2APIC_API_USE_32BIT_IDS) + kvm->arch.x2apic_format = true; + if (cap->args[0] & KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK) + kvm->arch.x2apic_broadcast_quirk_disabled = true; + + r = 0; + break; default: r = -EINVAL; break; @@ -3833,7 +3863,7 @@ long kvm_arch_vm_ioctl(struct file *filp, if (kvm->arch.vpic) goto create_irqchip_unlock; r = -EINVAL; - if (atomic_read(&kvm->online_vcpus)) + if (kvm->created_vcpus) goto create_irqchip_unlock; r = -ENOMEM; vpic = kvm_create_pic(kvm); @@ -3873,7 +3903,7 @@ long kvm_arch_vm_ioctl(struct file *filp, sizeof(struct kvm_pit_config))) goto out; create_pit: - mutex_lock(&kvm->slots_lock); + mutex_lock(&kvm->lock); r = -EEXIST; if (kvm->arch.vpit) goto create_pit_unlock; @@ -3882,7 +3912,7 @@ long kvm_arch_vm_ioctl(struct file *filp, if (kvm->arch.vpit) r = 0; create_pit_unlock: - mutex_unlock(&kvm->slots_lock); + mutex_unlock(&kvm->lock); break; case KVM_GET_IRQCHIP: { /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ @@ -3989,7 +4019,7 @@ long kvm_arch_vm_ioctl(struct file *filp, case KVM_SET_BOOT_CPU_ID: r = 0; mutex_lock(&kvm->lock); - if (atomic_read(&kvm->online_vcpus) != 0) + if (kvm->created_vcpus) r = -EBUSY; else kvm->arch.bsp_vcpu_id = arg; @@ -5297,13 +5327,8 @@ static void kvm_smm_changed(struct kvm_vcpu *vcpu) /* This is a good place to trace that we are exiting SMM. */ trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, false); - if (unlikely(vcpu->arch.smi_pending)) { - kvm_make_request(KVM_REQ_SMI, vcpu); - vcpu->arch.smi_pending = 0; - } else { - /* Process a latched INIT, if any. */ - kvm_make_request(KVM_REQ_EVENT, vcpu); - } + /* Process a latched INIT or SMI, if any. */ + kvm_make_request(KVM_REQ_EVENT, vcpu); } kvm_mmu_reset_context(vcpu); @@ -5849,8 +5874,8 @@ int kvm_arch_init(void *opaque) kvm_x86_ops = ops; kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, - PT_DIRTY_MASK, PT64_NX_MASK, 0); - + PT_DIRTY_MASK, PT64_NX_MASK, 0, + PT_PRESENT_MASK); kvm_timer_init(); perf_register_guest_info_callbacks(&kvm_guest_cbs); @@ -6084,7 +6109,10 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win) } /* try to inject new event if pending */ - if (vcpu->arch.nmi_pending && kvm_x86_ops->nmi_allowed(vcpu)) { + if (vcpu->arch.smi_pending && !is_smm(vcpu)) { + vcpu->arch.smi_pending = false; + enter_smm(vcpu); + } else if (vcpu->arch.nmi_pending && kvm_x86_ops->nmi_allowed(vcpu)) { --vcpu->arch.nmi_pending; vcpu->arch.nmi_injected = true; kvm_x86_ops->set_nmi(vcpu); @@ -6107,6 +6135,7 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win) kvm_x86_ops->set_irq(vcpu); } } + return 0; } @@ -6130,7 +6159,7 @@ static void process_nmi(struct kvm_vcpu *vcpu) #define put_smstate(type, buf, offset, val) \ *(type *)((buf) + (offset) - 0x7e00) = val -static u32 process_smi_get_segment_flags(struct kvm_segment *seg) +static u32 enter_smm_get_segment_flags(struct kvm_segment *seg) { u32 flags = 0; flags |= seg->g << 23; @@ -6144,7 +6173,7 @@ static u32 process_smi_get_segment_flags(struct kvm_segment *seg) return flags; } -static void process_smi_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n) +static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n) { struct kvm_segment seg; int offset; @@ -6159,11 +6188,11 @@ static void process_smi_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n) put_smstate(u32, buf, offset + 8, seg.base); put_smstate(u32, buf, offset + 4, seg.limit); - put_smstate(u32, buf, offset, process_smi_get_segment_flags(&seg)); + put_smstate(u32, buf, offset, enter_smm_get_segment_flags(&seg)); } #ifdef CONFIG_X86_64 -static void process_smi_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n) +static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n) { struct kvm_segment seg; int offset; @@ -6172,7 +6201,7 @@ static void process_smi_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n) kvm_get_segment(vcpu, &seg, n); offset = 0x7e00 + n * 16; - flags = process_smi_get_segment_flags(&seg) >> 8; + flags = enter_smm_get_segment_flags(&seg) >> 8; put_smstate(u16, buf, offset, seg.selector); put_smstate(u16, buf, offset + 2, flags); put_smstate(u32, buf, offset + 4, seg.limit); @@ -6180,7 +6209,7 @@ static void process_smi_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n) } #endif -static void process_smi_save_state_32(struct kvm_vcpu *vcpu, char *buf) +static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, char *buf) { struct desc_ptr dt; struct kvm_segment seg; @@ -6204,13 +6233,13 @@ static void process_smi_save_state_32(struct kvm_vcpu *vcpu, char *buf) put_smstate(u32, buf, 0x7fc4, seg.selector); put_smstate(u32, buf, 0x7f64, seg.base); put_smstate(u32, buf, 0x7f60, seg.limit); - put_smstate(u32, buf, 0x7f5c, process_smi_get_segment_flags(&seg)); + put_smstate(u32, buf, 0x7f5c, enter_smm_get_segment_flags(&seg)); kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR); put_smstate(u32, buf, 0x7fc0, seg.selector); put_smstate(u32, buf, 0x7f80, seg.base); put_smstate(u32, buf, 0x7f7c, seg.limit); - put_smstate(u32, buf, 0x7f78, process_smi_get_segment_flags(&seg)); + put_smstate(u32, buf, 0x7f78, enter_smm_get_segment_flags(&seg)); kvm_x86_ops->get_gdt(vcpu, &dt); put_smstate(u32, buf, 0x7f74, dt.address); @@ -6221,7 +6250,7 @@ static void process_smi_save_state_32(struct kvm_vcpu *vcpu, char *buf) put_smstate(u32, buf, 0x7f54, dt.size); for (i = 0; i < 6; i++) - process_smi_save_seg_32(vcpu, buf, i); + enter_smm_save_seg_32(vcpu, buf, i); put_smstate(u32, buf, 0x7f14, kvm_read_cr4(vcpu)); @@ -6230,7 +6259,7 @@ static void process_smi_save_state_32(struct kvm_vcpu *vcpu, char *buf) put_smstate(u32, buf, 0x7ef8, vcpu->arch.smbase); } -static void process_smi_save_state_64(struct kvm_vcpu *vcpu, char *buf) +static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf) { #ifdef CONFIG_X86_64 struct desc_ptr dt; @@ -6262,7 +6291,7 @@ static void process_smi_save_state_64(struct kvm_vcpu *vcpu, char *buf) kvm_get_segment(vcpu, &seg, VCPU_SREG_TR); put_smstate(u16, buf, 0x7e90, seg.selector); - put_smstate(u16, buf, 0x7e92, process_smi_get_segment_flags(&seg) >> 8); + put_smstate(u16, buf, 0x7e92, enter_smm_get_segment_flags(&seg) >> 8); put_smstate(u32, buf, 0x7e94, seg.limit); put_smstate(u64, buf, 0x7e98, seg.base); @@ -6272,7 +6301,7 @@ static void process_smi_save_state_64(struct kvm_vcpu *vcpu, char *buf) kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR); put_smstate(u16, buf, 0x7e70, seg.selector); - put_smstate(u16, buf, 0x7e72, process_smi_get_segment_flags(&seg) >> 8); + put_smstate(u16, buf, 0x7e72, enter_smm_get_segment_flags(&seg) >> 8); put_smstate(u32, buf, 0x7e74, seg.limit); put_smstate(u64, buf, 0x7e78, seg.base); @@ -6281,31 +6310,26 @@ static void process_smi_save_state_64(struct kvm_vcpu *vcpu, char *buf) put_smstate(u64, buf, 0x7e68, dt.address); for (i = 0; i < 6; i++) - process_smi_save_seg_64(vcpu, buf, i); + enter_smm_save_seg_64(vcpu, buf, i); #else WARN_ON_ONCE(1); #endif } -static void process_smi(struct kvm_vcpu *vcpu) +static void enter_smm(struct kvm_vcpu *vcpu) { struct kvm_segment cs, ds; struct desc_ptr dt; char buf[512]; u32 cr0; - if (is_smm(vcpu)) { - vcpu->arch.smi_pending = true; - return; - } - trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, true); vcpu->arch.hflags |= HF_SMM_MASK; memset(buf, 0, 512); if (guest_cpuid_has_longmode(vcpu)) - process_smi_save_state_64(vcpu, buf); + enter_smm_save_state_64(vcpu, buf); else - process_smi_save_state_32(vcpu, buf); + enter_smm_save_state_32(vcpu, buf); kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, buf, sizeof(buf)); @@ -6361,6 +6385,12 @@ static void process_smi(struct kvm_vcpu *vcpu) kvm_mmu_reset_context(vcpu); } +static void process_smi(struct kvm_vcpu *vcpu) +{ + vcpu->arch.smi_pending = true; + kvm_make_request(KVM_REQ_EVENT, vcpu); +} + void kvm_make_scan_ioapic_request(struct kvm *kvm) { kvm_make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC); @@ -6555,8 +6585,18 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (inject_pending_event(vcpu, req_int_win) != 0) req_immediate_exit = true; - /* enable NMI/IRQ window open exits if needed */ else { + /* Enable NMI/IRQ window open exits if needed. + * + * SMIs have two cases: 1) they can be nested, and + * then there is nothing to do here because RSM will + * cause a vmexit anyway; 2) or the SMI can be pending + * because inject_pending_event has completed the + * injection of an IRQ or NMI from the previous vmexit, + * and then we request an immediate exit to inject the SMI. + */ + if (vcpu->arch.smi_pending && !is_smm(vcpu)) + req_immediate_exit = true; if (vcpu->arch.nmi_pending) kvm_x86_ops->enable_nmi_window(vcpu); if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win) @@ -6607,12 +6647,14 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) kvm_load_guest_xcr0(vcpu); - if (req_immediate_exit) + if (req_immediate_exit) { + kvm_make_request(KVM_REQ_EVENT, vcpu); smp_send_reschedule(vcpu->cpu); + } trace_kvm_entry(vcpu->vcpu_id); wait_lapic_expire(vcpu); - __kvm_guest_enter(); + guest_enter_irqoff(); if (unlikely(vcpu->arch.switch_db_regs)) { set_debugreg(0, 7); @@ -6663,16 +6705,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) ++vcpu->stat.exits; - /* - * We must have an instruction between local_irq_enable() and - * kvm_guest_exit(), so the timer interrupt isn't delayed by - * the interrupt shadow. The stat.exits increment will do nicely. - * But we need to prevent reordering, hence this barrier(): - */ - barrier(); - - kvm_guest_exit(); + guest_exit_irqoff(); + local_irq_enable(); preempt_enable(); vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); @@ -7409,6 +7444,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) { vcpu->arch.hflags = 0; + vcpu->arch.smi_pending = 0; atomic_set(&vcpu->arch.nmi_queued, 0); vcpu->arch.nmi_pending = 0; vcpu->arch.nmi_injected = false; @@ -7601,11 +7637,6 @@ bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu) return (vcpu->arch.apic_base & MSR_IA32_APICBASE_BSP) != 0; } -bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) -{ - return irqchip_in_kernel(vcpu->kvm) == lapic_in_kernel(vcpu); -} - struct static_key kvm_no_apic_vcpu __read_mostly; EXPORT_SYMBOL_GPL(kvm_no_apic_vcpu); @@ -7872,7 +7903,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kfree(kvm->arch.vpic); kfree(kvm->arch.vioapic); kvm_free_vcpus(kvm); - kfree(rcu_dereference_check(kvm->arch.apic_map, 1)); + kvfree(rcu_dereference_check(kvm->arch.apic_map, 1)); kvm_mmu_uninit_vm(kvm); } @@ -8380,7 +8411,7 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons, /* * When producer of consumer is unregistered, we change back to * remapped mode, so we can re-use the current implementation - * when the irq is masked/disabed or the consumer side (KVM + * when the irq is masked/disabled or the consumer side (KVM * int this case doesn't want to receive the interrupts. */ ret = kvm_x86_ops->update_pi_irte(irqfd->kvm, prod->irq, irqfd->gsi, 0); diff --git a/arch/x86/lib/hweight.S b/arch/x86/lib/hweight.S index 02de3d74d2c5..8a602a1e404a 100644 --- a/arch/x86/lib/hweight.S +++ b/arch/x86/lib/hweight.S @@ -35,6 +35,7 @@ ENDPROC(__sw_hweight32) ENTRY(__sw_hweight64) #ifdef CONFIG_X86_64 + pushq %rdi pushq %rdx movq %rdi, %rdx # w -> t @@ -60,6 +61,7 @@ ENTRY(__sw_hweight64) shrq $56, %rax # w = w_tmp >> 56 popq %rdx + popq %rdi ret #else /* CONFIG_X86_32 */ /* We're getting an u64 arg in (%eax,%edx): unsigned long hweight64(__u64 w) */ diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index fb4c1b42fc7e..620928903be3 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -208,7 +208,7 @@ static int __meminit save_mr(struct map_range *mr, int nr_range, * adjust the page_size_mask for small range to go with * big page size instead small one if nearby are ram too. */ -static void __init_refok adjust_range_page_size_mask(struct map_range *mr, +static void __ref adjust_range_page_size_mask(struct map_range *mr, int nr_range) { int i; @@ -396,7 +396,7 @@ bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn) * This runs before bootmem is initialized and gets pages directly from * the physical memory. To access them they are temporarily mapped. */ -unsigned long __init_refok init_memory_mapping(unsigned long start, +unsigned long __ref init_memory_mapping(unsigned long start, unsigned long end) { struct map_range mr[NR_RANGE_MR]; diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 8196054fedb0..7b6a9d14c8c0 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -133,7 +133,7 @@ static void pcibios_fixup_device_resources(struct pci_dev *dev) if (pci_probe & PCI_NOASSIGN_BARS) { /* * If the BIOS did not assign the BAR, zero out the - * resource so the kernel doesn't attmept to assign + * resource so the kernel doesn't attempt to assign * it later on in pci_assign_unassigned_resources */ for (bar = 0; bar <= PCI_STD_RESOURCE_END; bar++) { diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c index 5ceda85b8687..052c1cb76305 100644 --- a/arch/x86/pci/sta2x11-fixup.c +++ b/arch/x86/pci/sta2x11-fixup.c @@ -169,7 +169,7 @@ static void *sta2x11_swiotlb_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flags, - struct dma_attrs *attrs) + unsigned long attrs) { void *vaddr; diff --git a/arch/x86/pci/vmd.c b/arch/x86/pci/vmd.c index 613cac7395c4..b814ca675131 100644 --- a/arch/x86/pci/vmd.c +++ b/arch/x86/pci/vmd.c @@ -119,10 +119,11 @@ static void vmd_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) static void vmd_irq_enable(struct irq_data *data) { struct vmd_irq *vmdirq = data->chip_data; + unsigned long flags; - raw_spin_lock(&list_lock); + raw_spin_lock_irqsave(&list_lock, flags); list_add_tail_rcu(&vmdirq->node, &vmdirq->irq->irq_list); - raw_spin_unlock(&list_lock); + raw_spin_unlock_irqrestore(&list_lock, flags); data->chip->irq_unmask(data); } @@ -130,12 +131,14 @@ static void vmd_irq_enable(struct irq_data *data) static void vmd_irq_disable(struct irq_data *data) { struct vmd_irq *vmdirq = data->chip_data; + unsigned long flags; data->chip->irq_mask(data); - raw_spin_lock(&list_lock); + raw_spin_lock_irqsave(&list_lock, flags); list_del_rcu(&vmdirq->node); - raw_spin_unlock(&list_lock); + INIT_LIST_HEAD_RCU(&vmdirq->node); + raw_spin_unlock_irqrestore(&list_lock, flags); } /* @@ -166,16 +169,20 @@ static irq_hw_number_t vmd_get_hwirq(struct msi_domain_info *info, * XXX: We can be even smarter selecting the best IRQ once we solve the * affinity problem. */ -static struct vmd_irq_list *vmd_next_irq(struct vmd_dev *vmd) +static struct vmd_irq_list *vmd_next_irq(struct vmd_dev *vmd, struct msi_desc *desc) { - int i, best = 0; + int i, best = 1; + unsigned long flags; - raw_spin_lock(&list_lock); + if (!desc->msi_attrib.is_msix || vmd->msix_count == 1) + return &vmd->irqs[0]; + + raw_spin_lock_irqsave(&list_lock, flags); for (i = 1; i < vmd->msix_count; i++) if (vmd->irqs[i].count < vmd->irqs[best].count) best = i; vmd->irqs[best].count++; - raw_spin_unlock(&list_lock); + raw_spin_unlock_irqrestore(&list_lock, flags); return &vmd->irqs[best]; } @@ -184,14 +191,15 @@ static int vmd_msi_init(struct irq_domain *domain, struct msi_domain_info *info, unsigned int virq, irq_hw_number_t hwirq, msi_alloc_info_t *arg) { - struct vmd_dev *vmd = vmd_from_bus(msi_desc_to_pci_dev(arg->desc)->bus); + struct msi_desc *desc = arg->desc; + struct vmd_dev *vmd = vmd_from_bus(msi_desc_to_pci_dev(desc)->bus); struct vmd_irq *vmdirq = kzalloc(sizeof(*vmdirq), GFP_KERNEL); if (!vmdirq) return -ENOMEM; INIT_LIST_HEAD(&vmdirq->node); - vmdirq->irq = vmd_next_irq(vmd); + vmdirq->irq = vmd_next_irq(vmd, desc); vmdirq->virq = virq; irq_domain_set_info(domain, virq, vmdirq->irq->vmd_vector, info->chip, @@ -203,11 +211,12 @@ static void vmd_msi_free(struct irq_domain *domain, struct msi_domain_info *info, unsigned int virq) { struct vmd_irq *vmdirq = irq_get_chip_data(virq); + unsigned long flags; /* XXX: Potential optimization to rebalance */ - raw_spin_lock(&list_lock); + raw_spin_lock_irqsave(&list_lock, flags); vmdirq->irq->count--; - raw_spin_unlock(&list_lock); + raw_spin_unlock_irqrestore(&list_lock, flags); kfree_rcu(vmdirq, rcu); } @@ -261,18 +270,18 @@ static struct device *to_vmd_dev(struct device *dev) static struct dma_map_ops *vmd_dma_ops(struct device *dev) { - return to_vmd_dev(dev)->archdata.dma_ops; + return get_dma_ops(to_vmd_dev(dev)); } static void *vmd_alloc(struct device *dev, size_t size, dma_addr_t *addr, - gfp_t flag, struct dma_attrs *attrs) + gfp_t flag, unsigned long attrs) { return vmd_dma_ops(dev)->alloc(to_vmd_dev(dev), size, addr, flag, attrs); } static void vmd_free(struct device *dev, size_t size, void *vaddr, - dma_addr_t addr, struct dma_attrs *attrs) + dma_addr_t addr, unsigned long attrs) { return vmd_dma_ops(dev)->free(to_vmd_dev(dev), size, vaddr, addr, attrs); @@ -280,7 +289,7 @@ static void vmd_free(struct device *dev, size_t size, void *vaddr, static int vmd_mmap(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t addr, size_t size, - struct dma_attrs *attrs) + unsigned long attrs) { return vmd_dma_ops(dev)->mmap(to_vmd_dev(dev), vma, cpu_addr, addr, size, attrs); @@ -288,7 +297,7 @@ static int vmd_mmap(struct device *dev, struct vm_area_struct *vma, static int vmd_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr, dma_addr_t addr, size_t size, - struct dma_attrs *attrs) + unsigned long attrs) { return vmd_dma_ops(dev)->get_sgtable(to_vmd_dev(dev), sgt, cpu_addr, addr, size, attrs); @@ -297,26 +306,26 @@ static int vmd_get_sgtable(struct device *dev, struct sg_table *sgt, static dma_addr_t vmd_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { return vmd_dma_ops(dev)->map_page(to_vmd_dev(dev), page, offset, size, dir, attrs); } static void vmd_unmap_page(struct device *dev, dma_addr_t addr, size_t size, - enum dma_data_direction dir, struct dma_attrs *attrs) + enum dma_data_direction dir, unsigned long attrs) { vmd_dma_ops(dev)->unmap_page(to_vmd_dev(dev), addr, size, dir, attrs); } static int vmd_map_sg(struct device *dev, struct scatterlist *sg, int nents, - enum dma_data_direction dir, struct dma_attrs *attrs) + enum dma_data_direction dir, unsigned long attrs) { return vmd_dma_ops(dev)->map_sg(to_vmd_dev(dev), sg, nents, dir, attrs); } static void vmd_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, - enum dma_data_direction dir, struct dma_attrs *attrs) + enum dma_data_direction dir, unsigned long attrs) { vmd_dma_ops(dev)->unmap_sg(to_vmd_dev(dev), sg, nents, dir, attrs); } @@ -367,7 +376,7 @@ static void vmd_teardown_dma_ops(struct vmd_dev *vmd) { struct dma_domain *domain = &vmd->dma_domain; - if (vmd->dev->dev.archdata.dma_ops) + if (get_dma_ops(&vmd->dev->dev)) del_dma_domain(domain); } @@ -379,7 +388,7 @@ static void vmd_teardown_dma_ops(struct vmd_dev *vmd) static void vmd_setup_dma_ops(struct vmd_dev *vmd) { - const struct dma_map_ops *source = vmd->dev->dev.archdata.dma_ops; + const struct dma_map_ops *source = get_dma_ops(&vmd->dev->dev); struct dma_map_ops *dest = &vmd->dma_ops; struct dma_domain *domain = &vmd->dma_domain; @@ -594,7 +603,7 @@ static int vmd_enable_domain(struct vmd_dev *vmd) sd->node = pcibus_to_node(vmd->dev->bus); vmd->irq_domain = pci_msi_create_irq_domain(NULL, &vmd_msi_domain_info, - NULL); + x86_vector_domain); if (!vmd->irq_domain) return -ENODEV; diff --git a/arch/x86/platform/efi/early_printk.c b/arch/x86/platform/efi/early_printk.c index 524142117296..5fdacb322ceb 100644 --- a/arch/x86/platform/efi/early_printk.c +++ b/arch/x86/platform/efi/early_printk.c @@ -44,7 +44,7 @@ early_initcall(early_efi_map_fb); * In case earlyprintk=efi,keep we have the whole framebuffer mapped already * so just return the offset efi_fb + start. */ -static __init_refok void *early_efi_map(unsigned long start, unsigned long len) +static __ref void *early_efi_map(unsigned long start, unsigned long len) { unsigned long base; @@ -56,7 +56,7 @@ static __init_refok void *early_efi_map(unsigned long start, unsigned long len) return early_ioremap(base + start, len); } -static __init_refok void early_efi_unmap(void *addr, unsigned long len) +static __ref void early_efi_unmap(void *addr, unsigned long len) { if (!efi_fb) early_iounmap(addr, len); diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 17c8bbd4e2f0..1fbb408e2e72 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -51,7 +51,6 @@ #include <asm/cacheflush.h> #include <asm/tlbflush.h> #include <asm/x86_init.h> -#include <asm/rtc.h> #include <asm/uv/uv.h> static struct efi efi_phys __initdata; diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 04db6fbce96d..677e29e29473 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -25,6 +25,7 @@ #include <linux/bootmem.h> #include <linux/ioport.h> #include <linux/init.h> +#include <linux/mc146818rtc.h> #include <linux/efi.h> #include <linux/uaccess.h> #include <linux/io.h> diff --git a/arch/x86/platform/intel-mid/intel_mid_vrtc.c b/arch/x86/platform/intel-mid/intel_mid_vrtc.c index ee40fcb6e54d..58024862a7eb 100644 --- a/arch/x86/platform/intel-mid/intel_mid_vrtc.c +++ b/arch/x86/platform/intel-mid/intel_mid_vrtc.c @@ -22,6 +22,7 @@ #include <linux/init.h> #include <linux/sfi.h> #include <linux/platform_device.h> +#include <linux/mc146818rtc.h> #include <asm/intel-mid.h> #include <asm/intel_mid_vrtc.h> diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c index f2b5e6a5cf95..f0b5f2d402af 100644 --- a/arch/x86/power/hibernate_64.c +++ b/arch/x86/power/hibernate_64.c @@ -37,11 +37,11 @@ unsigned long jump_address_phys; */ unsigned long restore_cr3 __visible; -pgd_t *temp_level4_pgt __visible; +unsigned long temp_level4_pgt __visible; unsigned long relocated_restore_code __visible; -static int set_up_temporary_text_mapping(void) +static int set_up_temporary_text_mapping(pgd_t *pgd) { pmd_t *pmd; pud_t *pud; @@ -71,7 +71,7 @@ static int set_up_temporary_text_mapping(void) __pmd((jump_address_phys & PMD_MASK) | __PAGE_KERNEL_LARGE_EXEC)); set_pud(pud + pud_index(restore_jump_address), __pud(__pa(pmd) | _KERNPG_TABLE)); - set_pgd(temp_level4_pgt + pgd_index(restore_jump_address), + set_pgd(pgd + pgd_index(restore_jump_address), __pgd(__pa(pud) | _KERNPG_TABLE)); return 0; @@ -90,15 +90,16 @@ static int set_up_temporary_mappings(void) .kernel_mapping = true, }; unsigned long mstart, mend; + pgd_t *pgd; int result; int i; - temp_level4_pgt = (pgd_t *)get_safe_page(GFP_ATOMIC); - if (!temp_level4_pgt) + pgd = (pgd_t *)get_safe_page(GFP_ATOMIC); + if (!pgd) return -ENOMEM; /* Prepare a temporary mapping for the kernel text */ - result = set_up_temporary_text_mapping(); + result = set_up_temporary_text_mapping(pgd); if (result) return result; @@ -107,13 +108,12 @@ static int set_up_temporary_mappings(void) mstart = pfn_mapped[i].start << PAGE_SHIFT; mend = pfn_mapped[i].end << PAGE_SHIFT; - result = kernel_ident_mapping_init(&info, temp_level4_pgt, - mstart, mend); - + result = kernel_ident_mapping_init(&info, pgd, mstart, mend); if (result) return result; } + temp_level4_pgt = (unsigned long)pgd - __PAGE_OFFSET; return 0; } diff --git a/arch/x86/power/hibernate_asm_64.S b/arch/x86/power/hibernate_asm_64.S index 8eee0e9c93f0..ce8da3a0412c 100644 --- a/arch/x86/power/hibernate_asm_64.S +++ b/arch/x86/power/hibernate_asm_64.S @@ -72,8 +72,6 @@ ENTRY(restore_image) /* code below has been relocated to a safe page */ ENTRY(core_restore_code) /* switch to temporary page tables */ - movq $__PAGE_OFFSET, %rcx - subq %rcx, %rax movq %rax, %cr3 /* flush TLB */ movq %rbx, %rcx diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile index 12734a96df47..ac58c1616408 100644 --- a/arch/x86/purgatory/Makefile +++ b/arch/x86/purgatory/Makefile @@ -8,6 +8,8 @@ PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y)) LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined -nostdlib -z nodefaultlib targets += purgatory.ro +KCOV_INSTRUMENT := n + # Default KBUILD_CFLAGS can have -pg option set when FTRACE is enabled. That # in turn leaves some undefined symbols like __fentry__ in purgatory and not # sure how to relocate those. Like kexec-tools, use custom flags. diff --git a/arch/x86/realmode/rm/Makefile b/arch/x86/realmode/rm/Makefile index c556c5ae8de5..25012abc3409 100644 --- a/arch/x86/realmode/rm/Makefile +++ b/arch/x86/realmode/rm/Makefile @@ -48,7 +48,7 @@ targets += realmode.lds $(obj)/realmode.lds: $(obj)/pasyms.h LDFLAGS_realmode.elf := --emit-relocs -T -CPPFLAGS_realmode.lds += -P -C -I$(obj) +CPPFLAGS_realmode.lds += -P -C -I$(objtree)/$(obj) targets += realmode.elf $(obj)/realmode.elf: $(obj)/realmode.lds $(REALMODE_OBJS) FORCE diff --git a/arch/x86/um/vdso/Makefile b/arch/x86/um/vdso/Makefile index 6c803ca49b5d..d72dec406ccb 100644 --- a/arch/x86/um/vdso/Makefile +++ b/arch/x86/um/vdso/Makefile @@ -2,6 +2,9 @@ # Building vDSO images for x86. # +# Prevents link failures: __sanitizer_cov_trace_pc() is not linked in. +KCOV_INSTRUMENT := n + VDSO64-y := y vdso-install-$(VDSO64-y) += vdso.so diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index cd993051aed7..8ffb089b19a5 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -34,9 +34,7 @@ #include <linux/edd.h> #include <linux/frame.h> -#ifdef CONFIG_KEXEC_CORE #include <linux/kexec.h> -#endif #include <xen/xen.h> #include <xen/events.h> @@ -1334,7 +1332,8 @@ static void xen_crash_shutdown(struct pt_regs *regs) static int xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr) { - xen_reboot(SHUTDOWN_crash); + if (!kexec_crash_loaded()) + xen_reboot(SHUTDOWN_crash); return NOTIFY_DONE; } diff --git a/arch/xtensa/kernel/pci-dma.c b/arch/xtensa/kernel/pci-dma.c index cd66698348ca..1e68806d6695 100644 --- a/arch/xtensa/kernel/pci-dma.c +++ b/arch/xtensa/kernel/pci-dma.c @@ -142,7 +142,7 @@ static void xtensa_sync_sg_for_device(struct device *dev, static void *xtensa_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t flag, - struct dma_attrs *attrs) + unsigned long attrs) { unsigned long ret; unsigned long uncached = 0; @@ -171,7 +171,7 @@ static void *xtensa_dma_alloc(struct device *dev, size_t size, } static void xtensa_dma_free(struct device *hwdev, size_t size, void *vaddr, - dma_addr_t dma_handle, struct dma_attrs *attrs) + dma_addr_t dma_handle, unsigned long attrs) { unsigned long addr = (unsigned long)vaddr + XCHAL_KSEG_CACHED_VADDR - XCHAL_KSEG_BYPASS_VADDR; @@ -185,7 +185,7 @@ static void xtensa_dma_free(struct device *hwdev, size_t size, void *vaddr, static dma_addr_t xtensa_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { dma_addr_t dma_handle = page_to_phys(page) + offset; @@ -195,14 +195,14 @@ static dma_addr_t xtensa_map_page(struct device *dev, struct page *page, static void xtensa_unmap_page(struct device *dev, dma_addr_t dma_handle, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { xtensa_sync_single_for_cpu(dev, dma_handle, size, dir); } static int xtensa_map_sg(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { struct scatterlist *s; int i; @@ -217,7 +217,7 @@ static int xtensa_map_sg(struct device *dev, struct scatterlist *sg, static void xtensa_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { struct scatterlist *s; int i; |