diff options
Diffstat (limited to 'arch')
26 files changed, 386 insertions, 142 deletions
diff --git a/arch/arm/include/asm/highmem.h b/arch/arm/include/asm/highmem.h index 7f36d00600b4..feb988a7ec37 100644 --- a/arch/arm/include/asm/highmem.h +++ b/arch/arm/include/asm/highmem.h @@ -11,7 +11,11 @@ #define kmap_prot PAGE_KERNEL -#define flush_cache_kmaps() flush_cache_all() +#define flush_cache_kmaps() \ + do { \ + if (cache_is_vivt()) \ + flush_cache_all(); \ + } while (0) extern pte_t *pkmap_page_table; @@ -21,11 +25,20 @@ extern void *kmap_high(struct page *page); extern void *kmap_high_get(struct page *page); extern void kunmap_high(struct page *page); +extern void *kmap_high_l1_vipt(struct page *page, pte_t *saved_pte); +extern void kunmap_high_l1_vipt(struct page *page, pte_t saved_pte); + +/* + * The following functions are already defined by <linux/highmem.h> + * when CONFIG_HIGHMEM is not set. + */ +#ifdef CONFIG_HIGHMEM extern void *kmap(struct page *page); extern void kunmap(struct page *page); extern void *kmap_atomic(struct page *page, enum km_type type); extern void kunmap_atomic(void *kvaddr, enum km_type type); extern void *kmap_atomic_pfn(unsigned long pfn, enum km_type type); extern struct page *kmap_atomic_to_page(const void *ptr); +#endif #endif diff --git a/arch/arm/include/asm/kmap_types.h b/arch/arm/include/asm/kmap_types.h index c019949a5189..c4b2ea3fbe42 100644 --- a/arch/arm/include/asm/kmap_types.h +++ b/arch/arm/include/asm/kmap_types.h @@ -18,6 +18,7 @@ enum km_type { KM_IRQ1, KM_SOFTIRQ0, KM_SOFTIRQ1, + KM_L1_CACHE, KM_L2_CACHE, KM_TYPE_NR }; diff --git a/arch/arm/include/asm/ucontext.h b/arch/arm/include/asm/ucontext.h index bf65e9f4525d..47f023aa8495 100644 --- a/arch/arm/include/asm/ucontext.h +++ b/arch/arm/include/asm/ucontext.h @@ -59,23 +59,22 @@ struct iwmmxt_sigframe { #endif /* CONFIG_IWMMXT */ #ifdef CONFIG_VFP -#if __LINUX_ARM_ARCH__ < 6 -/* For ARM pre-v6, we use fstmiax and fldmiax. This adds one extra - * word after the registers, and a word of padding at the end for - * alignment. */ #define VFP_MAGIC 0x56465001 -#define VFP_STORAGE_SIZE 152 -#else -#define VFP_MAGIC 0x56465002 -#define VFP_STORAGE_SIZE 144 -#endif struct vfp_sigframe { unsigned long magic; unsigned long size; - union vfp_state storage; -}; + struct user_vfp ufp; + struct user_vfp_exc ufp_exc; +} __attribute__((__aligned__(8))); + +/* + * 8 byte for magic and size, 264 byte for ufp, 12 bytes for ufp_exc, + * 4 bytes padding. + */ +#define VFP_STORAGE_SIZE sizeof(struct vfp_sigframe) + #endif /* CONFIG_VFP */ /* @@ -91,7 +90,7 @@ struct aux_sigframe { #ifdef CONFIG_IWMMXT struct iwmmxt_sigframe iwmmxt; #endif -#if 0 && defined CONFIG_VFP /* Not yet saved. */ +#ifdef CONFIG_VFP struct vfp_sigframe vfp; #endif /* Something that isn't a valid magic number for any coprocessor. */ diff --git a/arch/arm/include/asm/user.h b/arch/arm/include/asm/user.h index df95e050f9dd..05ac4b06876a 100644 --- a/arch/arm/include/asm/user.h +++ b/arch/arm/include/asm/user.h @@ -83,11 +83,21 @@ struct user{ /* * User specific VFP registers. If only VFPv2 is present, registers 16 to 31 - * are ignored by the ptrace system call. + * are ignored by the ptrace system call and the signal handler. */ struct user_vfp { unsigned long long fpregs[32]; unsigned long fpscr; }; +/* + * VFP exception registers exposed to user space during signal delivery. + * Fields not relavant to the current VFP architecture are ignored. + */ +struct user_vfp_exc { + unsigned long fpexc; + unsigned long fpinst; + unsigned long fpinst2; +}; + #endif /* _ARM_USER_H */ diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index e7714f367eb8..907d5a620bca 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -18,6 +18,7 @@ #include <asm/cacheflush.h> #include <asm/ucontext.h> #include <asm/unistd.h> +#include <asm/vfp.h> #include "ptrace.h" #include "signal.h" @@ -175,6 +176,90 @@ static int restore_iwmmxt_context(struct iwmmxt_sigframe *frame) #endif +#ifdef CONFIG_VFP + +static int preserve_vfp_context(struct vfp_sigframe __user *frame) +{ + struct thread_info *thread = current_thread_info(); + struct vfp_hard_struct *h = &thread->vfpstate.hard; + const unsigned long magic = VFP_MAGIC; + const unsigned long size = VFP_STORAGE_SIZE; + int err = 0; + + vfp_sync_hwstate(thread); + __put_user_error(magic, &frame->magic, err); + __put_user_error(size, &frame->size, err); + + /* + * Copy the floating point registers. There can be unused + * registers see asm/hwcap.h for details. + */ + err |= __copy_to_user(&frame->ufp.fpregs, &h->fpregs, + sizeof(h->fpregs)); + /* + * Copy the status and control register. + */ + __put_user_error(h->fpscr, &frame->ufp.fpscr, err); + + /* + * Copy the exception registers. + */ + __put_user_error(h->fpexc, &frame->ufp_exc.fpexc, err); + __put_user_error(h->fpinst, &frame->ufp_exc.fpinst, err); + __put_user_error(h->fpinst2, &frame->ufp_exc.fpinst2, err); + + return err ? -EFAULT : 0; +} + +static int restore_vfp_context(struct vfp_sigframe __user *frame) +{ + struct thread_info *thread = current_thread_info(); + struct vfp_hard_struct *h = &thread->vfpstate.hard; + unsigned long magic; + unsigned long size; + unsigned long fpexc; + int err = 0; + + __get_user_error(magic, &frame->magic, err); + __get_user_error(size, &frame->size, err); + + if (err) + return -EFAULT; + if (magic != VFP_MAGIC || size != VFP_STORAGE_SIZE) + return -EINVAL; + + /* + * Copy the floating point registers. There can be unused + * registers see asm/hwcap.h for details. + */ + err |= __copy_from_user(&h->fpregs, &frame->ufp.fpregs, + sizeof(h->fpregs)); + /* + * Copy the status and control register. + */ + __get_user_error(h->fpscr, &frame->ufp.fpscr, err); + + /* + * Sanitise and restore the exception registers. + */ + __get_user_error(fpexc, &frame->ufp_exc.fpexc, err); + /* Ensure the VFP is enabled. */ + fpexc |= FPEXC_EN; + /* Ensure FPINST2 is invalid and the exception flag is cleared. */ + fpexc &= ~(FPEXC_EX | FPEXC_FP2V); + h->fpexc = fpexc; + + __get_user_error(h->fpinst, &frame->ufp_exc.fpinst, err); + __get_user_error(h->fpinst2, &frame->ufp_exc.fpinst2, err); + + if (!err) + vfp_flush_hwstate(thread); + + return err ? -EFAULT : 0; +} + +#endif + /* * Do a signal return; undo the signal stack. These are aligned to 64-bit. */ @@ -233,8 +318,8 @@ static int restore_sigframe(struct pt_regs *regs, struct sigframe __user *sf) err |= restore_iwmmxt_context(&aux->iwmmxt); #endif #ifdef CONFIG_VFP -// if (err == 0) -// err |= vfp_restore_state(&sf->aux.vfp); + if (err == 0) + err |= restore_vfp_context(&aux->vfp); #endif return err; @@ -348,8 +433,8 @@ setup_sigframe(struct sigframe __user *sf, struct pt_regs *regs, sigset_t *set) err |= preserve_iwmmxt_context(&aux->iwmmxt); #endif #ifdef CONFIG_VFP -// if (err == 0) -// err |= vfp_save_state(&sf->aux.vfp); + if (err == 0) + err |= preserve_vfp_context(&aux->vfp); #endif __put_user_error(0, &aux->end_magic, err); diff --git a/arch/arm/mach-at91/Makefile b/arch/arm/mach-at91/Makefile index 027dd570dcc3..d4004557532a 100644 --- a/arch/arm/mach-at91/Makefile +++ b/arch/arm/mach-at91/Makefile @@ -16,8 +16,8 @@ obj-$(CONFIG_ARCH_AT91SAM9261) += at91sam9261.o at91sam926x_time.o at91sam9261_d obj-$(CONFIG_ARCH_AT91SAM9G10) += at91sam9261.o at91sam926x_time.o at91sam9261_devices.o sam9_smc.o obj-$(CONFIG_ARCH_AT91SAM9263) += at91sam9263.o at91sam926x_time.o at91sam9263_devices.o sam9_smc.o obj-$(CONFIG_ARCH_AT91SAM9RL) += at91sam9rl.o at91sam926x_time.o at91sam9rl_devices.o sam9_smc.o -obj-$(CONFIG_ARCH_AT91SAM9G20) += at91sam9260.o at91sam926x_time.o at91sam9260_devices.o sam9_smc.o - obj-$(CONFIG_ARCH_AT91SAM9G45) += at91sam9g45.o at91sam926x_time.o at91sam9g45_devices.o sam9_smc.o +obj-$(CONFIG_ARCH_AT91SAM9G20) += at91sam9260.o at91sam926x_time.o at91sam9260_devices.o sam9_smc.o +obj-$(CONFIG_ARCH_AT91SAM9G45) += at91sam9g45.o at91sam926x_time.o at91sam9g45_devices.o sam9_smc.o obj-$(CONFIG_ARCH_AT91CAP9) += at91cap9.o at91sam926x_time.o at91cap9_devices.o sam9_smc.o obj-$(CONFIG_ARCH_AT572D940HF) += at572d940hf.o at91sam926x_time.o at572d940hf_devices.o sam9_smc.o obj-$(CONFIG_ARCH_AT91X40) += at91x40.o at91x40_time.o diff --git a/arch/arm/mach-at91/pm_slowclock.S b/arch/arm/mach-at91/pm_slowclock.S index 9fcbd6ca0090..9c5b48e68a71 100644 --- a/arch/arm/mach-at91/pm_slowclock.S +++ b/arch/arm/mach-at91/pm_slowclock.S @@ -175,8 +175,6 @@ ENTRY(at91_slow_clock) orr r3, r3, #(1 << 29) /* bit 29 always set */ str r3, [r1, #(AT91_CKGR_PLLAR - AT91_PMC)] - wait_pllalock - /* Save PLLB setting and disable it */ ldr r3, [r1, #(AT91_CKGR_PLLBR - AT91_PMC)] str r3, .saved_pllbr @@ -184,8 +182,6 @@ ENTRY(at91_slow_clock) mov r3, #AT91_PMC_PLLCOUNT str r3, [r1, #(AT91_CKGR_PLLBR - AT91_PMC)] - wait_pllblock - /* Turn off the main oscillator */ ldr r3, [r1, #(AT91_CKGR_MOR - AT91_PMC)] bic r3, r3, #AT91_PMC_MOSCEN diff --git a/arch/arm/mm/copypage-v6.c b/arch/arm/mm/copypage-v6.c index 8bca4dea6dfa..f55fa1044f72 100644 --- a/arch/arm/mm/copypage-v6.c +++ b/arch/arm/mm/copypage-v6.c @@ -41,14 +41,7 @@ static void v6_copy_user_highpage_nonaliasing(struct page *to, kfrom = kmap_atomic(from, KM_USER0); kto = kmap_atomic(to, KM_USER1); copy_page(kto, kfrom); -#ifdef CONFIG_HIGHMEM - /* - * kmap_atomic() doesn't set the page virtual address, and - * kunmap_atomic() takes care of cache flushing already. - */ - if (page_address(to) != NULL) -#endif - __cpuc_flush_dcache_area(kto, PAGE_SIZE); + __cpuc_flush_dcache_area(kto, PAGE_SIZE); kunmap_atomic(kto, KM_USER1); kunmap_atomic(kfrom, KM_USER0); } diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 1351edc0b26f..13fa536d82e6 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -464,6 +464,11 @@ static void dma_cache_maint_page(struct page *page, unsigned long offset, vaddr += offset; op(vaddr, len, dir); kunmap_high(page); + } else if (cache_is_vipt()) { + pte_t saved_pte; + vaddr = kmap_high_l1_vipt(page, &saved_pte); + op(vaddr + offset, len, dir); + kunmap_high_l1_vipt(page, saved_pte); } } else { vaddr = page_address(page) + offset; diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c index e34f095e2090..c6844cb9b508 100644 --- a/arch/arm/mm/flush.c +++ b/arch/arm/mm/flush.c @@ -13,6 +13,7 @@ #include <asm/cacheflush.h> #include <asm/cachetype.h> +#include <asm/highmem.h> #include <asm/smp_plat.h> #include <asm/system.h> #include <asm/tlbflush.h> @@ -152,21 +153,25 @@ void copy_to_user_page(struct vm_area_struct *vma, struct page *page, void __flush_dcache_page(struct address_space *mapping, struct page *page) { - void *addr = page_address(page); - /* * Writeback any data associated with the kernel mapping of this * page. This ensures that data in the physical page is mutually * coherent with the kernels mapping. */ -#ifdef CONFIG_HIGHMEM - /* - * kmap_atomic() doesn't set the page virtual address, and - * kunmap_atomic() takes care of cache flushing already. - */ - if (addr) -#endif - __cpuc_flush_dcache_area(addr, PAGE_SIZE); + if (!PageHighMem(page)) { + __cpuc_flush_dcache_area(page_address(page), PAGE_SIZE); + } else { + void *addr = kmap_high_get(page); + if (addr) { + __cpuc_flush_dcache_area(addr, PAGE_SIZE); + kunmap_high(page); + } else if (cache_is_vipt()) { + pte_t saved_pte; + addr = kmap_high_l1_vipt(page, &saved_pte); + __cpuc_flush_dcache_area(addr, PAGE_SIZE); + kunmap_high_l1_vipt(page, saved_pte); + } + } /* * If this is a page cache page, and we have an aliasing VIPT cache, diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c index 2be1ec7c1b41..77b030f5ec09 100644 --- a/arch/arm/mm/highmem.c +++ b/arch/arm/mm/highmem.c @@ -79,7 +79,8 @@ void kunmap_atomic(void *kvaddr, enum km_type type) unsigned int idx = type + KM_TYPE_NR * smp_processor_id(); if (kvaddr >= (void *)FIXADDR_START) { - __cpuc_flush_dcache_area((void *)vaddr, PAGE_SIZE); + if (cache_is_vivt()) + __cpuc_flush_dcache_area((void *)vaddr, PAGE_SIZE); #ifdef CONFIG_DEBUG_HIGHMEM BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx)); set_pte_ext(TOP_PTE(vaddr), __pte(0), 0); @@ -124,3 +125,87 @@ struct page *kmap_atomic_to_page(const void *ptr) pte = TOP_PTE(vaddr); return pte_page(*pte); } + +#ifdef CONFIG_CPU_CACHE_VIPT + +#include <linux/percpu.h> + +/* + * The VIVT cache of a highmem page is always flushed before the page + * is unmapped. Hence unmapped highmem pages need no cache maintenance + * in that case. + * + * However unmapped pages may still be cached with a VIPT cache, and + * it is not possible to perform cache maintenance on them using physical + * addresses unfortunately. So we have no choice but to set up a temporary + * virtual mapping for that purpose. + * + * Yet this VIPT cache maintenance may be triggered from DMA support + * functions which are possibly called from interrupt context. As we don't + * want to keep interrupt disabled all the time when such maintenance is + * taking place, we therefore allow for some reentrancy by preserving and + * restoring the previous fixmap entry before the interrupted context is + * resumed. If the reentrancy depth is 0 then there is no need to restore + * the previous fixmap, and leaving the current one in place allow it to + * be reused the next time without a TLB flush (common with DMA). + */ + +static DEFINE_PER_CPU(int, kmap_high_l1_vipt_depth); + +void *kmap_high_l1_vipt(struct page *page, pte_t *saved_pte) +{ + unsigned int idx, cpu = smp_processor_id(); + int *depth = &per_cpu(kmap_high_l1_vipt_depth, cpu); + unsigned long vaddr, flags; + pte_t pte, *ptep; + + idx = KM_L1_CACHE + KM_TYPE_NR * cpu; + vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); + ptep = TOP_PTE(vaddr); + pte = mk_pte(page, kmap_prot); + + if (!in_interrupt()) + preempt_disable(); + + raw_local_irq_save(flags); + (*depth)++; + if (pte_val(*ptep) == pte_val(pte)) { + *saved_pte = pte; + } else { + *saved_pte = *ptep; + set_pte_ext(ptep, pte, 0); + local_flush_tlb_kernel_page(vaddr); + } + raw_local_irq_restore(flags); + + return (void *)vaddr; +} + +void kunmap_high_l1_vipt(struct page *page, pte_t saved_pte) +{ + unsigned int idx, cpu = smp_processor_id(); + int *depth = &per_cpu(kmap_high_l1_vipt_depth, cpu); + unsigned long vaddr, flags; + pte_t pte, *ptep; + + idx = KM_L1_CACHE + KM_TYPE_NR * cpu; + vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); + ptep = TOP_PTE(vaddr); + pte = mk_pte(page, kmap_prot); + + BUG_ON(pte_val(*ptep) != pte_val(pte)); + BUG_ON(*depth <= 0); + + raw_local_irq_save(flags); + (*depth)--; + if (*depth != 0 && pte_val(pte) != pte_val(saved_pte)) { + set_pte_ext(ptep, saved_pte, 0); + local_flush_tlb_kernel_page(vaddr); + } + raw_local_irq_restore(flags); + + if (!in_interrupt()) + preempt_enable(); +} + +#endif /* CONFIG_CPU_CACHE_VIPT */ diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 4223d086aa17..241c24a1c18f 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -1054,10 +1054,12 @@ void setup_mm_for_reboot(char mode) pgd_t *pgd; int i; - if (current->mm && current->mm->pgd) - pgd = current->mm->pgd; - else - pgd = init_mm.pgd; + /* + * We need to access to user-mode page tables here. For kernel threads + * we don't have any user-mode mappings so we use the context that we + * "borrowed". + */ + pgd = current->active_mm->pgd; base_pmdval = PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | PMD_TYPE_SECT; if (cpu_architecture() <= CPU_ARCH_ARMv5TEJ && !cpu_is_xscale()) diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c index a420cb949328..315a540c7ce5 100644 --- a/arch/arm/vfp/vfpmodule.c +++ b/arch/arm/vfp/vfpmodule.c @@ -428,26 +428,6 @@ static void vfp_pm_init(void) static inline void vfp_pm_init(void) { } #endif /* CONFIG_PM */ -/* - * Synchronise the hardware VFP state of a thread other than current with the - * saved one. This function is used by the ptrace mechanism. - */ -#ifdef CONFIG_SMP -void vfp_sync_hwstate(struct thread_info *thread) -{ -} - -void vfp_flush_hwstate(struct thread_info *thread) -{ - /* - * On SMP systems, the VFP state is automatically saved at every - * context switch. We mark the thread VFP state as belonging to a - * non-existent CPU so that the saved one will be reloaded when - * needed. - */ - thread->vfpstate.hard.cpu = NR_CPUS; -} -#else void vfp_sync_hwstate(struct thread_info *thread) { unsigned int cpu = get_cpu(); @@ -490,9 +470,18 @@ void vfp_flush_hwstate(struct thread_info *thread) last_VFP_context[cpu] = NULL; } +#ifdef CONFIG_SMP + /* + * For SMP we still have to take care of the case where the thread + * migrates to another CPU and then back to the original CPU on which + * the last VFP user is still the same thread. Mark the thread VFP + * state as belonging to a non-existent CPU so that the saved one will + * be reloaded in the above case. + */ + thread->vfpstate.hard.cpu = NR_CPUS; +#endif put_cpu(); } -#endif #include <linux/smp.h> diff --git a/arch/m68k/include/asm/atomic_mm.h b/arch/m68k/include/asm/atomic_mm.h index 88b7af20a996..d9d2ed647435 100644 --- a/arch/m68k/include/asm/atomic_mm.h +++ b/arch/m68k/include/asm/atomic_mm.h @@ -148,14 +148,18 @@ static inline int atomic_xchg(atomic_t *v, int new) static inline int atomic_sub_and_test(int i, atomic_t *v) { char c; - __asm__ __volatile__("subl %2,%1; seq %0" : "=d" (c), "+m" (*v): "g" (i)); + __asm__ __volatile__("subl %2,%1; seq %0" + : "=d" (c), "+m" (*v) + : "id" (i)); return c != 0; } static inline int atomic_add_negative(int i, atomic_t *v) { char c; - __asm__ __volatile__("addl %2,%1; smi %0" : "=d" (c), "+m" (*v): "g" (i)); + __asm__ __volatile__("addl %2,%1; smi %0" + : "=d" (c), "+m" (*v) + : "id" (i)); return c != 0; } diff --git a/arch/m68k/include/asm/sigcontext.h b/arch/m68k/include/asm/sigcontext.h index 1320eaa4cc2a..a29dd74a17cb 100644 --- a/arch/m68k/include/asm/sigcontext.h +++ b/arch/m68k/include/asm/sigcontext.h @@ -17,13 +17,11 @@ struct sigcontext { #ifndef __uClinux__ # ifdef __mcoldfire__ unsigned long sc_fpregs[2][2]; /* room for two fp registers */ - unsigned long sc_fpcntl[3]; - unsigned char sc_fpstate[16+6*8]; # else unsigned long sc_fpregs[2*3]; /* room for two fp registers */ +# endif unsigned long sc_fpcntl[3]; unsigned char sc_fpstate[216]; -# endif #endif }; diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 59b4556a5b92..e790bc1fbfa3 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -626,7 +626,7 @@ ia32_sys_call_table: .quad stub32_sigreturn .quad stub32_clone /* 120 */ .quad sys_setdomainname - .quad sys_uname + .quad sys_newuname .quad sys_modify_ldt .quad compat_sys_adjtimex .quad sys32_mprotect /* 125 */ diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index ba19ad4c47d0..86a0ff0aeac7 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -21,6 +21,7 @@ #define _ASM_X86_AMD_IOMMU_TYPES_H #include <linux/types.h> +#include <linux/mutex.h> #include <linux/list.h> #include <linux/spinlock.h> @@ -140,6 +141,7 @@ /* constants to configure the command buffer */ #define CMD_BUFFER_SIZE 8192 +#define CMD_BUFFER_UNINITIALIZED 1 #define CMD_BUFFER_ENTRIES 512 #define MMIO_CMD_SIZE_SHIFT 56 #define MMIO_CMD_SIZE_512 (0x9ULL << MMIO_CMD_SIZE_SHIFT) @@ -237,6 +239,7 @@ struct protection_domain { struct list_head list; /* for list of all protection domains */ struct list_head dev_list; /* List of all devices in this domain */ spinlock_t lock; /* mostly used to lock the page table*/ + struct mutex api_lock; /* protect page tables in the iommu-api path */ u16 id; /* the domain id written to the device table */ int mode; /* paging mode (0-6 levels) */ u64 *pt_root; /* page table root pointer */ diff --git a/arch/x86/include/asm/lguest_hcall.h b/arch/x86/include/asm/lguest_hcall.h index ba0eed8aa1a6..b60f2924c413 100644 --- a/arch/x86/include/asm/lguest_hcall.h +++ b/arch/x86/include/asm/lguest_hcall.h @@ -28,22 +28,39 @@ #ifndef __ASSEMBLY__ #include <asm/hw_irq.h> -#include <asm/kvm_para.h> /*G:030 * But first, how does our Guest contact the Host to ask for privileged * operations? There are two ways: the direct way is to make a "hypercall", * to make requests of the Host Itself. * - * We use the KVM hypercall mechanism, though completely different hypercall - * numbers. Seventeen hypercalls are available: the hypercall number is put in - * the %eax register, and the arguments (when required) are placed in %ebx, - * %ecx, %edx and %esi. If a return value makes sense, it's returned in %eax. + * Our hypercall mechanism uses the highest unused trap code (traps 32 and + * above are used by real hardware interrupts). Seventeen hypercalls are + * available: the hypercall number is put in the %eax register, and the + * arguments (when required) are placed in %ebx, %ecx, %edx and %esi. + * If a return value makes sense, it's returned in %eax. * * Grossly invalid calls result in Sudden Death at the hands of the vengeful * Host, rather than returning failure. This reflects Winston Churchill's * definition of a gentleman: "someone who is only rude intentionally". -:*/ + */ +static inline unsigned long +hcall(unsigned long call, + unsigned long arg1, unsigned long arg2, unsigned long arg3, + unsigned long arg4) +{ + /* "int" is the Intel instruction to trigger a trap. */ + asm volatile("int $" __stringify(LGUEST_TRAP_ENTRY) + /* The call in %eax (aka "a") might be overwritten */ + : "=a"(call) + /* The arguments are in %eax, %ebx, %ecx, %edx & %esi */ + : "a"(call), "b"(arg1), "c"(arg2), "d"(arg3), "S"(arg4) + /* "memory" means this might write somewhere in memory. + * This isn't true for all calls, but it's safe to tell + * gcc that it might happen so it doesn't get clever. */ + : "memory"); + return call; +} /* Can't use our min() macro here: needs to be a constant */ #define LGUEST_IRQS (NR_IRQS < 32 ? NR_IRQS: 32) diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index f3dadb571d9b..f854d89b7edf 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -118,7 +118,7 @@ static bool check_device(struct device *dev) return false; /* No device or no PCI device */ - if (!dev || dev->bus != &pci_bus_type) + if (dev->bus != &pci_bus_type) return false; devid = get_device_id(dev); @@ -392,6 +392,7 @@ static int __iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) u32 tail, head; u8 *target; + WARN_ON(iommu->cmd_buf_size & CMD_BUFFER_UNINITIALIZED); tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); target = iommu->cmd_buf + tail; memcpy_toio(target, cmd, sizeof(*cmd)); @@ -2186,7 +2187,7 @@ static void prealloc_protection_domains(void) struct dma_ops_domain *dma_dom; u16 devid; - while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { + for_each_pci_dev(dev) { /* Do we handle this device? */ if (!check_device(&dev->dev)) @@ -2298,7 +2299,7 @@ static void cleanup_domain(struct protection_domain *domain) list_for_each_entry_safe(dev_data, next, &domain->dev_list, list) { struct device *dev = dev_data->dev; - do_detach(dev); + __detach_device(dev); atomic_set(&dev_data->bind, 0); } @@ -2327,6 +2328,7 @@ static struct protection_domain *protection_domain_alloc(void) return NULL; spin_lock_init(&domain->lock); + mutex_init(&domain->api_lock); domain->id = domain_id_alloc(); if (!domain->id) goto out_err; @@ -2379,9 +2381,7 @@ static void amd_iommu_domain_destroy(struct iommu_domain *dom) free_pagetable(domain); - domain_id_free(domain->id); - - kfree(domain); + protection_domain_free(domain); dom->priv = NULL; } @@ -2456,6 +2456,8 @@ static int amd_iommu_map_range(struct iommu_domain *dom, iova &= PAGE_MASK; paddr &= PAGE_MASK; + mutex_lock(&domain->api_lock); + for (i = 0; i < npages; ++i) { ret = iommu_map_page(domain, iova, paddr, prot, PM_MAP_4k); if (ret) @@ -2465,6 +2467,8 @@ static int amd_iommu_map_range(struct iommu_domain *dom, paddr += PAGE_SIZE; } + mutex_unlock(&domain->api_lock); + return 0; } @@ -2477,12 +2481,16 @@ static void amd_iommu_unmap_range(struct iommu_domain *dom, iova &= PAGE_MASK; + mutex_lock(&domain->api_lock); + for (i = 0; i < npages; ++i) { iommu_unmap_page(domain, iova, PM_MAP_4k); iova += PAGE_SIZE; } iommu_flush_tlb_pde(domain); + + mutex_unlock(&domain->api_lock); } static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 42f5350b908f..6360abf993d4 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -138,9 +138,9 @@ int amd_iommus_present; bool amd_iommu_np_cache __read_mostly; /* - * Set to true if ACPI table parsing and hardware intialization went properly + * The ACPI table parsing functions set this variable on an error */ -static bool amd_iommu_initialized; +static int __initdata amd_iommu_init_err; /* * List of protection domains - used during resume @@ -391,9 +391,11 @@ static int __init find_last_devid_acpi(struct acpi_table_header *table) */ for (i = 0; i < table->length; ++i) checksum += p[i]; - if (checksum != 0) + if (checksum != 0) { /* ACPI table corrupt */ - return -ENODEV; + amd_iommu_init_err = -ENODEV; + return 0; + } p += IVRS_HEADER_LENGTH; @@ -436,7 +438,7 @@ static u8 * __init alloc_command_buffer(struct amd_iommu *iommu) if (cmd_buf == NULL) return NULL; - iommu->cmd_buf_size = CMD_BUFFER_SIZE; + iommu->cmd_buf_size = CMD_BUFFER_SIZE | CMD_BUFFER_UNINITIALIZED; return cmd_buf; } @@ -472,12 +474,13 @@ static void iommu_enable_command_buffer(struct amd_iommu *iommu) &entry, sizeof(entry)); amd_iommu_reset_cmd_buffer(iommu); + iommu->cmd_buf_size &= ~(CMD_BUFFER_UNINITIALIZED); } static void __init free_command_buffer(struct amd_iommu *iommu) { free_pages((unsigned long)iommu->cmd_buf, - get_order(iommu->cmd_buf_size)); + get_order(iommu->cmd_buf_size & ~(CMD_BUFFER_UNINITIALIZED))); } /* allocates the memory where the IOMMU will log its events to */ @@ -920,11 +923,16 @@ static int __init init_iommu_all(struct acpi_table_header *table) h->mmio_phys); iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL); - if (iommu == NULL) - return -ENOMEM; + if (iommu == NULL) { + amd_iommu_init_err = -ENOMEM; + return 0; + } + ret = init_iommu_one(iommu, h); - if (ret) - return ret; + if (ret) { + amd_iommu_init_err = ret; + return 0; + } break; default: break; @@ -934,8 +942,6 @@ static int __init init_iommu_all(struct acpi_table_header *table) } WARN_ON(p != end); - amd_iommu_initialized = true; - return 0; } @@ -1211,6 +1217,10 @@ static int __init amd_iommu_init(void) if (acpi_table_parse("IVRS", find_last_devid_acpi) != 0) return -ENODEV; + ret = amd_iommu_init_err; + if (ret) + goto out; + dev_table_size = tbl_size(DEV_TABLE_ENTRY_SIZE); alias_table_size = tbl_size(ALIAS_TABLE_ENTRY_SIZE); rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE); @@ -1270,12 +1280,19 @@ static int __init amd_iommu_init(void) if (acpi_table_parse("IVRS", init_iommu_all) != 0) goto free; - if (!amd_iommu_initialized) + if (amd_iommu_init_err) { + ret = amd_iommu_init_err; goto free; + } if (acpi_table_parse("IVRS", init_memory_definitions) != 0) goto free; + if (amd_iommu_init_err) { + ret = amd_iommu_init_err; + goto free; + } + ret = sysdev_class_register(&amd_iommu_sysdev_class); if (ret) goto free; @@ -1288,6 +1305,8 @@ static int __init amd_iommu_init(void) if (ret) goto free; + enable_iommus(); + if (iommu_pass_through) ret = amd_iommu_init_passthrough(); else @@ -1300,8 +1319,6 @@ static int __init amd_iommu_init(void) amd_iommu_init_notifier(); - enable_iommus(); - if (iommu_pass_through) goto out; @@ -1315,6 +1332,7 @@ out: return ret; free: + disable_iommus(); amd_iommu_uninit_devices(); diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 3704997e8b25..b5d8b0bcf235 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -393,6 +393,7 @@ void __init gart_iommu_hole_init(void) for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { int bus; int dev_base, dev_limit; + u32 ctl; bus = bus_dev_ranges[i].bus; dev_base = bus_dev_ranges[i].dev_base; @@ -406,7 +407,19 @@ void __init gart_iommu_hole_init(void) gart_iommu_aperture = 1; x86_init.iommu.iommu_init = gart_iommu_init; - aper_order = (read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL) >> 1) & 7; + ctl = read_pci_config(bus, slot, 3, + AMD64_GARTAPERTURECTL); + + /* + * Before we do anything else disable the GART. It may + * still be enabled if we boot into a crash-kernel here. + * Reconfiguring the GART while it is enabled could have + * unknown side-effects. + */ + ctl &= ~GARTEN; + write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl); + + aper_order = (ctl >> 1) & 7; aper_size = (32 * 1024 * 1024) << aper_order; aper_base = read_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE) & 0x7fff; aper_base <<= 25; diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index a4849c10a77e..ebd4c51d096a 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -27,7 +27,6 @@ #include <asm/cpu.h> #include <asm/reboot.h> #include <asm/virtext.h> -#include <asm/x86_init.h> #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) @@ -103,10 +102,5 @@ void native_machine_crash_shutdown(struct pt_regs *regs) #ifdef CONFIG_HPET_TIMER hpet_disable(); #endif - -#ifdef CONFIG_X86_64 - x86_platform.iommu_shutdown(); -#endif - crash_save_cpu(regs, safe_smp_processor_id()); } diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h index e39e77168a37..e1a93be4fd44 100644 --- a/arch/x86/kernel/dumpstack.h +++ b/arch/x86/kernel/dumpstack.h @@ -14,6 +14,8 @@ #define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) #endif +#include <linux/uaccess.h> + extern void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, unsigned long bp, char *log_lvl); @@ -42,8 +44,10 @@ static inline unsigned long rewind_frame_pointer(int n) get_bp(frame); #ifdef CONFIG_FRAME_POINTER - while (n--) - frame = frame->next_frame; + while (n--) { + if (probe_kernel_address(&frame->next_frame, frame)) + break; + } #endif return (unsigned long)frame; diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 68cd24f9deae..0f7f130caa67 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -565,6 +565,9 @@ static void enable_gart_translations(void) enable_gart_translation(dev, __pa(agp_gatt_table)); } + + /* Flush the GART-TLB to remove stale entries */ + k8_flush_garts(); } /* diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 7e59dc1d3fc2..2bdf628066bd 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -115,7 +115,7 @@ static void async_hcall(unsigned long call, unsigned long arg1, local_irq_save(flags); if (lguest_data.hcall_status[next_call] != 0xFF) { /* Table full, so do normal hcall which will flush table. */ - kvm_hypercall4(call, arg1, arg2, arg3, arg4); + hcall(call, arg1, arg2, arg3, arg4); } else { lguest_data.hcalls[next_call].arg0 = call; lguest_data.hcalls[next_call].arg1 = arg1; @@ -145,46 +145,45 @@ static void async_hcall(unsigned long call, unsigned long arg1, * So, when we're in lazy mode, we call async_hcall() to store the call for * future processing: */ -static void lazy_hcall1(unsigned long call, - unsigned long arg1) +static void lazy_hcall1(unsigned long call, unsigned long arg1) { if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) - kvm_hypercall1(call, arg1); + hcall(call, arg1, 0, 0, 0); else async_hcall(call, arg1, 0, 0, 0); } /* You can imagine what lazy_hcall2, 3 and 4 look like. :*/ static void lazy_hcall2(unsigned long call, - unsigned long arg1, - unsigned long arg2) + unsigned long arg1, + unsigned long arg2) { if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) - kvm_hypercall2(call, arg1, arg2); + hcall(call, arg1, arg2, 0, 0); else async_hcall(call, arg1, arg2, 0, 0); } static void lazy_hcall3(unsigned long call, - unsigned long arg1, - unsigned long arg2, - unsigned long arg3) + unsigned long arg1, + unsigned long arg2, + unsigned long arg3) { if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) - kvm_hypercall3(call, arg1, arg2, arg3); + hcall(call, arg1, arg2, arg3, 0); else async_hcall(call, arg1, arg2, arg3, 0); } #ifdef CONFIG_X86_PAE static void lazy_hcall4(unsigned long call, - unsigned long arg1, - unsigned long arg2, - unsigned long arg3, - unsigned long arg4) + unsigned long arg1, + unsigned long arg2, + unsigned long arg3, + unsigned long arg4) { if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) - kvm_hypercall4(call, arg1, arg2, arg3, arg4); + hcall(call, arg1, arg2, arg3, arg4); else async_hcall(call, arg1, arg2, arg3, arg4); } @@ -196,13 +195,13 @@ static void lazy_hcall4(unsigned long call, :*/ static void lguest_leave_lazy_mmu_mode(void) { - kvm_hypercall0(LHCALL_FLUSH_ASYNC); + hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0, 0); paravirt_leave_lazy_mmu(); } static void lguest_end_context_switch(struct task_struct *next) { - kvm_hypercall0(LHCALL_FLUSH_ASYNC); + hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0, 0); paravirt_end_context_switch(next); } @@ -286,7 +285,7 @@ static void lguest_write_idt_entry(gate_desc *dt, /* Keep the local copy up to date. */ native_write_idt_entry(dt, entrynum, g); /* Tell Host about this new entry. */ - kvm_hypercall3(LHCALL_LOAD_IDT_ENTRY, entrynum, desc[0], desc[1]); + hcall(LHCALL_LOAD_IDT_ENTRY, entrynum, desc[0], desc[1], 0); } /* @@ -300,7 +299,7 @@ static void lguest_load_idt(const struct desc_ptr *desc) struct desc_struct *idt = (void *)desc->address; for (i = 0; i < (desc->size+1)/8; i++) - kvm_hypercall3(LHCALL_LOAD_IDT_ENTRY, i, idt[i].a, idt[i].b); + hcall(LHCALL_LOAD_IDT_ENTRY, i, idt[i].a, idt[i].b, 0); } /* @@ -321,7 +320,7 @@ static void lguest_load_gdt(const struct desc_ptr *desc) struct desc_struct *gdt = (void *)desc->address; for (i = 0; i < (desc->size+1)/8; i++) - kvm_hypercall3(LHCALL_LOAD_GDT_ENTRY, i, gdt[i].a, gdt[i].b); + hcall(LHCALL_LOAD_GDT_ENTRY, i, gdt[i].a, gdt[i].b, 0); } /* @@ -334,8 +333,8 @@ static void lguest_write_gdt_entry(struct desc_struct *dt, int entrynum, { native_write_gdt_entry(dt, entrynum, desc, type); /* Tell Host about this new entry. */ - kvm_hypercall3(LHCALL_LOAD_GDT_ENTRY, entrynum, - dt[entrynum].a, dt[entrynum].b); + hcall(LHCALL_LOAD_GDT_ENTRY, entrynum, + dt[entrynum].a, dt[entrynum].b, 0); } /* @@ -931,7 +930,7 @@ static int lguest_clockevent_set_next_event(unsigned long delta, } /* Please wake us this far in the future. */ - kvm_hypercall1(LHCALL_SET_CLOCKEVENT, delta); + hcall(LHCALL_SET_CLOCKEVENT, delta, 0, 0, 0); return 0; } @@ -942,7 +941,7 @@ static void lguest_clockevent_set_mode(enum clock_event_mode mode, case CLOCK_EVT_MODE_UNUSED: case CLOCK_EVT_MODE_SHUTDOWN: /* A 0 argument shuts the clock down. */ - kvm_hypercall0(LHCALL_SET_CLOCKEVENT); + hcall(LHCALL_SET_CLOCKEVENT, 0, 0, 0, 0); break; case CLOCK_EVT_MODE_ONESHOT: /* This is what we expect. */ @@ -1100,7 +1099,7 @@ static void set_lguest_basic_apic_ops(void) /* STOP! Until an interrupt comes in. */ static void lguest_safe_halt(void) { - kvm_hypercall0(LHCALL_HALT); + hcall(LHCALL_HALT, 0, 0, 0, 0); } /* @@ -1112,8 +1111,8 @@ static void lguest_safe_halt(void) */ static void lguest_power_off(void) { - kvm_hypercall2(LHCALL_SHUTDOWN, __pa("Power down"), - LGUEST_SHUTDOWN_POWEROFF); + hcall(LHCALL_SHUTDOWN, __pa("Power down"), + LGUEST_SHUTDOWN_POWEROFF, 0, 0); } /* @@ -1123,7 +1122,7 @@ static void lguest_power_off(void) */ static int lguest_panic(struct notifier_block *nb, unsigned long l, void *p) { - kvm_hypercall2(LHCALL_SHUTDOWN, __pa(p), LGUEST_SHUTDOWN_POWEROFF); + hcall(LHCALL_SHUTDOWN, __pa(p), LGUEST_SHUTDOWN_POWEROFF, 0, 0); /* The hcall won't return, but to keep gcc happy, we're "done". */ return NOTIFY_DONE; } @@ -1162,7 +1161,7 @@ static __init int early_put_chars(u32 vtermno, const char *buf, int count) len = sizeof(scratch) - 1; scratch[len] = '\0'; memcpy(scratch, buf, len); - kvm_hypercall1(LHCALL_NOTIFY, __pa(scratch)); + hcall(LHCALL_NOTIFY, __pa(scratch), 0, 0, 0); /* This routine returns the number of bytes actually written. */ return len; @@ -1174,7 +1173,7 @@ static __init int early_put_chars(u32 vtermno, const char *buf, int count) */ static void lguest_restart(char *reason) { - kvm_hypercall2(LHCALL_SHUTDOWN, __pa(reason), LGUEST_SHUTDOWN_RESTART); + hcall(LHCALL_SHUTDOWN, __pa(reason), LGUEST_SHUTDOWN_RESTART, 0, 0); } /*G:050 diff --git a/arch/x86/lguest/i386_head.S b/arch/x86/lguest/i386_head.S index 27eac0faee48..4f420c2f2d55 100644 --- a/arch/x86/lguest/i386_head.S +++ b/arch/x86/lguest/i386_head.S @@ -32,7 +32,7 @@ ENTRY(lguest_entry) */ movl $LHCALL_LGUEST_INIT, %eax movl $lguest_data - __PAGE_OFFSET, %ebx - .byte 0x0f,0x01,0xc1 /* KVM_HYPERCALL */ + int $LGUEST_TRAP_ENTRY /* Set up the initial stack so we can run C code. */ movl $(init_thread_union+THREAD_SIZE),%esp |