From f8f98a9335db4a7d6285b785180fad720bf22864 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 8 Jun 2005 15:28:24 +0100 Subject: [PATCH] ARM: Fix Xscale copy_page implementation The ARM copypage changes in 2.6.12-rc4-git1 removed the preempt locking from the copypage functions which broke the XScale implementation. This patch fixes the locking on XScale and removes the now unneeded minicache code. Signed-off-by: Russell King Checked-by: Richard Purdie --- arch/arm/mm/copypage-xscale.c | 131 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 131 insertions(+) create mode 100644 arch/arm/mm/copypage-xscale.c (limited to 'arch/arm/mm/copypage-xscale.c') diff --git a/arch/arm/mm/copypage-xscale.c b/arch/arm/mm/copypage-xscale.c new file mode 100644 index 000000000000..42a6ee255ce0 --- /dev/null +++ b/arch/arm/mm/copypage-xscale.c @@ -0,0 +1,131 @@ +/* + * linux/arch/arm/lib/copypage-xscale.S + * + * Copyright (C) 1995-2005 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This handles the mini data cache, as found on SA11x0 and XScale + * processors. When we copy a user page page, we map it in such a way + * that accesses to this page will not touch the main data cache, but + * will be cached in the mini data cache. This prevents us thrashing + * the main data cache on page faults. + */ +#include +#include + +#include +#include +#include + +/* + * 0xffff8000 to 0xffffffff is reserved for any ARM architecture + * specific hacks for copying pages efficiently. + */ +#define COPYPAGE_MINICACHE 0xffff8000 + +#define minicache_pgprot __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | \ + L_PTE_CACHEABLE) + +#define TOP_PTE(x) pte_offset_kernel(top_pmd, x) + +static DEFINE_SPINLOCK(minicache_lock); + +/* + * XScale mini-dcache optimised copy_user_page + * + * We flush the destination cache lines just before we write the data into the + * corresponding address. Since the Dcache is read-allocate, this removes the + * Dcache aliasing issue. The writes will be forwarded to the write buffer, + * and merged as appropriate. + */ +static void __attribute__((naked)) +mc_copy_user_page(void *from, void *to) +{ + /* + * Strangely enough, best performance is achieved + * when prefetching destination as well. (NP) + */ + asm volatile( + "stmfd sp!, {r4, r5, lr} \n\ + mov lr, %2 \n\ + pld [r0, #0] \n\ + pld [r0, #32] \n\ + pld [r1, #0] \n\ + pld [r1, #32] \n\ +1: pld [r0, #64] \n\ + pld [r0, #96] \n\ + pld [r1, #64] \n\ + pld [r1, #96] \n\ +2: ldrd r2, [r0], #8 \n\ + ldrd r4, [r0], #8 \n\ + mov ip, r1 \n\ + strd r2, [r1], #8 \n\ + ldrd r2, [r0], #8 \n\ + strd r4, [r1], #8 \n\ + ldrd r4, [r0], #8 \n\ + strd r2, [r1], #8 \n\ + strd r4, [r1], #8 \n\ + mcr p15, 0, ip, c7, c10, 1 @ clean D line\n\ + ldrd r2, [r0], #8 \n\ + mcr p15, 0, ip, c7, c6, 1 @ invalidate D line\n\ + ldrd r4, [r0], #8 \n\ + mov ip, r1 \n\ + strd r2, [r1], #8 \n\ + ldrd r2, [r0], #8 \n\ + strd r4, [r1], #8 \n\ + ldrd r4, [r0], #8 \n\ + strd r2, [r1], #8 \n\ + strd r4, [r1], #8 \n\ + mcr p15, 0, ip, c7, c10, 1 @ clean D line\n\ + subs lr, lr, #1 \n\ + mcr p15, 0, ip, c7, c6, 1 @ invalidate D line\n\ + bgt 1b \n\ + beq 2b \n\ + ldmfd sp!, {r4, r5, pc} " + : + : "r" (from), "r" (to), "I" (PAGE_SIZE / 64 - 1)); +} + +void xscale_mc_copy_user_page(void *kto, const void *kfrom, unsigned long vaddr) +{ + spin_lock(&minicache_lock); + + set_pte(TOP_PTE(COPYPAGE_MINICACHE), pfn_pte(__pa(kfrom) >> PAGE_SHIFT, minicache_pgprot)); + flush_tlb_kernel_page(COPYPAGE_MINICACHE); + + mc_copy_user_page((void *)COPYPAGE_MINICACHE, kto); + + spin_unlock(&minicache_lock); +} + +/* + * XScale optimised clear_user_page + */ +void __attribute__((naked)) +xscale_mc_clear_user_page(void *kaddr, unsigned long vaddr) +{ + asm volatile( + "mov r1, %0 \n\ + mov r2, #0 \n\ + mov r3, #0 \n\ +1: mov ip, r0 \n\ + strd r2, [r0], #8 \n\ + strd r2, [r0], #8 \n\ + strd r2, [r0], #8 \n\ + strd r2, [r0], #8 \n\ + mcr p15, 0, ip, c7, c10, 1 @ clean D line\n\ + subs r1, r1, #1 \n\ + mcr p15, 0, ip, c7, c6, 1 @ invalidate D line\n\ + bne 1b \n\ + mov pc, lr" + : + : "I" (PAGE_SIZE / 32)); +} + +struct cpu_user_fns xscale_mc_user_fns __initdata = { + .cpu_clear_user_page = xscale_mc_clear_user_page, + .cpu_copy_user_page = xscale_mc_copy_user_page, +}; -- cgit v1.2.1