diff options
Diffstat (limited to 'arch/sh/lib')
-rw-r--r-- | arch/sh/lib/Makefile | 5 | ||||
-rw-r--r-- | arch/sh/lib/clear_page.S | 154 | ||||
-rw-r--r-- | arch/sh/lib/copy_page.S | 389 | ||||
-rw-r--r-- | arch/sh/lib/io.c | 82 |
4 files changed, 628 insertions, 2 deletions
diff --git a/arch/sh/lib/Makefile b/arch/sh/lib/Makefile index 9dc7b6985052..ebb55d1149f5 100644 --- a/arch/sh/lib/Makefile +++ b/arch/sh/lib/Makefile @@ -2,12 +2,13 @@ # Makefile for SuperH-specific library files.. # -lib-y = delay.o memset.o memmove.o memchr.o \ +lib-y = delay.o io.o memset.o memmove.o memchr.o \ checksum.o strlen.o div64.o div64-generic.o memcpy-y := memcpy.o memcpy-$(CONFIG_CPU_SH4) := memcpy-sh4.o -lib-y += $(memcpy-y) +lib-$(CONFIG_MMU) += copy_page.o clear_page.o +lib-y += $(memcpy-y) EXTRA_CFLAGS += -Werror diff --git a/arch/sh/lib/clear_page.S b/arch/sh/lib/clear_page.S new file mode 100644 index 000000000000..3539123fe517 --- /dev/null +++ b/arch/sh/lib/clear_page.S @@ -0,0 +1,154 @@ +/* + * __clear_user_page, __clear_user, clear_page implementation of SuperH + * + * Copyright (C) 2001 Kaz Kojima + * Copyright (C) 2001, 2002 Niibe Yutaka + * Copyright (C) 2006 Paul Mundt + */ +#include <linux/linkage.h> +#include <asm/page.h> + +/* + * clear_page + * @to: P1 address + * + * void clear_page(void *to) + */ + +/* + * r0 --- scratch + * r4 --- to + * r5 --- to + PAGE_SIZE + */ +ENTRY(clear_page) + mov r4,r5 + mov.l .Llimit,r0 + add r0,r5 + mov #0,r0 + ! +1: +#if defined(CONFIG_CPU_SH3) + mov.l r0,@r4 +#elif defined(CONFIG_CPU_SH4) + movca.l r0,@r4 + mov r4,r1 +#endif + add #32,r4 + mov.l r0,@-r4 + mov.l r0,@-r4 + mov.l r0,@-r4 + mov.l r0,@-r4 + mov.l r0,@-r4 + mov.l r0,@-r4 + mov.l r0,@-r4 +#if defined(CONFIG_CPU_SH4) + ocbwb @r1 +#endif + cmp/eq r5,r4 + bf/s 1b + add #28,r4 + ! + rts + nop + + .balign 4 +.Llimit: .long (PAGE_SIZE-28) + +ENTRY(__clear_user) + ! + mov #0, r0 + mov #0xe0, r1 ! 0xffffffe0 + ! + ! r4..(r4+31)&~32 -------- not aligned [ Area 0 ] + ! (r4+31)&~32..(r4+r5)&~32 -------- aligned [ Area 1 ] + ! (r4+r5)&~32..r4+r5 -------- not aligned [ Area 2 ] + ! + ! Clear area 0 + mov r4, r2 + ! + tst r1, r5 ! length < 32 + bt .Larea2 ! skip to remainder + ! + add #31, r2 + and r1, r2 + cmp/eq r4, r2 + bt .Larea1 + mov r2, r3 + sub r4, r3 + mov r3, r7 + mov r4, r2 + ! +.L0: dt r3 +0: mov.b r0, @r2 + bf/s .L0 + add #1, r2 + ! + sub r7, r5 + mov r2, r4 +.Larea1: + mov r4, r3 + add r5, r3 + and r1, r3 + cmp/hi r2, r3 + bf .Larea2 + ! + ! Clear area 1 +#if defined(CONFIG_CPU_SH4) +1: movca.l r0, @r2 +#else +1: mov.l r0, @r2 +#endif + add #4, r2 +2: mov.l r0, @r2 + add #4, r2 +3: mov.l r0, @r2 + add #4, r2 +4: mov.l r0, @r2 + add #4, r2 +5: mov.l r0, @r2 + add #4, r2 +6: mov.l r0, @r2 + add #4, r2 +7: mov.l r0, @r2 + add #4, r2 +8: mov.l r0, @r2 + add #4, r2 + cmp/hi r2, r3 + bt/s 1b + nop + ! + ! Clear area 2 +.Larea2: + mov r4, r3 + add r5, r3 + cmp/hs r3, r2 + bt/s .Ldone + sub r2, r3 +.L2: dt r3 +9: mov.b r0, @r2 + bf/s .L2 + add #1, r2 + ! +.Ldone: rts + mov #0, r0 ! return 0 as normal return + + ! return the number of bytes remained +.Lbad_clear_user: + mov r4, r0 + add r5, r0 + rts + sub r2, r0 + +.section __ex_table,"a" + .align 2 + .long 0b, .Lbad_clear_user + .long 1b, .Lbad_clear_user + .long 2b, .Lbad_clear_user + .long 3b, .Lbad_clear_user + .long 4b, .Lbad_clear_user + .long 5b, .Lbad_clear_user + .long 6b, .Lbad_clear_user + .long 7b, .Lbad_clear_user + .long 8b, .Lbad_clear_user + .long 9b, .Lbad_clear_user +.previous diff --git a/arch/sh/lib/copy_page.S b/arch/sh/lib/copy_page.S new file mode 100644 index 000000000000..e002b91c8752 --- /dev/null +++ b/arch/sh/lib/copy_page.S @@ -0,0 +1,389 @@ +/* + * copy_page, __copy_user_page, __copy_user implementation of SuperH + * + * Copyright (C) 2001 Niibe Yutaka & Kaz Kojima + * Copyright (C) 2002 Toshinobu Sugioka + * Copyright (C) 2006 Paul Mundt + */ +#include <linux/linkage.h> +#include <asm/page.h> + +/* + * copy_page + * @to: P1 address + * @from: P1 address + * + * void copy_page(void *to, void *from) + */ + +/* + * r0, r1, r2, r3, r4, r5, r6, r7 --- scratch + * r8 --- from + PAGE_SIZE + * r9 --- not used + * r10 --- to + * r11 --- from + */ +ENTRY(copy_page) + mov.l r8,@-r15 + mov.l r10,@-r15 + mov.l r11,@-r15 + mov r4,r10 + mov r5,r11 + mov r5,r8 + mov.l .Lpsz,r0 + add r0,r8 + ! +1: mov.l @r11+,r0 + mov.l @r11+,r1 + mov.l @r11+,r2 + mov.l @r11+,r3 + mov.l @r11+,r4 + mov.l @r11+,r5 + mov.l @r11+,r6 + mov.l @r11+,r7 +#if defined(CONFIG_CPU_SH3) + mov.l r0,@r10 +#elif defined(CONFIG_CPU_SH4) + movca.l r0,@r10 + mov r10,r0 +#endif + add #32,r10 + mov.l r7,@-r10 + mov.l r6,@-r10 + mov.l r5,@-r10 + mov.l r4,@-r10 + mov.l r3,@-r10 + mov.l r2,@-r10 + mov.l r1,@-r10 +#if defined(CONFIG_CPU_SH4) + ocbwb @r0 +#endif + cmp/eq r11,r8 + bf/s 1b + add #28,r10 + ! + mov.l @r15+,r11 + mov.l @r15+,r10 + mov.l @r15+,r8 + rts + nop + + .balign 4 +.Lpsz: .long PAGE_SIZE + +/* + * __kernel_size_t __copy_user(void *to, const void *from, __kernel_size_t n); + * Return the number of bytes NOT copied + */ +#define EX(...) \ + 9999: __VA_ARGS__ ; \ + .section __ex_table, "a"; \ + .long 9999b, 6000f ; \ + .previous +ENTRY(__copy_user) + ! Check if small number of bytes + mov #11,r0 + mov r4,r3 + cmp/gt r0,r6 ! r6 (len) > r0 (11) + bf/s .L_cleanup_loop_no_pop + add r6,r3 ! last destination address + + ! Calculate bytes needed to align to src + mov.l r11,@-r15 + neg r5,r0 + mov.l r10,@-r15 + add #4,r0 + mov.l r9,@-r15 + and #3,r0 + mov.l r8,@-r15 + tst r0,r0 + bt 2f + +1: + ! Copy bytes to long word align src +EX( mov.b @r5+,r1 ) + dt r0 + add #-1,r6 +EX( mov.b r1,@r4 ) + bf/s 1b + add #1,r4 + + ! Jump to appropriate routine depending on dest +2: mov #3,r1 + mov r6, r2 + and r4,r1 + shlr2 r2 + shll2 r1 + mova .L_jump_tbl,r0 + mov.l @(r0,r1),r1 + jmp @r1 + nop + + .align 2 +.L_jump_tbl: + .long .L_dest00 + .long .L_dest01 + .long .L_dest10 + .long .L_dest11 + +/* + * Come here if there are less than 12 bytes to copy + * + * Keep the branch target close, so the bf/s callee doesn't overflow + * and result in a more expensive branch being inserted. This is the + * fast-path for small copies, the jump via the jump table will hit the + * default slow-path cleanup. -PFM. + */ +.L_cleanup_loop_no_pop: + tst r6,r6 ! Check explicitly for zero + bt 1f + +2: +EX( mov.b @r5+,r0 ) + dt r6 +EX( mov.b r0,@r4 ) + bf/s 2b + add #1,r4 + +1: mov #0,r0 ! normal return +5000: + +# Exception handler: +.section .fixup, "ax" +6000: + mov.l 8000f,r1 + mov r3,r0 + jmp @r1 + sub r4,r0 + .align 2 +8000: .long 5000b + +.previous + rts + nop + +! Destination = 00 + +.L_dest00: + ! Skip the large copy for small transfers + mov #(32+32-4), r0 + cmp/gt r6, r0 ! r0 (60) > r6 (len) + bt 1f + + ! Align dest to a 32 byte boundary + neg r4,r0 + add #0x20, r0 + and #0x1f, r0 + tst r0, r0 + bt 2f + + sub r0, r6 + shlr2 r0 +3: +EX( mov.l @r5+,r1 ) + dt r0 +EX( mov.l r1,@r4 ) + bf/s 3b + add #4,r4 + +2: +EX( mov.l @r5+,r0 ) +EX( mov.l @r5+,r1 ) +EX( mov.l @r5+,r2 ) +EX( mov.l @r5+,r7 ) +EX( mov.l @r5+,r8 ) +EX( mov.l @r5+,r9 ) +EX( mov.l @r5+,r10 ) +EX( mov.l @r5+,r11 ) +#ifdef CONFIG_CPU_SH4 +EX( movca.l r0,@r4 ) +#else +EX( mov.l r0,@r4 ) +#endif + add #-32, r6 +EX( mov.l r1,@(4,r4) ) + mov #32, r0 +EX( mov.l r2,@(8,r4) ) + cmp/gt r6, r0 ! r0 (32) > r6 (len) +EX( mov.l r7,@(12,r4) ) +EX( mov.l r8,@(16,r4) ) +EX( mov.l r9,@(20,r4) ) +EX( mov.l r10,@(24,r4) ) +EX( mov.l r11,@(28,r4) ) + bf/s 2b + add #32,r4 + +1: mov r6, r0 + shlr2 r0 + tst r0, r0 + bt .L_cleanup +1: +EX( mov.l @r5+,r1 ) + dt r0 +EX( mov.l r1,@r4 ) + bf/s 1b + add #4,r4 + + bra .L_cleanup + nop + +! Destination = 10 + +.L_dest10: + mov r2,r7 + shlr2 r7 + shlr r7 + tst r7,r7 + mov #7,r0 + bt/s 1f + and r0,r2 +2: + dt r7 +#ifdef CONFIG_CPU_LITTLE_ENDIAN +EX( mov.l @r5+,r0 ) +EX( mov.l @r5+,r1 ) +EX( mov.l @r5+,r8 ) +EX( mov.l @r5+,r9 ) +EX( mov.l @r5+,r10 ) +EX( mov.w r0,@r4 ) + add #2,r4 + xtrct r1,r0 + xtrct r8,r1 + xtrct r9,r8 + xtrct r10,r9 + +EX( mov.l r0,@r4 ) +EX( mov.l r1,@(4,r4) ) +EX( mov.l r8,@(8,r4) ) +EX( mov.l r9,@(12,r4) ) + +EX( mov.l @r5+,r1 ) +EX( mov.l @r5+,r8 ) +EX( mov.l @r5+,r0 ) + xtrct r1,r10 + xtrct r8,r1 + xtrct r0,r8 + shlr16 r0 +EX( mov.l r10,@(16,r4) ) +EX( mov.l r1,@(20,r4) ) +EX( mov.l r8,@(24,r4) ) +EX( mov.w r0,@(28,r4) ) + bf/s 2b + add #30,r4 +#else +EX( mov.l @(28,r5),r0 ) +EX( mov.l @(24,r5),r8 ) +EX( mov.l @(20,r5),r9 ) +EX( mov.l @(16,r5),r10 ) +EX( mov.w r0,@(30,r4) ) + add #-2,r4 + xtrct r8,r0 + xtrct r9,r8 + xtrct r10,r9 +EX( mov.l r0,@(28,r4) ) +EX( mov.l r8,@(24,r4) ) +EX( mov.l r9,@(20,r4) ) + +EX( mov.l @(12,r5),r0 ) +EX( mov.l @(8,r5),r8 ) + xtrct r0,r10 +EX( mov.l @(4,r5),r9 ) + mov.l r10,@(16,r4) +EX( mov.l @r5,r10 ) + xtrct r8,r0 + xtrct r9,r8 + xtrct r10,r9 +EX( mov.l r0,@(12,r4) ) +EX( mov.l r8,@(8,r4) ) + swap.w r10,r0 +EX( mov.l r9,@(4,r4) ) +EX( mov.w r0,@(2,r4) ) + + add #32,r5 + bf/s 2b + add #34,r4 +#endif + tst r2,r2 + bt .L_cleanup + +1: ! Read longword, write two words per iteration +EX( mov.l @r5+,r0 ) + dt r2 +#ifdef CONFIG_CPU_LITTLE_ENDIAN +EX( mov.w r0,@r4 ) + shlr16 r0 +EX( mov.w r0,@(2,r4) ) +#else +EX( mov.w r0,@(2,r4) ) + shlr16 r0 +EX( mov.w r0,@r4 ) +#endif + bf/s 1b + add #4,r4 + + bra .L_cleanup + nop + +! Destination = 01 or 11 + +.L_dest01: +.L_dest11: + ! Read longword, write byte, word, byte per iteration +EX( mov.l @r5+,r0 ) + dt r2 +#ifdef CONFIG_CPU_LITTLE_ENDIAN +EX( mov.b r0,@r4 ) + shlr8 r0 + add #1,r4 +EX( mov.w r0,@r4 ) + shlr16 r0 +EX( mov.b r0,@(2,r4) ) + bf/s .L_dest01 + add #3,r4 +#else +EX( mov.b r0,@(3,r4) ) + shlr8 r0 + swap.w r0,r7 +EX( mov.b r7,@r4 ) + add #1,r4 +EX( mov.w r0,@r4 ) + bf/s .L_dest01 + add #3,r4 +#endif + +! Cleanup last few bytes +.L_cleanup: + mov r6,r0 + and #3,r0 + tst r0,r0 + bt .L_exit + mov r0,r6 + +.L_cleanup_loop: +EX( mov.b @r5+,r0 ) + dt r6 +EX( mov.b r0,@r4 ) + bf/s .L_cleanup_loop + add #1,r4 + +.L_exit: + mov #0,r0 ! normal return + +5000: + +# Exception handler: +.section .fixup, "ax" +6000: + mov.l 8000f,r1 + mov r3,r0 + jmp @r1 + sub r4,r0 + .align 2 +8000: .long 5000b + +.previous + mov.l @r15+,r8 + mov.l @r15+,r9 + mov.l @r15+,r10 + rts + mov.l @r15+,r11 diff --git a/arch/sh/lib/io.c b/arch/sh/lib/io.c new file mode 100644 index 000000000000..4f54ec43516f --- /dev/null +++ b/arch/sh/lib/io.c @@ -0,0 +1,82 @@ +/* + * arch/sh/lib/io.c - SH32 optimized I/O routines + * + * Copyright (C) 2000 Stuart Menefy + * Copyright (C) 2005 Paul Mundt + * + * Provide real functions which expand to whatever the header file defined. + * Also definitions of machine independent IO functions. + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ +#include <linux/module.h> +#include <linux/io.h> + +void __raw_readsl(unsigned long addr, void *datap, int len) +{ + u32 *data; + + for (data = datap; (len != 0) && (((u32)data & 0x1f) != 0); len--) + *data++ = ctrl_inl(addr); + + if (likely(len >= (0x20 >> 2))) { + int tmp2, tmp3, tmp4, tmp5, tmp6; + + __asm__ __volatile__( + "1: \n\t" + "mov.l @%7, r0 \n\t" + "mov.l @%7, %2 \n\t" +#ifdef CONFIG_CPU_SH4 + "movca.l r0, @%0 \n\t" +#else + "mov.l r0, @%0 \n\t" +#endif + "mov.l @%7, %3 \n\t" + "mov.l @%7, %4 \n\t" + "mov.l @%7, %5 \n\t" + "mov.l @%7, %6 \n\t" + "mov.l @%7, r7 \n\t" + "mov.l @%7, r0 \n\t" + "mov.l %2, @(0x04,%0) \n\t" + "mov #0x20>>2, %2 \n\t" + "mov.l %3, @(0x08,%0) \n\t" + "sub %2, %1 \n\t" + "mov.l %4, @(0x0c,%0) \n\t" + "cmp/hi %1, %2 ! T if 32 > len \n\t" + "mov.l %5, @(0x10,%0) \n\t" + "mov.l %6, @(0x14,%0) \n\t" + "mov.l r7, @(0x18,%0) \n\t" + "mov.l r0, @(0x1c,%0) \n\t" + "bf.s 1b \n\t" + " add #0x20, %0 \n\t" + : "=&r" (data), "=&r" (len), + "=&r" (tmp2), "=&r" (tmp3), "=&r" (tmp4), + "=&r" (tmp5), "=&r" (tmp6) + : "r"(addr), "0" (data), "1" (len) + : "r0", "r7", "t", "memory"); + } + + for (; len != 0; len--) + *data++ = ctrl_inl(addr); +} +EXPORT_SYMBOL(__raw_readsl); + +void __raw_writesl(unsigned long addr, const void *data, int len) +{ + if (likely(len != 0)) { + int tmp1; + + __asm__ __volatile__ ( + "1: \n\t" + "mov.l @%0+, %1 \n\t" + "dt %3 \n\t" + "bf.s 1b \n\t" + " mov.l %1, @%4 \n\t" + : "=&r" (data), "=&r" (tmp1) + : "0" (data), "r" (len), "r"(addr) + : "t", "memory"); + } +} +EXPORT_SYMBOL(__raw_writesl); |