diff options
author | David S. Miller <davem@davemloft.net> | 2012-09-26 21:11:01 -0700 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2012-09-27 00:35:11 -0700 |
commit | ae2c6ca64118b934ef85f66adb03d5bbfdd57201 (patch) | |
tree | 5eb9a50cce32cadd527d5fc92095c76c00b72bae /arch/sparc/lib/NG4copy_page.S | |
parent | da201161662b8ee9c8d7bd8cc50ce3cb3366d400 (diff) | |
download | blackbird-op-linux-ae2c6ca64118b934ef85f66adb03d5bbfdd57201.tar.gz blackbird-op-linux-ae2c6ca64118b934ef85f66adb03d5bbfdd57201.zip |
sparc64: Add SPARC-T4 optimized memcpy.
Before After
-------------- --------------
bw_tcp: 1288.53 MB/sec 1637.77 MB/sec
bw_pipe: 1517.18 MB/sec 2107.61 MB/sec
bw_unix: 1838.38 MB/sec 2640.91 MB/sec
make -s -j128
allmodconfig 5min 49sec 5min 31sec
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'arch/sparc/lib/NG4copy_page.S')
-rw-r--r-- | arch/sparc/lib/NG4copy_page.S | 57 |
1 files changed, 57 insertions, 0 deletions
diff --git a/arch/sparc/lib/NG4copy_page.S b/arch/sparc/lib/NG4copy_page.S new file mode 100644 index 000000000000..f30ec10bbcac --- /dev/null +++ b/arch/sparc/lib/NG4copy_page.S @@ -0,0 +1,57 @@ +/* NG4copy_page.S: Niagara-4 optimized copy page. + * + * Copyright (C) 2012 (davem@davemloft.net) + */ + +#include <asm/asi.h> +#include <asm/page.h> + + .text + .align 32 + + .register %g2, #scratch + .register %g3, #scratch + + .globl NG4copy_user_page +NG4copy_user_page: /* %o0=dest, %o1=src, %o2=vaddr */ + prefetch [%o1 + 0x000], #n_reads_strong + prefetch [%o1 + 0x040], #n_reads_strong + prefetch [%o1 + 0x080], #n_reads_strong + prefetch [%o1 + 0x0c0], #n_reads_strong + set PAGE_SIZE, %g7 + prefetch [%o1 + 0x100], #n_reads_strong + prefetch [%o1 + 0x140], #n_reads_strong + prefetch [%o1 + 0x180], #n_reads_strong + prefetch [%o1 + 0x1c0], #n_reads_strong +1: + ldx [%o1 + 0x00], %o2 + subcc %g7, 0x40, %g7 + ldx [%o1 + 0x08], %o3 + ldx [%o1 + 0x10], %o4 + ldx [%o1 + 0x18], %o5 + ldx [%o1 + 0x20], %g1 + stxa %o2, [%o0] ASI_BLK_INIT_QUAD_LDD_P + add %o0, 0x08, %o0 + ldx [%o1 + 0x28], %g2 + stxa %o3, [%o0] ASI_BLK_INIT_QUAD_LDD_P + add %o0, 0x08, %o0 + ldx [%o1 + 0x30], %g3 + stxa %o4, [%o0] ASI_BLK_INIT_QUAD_LDD_P + add %o0, 0x08, %o0 + ldx [%o1 + 0x38], %o2 + add %o1, 0x40, %o1 + stxa %o5, [%o0] ASI_BLK_INIT_QUAD_LDD_P + add %o0, 0x08, %o0 + stxa %g1, [%o0] ASI_BLK_INIT_QUAD_LDD_P + add %o0, 0x08, %o0 + stxa %g2, [%o0] ASI_BLK_INIT_QUAD_LDD_P + add %o0, 0x08, %o0 + stxa %g3, [%o0] ASI_BLK_INIT_QUAD_LDD_P + add %o0, 0x08, %o0 + stxa %o2, [%o0] ASI_BLK_INIT_QUAD_LDD_P + add %o0, 0x08, %o0 + bne,pt %icc, 1b + prefetch [%o1 + 0x200], #n_reads_strong + retl + membar #StoreLoad | #StoreStore + .size NG4copy_user_page,.-NG4copy_user_page |