summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAtsushi Nemoto <anemo@mba.ocn.ne.jp>2006-12-08 01:04:51 +0900
committerRalf Baechle <ralf@linux-mips.org>2006-12-09 01:03:59 +0000
commited99e2bc1dc5dc54eb5a019f4975562dbef20103 (patch)
treec8ff52ab4a29fe842e34fd94d01e74082486391d
parent773ff78838ca3c07245e45c06235e0baaa5f710a (diff)
downloadblackbird-op-linux-ed99e2bc1dc5dc54eb5a019f4975562dbef20103.tar.gz
blackbird-op-linux-ed99e2bc1dc5dc54eb5a019f4975562dbef20103.zip
[MIPS] Optimize csum_partial for 64bit kernel
Make csum_partial 64-bit powered. Signed-off-by: Atsushi Nemoto <anemo@mba.ocn.ne.jp> Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
-rw-r--r--arch/mips/lib/csum_partial.S76
1 files changed, 54 insertions, 22 deletions
diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S
index b04475d76f3c..9db357294be1 100644
--- a/arch/mips/lib/csum_partial.S
+++ b/arch/mips/lib/csum_partial.S
@@ -29,30 +29,49 @@
#define t5 $13
#define t6 $14
#define t7 $15
+
+#define USE_DOUBLE
#endif
+#ifdef USE_DOUBLE
+
+#define LOAD ld
+#define ADD daddu
+#define NBYTES 8
+
+#else
+
+#define LOAD lw
+#define ADD addu
+#define NBYTES 4
+
+#endif /* USE_DOUBLE */
+
+#define UNIT(unit) ((unit)*NBYTES)
+
#define ADDC(sum,reg) \
- addu sum, reg; \
+ ADD sum, reg; \
sltu v1, sum, reg; \
- addu sum, v1
+ ADD sum, v1
-#define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3) \
- lw _t0, (offset + 0x00)(src); \
- lw _t1, (offset + 0x04)(src); \
- lw _t2, (offset + 0x08)(src); \
- lw _t3, (offset + 0x0c)(src); \
- ADDC(sum, _t0); \
- ADDC(sum, _t1); \
- ADDC(sum, _t2); \
- ADDC(sum, _t3); \
- lw _t0, (offset + 0x10)(src); \
- lw _t1, (offset + 0x14)(src); \
- lw _t2, (offset + 0x18)(src); \
- lw _t3, (offset + 0x1c)(src); \
+#define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3) \
+ LOAD _t0, (offset + UNIT(0))(src); \
+ LOAD _t1, (offset + UNIT(1))(src); \
+ LOAD _t2, (offset + UNIT(2))(src); \
+ LOAD _t3, (offset + UNIT(3))(src); \
ADDC(sum, _t0); \
ADDC(sum, _t1); \
ADDC(sum, _t2); \
- ADDC(sum, _t3); \
+ ADDC(sum, _t3)
+
+#ifdef USE_DOUBLE
+#define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3) \
+ CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3)
+#else
+#define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3) \
+ CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3); \
+ CSUM_BIGCHUNK1(src, offset + 0x10, sum, _t0, _t1, _t2, _t3)
+#endif
/*
* a0: source address
@@ -117,11 +136,17 @@ qword_align:
beqz t8, oword_align
andi t8, src, 0x10
+#ifdef USE_DOUBLE
+ ld t0, 0x00(src)
+ LONG_SUBU a1, a1, 0x8
+ ADDC(sum, t0)
+#else
lw t0, 0x00(src)
lw t1, 0x04(src)
LONG_SUBU a1, a1, 0x8
ADDC(sum, t0)
ADDC(sum, t1)
+#endif
PTR_ADDU src, src, 0x8
andi t8, src, 0x10
@@ -129,14 +154,14 @@ oword_align:
beqz t8, begin_movement
LONG_SRL t8, a1, 0x7
- lw t3, 0x08(src)
- lw t4, 0x0c(src)
- lw t0, 0x00(src)
- lw t1, 0x04(src)
- ADDC(sum, t3)
- ADDC(sum, t4)
+#ifdef USE_DOUBLE
+ ld t0, 0x00(src)
+ ld t1, 0x08(src)
ADDC(sum, t0)
ADDC(sum, t1)
+#else
+ CSUM_BIGCHUNK1(src, 0x00, sum, t0, t1, t3, t4)
+#endif
LONG_SUBU a1, a1, 0x10
PTR_ADDU src, src, 0x10
LONG_SRL t8, a1, 0x7
@@ -219,6 +244,13 @@ small_csumcpy:
1: ADDC(sum, t1)
/* fold checksum */
+#ifdef USE_DOUBLE
+ dsll32 v1, sum, 0
+ daddu sum, v1
+ sltu v1, sum, v1
+ dsra32 sum, sum, 0
+ addu sum, v1
+#endif
sll v1, sum, 16
addu sum, v1
sltu v1, sum, v1
OpenPOWER on IntegriCloud