1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
|
/* Optimised simple memory checksum
*
* Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public Licence
* as published by the Free Software Foundation; either version
* 2 of the Licence, or (at your option) any later version.
*/
#include <asm/cache.h>
.section .text
.balign L1_CACHE_BYTES
###############################################################################
#
# unsigned int do_csum(const unsigned char *buff, int len)
#
###############################################################################
.globl do_csum
.type do_csum,@function
do_csum:
movm [d2,d3],(sp)
mov d1,d2 # count
mov d0,a0 # buff
mov a0,a1
clr d1 # accumulator
cmp +0,d2
ble do_csum_done # check for zero length or negative
# 4-byte align the buffer pointer
btst +3,a0
beq do_csum_now_4b_aligned
btst +1,a0
beq do_csum_addr_not_odd
movbu (a0),d0
inc a0
asl +8,d0
add d0,d1
add -1,d2
do_csum_addr_not_odd:
cmp +2,d2
bcs do_csum_fewer_than_4
btst +2,a0
beq do_csum_now_4b_aligned
movhu (a0+),d0
add d0,d1
add -2,d2
cmp +4,d2
bcs do_csum_fewer_than_4
do_csum_now_4b_aligned:
# we want to checksum as much as we can in chunks of 32 bytes
cmp +31,d2
bls do_csum_remainder # 4-byte aligned remainder
add -32,d2
mov +32,d3
do_csum_loop:
mov (a0+),d0
mov (a0+),e0
mov (a0+),e1
mov (a0+),e3
add d0,d1
addc e0,d1
addc e1,d1
addc e3,d1
mov (a0+),d0
mov (a0+),e0
mov (a0+),e1
mov (a0+),e3
addc d0,d1
addc e0,d1
addc e1,d1
addc e3,d1
addc +0,d1
sub d3,d2
bcc do_csum_loop
add d3,d2
beq do_csum_done
do_csum_remainder:
# cut 16-31 bytes down to 0-15
cmp +16,d2
bcs do_csum_fewer_than_16
mov (a0+),d0
mov (a0+),e0
mov (a0+),e1
mov (a0+),e3
add d0,d1
addc e0,d1
addc e1,d1
addc e3,d1
addc +0,d1
add -16,d2
beq do_csum_done
do_csum_fewer_than_16:
# copy the remaining whole words
cmp +4,d2
bcs do_csum_fewer_than_4
cmp +8,d2
bcs do_csum_one_word
cmp +12,d2
bcs do_csum_two_words
mov (a0+),d0
add d0,d1
addc +0,d1
do_csum_two_words:
mov (a0+),d0
add d0,d1
addc +0,d1
do_csum_one_word:
mov (a0+),d0
add d0,d1
addc +0,d1
do_csum_fewer_than_4:
and +3,d2
beq do_csum_done
xor_cmp d0,d0,+2,d2
bcs do_csum_fewer_than_2
movhu (a0+),d0
and +1,d2
beq do_csum_add_last_bit
do_csum_fewer_than_2:
movbu (a0),d3
add d3,d0
do_csum_add_last_bit:
add d0,d1
addc +0,d1
do_csum_done:
# compress the checksum down to 16 bits
mov +0xffff0000,d0
and d1,d0
asl +16,d1
add d1,d0
addc +0xffff,d0
lsr +16,d0
# flip the halves of the word result if the buffer was oddly aligned
and +1,a1
beq do_csum_not_oddly_aligned
swaph d0,d0 # exchange bits 15:8 with 7:0
do_csum_not_oddly_aligned:
ret [d2,d3],8
.size do_csum, .-do_csum
|