summaryrefslogtreecommitdiffstats
path: root/arch/arm64/crypto/aes-ce-ccm-core.S
blob: 88f5aef7934c77a5213fdc85a7f8fe435fc76e70 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
/*
 * aesce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions
 *
 * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */

#include <linux/linkage.h>
#include <asm/assembler.h>

	.text
	.arch	armv8-a+crypto

	/*
	 * void ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes,
	 *			     u32 *macp, u8 const rk[], u32 rounds);
	 */
ENTRY(ce_aes_ccm_auth_data)
	frame_push	7

	mov	x19, x0
	mov	x20, x1
	mov	x21, x2
	mov	x22, x3
	mov	x23, x4
	mov	x24, x5

	ldr	w25, [x22]			/* leftover from prev round? */
	ld1	{v0.16b}, [x0]			/* load mac */
	cbz	w25, 1f
	sub	w25, w25, #16
	eor	v1.16b, v1.16b, v1.16b
0:	ldrb	w7, [x20], #1			/* get 1 byte of input */
	subs	w21, w21, #1
	add	w25, w25, #1
	ins	v1.b[0], w7
	ext	v1.16b, v1.16b, v1.16b, #1	/* rotate in the input bytes */
	beq	8f				/* out of input? */
	cbnz	w25, 0b
	eor	v0.16b, v0.16b, v1.16b
1:	ld1	{v3.4s}, [x23]			/* load first round key */
	prfm	pldl1strm, [x20]
	cmp	w24, #12			/* which key size? */
	add	x6, x23, #16
	sub	w7, w24, #2			/* modified # of rounds */
	bmi	2f
	bne	5f
	mov	v5.16b, v3.16b
	b	4f
2:	mov	v4.16b, v3.16b
	ld1	{v5.4s}, [x6], #16		/* load 2nd round key */
3:	aese	v0.16b, v4.16b
	aesmc	v0.16b, v0.16b
4:	ld1	{v3.4s}, [x6], #16		/* load next round key */
	aese	v0.16b, v5.16b
	aesmc	v0.16b, v0.16b
5:	ld1	{v4.4s}, [x6], #16		/* load next round key */
	subs	w7, w7, #3
	aese	v0.16b, v3.16b
	aesmc	v0.16b, v0.16b
	ld1	{v5.4s}, [x6], #16		/* load next round key */
	bpl	3b
	aese	v0.16b, v4.16b
	subs	w21, w21, #16			/* last data? */
	eor	v0.16b, v0.16b, v5.16b		/* final round */
	bmi	6f
	ld1	{v1.16b}, [x20], #16		/* load next input block */
	eor	v0.16b, v0.16b, v1.16b		/* xor with mac */
	beq	6f

	if_will_cond_yield_neon
	st1	{v0.16b}, [x19]			/* store mac */
	do_cond_yield_neon
	ld1	{v0.16b}, [x19]			/* reload mac */
	endif_yield_neon

	b	1b
6:	st1	{v0.16b}, [x19]			/* store mac */
	beq	10f
	adds	w21, w21, #16
	beq	10f
	mov	w25, w21
7:	ldrb	w7, [x20], #1
	umov	w6, v0.b[0]
	eor	w6, w6, w7
	strb	w6, [x19], #1
	subs	w21, w21, #1
	beq	10f
	ext	v0.16b, v0.16b, v0.16b, #1	/* rotate out the mac bytes */
	b	7b
8:	mov	w7, w25
	add	w25, w25, #16
9:	ext	v1.16b, v1.16b, v1.16b, #1
	adds	w7, w7, #1
	bne	9b
	eor	v0.16b, v0.16b, v1.16b
	st1	{v0.16b}, [x19]
10:	str	w25, [x22]

	frame_pop
	ret
ENDPROC(ce_aes_ccm_auth_data)

	/*
	 * void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u8 const rk[],
	 * 			 u32 rounds);
	 */
ENTRY(ce_aes_ccm_final)
	ld1	{v3.4s}, [x2], #16		/* load first round key */
	ld1	{v0.16b}, [x0]			/* load mac */
	cmp	w3, #12				/* which key size? */
	sub	w3, w3, #2			/* modified # of rounds */
	ld1	{v1.16b}, [x1]			/* load 1st ctriv */
	bmi	0f
	bne	3f
	mov	v5.16b, v3.16b
	b	2f
0:	mov	v4.16b, v3.16b
1:	ld1	{v5.4s}, [x2], #16		/* load next round key */
	aese	v0.16b, v4.16b
	aesmc	v0.16b, v0.16b
	aese	v1.16b, v4.16b
	aesmc	v1.16b, v1.16b
2:	ld1	{v3.4s}, [x2], #16		/* load next round key */
	aese	v0.16b, v5.16b
	aesmc	v0.16b, v0.16b
	aese	v1.16b, v5.16b
	aesmc	v1.16b, v1.16b
3:	ld1	{v4.4s}, [x2], #16		/* load next round key */
	subs	w3, w3, #3
	aese	v0.16b, v3.16b
	aesmc	v0.16b, v0.16b
	aese	v1.16b, v3.16b
	aesmc	v1.16b, v1.16b
	bpl	1b
	aese	v0.16b, v4.16b
	aese	v1.16b, v4.16b
	/* final round key cancels out */
	eor	v0.16b, v0.16b, v1.16b		/* en-/decrypt the mac */
	st1	{v0.16b}, [x0]			/* store result */
	ret
ENDPROC(ce_aes_ccm_final)

	.macro	aes_ccm_do_crypt,enc
	frame_push	8

	mov	x19, x0
	mov	x20, x1
	mov	x21, x2
	mov	x22, x3
	mov	x23, x4
	mov	x24, x5
	mov	x25, x6

	ldr	x26, [x25, #8]			/* load lower ctr */
	ld1	{v0.16b}, [x24]			/* load mac */
CPU_LE(	rev	x26, x26		)	/* keep swabbed ctr in reg */
0:	/* outer loop */
	ld1	{v1.8b}, [x25]			/* load upper ctr */
	prfm	pldl1strm, [x20]
	add	x26, x26, #1
	rev	x9, x26
	cmp	w23, #12			/* which key size? */
	sub	w7, w23, #2			/* get modified # of rounds */
	ins	v1.d[1], x9			/* no carry in lower ctr */
	ld1	{v3.4s}, [x22]			/* load first round key */
	add	x10, x22, #16
	bmi	1f
	bne	4f
	mov	v5.16b, v3.16b
	b	3f
1:	mov	v4.16b, v3.16b
	ld1	{v5.4s}, [x10], #16		/* load 2nd round key */
2:	/* inner loop: 3 rounds, 2x interleaved */
	aese	v0.16b, v4.16b
	aesmc	v0.16b, v0.16b
	aese	v1.16b, v4.16b
	aesmc	v1.16b, v1.16b
3:	ld1	{v3.4s}, [x10], #16		/* load next round key */
	aese	v0.16b, v5.16b
	aesmc	v0.16b, v0.16b
	aese	v1.16b, v5.16b
	aesmc	v1.16b, v1.16b
4:	ld1	{v4.4s}, [x10], #16		/* load next round key */
	subs	w7, w7, #3
	aese	v0.16b, v3.16b
	aesmc	v0.16b, v0.16b
	aese	v1.16b, v3.16b
	aesmc	v1.16b, v1.16b
	ld1	{v5.4s}, [x10], #16		/* load next round key */
	bpl	2b
	aese	v0.16b, v4.16b
	aese	v1.16b, v4.16b
	subs	w21, w21, #16
	bmi	7f				/* partial block? */
	ld1	{v2.16b}, [x20], #16		/* load next input block */
	.if	\enc == 1
	eor	v2.16b, v2.16b, v5.16b		/* final round enc+mac */
	eor	v1.16b, v1.16b, v2.16b		/* xor with crypted ctr */
	.else
	eor	v2.16b, v2.16b, v1.16b		/* xor with crypted ctr */
	eor	v1.16b, v2.16b, v5.16b		/* final round enc */
	.endif
	eor	v0.16b, v0.16b, v2.16b		/* xor mac with pt ^ rk[last] */
	st1	{v1.16b}, [x19], #16		/* write output block */
	beq	5f

	if_will_cond_yield_neon
	st1	{v0.16b}, [x24]			/* store mac */
	do_cond_yield_neon
	ld1	{v0.16b}, [x24]			/* reload mac */
	endif_yield_neon

	b	0b
5:
CPU_LE(	rev	x26, x26			)
	st1	{v0.16b}, [x24]			/* store mac */
	str	x26, [x25, #8]			/* store lsb end of ctr (BE) */

6:	frame_pop
	ret

7:	eor	v0.16b, v0.16b, v5.16b		/* final round mac */
	eor	v1.16b, v1.16b, v5.16b		/* final round enc */
	st1	{v0.16b}, [x24]			/* store mac */
	add	w21, w21, #16			/* process partial tail block */
8:	ldrb	w9, [x20], #1			/* get 1 byte of input */
	umov	w6, v1.b[0]			/* get top crypted ctr byte */
	umov	w7, v0.b[0]			/* get top mac byte */
	.if	\enc == 1
	eor	w7, w7, w9
	eor	w9, w9, w6
	.else
	eor	w9, w9, w6
	eor	w7, w7, w9
	.endif
	strb	w9, [x19], #1			/* store out byte */
	strb	w7, [x24], #1			/* store mac byte */
	subs	w21, w21, #1
	beq	6b
	ext	v0.16b, v0.16b, v0.16b, #1	/* shift out mac byte */
	ext	v1.16b, v1.16b, v1.16b, #1	/* shift out ctr byte */
	b	8b
	.endm

	/*
	 * void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes,
	 * 			   u8 const rk[], u32 rounds, u8 mac[],
	 * 			   u8 ctr[]);
	 * void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes,
	 * 			   u8 const rk[], u32 rounds, u8 mac[],
	 * 			   u8 ctr[]);
	 */
ENTRY(ce_aes_ccm_encrypt)
	aes_ccm_do_crypt	1
ENDPROC(ce_aes_ccm_encrypt)

ENTRY(ce_aes_ccm_decrypt)
	aes_ccm_do_crypt	0
ENDPROC(ce_aes_ccm_decrypt)
OpenPOWER on IntegriCloud