summaryrefslogtreecommitdiffstats
path: root/arch/x86/crypto/chacha20_glue.c
blob: f910d1d449f005ba26c3d4d65df6a05cc7f9b924 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
/*
 * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
 *
 * Copyright (C) 2015 Martin Willi
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 */

#include <crypto/algapi.h>
#include <crypto/chacha20.h>
#include <linux/crypto.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <asm/fpu/api.h>
#include <asm/simd.h>

#define CHACHA20_STATE_ALIGN 16

asmlinkage void chacha20_block_xor_ssse3(u32 *state, u8 *dst, const u8 *src);
asmlinkage void chacha20_4block_xor_ssse3(u32 *state, u8 *dst, const u8 *src);
#ifdef CONFIG_AS_AVX2
asmlinkage void chacha20_8block_xor_avx2(u32 *state, u8 *dst, const u8 *src);
static bool chacha20_use_avx2;
#endif

static void chacha20_dosimd(u32 *state, u8 *dst, const u8 *src,
			    unsigned int bytes)
{
	u8 buf[CHACHA20_BLOCK_SIZE];

#ifdef CONFIG_AS_AVX2
	if (chacha20_use_avx2) {
		while (bytes >= CHACHA20_BLOCK_SIZE * 8) {
			chacha20_8block_xor_avx2(state, dst, src);
			bytes -= CHACHA20_BLOCK_SIZE * 8;
			src += CHACHA20_BLOCK_SIZE * 8;
			dst += CHACHA20_BLOCK_SIZE * 8;
			state[12] += 8;
		}
	}
#endif
	while (bytes >= CHACHA20_BLOCK_SIZE * 4) {
		chacha20_4block_xor_ssse3(state, dst, src);
		bytes -= CHACHA20_BLOCK_SIZE * 4;
		src += CHACHA20_BLOCK_SIZE * 4;
		dst += CHACHA20_BLOCK_SIZE * 4;
		state[12] += 4;
	}
	while (bytes >= CHACHA20_BLOCK_SIZE) {
		chacha20_block_xor_ssse3(state, dst, src);
		bytes -= CHACHA20_BLOCK_SIZE;
		src += CHACHA20_BLOCK_SIZE;
		dst += CHACHA20_BLOCK_SIZE;
		state[12]++;
	}
	if (bytes) {
		memcpy(buf, src, bytes);
		chacha20_block_xor_ssse3(state, buf, buf);
		memcpy(dst, buf, bytes);
	}
}

static int chacha20_simd(struct blkcipher_desc *desc, struct scatterlist *dst,
			 struct scatterlist *src, unsigned int nbytes)
{
	u32 *state, state_buf[16 + (CHACHA20_STATE_ALIGN / sizeof(u32)) - 1];
	struct blkcipher_walk walk;
	int err;

	if (nbytes <= CHACHA20_BLOCK_SIZE || !may_use_simd())
		return crypto_chacha20_crypt(desc, dst, src, nbytes);

	state = (u32 *)roundup((uintptr_t)state_buf, CHACHA20_STATE_ALIGN);

	blkcipher_walk_init(&walk, dst, src, nbytes);
	err = blkcipher_walk_virt_block(desc, &walk, CHACHA20_BLOCK_SIZE);

	crypto_chacha20_init(state, crypto_blkcipher_ctx(desc->tfm), walk.iv);

	kernel_fpu_begin();

	while (walk.nbytes >= CHACHA20_BLOCK_SIZE) {
		chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr,
				rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE));
		err = blkcipher_walk_done(desc, &walk,
					  walk.nbytes % CHACHA20_BLOCK_SIZE);
	}

	if (walk.nbytes) {
		chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr,
				walk.nbytes);
		err = blkcipher_walk_done(desc, &walk, 0);
	}

	kernel_fpu_end();

	return err;
}

static struct crypto_alg alg = {
	.cra_name		= "chacha20",
	.cra_driver_name	= "chacha20-simd",
	.cra_priority		= 300,
	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
	.cra_blocksize		= 1,
	.cra_type		= &crypto_blkcipher_type,
	.cra_ctxsize		= sizeof(struct chacha20_ctx),
	.cra_alignmask		= sizeof(u32) - 1,
	.cra_module		= THIS_MODULE,
	.cra_u			= {
		.blkcipher = {
			.min_keysize	= CHACHA20_KEY_SIZE,
			.max_keysize	= CHACHA20_KEY_SIZE,
			.ivsize		= CHACHA20_IV_SIZE,
			.geniv		= "seqiv",
			.setkey		= crypto_chacha20_setkey,
			.encrypt	= chacha20_simd,
			.decrypt	= chacha20_simd,
		},
	},
};

static int __init chacha20_simd_mod_init(void)
{
	if (!boot_cpu_has(X86_FEATURE_SSSE3))
		return -ENODEV;

#ifdef CONFIG_AS_AVX2
	chacha20_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) &&
			    boot_cpu_has(X86_FEATURE_AVX2) &&
			    cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
#endif
	return crypto_register_alg(&alg);
}

static void __exit chacha20_simd_mod_fini(void)
{
	crypto_unregister_alg(&alg);
}

module_init(chacha20_simd_mod_init);
module_exit(chacha20_simd_mod_fini);

MODULE_LICENSE("GPL");
MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");
MODULE_DESCRIPTION("chacha20 cipher algorithm, SIMD accelerated");
MODULE_ALIAS_CRYPTO("chacha20");
MODULE_ALIAS_CRYPTO("chacha20-simd");
OpenPOWER on IntegriCloud