diff options
Diffstat (limited to 'drivers/staging/skein')
-rw-r--r-- | drivers/staging/skein/Kconfig | 24 | ||||
-rw-r--r-- | drivers/staging/skein/Makefile | 13 | ||||
-rw-r--r-- | drivers/staging/skein/skein_api.c | 2 | ||||
-rw-r--r-- | drivers/staging/skein/skein_api.h | 2 | ||||
-rw-r--r-- | drivers/staging/skein/skein_base.c (renamed from drivers/staging/skein/skein.c) | 23 | ||||
-rw-r--r-- | drivers/staging/skein/skein_base.h (renamed from drivers/staging/skein/skein.h) | 39 | ||||
-rw-r--r-- | drivers/staging/skein/skein_block.c | 932 | ||||
-rw-r--r-- | drivers/staging/skein/skein_block.h | 2 | ||||
-rw-r--r-- | drivers/staging/skein/skein_generic.c | 216 | ||||
-rw-r--r-- | drivers/staging/skein/skein_iv.h | 2 | ||||
-rw-r--r-- | drivers/staging/skein/threefish_api.h | 2 |
11 files changed, 723 insertions, 534 deletions
diff --git a/drivers/staging/skein/Kconfig b/drivers/staging/skein/Kconfig index b9172bfcdc1b..012a8233376e 100644 --- a/drivers/staging/skein/Kconfig +++ b/drivers/staging/skein/Kconfig @@ -1,8 +1,8 @@ config CRYPTO_SKEIN - bool "Skein digest algorithm" + tristate "Skein digest algorithm" depends on (X86 || UML_X86) && 64BIT && CRYPTO - select CRYPTO_THREEFISH select CRYPTO_HASH + select CRYPTO_ALGAPI help Skein secure hash algorithm is one of 5 finalists from the NIST SHA3 competition. @@ -12,21 +12,5 @@ config CRYPTO_SKEIN http://www.skein-hash.info/sites/default/files/skein1.3.pdf - for more information. This module depends on the threefish block - cipher module. - -config CRYPTO_THREEFISH - bool "Threefish tweakable block cipher" - depends on (X86 || UML_X86) && 64BIT && CRYPTO - select CRYPTO_ALGAPI - help - Threefish cipher algorithm is the tweakable block cipher underneath - the Skein family of secure hash algorithms. Skein is one of 5 - finalists from the NIST SHA3 competition. - - Skein is optimized for modern, 64bit processors and is highly - customizable. See: - - http://www.skein-hash.info/sites/default/files/skein1.3.pdf - - for more information. + for more information. This module also contains the threefish block + cipher algorithm. diff --git a/drivers/staging/skein/Makefile b/drivers/staging/skein/Makefile index a14aaddd829c..b7f947fb98f0 100644 --- a/drivers/staging/skein/Makefile +++ b/drivers/staging/skein/Makefile @@ -1,9 +1,10 @@ # # Makefile for the skein secure hash algorithm # -obj-$(CONFIG_CRYPTO_SKEIN) += skein.o \ - skein_api.o \ - skein_block.o - -obj-$(CONFIG_CRYPTO_THREEFISH) += threefish_block.o \ - threefish_api.o +obj-$(CONFIG_CRYPTO_SKEIN) += skein.o +skein-y := skein_base.o \ + skein_api.o \ + skein_block.o \ + threefish_block.o \ + threefish_api.o \ + skein_generic.o diff --git a/drivers/staging/skein/skein_api.c b/drivers/staging/skein/skein_api.c index 6e700eefc00c..5bfce076f7c8 100644 --- a/drivers/staging/skein/skein_api.c +++ b/drivers/staging/skein/skein_api.c @@ -31,7 +31,7 @@ int skein_ctx_prepare(struct skein_ctx *ctx, enum skein_size size) { skein_assert_ret(ctx && size, SKEIN_FAIL); - memset(ctx , 0, sizeof(struct skein_ctx)); + memset(ctx, 0, sizeof(struct skein_ctx)); ctx->skein_size = size; return SKEIN_SUCCESS; diff --git a/drivers/staging/skein/skein_api.h b/drivers/staging/skein/skein_api.h index e02fa19d9458..171b87549548 100644 --- a/drivers/staging/skein/skein_api.h +++ b/drivers/staging/skein/skein_api.h @@ -79,7 +79,7 @@ OTHER DEALINGS IN THE SOFTWARE. */ #include <linux/types.h> -#include "skein.h" +#include "skein_base.h" /** * Which Skein size to use diff --git a/drivers/staging/skein/skein.c b/drivers/staging/skein/skein_base.c index 8cc83587b1f1..7e700a6b5788 100644 --- a/drivers/staging/skein/skein.c +++ b/drivers/staging/skein/skein_base.c @@ -8,10 +8,9 @@ ** ************************************************************************/ -#define SKEIN_PORT_CODE /* instantiate any code in skein_port.h */ - #include <linux/string.h> /* get the memcpy/memset functions */ -#include "skein.h" /* get the Skein API definitions */ +#include <linux/export.h> +#include "skein_base.h" /* get the Skein API definitions */ #include "skein_iv.h" /* get precomputed IVs */ #include "skein_block.h" @@ -125,8 +124,6 @@ int skein_256_init_ext(struct skein_256_ctx *ctx, size_t hash_bit_len, /* tree hash config info (or SKEIN_CFG_TREE_INFO_SEQUENTIAL) */ cfg.w[2] = skein_swap64(tree_info); - skein_show_key(256, &ctx->h, key, key_bytes); - /* compute the initial chaining values from config block */ skein_256_process_block(ctx, cfg.b, 1, SKEIN_CFG_STR_LEN); @@ -233,8 +230,6 @@ int skein_256_final(struct skein_256_ctx *ctx, u8 *hash_val) /* "output" the ctr mode bytes */ skein_put64_lsb_first(hash_val+i*SKEIN_256_BLOCK_BYTES, ctx->x, n); - skein_show_final(256, &ctx->h, n, - hash_val+i*SKEIN_256_BLOCK_BYTES); /* restore the counter mode key for next time */ memcpy(ctx->x, x, sizeof(x)); } @@ -354,8 +349,6 @@ int skein_512_init_ext(struct skein_512_ctx *ctx, size_t hash_bit_len, /* tree hash config info (or SKEIN_CFG_TREE_INFO_SEQUENTIAL) */ cfg.w[2] = skein_swap64(tree_info); - skein_show_key(512, &ctx->h, key, key_bytes); - /* compute the initial chaining values from config block */ skein_512_process_block(ctx, cfg.b, 1, SKEIN_CFG_STR_LEN); @@ -462,8 +455,6 @@ int skein_512_final(struct skein_512_ctx *ctx, u8 *hash_val) /* "output" the ctr mode bytes */ skein_put64_lsb_first(hash_val+i*SKEIN_512_BLOCK_BYTES, ctx->x, n); - skein_show_final(512, &ctx->h, n, - hash_val+i*SKEIN_512_BLOCK_BYTES); /* restore the counter mode key for next time */ memcpy(ctx->x, x, sizeof(x)); } @@ -578,8 +569,6 @@ int skein_1024_init_ext(struct skein_1024_ctx *ctx, size_t hash_bit_len, /* tree hash config info (or SKEIN_CFG_TREE_INFO_SEQUENTIAL) */ cfg.w[2] = skein_swap64(tree_info); - skein_show_key(1024, &ctx->h, key, key_bytes); - /* compute the initial chaining values from config block */ skein_1024_process_block(ctx, cfg.b, 1, SKEIN_CFG_STR_LEN); @@ -686,8 +675,6 @@ int skein_1024_final(struct skein_1024_ctx *ctx, u8 *hash_val) /* "output" the ctr mode bytes */ skein_put64_lsb_first(hash_val+i*SKEIN_1024_BLOCK_BYTES, ctx->x, n); - skein_show_final(1024, &ctx->h, n, - hash_val+i*SKEIN_1024_BLOCK_BYTES); /* restore the counter mode key for next time */ memcpy(ctx->x, x, sizeof(x)); } @@ -795,8 +782,6 @@ int skein_256_output(struct skein_256_ctx *ctx, u8 *hash_val) /* "output" the ctr mode bytes */ skein_put64_lsb_first(hash_val+i*SKEIN_256_BLOCK_BYTES, ctx->x, n); - skein_show_final(256, &ctx->h, n, - hash_val+i*SKEIN_256_BLOCK_BYTES); /* restore the counter mode key for next time */ memcpy(ctx->x, x, sizeof(x)); } @@ -834,8 +819,6 @@ int skein_512_output(struct skein_512_ctx *ctx, u8 *hash_val) /* "output" the ctr mode bytes */ skein_put64_lsb_first(hash_val+i*SKEIN_512_BLOCK_BYTES, ctx->x, n); - skein_show_final(256, &ctx->h, n, - hash_val+i*SKEIN_512_BLOCK_BYTES); /* restore the counter mode key for next time */ memcpy(ctx->x, x, sizeof(x)); } @@ -873,8 +856,6 @@ int skein_1024_output(struct skein_1024_ctx *ctx, u8 *hash_val) /* "output" the ctr mode bytes */ skein_put64_lsb_first(hash_val+i*SKEIN_1024_BLOCK_BYTES, ctx->x, n); - skein_show_final(256, &ctx->h, n, - hash_val+i*SKEIN_1024_BLOCK_BYTES); /* restore the counter mode key for next time */ memcpy(ctx->x, x, sizeof(x)); } diff --git a/drivers/staging/skein/skein.h b/drivers/staging/skein/skein_base.h index e6669f196e5d..3c7f8ad3627d 100644 --- a/drivers/staging/skein/skein.h +++ b/drivers/staging/skein/skein_base.h @@ -15,10 +15,6 @@ ** ** The "default" note explains what happens when the switch is not defined. ** -** SKEIN_DEBUG -- make callouts from inside Skein code -** to examine/display intermediate values. -** [default: no callouts (no overhead)] -** ** SKEIN_ERR_CHECK -- how error checking is handled inside Skein ** code. If not defined, most error checking ** is disabled (for performance). Otherwise, @@ -28,9 +24,10 @@ ** ***************************************************************************/ -#ifndef rotl_64 -#define rotl_64(x, N) (((x) << (N)) | ((x) >> (64-(N)))) -#endif +/*Skein digest sizes for crypto api*/ +#define SKEIN256_DIGEST_BIT_SIZE 256 +#define SKEIN512_DIGEST_BIT_SIZE 512 +#define SKEIN1024_DIGEST_BIT_SIZE 1024 /* below two prototype assume we are handed aligned data */ #define skein_put64_lsb_first(dst08, src64, b_cnt) memcpy(dst08, src64, b_cnt) @@ -44,12 +41,12 @@ enum { SKEIN_BAD_HASHLEN = 2 }; -#define SKEIN_MODIFIER_WORDS (2) /* number of modifier (tweak) words */ +#define SKEIN_MODIFIER_WORDS 2 /* number of modifier (tweak) words */ -#define SKEIN_256_STATE_WORDS (4) -#define SKEIN_512_STATE_WORDS (8) -#define SKEIN_1024_STATE_WORDS (16) -#define SKEIN_MAX_STATE_WORDS (16) +#define SKEIN_256_STATE_WORDS 4 +#define SKEIN_512_STATE_WORDS 8 +#define SKEIN_1024_STATE_WORDS 16 +#define SKEIN_MAX_STATE_WORDS 16 #define SKEIN_256_STATE_BYTES (8*SKEIN_256_STATE_WORDS) #define SKEIN_512_STATE_BYTES (8*SKEIN_512_STATE_WORDS) @@ -87,6 +84,11 @@ struct skein_1024_ctx { /* 1024-bit Skein hash context structure */ u8 b[SKEIN_1024_BLOCK_BYTES]; /* partial block buf (8-byte aligned) */ }; +static inline u64 rotl_64(u64 x, u8 N) +{ + return (x << N) | (x >> (64 - N)); +} + /* Skein APIs for (incremental) "straight hashing" */ int skein_256_init(struct skein_256_ctx *ctx, size_t hash_bit_len); int skein_512_init(struct skein_512_ctx *ctx, size_t hash_bit_len); @@ -273,19 +275,6 @@ int skein_1024_output(struct skein_1024_ctx *ctx, u8 *hash_val); (hdr).tweak[1] |= SKEIN_T1_TREE_LEVEL(height); \ } -/***************************************************************** -** "Internal" Skein definitions for debugging and error checking -******************************************************************/ -#ifdef SKEIN_DEBUG /* examine/display intermediate values? */ -#include "skein_debug.h" -#else /* default is no callouts */ -#define skein_show_block(bits, ctx, x, blk_ptr, w_ptr, ks_event_ptr, ks_odd_ptr) -#define skein_show_round(bits, ctx, r, x) -#define skein_show_r_ptr(bits, ctx, r, x_ptr) -#define skein_show_final(bits, ctx, cnt, out_ptr) -#define skein_show_key(bits, ctx, key, key_bytes) -#endif - /* ignore all asserts, for performance */ #define skein_assert_ret(x, ret_code) #define skein_assert(x) diff --git a/drivers/staging/skein/skein_block.c b/drivers/staging/skein/skein_block.c index 616364faf92e..66261ab25c88 100644 --- a/drivers/staging/skein/skein_block.c +++ b/drivers/staging/skein/skein_block.c @@ -15,7 +15,7 @@ ************************************************************************/ #include <linux/string.h> -#include "skein.h" +#include "skein_base.h" #include "skein_block.h" #ifndef SKEIN_USE_ASM @@ -26,32 +26,27 @@ #define SKEIN_LOOP 001 /* default: unroll 256 and 512, but not 1024 */ #endif -#define BLK_BITS (WCNT*64) /* some useful definitions for code here */ +#define BLK_BITS (WCNT * 64) /* some useful definitions for code here */ #define KW_TWK_BASE (0) #define KW_KEY_BASE (3) #define ks (kw + KW_KEY_BASE) #define ts (kw + KW_TWK_BASE) #ifdef SKEIN_DEBUG -#define debug_save_tweak(ctx) { \ - ctx->h.tweak[0] = ts[0]; ctx->h.tweak[1] = ts[1]; } +#define debug_save_tweak(ctx) \ +{ \ + ctx->h.tweak[0] = ts[0]; \ + ctx->h.tweak[1] = ts[1]; \ +} #else #define debug_save_tweak(ctx) #endif -/***************************** SKEIN_256 ******************************/ #if !(SKEIN_USE_ASM & 256) -void skein_256_process_block(struct skein_256_ctx *ctx, const u8 *blk_ptr, - size_t blk_cnt, size_t byte_cnt_add) - { /* do it in C */ - enum { - WCNT = SKEIN_256_STATE_WORDS - }; #undef RCNT -#define RCNT (SKEIN_256_ROUNDS_TOTAL/8) - +#define RCNT (SKEIN_256_ROUNDS_TOTAL / 8) #ifdef SKEIN_LOOP /* configure how much to unroll the loop */ -#define SKEIN_UNROLL_256 (((SKEIN_LOOP)/100)%10) +#define SKEIN_UNROLL_256 (((SKEIN_LOOP) / 100) % 10) #else #define SKEIN_UNROLL_256 (0) #endif @@ -60,17 +55,329 @@ void skein_256_process_block(struct skein_256_ctx *ctx, const u8 *blk_ptr, #if (RCNT % SKEIN_UNROLL_256) #error "Invalid SKEIN_UNROLL_256" /* sanity check on unroll count */ #endif - size_t r; - u64 kw[WCNT+4+RCNT*2]; /* key schedule: chaining vars + tweak + "rot"*/ +#endif +#define ROUND256(p0, p1, p2, p3, ROT, r_num) \ +do { \ + X##p0 += X##p1; \ + X##p1 = rotl_64(X##p1, ROT##_0); \ + X##p1 ^= X##p0; \ + X##p2 += X##p3; \ + X##p3 = rotl_64(X##p3, ROT##_1); \ + X##p3 ^= X##p2; \ +} while (0) + +#if SKEIN_UNROLL_256 == 0 +#define R256(p0, p1, p2, p3, ROT, r_num) /* fully unrolled */ \ +do { \ + ROUND256(p0, p1, p2, p3, ROT, r_num); \ +} while (0) + +#define I256(R) \ +do { \ + /* inject the key schedule value */ \ + X0 += ks[((R) + 1) % 5]; \ + X1 += ks[((R) + 2) % 5] + ts[((R) + 1) % 3]; \ + X2 += ks[((R) + 3) % 5] + ts[((R) + 2) % 3]; \ + X3 += ks[((R) + 4) % 5] + (R) + 1; \ +} while (0) #else - u64 kw[WCNT+4]; /* key schedule words : chaining vars + tweak */ +/* looping version */ +#define R256(p0, p1, p2, p3, ROT, r_num) \ +do { \ + ROUND256(p0, p1, p2, p3, ROT, r_num); \ +} while (0) + +#define I256(R) \ +do { \ + /* inject the key schedule value */ \ + X0 += ks[r + (R) + 0]; \ + X1 += ks[r + (R) + 1] + ts[r + (R) + 0]; \ + X2 += ks[r + (R) + 2] + ts[r + (R) + 1]; \ + X3 += ks[r + (R) + 3] + r + (R); \ + /* rotate key schedule */ \ + ks[r + (R) + 4] = ks[r + (R) - 1]; \ + ts[r + (R) + 2] = ts[r + (R) - 1]; \ +} while (0) +#endif +#define R256_8_ROUNDS(R) \ +do { \ + R256(0, 1, 2, 3, R_256_0, 8 * (R) + 1); \ + R256(0, 3, 2, 1, R_256_1, 8 * (R) + 2); \ + R256(0, 1, 2, 3, R_256_2, 8 * (R) + 3); \ + R256(0, 3, 2, 1, R_256_3, 8 * (R) + 4); \ + I256(2 * (R)); \ + R256(0, 1, 2, 3, R_256_4, 8 * (R) + 5); \ + R256(0, 3, 2, 1, R_256_5, 8 * (R) + 6); \ + R256(0, 1, 2, 3, R_256_6, 8 * (R) + 7); \ + R256(0, 3, 2, 1, R_256_7, 8 * (R) + 8); \ + I256(2 * (R) + 1); \ +} while (0) + +#define R256_UNROLL_R(NN) \ + ((SKEIN_UNROLL_256 == 0 && \ + SKEIN_256_ROUNDS_TOTAL / 8 > (NN)) || \ + (SKEIN_UNROLL_256 > (NN))) + +#if (SKEIN_UNROLL_256 > 14) +#error "need more unrolling in skein_256_process_block" +#endif +#endif + +#if !(SKEIN_USE_ASM & 512) +#undef RCNT +#define RCNT (SKEIN_512_ROUNDS_TOTAL/8) + +#ifdef SKEIN_LOOP /* configure how much to unroll the loop */ +#define SKEIN_UNROLL_512 (((SKEIN_LOOP)/10)%10) +#else +#define SKEIN_UNROLL_512 (0) +#endif + +#if SKEIN_UNROLL_512 +#if (RCNT % SKEIN_UNROLL_512) +#error "Invalid SKEIN_UNROLL_512" /* sanity check on unroll count */ +#endif +#endif +#define ROUND512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, r_num) \ +do { \ + X##p0 += X##p1; \ + X##p1 = rotl_64(X##p1, ROT##_0); \ + X##p1 ^= X##p0; \ + X##p2 += X##p3; \ + X##p3 = rotl_64(X##p3, ROT##_1); \ + X##p3 ^= X##p2; \ + X##p4 += X##p5; \ + X##p5 = rotl_64(X##p5, ROT##_2); \ + X##p5 ^= X##p4; \ + X##p6 += X##p7; X##p7 = rotl_64(X##p7, ROT##_3); \ + X##p7 ^= X##p6; \ +} while (0) + +#if SKEIN_UNROLL_512 == 0 +#define R512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, r_num) /* unrolled */ \ +do { \ + ROUND512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, r_num); \ +} while (0) + +#define I512(R) \ +do { \ + /* inject the key schedule value */ \ + X0 += ks[((R) + 1) % 9]; \ + X1 += ks[((R) + 2) % 9]; \ + X2 += ks[((R) + 3) % 9]; \ + X3 += ks[((R) + 4) % 9]; \ + X4 += ks[((R) + 5) % 9]; \ + X5 += ks[((R) + 6) % 9] + ts[((R) + 1) % 3]; \ + X6 += ks[((R) + 7) % 9] + ts[((R) + 2) % 3]; \ + X7 += ks[((R) + 8) % 9] + (R) + 1; \ +} while (0) + +#else /* looping version */ +#define R512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, r_num) \ +do { \ + ROUND512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, r_num); \ +} while (0) + +#define I512(R) \ +do { \ + /* inject the key schedule value */ \ + X0 += ks[r + (R) + 0]; \ + X1 += ks[r + (R) + 1]; \ + X2 += ks[r + (R) + 2]; \ + X3 += ks[r + (R) + 3]; \ + X4 += ks[r + (R) + 4]; \ + X5 += ks[r + (R) + 5] + ts[r + (R) + 0]; \ + X6 += ks[r + (R) + 6] + ts[r + (R) + 1]; \ + X7 += ks[r + (R) + 7] + r + (R); \ + /* rotate key schedule */ \ + ks[r + (R) + 8] = ks[r + (R) - 1]; \ + ts[r + (R) + 2] = ts[r + (R) - 1]; \ +} while (0) +#endif /* end of looped code definitions */ +#define R512_8_ROUNDS(R) /* do 8 full rounds */ \ +do { \ + R512(0, 1, 2, 3, 4, 5, 6, 7, R_512_0, 8 * (R) + 1); \ + R512(2, 1, 4, 7, 6, 5, 0, 3, R_512_1, 8 * (R) + 2); \ + R512(4, 1, 6, 3, 0, 5, 2, 7, R_512_2, 8 * (R) + 3); \ + R512(6, 1, 0, 7, 2, 5, 4, 3, R_512_3, 8 * (R) + 4); \ + I512(2 * (R)); \ + R512(0, 1, 2, 3, 4, 5, 6, 7, R_512_4, 8 * (R) + 5); \ + R512(2, 1, 4, 7, 6, 5, 0, 3, R_512_5, 8 * (R) + 6); \ + R512(4, 1, 6, 3, 0, 5, 2, 7, R_512_6, 8 * (R) + 7); \ + R512(6, 1, 0, 7, 2, 5, 4, 3, R_512_7, 8 * (R) + 8); \ + I512(2 * (R) + 1); /* and key injection */ \ +} while (0) +#define R512_UNROLL_R(NN) \ + ((SKEIN_UNROLL_512 == 0 && \ + SKEIN_512_ROUNDS_TOTAL/8 > (NN)) || \ + (SKEIN_UNROLL_512 > (NN))) + +#if (SKEIN_UNROLL_512 > 14) +#error "need more unrolling in skein_512_process_block" +#endif +#endif + +#if !(SKEIN_USE_ASM & 1024) +#undef RCNT +#define RCNT (SKEIN_1024_ROUNDS_TOTAL/8) +#ifdef SKEIN_LOOP /* configure how much to unroll the loop */ +#define SKEIN_UNROLL_1024 ((SKEIN_LOOP)%10) +#else +#define SKEIN_UNROLL_1024 (0) +#endif + +#if (SKEIN_UNROLL_1024 != 0) +#if (RCNT % SKEIN_UNROLL_1024) +#error "Invalid SKEIN_UNROLL_1024" /* sanity check on unroll count */ +#endif +#endif +#define ROUND1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, pD, pE, \ + pF, ROT, r_num) \ +do { \ + X##p0 += X##p1; \ + X##p1 = rotl_64(X##p1, ROT##_0); \ + X##p1 ^= X##p0; \ + X##p2 += X##p3; \ + X##p3 = rotl_64(X##p3, ROT##_1); \ + X##p3 ^= X##p2; \ + X##p4 += X##p5; \ + X##p5 = rotl_64(X##p5, ROT##_2); \ + X##p5 ^= X##p4; \ + X##p6 += X##p7; \ + X##p7 = rotl_64(X##p7, ROT##_3); \ + X##p7 ^= X##p6; \ + X##p8 += X##p9; \ + X##p9 = rotl_64(X##p9, ROT##_4); \ + X##p9 ^= X##p8; \ + X##pA += X##pB; \ + X##pB = rotl_64(X##pB, ROT##_5); \ + X##pB ^= X##pA; \ + X##pC += X##pD; \ + X##pD = rotl_64(X##pD, ROT##_6); \ + X##pD ^= X##pC; \ + X##pE += X##pF; \ + X##pF = rotl_64(X##pF, ROT##_7); \ + X##pF ^= X##pE; \ +} while (0) + +#if SKEIN_UNROLL_1024 == 0 +#define R1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, pD, pE, pF, \ + ROT, rn) \ +do { \ + ROUND1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, pD, pE, \ + pF, ROT, rn); \ +} while (0) + +#define I1024(R) \ +do { \ + /* inject the key schedule value */ \ + X00 += ks[((R) + 1) % 17]; \ + X01 += ks[((R) + 2) % 17]; \ + X02 += ks[((R) + 3) % 17]; \ + X03 += ks[((R) + 4) % 17]; \ + X04 += ks[((R) + 5) % 17]; \ + X05 += ks[((R) + 6) % 17]; \ + X06 += ks[((R) + 7) % 17]; \ + X07 += ks[((R) + 8) % 17]; \ + X08 += ks[((R) + 9) % 17]; \ + X09 += ks[((R) + 10) % 17]; \ + X10 += ks[((R) + 11) % 17]; \ + X11 += ks[((R) + 12) % 17]; \ + X12 += ks[((R) + 13) % 17]; \ + X13 += ks[((R) + 14) % 17] + ts[((R) + 1) % 3]; \ + X14 += ks[((R) + 15) % 17] + ts[((R) + 2) % 3]; \ + X15 += ks[((R) + 16) % 17] + (R) + 1; \ +} while (0) +#else /* looping version */ +#define R1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, pD, pE, pF, \ + ROT, rn) \ +do { \ + ROUND1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, pD, pE, \ + pF, ROT, rn); \ +} while (0) + +#define I1024(R) \ +do { \ + /* inject the key schedule value */ \ + X00 += ks[r + (R) + 0]; \ + X01 += ks[r + (R) + 1]; \ + X02 += ks[r + (R) + 2]; \ + X03 += ks[r + (R) + 3]; \ + X04 += ks[r + (R) + 4]; \ + X05 += ks[r + (R) + 5]; \ + X06 += ks[r + (R) + 6]; \ + X07 += ks[r + (R) + 7]; \ + X08 += ks[r + (R) + 8]; \ + X09 += ks[r + (R) + 9]; \ + X10 += ks[r + (R) + 10]; \ + X11 += ks[r + (R) + 11]; \ + X12 += ks[r + (R) + 12]; \ + X13 += ks[r + (R) + 13] + ts[r + (R) + 0]; \ + X14 += ks[r + (R) + 14] + ts[r + (R) + 1]; \ + X15 += ks[r + (R) + 15] + r + (R); \ + /* rotate key schedule */ \ + ks[r + (R) + 16] = ks[r + (R) - 1]; \ + ts[r + (R) + 2] = ts[r + (R) - 1]; \ +} while (0) + +#endif +#define R1024_8_ROUNDS(R) \ +do { \ + R1024(00, 01, 02, 03, 04, 05, 06, 07, 08, 09, 10, 11, 12, 13, 14, 15, \ + R1024_0, 8*(R) + 1); \ + R1024(00, 09, 02, 13, 06, 11, 04, 15, 10, 07, 12, 03, 14, 05, 08, 01, \ + R1024_1, 8*(R) + 2); \ + R1024(00, 07, 02, 05, 04, 03, 06, 01, 12, 15, 14, 13, 08, 11, 10, 09, \ + R1024_2, 8*(R) + 3); \ + R1024(00, 15, 02, 11, 06, 13, 04, 09, 14, 01, 08, 05, 10, 03, 12, 07, \ + R1024_3, 8*(R) + 4); \ + I1024(2*(R)); \ + R1024(00, 01, 02, 03, 04, 05, 06, 07, 08, 09, 10, 11, 12, 13, 14, 15, \ + R1024_4, 8*(R) + 5); \ + R1024(00, 09, 02, 13, 06, 11, 04, 15, 10, 07, 12, 03, 14, 05, 08, 01, \ + R1024_5, 8*(R) + 6); \ + R1024(00, 07, 02, 05, 04, 03, 06, 01, 12, 15, 14, 13, 08, 11, 10, 09, \ + R1024_6, 8*(R) + 7); \ + R1024(00, 15, 02, 11, 06, 13, 04, 09, 14, 01, 08, 05, 10, 03, 12, 07, \ + R1024_7, 8*(R) + 8); \ + I1024(2*(R)+1); \ +} while (0) + +#define R1024_UNROLL_R(NN) \ + ((SKEIN_UNROLL_1024 == 0 && \ + SKEIN_1024_ROUNDS_TOTAL/8 > (NN)) || \ + (SKEIN_UNROLL_1024 > (NN))) + +#if (SKEIN_UNROLL_1024 > 14) +#error "need more unrolling in Skein_1024_Process_Block" +#endif +#endif + +/***************************** SKEIN_256 ******************************/ +#if !(SKEIN_USE_ASM & 256) +void skein_256_process_block(struct skein_256_ctx *ctx, const u8 *blk_ptr, + size_t blk_cnt, size_t byte_cnt_add) +{ /* do it in C */ + enum { + WCNT = SKEIN_256_STATE_WORDS + }; + size_t r; +#if SKEIN_UNROLL_256 + /* key schedule: chaining vars + tweak + "rot"*/ + u64 kw[WCNT+4+RCNT*2]; +#else + /* key schedule words : chaining vars + tweak */ + u64 kw[WCNT+4]; #endif u64 X0, X1, X2, X3; /* local copy of context vars, for speed */ u64 w[WCNT]; /* local copy of input block */ #ifdef SKEIN_DEBUG const u64 *X_ptr[4]; /* use for debugging (help cc put Xn in regs) */ - X_ptr[0] = &X0; X_ptr[1] = &X1; X_ptr[2] = &X2; X_ptr[3] = &X3; + X_ptr[0] = &X0; + X_ptr[1] = &X1; + X_ptr[2] = &X2; + X_ptr[3] = &X3; #endif skein_assert(blk_cnt != 0); /* never call with blk_cnt == 0! */ ts[0] = ctx->h.tweak[0]; @@ -94,132 +401,62 @@ void skein_256_process_block(struct skein_256_ctx *ctx, const u8 *blk_ptr, /* get input block in little-endian format */ skein_get64_lsb_first(w, blk_ptr, WCNT); debug_save_tweak(ctx); - skein_show_block(BLK_BITS, &ctx->h, ctx->x, blk_ptr, w, ks, ts); - X0 = w[0] + ks[0]; /* do the first full key injection */ + /* do the first full key injection */ + X0 = w[0] + ks[0]; X1 = w[1] + ks[1] + ts[0]; X2 = w[2] + ks[2] + ts[1]; X3 = w[3] + ks[3]; - /* show starting state values */ - skein_show_r_ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INITIAL, - x_ptr); - blk_ptr += SKEIN_256_BLOCK_BYTES; /* run the rounds */ - -#define ROUND256(p0, p1, p2, p3, ROT, r_num) \ -do { \ - X##p0 += X##p1; X##p1 = rotl_64(X##p1, ROT##_0); X##p1 ^= X##p0; \ - X##p2 += X##p3; X##p3 = rotl_64(X##p3, ROT##_1); X##p3 ^= X##p2; \ -} while (0) - -#if SKEIN_UNROLL_256 == 0 -#define R256(p0, p1, p2, p3, ROT, r_num) /* fully unrolled */ \ -do { \ - ROUND256(p0, p1, p2, p3, ROT, r_num); \ - skein_show_r_ptr(BLK_BITS, &ctx->h, r_num, X_ptr); \ -} while (0) - -#define I256(R) \ -do { \ - /* inject the key schedule value */ \ - X0 += ks[((R)+1) % 5]; \ - X1 += ks[((R)+2) % 5] + ts[((R)+1) % 3]; \ - X2 += ks[((R)+3) % 5] + ts[((R)+2) % 3]; \ - X3 += ks[((R)+4) % 5] + (R)+1; \ - skein_show_r_ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, X_ptr); \ -} while (0) -#else /* looping version */ -#define R256(p0, p1, p2, p3, ROT, r_num) \ -do { \ - ROUND256(p0, p1, p2, p3, ROT, r_num); \ - skein_show_r_ptr(BLK_BITS, &ctx->h, 4 * (r - 1) + r_num, X_ptr); \ -} while (0) - -#define I256(R) \ -do { \ - /* inject the key schedule value */ \ - X0 += ks[r+(R)+0]; \ - X1 += ks[r+(R)+1] + ts[r+(R)+0]; \ - X2 += ks[r+(R)+2] + ts[r+(R)+1]; \ - X3 += ks[r+(R)+3] + r+(R); \ - /* rotate key schedule */ \ - ks[r + (R) + 4] = ks[r + (R) - 1]; \ - ts[r + (R) + 2] = ts[r + (R) - 1]; \ - skein_show_r_ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, X_ptr); \ -} while (0) - - for (r = 1; r < 2 * RCNT; r += 2 * SKEIN_UNROLL_256) + for (r = 1; + r < (SKEIN_UNROLL_256 ? 2 * RCNT : 2); + r += (SKEIN_UNROLL_256 ? 2 * SKEIN_UNROLL_256 : 1)) { + R256_8_ROUNDS(0); +#if R256_UNROLL_R(1) + R256_8_ROUNDS(1); +#endif +#if R256_UNROLL_R(2) + R256_8_ROUNDS(2); +#endif +#if R256_UNROLL_R(3) + R256_8_ROUNDS(3); +#endif +#if R256_UNROLL_R(4) + R256_8_ROUNDS(4); +#endif +#if R256_UNROLL_R(5) + R256_8_ROUNDS(5); +#endif +#if R256_UNROLL_R(6) + R256_8_ROUNDS(6); +#endif +#if R256_UNROLL_R(7) + R256_8_ROUNDS(7); +#endif +#if R256_UNROLL_R(8) + R256_8_ROUNDS(8); +#endif +#if R256_UNROLL_R(9) + R256_8_ROUNDS(9); +#endif +#if R256_UNROLL_R(10) + R256_8_ROUNDS(10); +#endif +#if R256_UNROLL_R(11) + R256_8_ROUNDS(11); +#endif +#if R256_UNROLL_R(12) + R256_8_ROUNDS(12); +#endif +#if R256_UNROLL_R(13) + R256_8_ROUNDS(13); +#endif +#if R256_UNROLL_R(14) + R256_8_ROUNDS(14); #endif - { -#define R256_8_ROUNDS(R) \ -do { \ - R256(0, 1, 2, 3, R_256_0, 8 * (R) + 1); \ - R256(0, 3, 2, 1, R_256_1, 8 * (R) + 2); \ - R256(0, 1, 2, 3, R_256_2, 8 * (R) + 3); \ - R256(0, 3, 2, 1, R_256_3, 8 * (R) + 4); \ - I256(2 * (R)); \ - R256(0, 1, 2, 3, R_256_4, 8 * (R) + 5); \ - R256(0, 3, 2, 1, R_256_5, 8 * (R) + 6); \ - R256(0, 1, 2, 3, R_256_6, 8 * (R) + 7); \ - R256(0, 3, 2, 1, R_256_7, 8 * (R) + 8); \ - I256(2 * (R) + 1); \ -} while (0) - - R256_8_ROUNDS(0); - -#define R256_UNROLL_R(NN) \ - ((SKEIN_UNROLL_256 == 0 && \ - SKEIN_256_ROUNDS_TOTAL/8 > (NN)) || \ - (SKEIN_UNROLL_256 > (NN))) - - #if R256_UNROLL_R(1) - R256_8_ROUNDS(1); - #endif - #if R256_UNROLL_R(2) - R256_8_ROUNDS(2); - #endif - #if R256_UNROLL_R(3) - R256_8_ROUNDS(3); - #endif - #if R256_UNROLL_R(4) - R256_8_ROUNDS(4); - #endif - #if R256_UNROLL_R(5) - R256_8_ROUNDS(5); - #endif - #if R256_UNROLL_R(6) - R256_8_ROUNDS(6); - #endif - #if R256_UNROLL_R(7) - R256_8_ROUNDS(7); - #endif - #if R256_UNROLL_R(8) - R256_8_ROUNDS(8); - #endif - #if R256_UNROLL_R(9) - R256_8_ROUNDS(9); - #endif - #if R256_UNROLL_R(10) - R256_8_ROUNDS(10); - #endif - #if R256_UNROLL_R(11) - R256_8_ROUNDS(11); - #endif - #if R256_UNROLL_R(12) - R256_8_ROUNDS(12); - #endif - #if R256_UNROLL_R(13) - R256_8_ROUNDS(13); - #endif - #if R256_UNROLL_R(14) - R256_8_ROUNDS(14); - #endif - #if (SKEIN_UNROLL_256 > 14) -#error "need more unrolling in skein_256_process_block" - #endif } /* do the final "feedforward" xor, update context chaining */ ctx->x[0] = X0 ^ w[0]; @@ -227,8 +464,6 @@ do { \ ctx->x[2] = X2 ^ w[2]; ctx->x[3] = X3 ^ w[3]; - skein_show_round(BLK_BITS, &ctx->h, SKEIN_RND_FEED_FWD, ctx->x); - ts[1] &= ~SKEIN_T1_FLAG_FIRST; } while (--blk_cnt); ctx->h.tweak[0] = ts[0]; @@ -256,20 +491,8 @@ void skein_512_process_block(struct skein_512_ctx *ctx, const u8 *blk_ptr, enum { WCNT = SKEIN_512_STATE_WORDS }; -#undef RCNT -#define RCNT (SKEIN_512_ROUNDS_TOTAL/8) - -#ifdef SKEIN_LOOP /* configure how much to unroll the loop */ -#define SKEIN_UNROLL_512 (((SKEIN_LOOP)/10)%10) -#else -#define SKEIN_UNROLL_512 (0) -#endif - -#if SKEIN_UNROLL_512 -#if (RCNT % SKEIN_UNROLL_512) -#error "Invalid SKEIN_UNROLL_512" /* sanity check on unroll count */ -#endif size_t r; +#if SKEIN_UNROLL_512 u64 kw[WCNT+4+RCNT*2]; /* key sched: chaining vars + tweak + "rot"*/ #else u64 kw[WCNT+4]; /* key schedule words : chaining vars + tweak */ @@ -279,8 +502,14 @@ void skein_512_process_block(struct skein_512_ctx *ctx, const u8 *blk_ptr, #ifdef SKEIN_DEBUG const u64 *X_ptr[8]; /* use for debugging (help cc put Xn in regs) */ - X_ptr[0] = &X0; X_ptr[1] = &X1; X_ptr[2] = &X2; X_ptr[3] = &X3; - X_ptr[4] = &X4; X_ptr[5] = &X5; X_ptr[6] = &X6; X_ptr[7] = &X7; + X_ptr[0] = &X0; + X_ptr[1] = &X1; + X_ptr[2] = &X2; + X_ptr[3] = &X3; + X_ptr[4] = &X4; + X_ptr[5] = &X5; + X_ptr[6] = &X6; + X_ptr[7] = &X7; #endif skein_assert(blk_cnt != 0); /* never call with blk_cnt == 0! */ @@ -310,143 +539,68 @@ void skein_512_process_block(struct skein_512_ctx *ctx, const u8 *blk_ptr, /* get input block in little-endian format */ skein_get64_lsb_first(w, blk_ptr, WCNT); debug_save_tweak(ctx); - skein_show_block(BLK_BITS, &ctx->h, ctx->x, blk_ptr, w, ks, ts); - X0 = w[0] + ks[0]; /* do the first full key injection */ - X1 = w[1] + ks[1]; - X2 = w[2] + ks[2]; - X3 = w[3] + ks[3]; - X4 = w[4] + ks[4]; - X5 = w[5] + ks[5] + ts[0]; - X6 = w[6] + ks[6] + ts[1]; - X7 = w[7] + ks[7]; + /* do the first full key injection */ + X0 = w[0] + ks[0]; + X1 = w[1] + ks[1]; + X2 = w[2] + ks[2]; + X3 = w[3] + ks[3]; + X4 = w[4] + ks[4]; + X5 = w[5] + ks[5] + ts[0]; + X6 = w[6] + ks[6] + ts[1]; + X7 = w[7] + ks[7]; blk_ptr += SKEIN_512_BLOCK_BYTES; - skein_show_r_ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INITIAL, - X_ptr); /* run the rounds */ -#define ROUND512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, r_num) \ -do { \ - X##p0 += X##p1; X##p1 = rotl_64(X##p1, ROT##_0); X##p1 ^= X##p0; \ - X##p2 += X##p3; X##p3 = rotl_64(X##p3, ROT##_1); X##p3 ^= X##p2; \ - X##p4 += X##p5; X##p5 = rotl_64(X##p5, ROT##_2); X##p5 ^= X##p4; \ - X##p6 += X##p7; X##p7 = rotl_64(X##p7, ROT##_3); X##p7 ^= X##p6; \ -} while (0) - -#if SKEIN_UNROLL_512 == 0 -#define R512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, r_num) /* unrolled */ \ -do { \ - ROUND512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, r_num) \ - skein_show_r_ptr(BLK_BITS, &ctx->h, r_num, X_ptr); \ -} while (0) - -#define I512(R) \ -do { \ - /* inject the key schedule value */ \ - X0 += ks[((R) + 1) % 9]; \ - X1 += ks[((R) + 2) % 9]; \ - X2 += ks[((R) + 3) % 9]; \ - X3 += ks[((R) + 4) % 9]; \ - X4 += ks[((R) + 5) % 9]; \ - X5 += ks[((R) + 6) % 9] + ts[((R) + 1) % 3]; \ - X6 += ks[((R) + 7) % 9] + ts[((R) + 2) % 3]; \ - X7 += ks[((R) + 8) % 9] + (R) + 1; \ - skein_show_r_ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, X_ptr); \ -} while (0) -#else /* looping version */ -#define R512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, r_num) \ -do { \ - ROUND512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, r_num); \ - skein_show_r_ptr(BLK_BITS, &ctx->h, 4 * (r - 1) + r_num, X_ptr); \ -} while (0) - -#define I512(R) \ -do { \ - /* inject the key schedule value */ \ - X0 += ks[r + (R) + 0]; \ - X1 += ks[r + (R) + 1]; \ - X2 += ks[r + (R) + 2]; \ - X3 += ks[r + (R) + 3]; \ - X4 += ks[r + (R) + 4]; \ - X5 += ks[r + (R) + 5] + ts[r + (R) + 0]; \ - X6 += ks[r + (R) + 6] + ts[r + (R) + 1]; \ - X7 += ks[r + (R) + 7] + r + (R); \ - /* rotate key schedule */ \ - ks[r + (R) + 8] = ks[r + (R) - 1]; \ - ts[r + (R) + 2] = ts[r + (R) - 1]; \ - skein_show_r_ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, X_ptr); \ -} while (0) - - for (r = 1; r < 2 * RCNT; r += 2 * SKEIN_UNROLL_512) -#endif /* end of looped code definitions */ - { -#define R512_8_ROUNDS(R) /* do 8 full rounds */ \ -do { \ - R512(0, 1, 2, 3, 4, 5, 6, 7, R_512_0, 8 * (R) + 1); \ - R512(2, 1, 4, 7, 6, 5, 0, 3, R_512_1, 8 * (R) + 2); \ - R512(4, 1, 6, 3, 0, 5, 2, 7, R_512_2, 8 * (R) + 3); \ - R512(6, 1, 0, 7, 2, 5, 4, 3, R_512_3, 8 * (R) + 4); \ - I512(2 * (R)); \ - R512(0, 1, 2, 3, 4, 5, 6, 7, R_512_4, 8 * (R) + 5); \ - R512(2, 1, 4, 7, 6, 5, 0, 3, R_512_5, 8 * (R) + 6); \ - R512(4, 1, 6, 3, 0, 5, 2, 7, R_512_6, 8 * (R) + 7); \ - R512(6, 1, 0, 7, 2, 5, 4, 3, R_512_7, 8 * (R) + 8); \ - I512(2 * (R) + 1); /* and key injection */ \ -} while (0) + for (r = 1; + r < (SKEIN_UNROLL_512 ? 2 * RCNT : 2); + r += (SKEIN_UNROLL_512 ? 2 * SKEIN_UNROLL_512 : 1)) { R512_8_ROUNDS(0); -#define R512_UNROLL_R(NN) \ - ((SKEIN_UNROLL_512 == 0 && \ - SKEIN_512_ROUNDS_TOTAL/8 > (NN)) || \ - (SKEIN_UNROLL_512 > (NN))) - - #if R512_UNROLL_R(1) +#if R512_UNROLL_R(1) R512_8_ROUNDS(1); - #endif - #if R512_UNROLL_R(2) +#endif +#if R512_UNROLL_R(2) R512_8_ROUNDS(2); - #endif - #if R512_UNROLL_R(3) +#endif +#if R512_UNROLL_R(3) R512_8_ROUNDS(3); - #endif - #if R512_UNROLL_R(4) +#endif +#if R512_UNROLL_R(4) R512_8_ROUNDS(4); - #endif - #if R512_UNROLL_R(5) +#endif +#if R512_UNROLL_R(5) R512_8_ROUNDS(5); - #endif - #if R512_UNROLL_R(6) +#endif +#if R512_UNROLL_R(6) R512_8_ROUNDS(6); - #endif - #if R512_UNROLL_R(7) +#endif +#if R512_UNROLL_R(7) R512_8_ROUNDS(7); - #endif - #if R512_UNROLL_R(8) +#endif +#if R512_UNROLL_R(8) R512_8_ROUNDS(8); - #endif - #if R512_UNROLL_R(9) +#endif +#if R512_UNROLL_R(9) R512_8_ROUNDS(9); - #endif - #if R512_UNROLL_R(10) +#endif +#if R512_UNROLL_R(10) R512_8_ROUNDS(10); - #endif - #if R512_UNROLL_R(11) +#endif +#if R512_UNROLL_R(11) R512_8_ROUNDS(11); - #endif - #if R512_UNROLL_R(12) +#endif +#if R512_UNROLL_R(12) R512_8_ROUNDS(12); - #endif - #if R512_UNROLL_R(13) +#endif +#if R512_UNROLL_R(13) R512_8_ROUNDS(13); - #endif - #if R512_UNROLL_R(14) +#endif +#if R512_UNROLL_R(14) R512_8_ROUNDS(14); - #endif - #if (SKEIN_UNROLL_512 > 14) -#error "need more unrolling in skein_512_process_block" - #endif +#endif } /* do the final "feedforward" xor, update context chaining */ @@ -458,7 +612,6 @@ do { \ ctx->x[5] = X5 ^ w[5]; ctx->x[6] = X6 ^ w[6]; ctx->x[7] = X7 ^ w[7]; - skein_show_round(BLK_BITS, &ctx->h, SKEIN_RND_FEED_FWD, ctx->x); ts[1] &= ~SKEIN_T1_FLAG_FIRST; } while (--blk_cnt); @@ -487,20 +640,8 @@ void skein_1024_process_block(struct skein_1024_ctx *ctx, const u8 *blk_ptr, enum { WCNT = SKEIN_1024_STATE_WORDS }; -#undef RCNT -#define RCNT (SKEIN_1024_ROUNDS_TOTAL/8) - -#ifdef SKEIN_LOOP /* configure how much to unroll the loop */ -#define SKEIN_UNROLL_1024 ((SKEIN_LOOP)%10) -#else -#define SKEIN_UNROLL_1024 (0) -#endif - -#if (SKEIN_UNROLL_1024 != 0) -#if (RCNT % SKEIN_UNROLL_1024) -#error "Invalid SKEIN_UNROLL_1024" /* sanity check on unroll count */ -#endif size_t r; +#if (SKEIN_UNROLL_1024 != 0) u64 kw[WCNT+4+RCNT*2]; /* key sched: chaining vars + tweak + "rot" */ #else u64 kw[WCNT+4]; /* key schedule words : chaining vars + tweak */ @@ -510,16 +651,6 @@ void skein_1024_process_block(struct skein_1024_ctx *ctx, const u8 *blk_ptr, u64 X00, X01, X02, X03, X04, X05, X06, X07, X08, X09, X10, X11, X12, X13, X14, X15; u64 w[WCNT]; /* local copy of input block */ -#ifdef SKEIN_DEBUG - const u64 *X_ptr[16]; /* use for debugging (help cc put Xn in regs) */ - - X_ptr[0] = &X00; X_ptr[1] = &X01; X_ptr[2] = &X02; - X_ptr[3] = &X03; X_ptr[4] = &X04; X_ptr[5] = &X05; - X_ptr[6] = &X06; X_ptr[7] = &X07; X_ptr[8] = &X08; - X_ptr[9] = &X09; X_ptr[10] = &X10; X_ptr[11] = &X11; - X_ptr[12] = &X12; X_ptr[13] = &X13; X_ptr[14] = &X14; - X_ptr[15] = &X15; -#endif skein_assert(blk_cnt != 0); /* never call with blk_cnt == 0! */ ts[0] = ctx->h.tweak[0]; @@ -548,192 +679,81 @@ void skein_1024_process_block(struct skein_1024_ctx *ctx, const u8 *blk_ptr, ks[13] = ctx->x[13]; ks[14] = ctx->x[14]; ks[15] = ctx->x[15]; - ks[16] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^ - ks[4] ^ ks[5] ^ ks[6] ^ ks[7] ^ - ks[8] ^ ks[9] ^ ks[10] ^ ks[11] ^ + ks[16] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^ + ks[4] ^ ks[5] ^ ks[6] ^ ks[7] ^ + ks[8] ^ ks[9] ^ ks[10] ^ ks[11] ^ ks[12] ^ ks[13] ^ ks[14] ^ ks[15] ^ SKEIN_KS_PARITY; - ts[2] = ts[0] ^ ts[1]; + ts[2] = ts[0] ^ ts[1]; /* get input block in little-endian format */ skein_get64_lsb_first(w, blk_ptr, WCNT); debug_save_tweak(ctx); - skein_show_block(BLK_BITS, &ctx->h, ctx->x, blk_ptr, w, ks, ts); - - X00 = w[0] + ks[0]; /* do the first full key injection */ - X01 = w[1] + ks[1]; - X02 = w[2] + ks[2]; - X03 = w[3] + ks[3]; - X04 = w[4] + ks[4]; - X05 = w[5] + ks[5]; - X06 = w[6] + ks[6]; - X07 = w[7] + ks[7]; - X08 = w[8] + ks[8]; - X09 = w[9] + ks[9]; - X10 = w[10] + ks[10]; - X11 = w[11] + ks[11]; - X12 = w[12] + ks[12]; - X13 = w[13] + ks[13] + ts[0]; - X14 = w[14] + ks[14] + ts[1]; - X15 = w[15] + ks[15]; - - skein_show_r_ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INITIAL, - X_ptr); - -#define ROUND1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, pD, pE, \ - pF, ROT, r_num) \ -do { \ - X##p0 += X##p1; X##p1 = rotl_64(X##p1, ROT##_0); X##p1 ^= X##p0; \ - X##p2 += X##p3; X##p3 = rotl_64(X##p3, ROT##_1); X##p3 ^= X##p2; \ - X##p4 += X##p5; X##p5 = rotl_64(X##p5, ROT##_2); X##p5 ^= X##p4; \ - X##p6 += X##p7; X##p7 = rotl_64(X##p7, ROT##_3); X##p7 ^= X##p6; \ - X##p8 += X##p9; X##p9 = rotl_64(X##p9, ROT##_4); X##p9 ^= X##p8; \ - X##pA += X##pB; X##pB = rotl_64(X##pB, ROT##_5); X##pB ^= X##pA; \ - X##pC += X##pD; X##pD = rotl_64(X##pD, ROT##_6); X##pD ^= X##pC; \ - X##pE += X##pF; X##pF = rotl_64(X##pF, ROT##_7); X##pF ^= X##pE; \ -} while (0) - -#if SKEIN_UNROLL_1024 == 0 -#define R1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, pD, pE, pF, \ - ROT, rn) \ -do { \ - ROUND1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, pD, pE, \ - pF, ROT, rn); \ - skein_show_r_ptr(BLK_BITS, &ctx->h, rn, X_ptr); \ -} while (0) - -#define I1024(R) \ -do { \ - /* inject the key schedule value */ \ - X00 += ks[((R) + 1) % 17]; \ - X01 += ks[((R) + 2) % 17]; \ - X02 += ks[((R) + 3) % 17]; \ - X03 += ks[((R) + 4) % 17]; \ - X04 += ks[((R) + 5) % 17]; \ - X05 += ks[((R) + 6) % 17]; \ - X06 += ks[((R) + 7) % 17]; \ - X07 += ks[((R) + 8) % 17]; \ - X08 += ks[((R) + 9) % 17]; \ - X09 += ks[((R) + 10) % 17]; \ - X10 += ks[((R) + 11) % 17]; \ - X11 += ks[((R) + 12) % 17]; \ - X12 += ks[((R) + 13) % 17]; \ - X13 += ks[((R) + 14) % 17] + ts[((R) + 1) % 3]; \ - X14 += ks[((R) + 15) % 17] + ts[((R) + 2) % 3]; \ - X15 += ks[((R) + 16) % 17] + (R) + 1; \ - skein_show_r_ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, X_ptr); \ -} while (0) -#else /* looping version */ -#define R1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, pD, pE, pF, \ - ROT, rn) \ -do { \ - ROUND1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, pD, pE, \ - pF, ROT, rn); \ - skein_show_r_ptr(BLK_BITS, &ctx->h, 4 * (r - 1) + rn, X_ptr); \ -} while (0) - -#define I1024(R) \ -do { \ - /* inject the key schedule value */ \ - X00 += ks[r + (R) + 0]; \ - X01 += ks[r + (R) + 1]; \ - X02 += ks[r + (R) + 2]; \ - X03 += ks[r + (R) + 3]; \ - X04 += ks[r + (R) + 4]; \ - X05 += ks[r + (R) + 5]; \ - X06 += ks[r + (R) + 6]; \ - X07 += ks[r + (R) + 7]; \ - X08 += ks[r + (R) + 8]; \ - X09 += ks[r + (R) + 9]; \ - X10 += ks[r + (R) + 10]; \ - X11 += ks[r + (R) + 11]; \ - X12 += ks[r + (R) + 12]; \ - X13 += ks[r + (R) + 13] + ts[r + (R) + 0]; \ - X14 += ks[r + (R) + 14] + ts[r + (R) + 1]; \ - X15 += ks[r + (R) + 15] + r + (R); \ - /* rotate key schedule */ \ - ks[r + (R) + 16] = ks[r + (R) - 1]; \ - ts[r + (R) + 2] = ts[r + (R) - 1]; \ - skein_show_r_ptr(BLK_BITSi, &ctx->h, SKEIN_RND_KEY_INJECT, X_ptr); \ -} while (0) - - for (r = 1; r <= 2 * RCNT; r += 2 * SKEIN_UNROLL_1024) -#endif - { -#define R1024_8_ROUNDS(R) \ -do { \ - R1024(00, 01, 02, 03, 04, 05, 06, 07, 08, 09, 10, 11, 12, 13, 14, 15, \ - R1024_0, 8*(R) + 1); \ - R1024(00, 09, 02, 13, 06, 11, 04, 15, 10, 07, 12, 03, 14, 05, 08, 01, \ - R1024_1, 8*(R) + 2); \ - R1024(00, 07, 02, 05, 04, 03, 06, 01, 12, 15, 14, 13, 08, 11, 10, 09, \ - R1024_2, 8*(R) + 3); \ - R1024(00, 15, 02, 11, 06, 13, 04, 09, 14, 01, 08, 05, 10, 03, 12, 07, \ - R1024_3, 8*(R) + 4); \ - I1024(2*(R)); \ - R1024(00, 01, 02, 03, 04, 05, 06, 07, 08, 09, 10, 11, 12, 13, 14, 15, \ - R1024_4, 8*(R) + 5); \ - R1024(00, 09, 02, 13, 06, 11, 04, 15, 10, 07, 12, 03, 14, 05, 08, 01, \ - R1024_5, 8*(R) + 6); \ - R1024(00, 07, 02, 05, 04, 03, 06, 01, 12, 15, 14, 13, 08, 11, 10, 09, \ - R1024_6, 8*(R) + 7); \ - R1024(00, 15, 02, 11, 06, 13, 04, 09, 14, 01, 08, 05, 10, 03, 12, 07, \ - R1024_7, 8*(R) + 8); \ - I1024(2*(R)+1); \ -} while (0) + /* do the first full key injection */ + X00 = w[0] + ks[0]; + X01 = w[1] + ks[1]; + X02 = w[2] + ks[2]; + X03 = w[3] + ks[3]; + X04 = w[4] + ks[4]; + X05 = w[5] + ks[5]; + X06 = w[6] + ks[6]; + X07 = w[7] + ks[7]; + X08 = w[8] + ks[8]; + X09 = w[9] + ks[9]; + X10 = w[10] + ks[10]; + X11 = w[11] + ks[11]; + X12 = w[12] + ks[12]; + X13 = w[13] + ks[13] + ts[0]; + X14 = w[14] + ks[14] + ts[1]; + X15 = w[15] + ks[15]; + + for (r = 1; + r < (SKEIN_UNROLL_1024 ? 2 * RCNT : 2); + r += (SKEIN_UNROLL_1024 ? 2 * SKEIN_UNROLL_1024 : 1)) { R1024_8_ROUNDS(0); - -#define R1024_UNROLL_R(NN) \ - ((SKEIN_UNROLL_1024 == 0 && \ - SKEIN_1024_ROUNDS_TOTAL/8 > (NN)) || \ - (SKEIN_UNROLL_1024 > (NN))) - - #if R1024_UNROLL_R(1) +#if R1024_UNROLL_R(1) R1024_8_ROUNDS(1); - #endif - #if R1024_UNROLL_R(2) +#endif +#if R1024_UNROLL_R(2) R1024_8_ROUNDS(2); - #endif - #if R1024_UNROLL_R(3) +#endif +#if R1024_UNROLL_R(3) R1024_8_ROUNDS(3); - #endif - #if R1024_UNROLL_R(4) +#endif +#if R1024_UNROLL_R(4) R1024_8_ROUNDS(4); - #endif - #if R1024_UNROLL_R(5) +#endif +#if R1024_UNROLL_R(5) R1024_8_ROUNDS(5); - #endif - #if R1024_UNROLL_R(6) +#endif +#if R1024_UNROLL_R(6) R1024_8_ROUNDS(6); - #endif - #if R1024_UNROLL_R(7) +#endif +#if R1024_UNROLL_R(7) R1024_8_ROUNDS(7); - #endif - #if R1024_UNROLL_R(8) +#endif +#if R1024_UNROLL_R(8) R1024_8_ROUNDS(8); - #endif - #if R1024_UNROLL_R(9) +#endif +#if R1024_UNROLL_R(9) R1024_8_ROUNDS(9); - #endif - #if R1024_UNROLL_R(10) +#endif +#if R1024_UNROLL_R(10) R1024_8_ROUNDS(10); - #endif - #if R1024_UNROLL_R(11) +#endif +#if R1024_UNROLL_R(11) R1024_8_ROUNDS(11); - #endif - #if R1024_UNROLL_R(12) +#endif +#if R1024_UNROLL_R(12) R1024_8_ROUNDS(12); - #endif - #if R1024_UNROLL_R(13) +#endif +#if R1024_UNROLL_R(13) R1024_8_ROUNDS(13); - #endif - #if R1024_UNROLL_R(14) +#endif +#if R1024_UNROLL_R(14) R1024_8_ROUNDS(14); - #endif -#if (SKEIN_UNROLL_1024 > 14) -#error "need more unrolling in Skein_1024_Process_Block" - #endif +#endif } /* do the final "feedforward" xor, update context chaining */ @@ -754,8 +774,6 @@ do { \ ctx->x[14] = X14 ^ w[14]; ctx->x[15] = X15 ^ w[15]; - skein_show_round(BLK_BITS, &ctx->h, SKEIN_RND_FEED_FWD, ctx->x); - ts[1] &= ~SKEIN_T1_FLAG_FIRST; blk_ptr += SKEIN_1024_BLOCK_BYTES; } while (--blk_cnt); diff --git a/drivers/staging/skein/skein_block.h b/drivers/staging/skein/skein_block.h index bd7bdc35df29..9d40f4a5267b 100644 --- a/drivers/staging/skein/skein_block.h +++ b/drivers/staging/skein/skein_block.h @@ -10,7 +10,7 @@ #ifndef _SKEIN_BLOCK_H_ #define _SKEIN_BLOCK_H_ -#include "skein.h" /* get the Skein API definitions */ +#include "skein_base.h" /* get the Skein API definitions */ void skein_256_process_block(struct skein_256_ctx *ctx, const u8 *blk_ptr, size_t blk_cnt, size_t byte_cnt_add); diff --git a/drivers/staging/skein/skein_generic.c b/drivers/staging/skein/skein_generic.c new file mode 100644 index 000000000000..85bd7d0168b0 --- /dev/null +++ b/drivers/staging/skein/skein_generic.c @@ -0,0 +1,216 @@ +/* + * Cryptographic API. + * + * Skein256 Hash Algorithm. + * + * Derived from cryptoapi implementation, adapted for in-place + * scatterlist interface. + * + * Copyright (c) Eric Rost <eric.rost@mybabylon.net> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ +#include <linux/types.h> +#include <linux/init.h> +#include <linux/module.h> +#include <crypto/internal/hash.h> +#include "skein_base.h" + + +static int skein256_init(struct shash_desc *desc) +{ + return skein_256_init((struct skein_256_ctx *) shash_desc_ctx(desc), + SKEIN256_DIGEST_BIT_SIZE); +} + +static int skein256_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + return skein_256_update((struct skein_256_ctx *)shash_desc_ctx(desc), + data, len); +} + +static int skein256_final(struct shash_desc *desc, u8 *out) +{ + return skein_256_final((struct skein_256_ctx *)shash_desc_ctx(desc), + out); +} + +static int skein256_export(struct shash_desc *desc, void *out) +{ + struct skein_256_ctx *sctx = shash_desc_ctx(desc); + + memcpy(out, sctx, sizeof(*sctx)); + return 0; +} + +static int skein256_import(struct shash_desc *desc, const void *in) +{ + struct skein_256_ctx *sctx = shash_desc_ctx(desc); + + memcpy(sctx, in, sizeof(*sctx)); + return 0; +} + +static int skein512_init(struct shash_desc *desc) +{ + return skein_512_init((struct skein_512_ctx *)shash_desc_ctx(desc), + SKEIN512_DIGEST_BIT_SIZE); +} + +static int skein512_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + return skein_512_update((struct skein_512_ctx *)shash_desc_ctx(desc), + data, len); +} + +static int skein512_final(struct shash_desc *desc, u8 *out) +{ + return skein_512_final((struct skein_512_ctx *)shash_desc_ctx(desc), + out); +} + +static int skein512_export(struct shash_desc *desc, void *out) +{ + struct skein_512_ctx *sctx = shash_desc_ctx(desc); + + memcpy(out, sctx, sizeof(*sctx)); + return 0; +} + +static int skein512_import(struct shash_desc *desc, const void *in) +{ + struct skein_512_ctx *sctx = shash_desc_ctx(desc); + + memcpy(sctx, in, sizeof(*sctx)); + return 0; +} + +static int skein1024_init(struct shash_desc *desc) +{ + return skein_1024_init((struct skein_1024_ctx *)shash_desc_ctx(desc), + SKEIN1024_DIGEST_BIT_SIZE); +} + +static int skein1024_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + return skein_1024_update((struct skein_1024_ctx *)shash_desc_ctx(desc), + data, len); +} + +static int skein1024_final(struct shash_desc *desc, u8 *out) +{ + return skein_1024_final((struct skein_1024_ctx *)shash_desc_ctx(desc), + out); +} + +static int skein1024_export(struct shash_desc *desc, void *out) +{ + struct skein_1024_ctx *sctx = shash_desc_ctx(desc); + + memcpy(out, sctx, sizeof(*sctx)); + return 0; +} + +static int skein1024_import(struct shash_desc *desc, const void *in) +{ + struct skein_1024_ctx *sctx = shash_desc_ctx(desc); + + memcpy(sctx, in, sizeof(*sctx)); + return 0; +} + +static struct shash_alg alg256 = { + .digestsize = (SKEIN256_DIGEST_BIT_SIZE / 8), + .init = skein256_init, + .update = skein256_update, + .final = skein256_final, + .export = skein256_export, + .import = skein256_import, + .descsize = sizeof(struct skein_256_ctx), + .statesize = sizeof(struct skein_256_ctx), + .base = { + .cra_name = "skein256", + .cra_driver_name = "skein", + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SKEIN_256_BLOCK_BYTES, + .cra_module = THIS_MODULE, + } +}; + +static struct shash_alg alg512 = { + .digestsize = (SKEIN512_DIGEST_BIT_SIZE / 8), + .init = skein512_init, + .update = skein512_update, + .final = skein512_final, + .export = skein512_export, + .import = skein512_import, + .descsize = sizeof(struct skein_512_ctx), + .statesize = sizeof(struct skein_512_ctx), + .base = { + .cra_name = "skein512", + .cra_driver_name = "skein", + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SKEIN_512_BLOCK_BYTES, + .cra_module = THIS_MODULE, + } +}; + +static struct shash_alg alg1024 = { + .digestsize = (SKEIN1024_DIGEST_BIT_SIZE / 8), + .init = skein1024_init, + .update = skein1024_update, + .final = skein1024_final, + .export = skein1024_export, + .import = skein1024_import, + .descsize = sizeof(struct skein_1024_ctx), + .statesize = sizeof(struct skein_1024_ctx), + .base = { + .cra_name = "skein1024", + .cra_driver_name = "skein", + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SKEIN_1024_BLOCK_BYTES, + .cra_module = THIS_MODULE, + } +}; + +static int __init skein_generic_init(void) +{ + if (crypto_register_shash(&alg256)) + goto out; + if (crypto_register_shash(&alg512)) + goto unreg256; + if (crypto_register_shash(&alg1024)) + goto unreg512; + + return 0; + + +unreg512: + crypto_unregister_shash(&alg512); +unreg256: + crypto_unregister_shash(&alg256); +out: + return -1; +} + +static void __exit skein_generic_fini(void) +{ + crypto_unregister_shash(&alg256); + crypto_unregister_shash(&alg512); + crypto_unregister_shash(&alg1024); +} + +module_init(skein_generic_init); +module_exit(skein_generic_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Skein Hash Algorithm"); + +MODULE_ALIAS("skein"); diff --git a/drivers/staging/skein/skein_iv.h b/drivers/staging/skein/skein_iv.h index d9dc1d5ed551..8a06314d0ed4 100644 --- a/drivers/staging/skein/skein_iv.h +++ b/drivers/staging/skein/skein_iv.h @@ -1,7 +1,7 @@ #ifndef _SKEIN_IV_H_ #define _SKEIN_IV_H_ -#include "skein.h" /* get Skein macros and types */ +#include "skein_base.h" /* get Skein macros and types */ /* ***************** Pre-computed Skein IVs ******************* diff --git a/drivers/staging/skein/threefish_api.h b/drivers/staging/skein/threefish_api.h index 8d5ddf8b3a9b..8e0a0b77ecce 100644 --- a/drivers/staging/skein/threefish_api.h +++ b/drivers/staging/skein/threefish_api.h @@ -29,7 +29,7 @@ */ #include <linux/types.h> -#include "skein.h" +#include "skein_base.h" #define KEY_SCHEDULE_CONST 0x1BD11BDAA9FC1A22L |