diff options
Diffstat (limited to 'arch/x86')
76 files changed, 5028 insertions, 5692 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 27fede438959..d234cca296db 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1296,7 +1296,7 @@ config MICROCODE the Linux kernel. The preferred method to load microcode from a detached initrd is described - in Documentation/x86/early-microcode.txt. For that you need to enable + in Documentation/x86/microcode.txt. For that you need to enable CONFIG_BLK_DEV_INITRD in order for the loader to be able to scan the initrd for microcode blobs. @@ -2665,11 +2665,13 @@ config PCI_CNB20LE_QUIRK source "drivers/pci/Kconfig" config ISA_BUS - bool "ISA-style bus support on modern systems" if EXPERT - select ISA_BUS_API + bool "ISA bus support on modern systems" if EXPERT help - Enables ISA-style drivers on modern systems. This is necessary to - support PC/104 devices on X86_64 platforms. + Expose ISA bus device drivers and options available for selection and + configuration. Enable this option if your target machine has an ISA + bus. ISA is an older system, displaced by PCI and newer bus + architectures -- if your target machine is modern, it probably does + not have an ISA bus. If unsure, say N. diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 12e8484a8ee7..e762ef417562 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -94,23 +94,30 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff #define STACK_OFFSET 8*3 -#define HashKey 16*0 // store HashKey <<1 mod poly here -#define HashKey_2 16*1 // store HashKey^2 <<1 mod poly here -#define HashKey_3 16*2 // store HashKey^3 <<1 mod poly here -#define HashKey_4 16*3 // store HashKey^4 <<1 mod poly here -#define HashKey_k 16*4 // store XOR of High 64 bits and Low 64 + +#define AadHash 16*0 +#define AadLen 16*1 +#define InLen (16*1)+8 +#define PBlockEncKey 16*2 +#define OrigIV 16*3 +#define CurCount 16*4 +#define PBlockLen 16*5 +#define HashKey 16*6 // store HashKey <<1 mod poly here +#define HashKey_2 16*7 // store HashKey^2 <<1 mod poly here +#define HashKey_3 16*8 // store HashKey^3 <<1 mod poly here +#define HashKey_4 16*9 // store HashKey^4 <<1 mod poly here +#define HashKey_k 16*10 // store XOR of High 64 bits and Low 64 // bits of HashKey <<1 mod poly here //(for Karatsuba purposes) -#define HashKey_2_k 16*5 // store XOR of High 64 bits and Low 64 +#define HashKey_2_k 16*11 // store XOR of High 64 bits and Low 64 // bits of HashKey^2 <<1 mod poly here // (for Karatsuba purposes) -#define HashKey_3_k 16*6 // store XOR of High 64 bits and Low 64 +#define HashKey_3_k 16*12 // store XOR of High 64 bits and Low 64 // bits of HashKey^3 <<1 mod poly here // (for Karatsuba purposes) -#define HashKey_4_k 16*7 // store XOR of High 64 bits and Low 64 +#define HashKey_4_k 16*13 // store XOR of High 64 bits and Low 64 // bits of HashKey^4 <<1 mod poly here // (for Karatsuba purposes) -#define VARIABLE_OFFSET 16*8 #define arg1 rdi #define arg2 rsi @@ -118,10 +125,11 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff #define arg4 rcx #define arg5 r8 #define arg6 r9 -#define arg7 STACK_OFFSET+8(%r14) -#define arg8 STACK_OFFSET+16(%r14) -#define arg9 STACK_OFFSET+24(%r14) -#define arg10 STACK_OFFSET+32(%r14) +#define arg7 STACK_OFFSET+8(%rsp) +#define arg8 STACK_OFFSET+16(%rsp) +#define arg9 STACK_OFFSET+24(%rsp) +#define arg10 STACK_OFFSET+32(%rsp) +#define arg11 STACK_OFFSET+40(%rsp) #define keysize 2*15*16(%arg1) #endif @@ -171,6 +179,332 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff #define TKEYP T1 #endif +.macro FUNC_SAVE + push %r12 + push %r13 + push %r14 +# +# states of %xmm registers %xmm6:%xmm15 not saved +# all %xmm registers are clobbered +# +.endm + + +.macro FUNC_RESTORE + pop %r14 + pop %r13 + pop %r12 +.endm + +# Precompute hashkeys. +# Input: Hash subkey. +# Output: HashKeys stored in gcm_context_data. Only needs to be called +# once per key. +# clobbers r12, and tmp xmm registers. +.macro PRECOMPUTE SUBKEY TMP1 TMP2 TMP3 TMP4 TMP5 TMP6 TMP7 + mov \SUBKEY, %r12 + movdqu (%r12), \TMP3 + movdqa SHUF_MASK(%rip), \TMP2 + PSHUFB_XMM \TMP2, \TMP3 + + # precompute HashKey<<1 mod poly from the HashKey (required for GHASH) + + movdqa \TMP3, \TMP2 + psllq $1, \TMP3 + psrlq $63, \TMP2 + movdqa \TMP2, \TMP1 + pslldq $8, \TMP2 + psrldq $8, \TMP1 + por \TMP2, \TMP3 + + # reduce HashKey<<1 + + pshufd $0x24, \TMP1, \TMP2 + pcmpeqd TWOONE(%rip), \TMP2 + pand POLY(%rip), \TMP2 + pxor \TMP2, \TMP3 + movdqa \TMP3, HashKey(%arg2) + + movdqa \TMP3, \TMP5 + pshufd $78, \TMP3, \TMP1 + pxor \TMP3, \TMP1 + movdqa \TMP1, HashKey_k(%arg2) + + GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 +# TMP5 = HashKey^2<<1 (mod poly) + movdqa \TMP5, HashKey_2(%arg2) +# HashKey_2 = HashKey^2<<1 (mod poly) + pshufd $78, \TMP5, \TMP1 + pxor \TMP5, \TMP1 + movdqa \TMP1, HashKey_2_k(%arg2) + + GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 +# TMP5 = HashKey^3<<1 (mod poly) + movdqa \TMP5, HashKey_3(%arg2) + pshufd $78, \TMP5, \TMP1 + pxor \TMP5, \TMP1 + movdqa \TMP1, HashKey_3_k(%arg2) + + GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 +# TMP5 = HashKey^3<<1 (mod poly) + movdqa \TMP5, HashKey_4(%arg2) + pshufd $78, \TMP5, \TMP1 + pxor \TMP5, \TMP1 + movdqa \TMP1, HashKey_4_k(%arg2) +.endm + +# GCM_INIT initializes a gcm_context struct to prepare for encoding/decoding. +# Clobbers rax, r10-r13 and xmm0-xmm6, %xmm13 +.macro GCM_INIT Iv SUBKEY AAD AADLEN + mov \AADLEN, %r11 + mov %r11, AadLen(%arg2) # ctx_data.aad_length = aad_length + xor %r11, %r11 + mov %r11, InLen(%arg2) # ctx_data.in_length = 0 + mov %r11, PBlockLen(%arg2) # ctx_data.partial_block_length = 0 + mov %r11, PBlockEncKey(%arg2) # ctx_data.partial_block_enc_key = 0 + mov \Iv, %rax + movdqu (%rax), %xmm0 + movdqu %xmm0, OrigIV(%arg2) # ctx_data.orig_IV = iv + + movdqa SHUF_MASK(%rip), %xmm2 + PSHUFB_XMM %xmm2, %xmm0 + movdqu %xmm0, CurCount(%arg2) # ctx_data.current_counter = iv + + PRECOMPUTE \SUBKEY, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + movdqa HashKey(%arg2), %xmm13 + + CALC_AAD_HASH %xmm13, \AAD, \AADLEN, %xmm0, %xmm1, %xmm2, %xmm3, \ + %xmm4, %xmm5, %xmm6 +.endm + +# GCM_ENC_DEC Encodes/Decodes given data. Assumes that the passed gcm_context +# struct has been initialized by GCM_INIT. +# Requires the input data be at least 1 byte long because of READ_PARTIAL_BLOCK +# Clobbers rax, r10-r13, and xmm0-xmm15 +.macro GCM_ENC_DEC operation + movdqu AadHash(%arg2), %xmm8 + movdqu HashKey(%arg2), %xmm13 + add %arg5, InLen(%arg2) + + xor %r11, %r11 # initialise the data pointer offset as zero + PARTIAL_BLOCK %arg3 %arg4 %arg5 %r11 %xmm8 \operation + + sub %r11, %arg5 # sub partial block data used + mov %arg5, %r13 # save the number of bytes + + and $-16, %r13 # %r13 = %r13 - (%r13 mod 16) + mov %r13, %r12 + # Encrypt/Decrypt first few blocks + + and $(3<<4), %r12 + jz _initial_num_blocks_is_0_\@ + cmp $(2<<4), %r12 + jb _initial_num_blocks_is_1_\@ + je _initial_num_blocks_is_2_\@ +_initial_num_blocks_is_3_\@: + INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ +%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, \operation + sub $48, %r13 + jmp _initial_blocks_\@ +_initial_num_blocks_is_2_\@: + INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ +%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, \operation + sub $32, %r13 + jmp _initial_blocks_\@ +_initial_num_blocks_is_1_\@: + INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ +%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, \operation + sub $16, %r13 + jmp _initial_blocks_\@ +_initial_num_blocks_is_0_\@: + INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ +%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, \operation +_initial_blocks_\@: + + # Main loop - Encrypt/Decrypt remaining blocks + + cmp $0, %r13 + je _zero_cipher_left_\@ + sub $64, %r13 + je _four_cipher_left_\@ +_crypt_by_4_\@: + GHASH_4_ENCRYPT_4_PARALLEL_\operation %xmm9, %xmm10, %xmm11, %xmm12, \ + %xmm13, %xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, \ + %xmm7, %xmm8, enc + add $64, %r11 + sub $64, %r13 + jne _crypt_by_4_\@ +_four_cipher_left_\@: + GHASH_LAST_4 %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, \ +%xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm8 +_zero_cipher_left_\@: + movdqu %xmm8, AadHash(%arg2) + movdqu %xmm0, CurCount(%arg2) + + mov %arg5, %r13 + and $15, %r13 # %r13 = arg5 (mod 16) + je _multiple_of_16_bytes_\@ + + mov %r13, PBlockLen(%arg2) + + # Handle the last <16 Byte block separately + paddd ONE(%rip), %xmm0 # INCR CNT to get Yn + movdqu %xmm0, CurCount(%arg2) + movdqa SHUF_MASK(%rip), %xmm10 + PSHUFB_XMM %xmm10, %xmm0 + + ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # Encrypt(K, Yn) + movdqu %xmm0, PBlockEncKey(%arg2) + + cmp $16, %arg5 + jge _large_enough_update_\@ + + lea (%arg4,%r11,1), %r10 + mov %r13, %r12 + READ_PARTIAL_BLOCK %r10 %r12 %xmm2 %xmm1 + jmp _data_read_\@ + +_large_enough_update_\@: + sub $16, %r11 + add %r13, %r11 + + # receive the last <16 Byte block + movdqu (%arg4, %r11, 1), %xmm1 + + sub %r13, %r11 + add $16, %r11 + + lea SHIFT_MASK+16(%rip), %r12 + # adjust the shuffle mask pointer to be able to shift 16-r13 bytes + # (r13 is the number of bytes in plaintext mod 16) + sub %r13, %r12 + # get the appropriate shuffle mask + movdqu (%r12), %xmm2 + # shift right 16-r13 bytes + PSHUFB_XMM %xmm2, %xmm1 + +_data_read_\@: + lea ALL_F+16(%rip), %r12 + sub %r13, %r12 + +.ifc \operation, dec + movdqa %xmm1, %xmm2 +.endif + pxor %xmm1, %xmm0 # XOR Encrypt(K, Yn) + movdqu (%r12), %xmm1 + # get the appropriate mask to mask out top 16-r13 bytes of xmm0 + pand %xmm1, %xmm0 # mask out top 16-r13 bytes of xmm0 +.ifc \operation, dec + pand %xmm1, %xmm2 + movdqa SHUF_MASK(%rip), %xmm10 + PSHUFB_XMM %xmm10 ,%xmm2 + + pxor %xmm2, %xmm8 +.else + movdqa SHUF_MASK(%rip), %xmm10 + PSHUFB_XMM %xmm10,%xmm0 + + pxor %xmm0, %xmm8 +.endif + + movdqu %xmm8, AadHash(%arg2) +.ifc \operation, enc + # GHASH computation for the last <16 byte block + movdqa SHUF_MASK(%rip), %xmm10 + # shuffle xmm0 back to output as ciphertext + PSHUFB_XMM %xmm10, %xmm0 +.endif + + # Output %r13 bytes + MOVQ_R64_XMM %xmm0, %rax + cmp $8, %r13 + jle _less_than_8_bytes_left_\@ + mov %rax, (%arg3 , %r11, 1) + add $8, %r11 + psrldq $8, %xmm0 + MOVQ_R64_XMM %xmm0, %rax + sub $8, %r13 +_less_than_8_bytes_left_\@: + mov %al, (%arg3, %r11, 1) + add $1, %r11 + shr $8, %rax + sub $1, %r13 + jne _less_than_8_bytes_left_\@ +_multiple_of_16_bytes_\@: +.endm + +# GCM_COMPLETE Finishes update of tag of last partial block +# Output: Authorization Tag (AUTH_TAG) +# Clobbers rax, r10-r12, and xmm0, xmm1, xmm5-xmm15 +.macro GCM_COMPLETE AUTHTAG AUTHTAGLEN + movdqu AadHash(%arg2), %xmm8 + movdqu HashKey(%arg2), %xmm13 + + mov PBlockLen(%arg2), %r12 + + cmp $0, %r12 + je _partial_done\@ + + GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 + +_partial_done\@: + mov AadLen(%arg2), %r12 # %r13 = aadLen (number of bytes) + shl $3, %r12 # convert into number of bits + movd %r12d, %xmm15 # len(A) in %xmm15 + mov InLen(%arg2), %r12 + shl $3, %r12 # len(C) in bits (*128) + MOVQ_R64_XMM %r12, %xmm1 + + pslldq $8, %xmm15 # %xmm15 = len(A)||0x0000000000000000 + pxor %xmm1, %xmm15 # %xmm15 = len(A)||len(C) + pxor %xmm15, %xmm8 + GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 + # final GHASH computation + movdqa SHUF_MASK(%rip), %xmm10 + PSHUFB_XMM %xmm10, %xmm8 + + movdqu OrigIV(%arg2), %xmm0 # %xmm0 = Y0 + ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # E(K, Y0) + pxor %xmm8, %xmm0 +_return_T_\@: + mov \AUTHTAG, %r10 # %r10 = authTag + mov \AUTHTAGLEN, %r11 # %r11 = auth_tag_len + cmp $16, %r11 + je _T_16_\@ + cmp $8, %r11 + jl _T_4_\@ +_T_8_\@: + MOVQ_R64_XMM %xmm0, %rax + mov %rax, (%r10) + add $8, %r10 + sub $8, %r11 + psrldq $8, %xmm0 + cmp $0, %r11 + je _return_T_done_\@ +_T_4_\@: + movd %xmm0, %eax + mov %eax, (%r10) + add $4, %r10 + sub $4, %r11 + psrldq $4, %xmm0 + cmp $0, %r11 + je _return_T_done_\@ +_T_123_\@: + movd %xmm0, %eax + cmp $2, %r11 + jl _T_1_\@ + mov %ax, (%r10) + cmp $2, %r11 + je _return_T_done_\@ + add $2, %r10 + sar $16, %eax +_T_1_\@: + mov %al, (%r10) + jmp _return_T_done_\@ +_T_16_\@: + movdqu %xmm0, (%r10) +_return_T_done_\@: +.endm #ifdef __x86_64__ /* GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0) @@ -264,232 +598,188 @@ _read_next_byte_lt8_\@: _done_read_partial_block_\@: .endm -/* -* if a = number of total plaintext bytes -* b = floor(a/16) -* num_initial_blocks = b mod 4 -* encrypt the initial num_initial_blocks blocks and apply ghash on -* the ciphertext -* %r10, %r11, %r12, %rax, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9 registers -* are clobbered -* arg1, %arg2, %arg3, %r14 are used as a pointer only, not modified -*/ - - -.macro INITIAL_BLOCKS_DEC num_initial_blocks TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \ -XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation - MOVADQ SHUF_MASK(%rip), %xmm14 - mov arg7, %r10 # %r10 = AAD - mov arg8, %r11 # %r11 = aadLen - pxor %xmm\i, %xmm\i - pxor \XMM2, \XMM2 +# CALC_AAD_HASH: Calculates the hash of the data which will not be encrypted. +# clobbers r10-11, xmm14 +.macro CALC_AAD_HASH HASHKEY AAD AADLEN TMP1 TMP2 TMP3 TMP4 TMP5 \ + TMP6 TMP7 + MOVADQ SHUF_MASK(%rip), %xmm14 + mov \AAD, %r10 # %r10 = AAD + mov \AADLEN, %r11 # %r11 = aadLen + pxor \TMP7, \TMP7 + pxor \TMP6, \TMP6 cmp $16, %r11 - jl _get_AAD_rest\num_initial_blocks\operation -_get_AAD_blocks\num_initial_blocks\operation: - movdqu (%r10), %xmm\i - PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data - pxor %xmm\i, \XMM2 - GHASH_MUL \XMM2, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 + jl _get_AAD_rest\@ +_get_AAD_blocks\@: + movdqu (%r10), \TMP7 + PSHUFB_XMM %xmm14, \TMP7 # byte-reflect the AAD data + pxor \TMP7, \TMP6 + GHASH_MUL \TMP6, \HASHKEY, \TMP1, \TMP2, \TMP3, \TMP4, \TMP5 add $16, %r10 sub $16, %r11 cmp $16, %r11 - jge _get_AAD_blocks\num_initial_blocks\operation + jge _get_AAD_blocks\@ - movdqu \XMM2, %xmm\i + movdqu \TMP6, \TMP7 /* read the last <16B of AAD */ -_get_AAD_rest\num_initial_blocks\operation: +_get_AAD_rest\@: cmp $0, %r11 - je _get_AAD_done\num_initial_blocks\operation - - READ_PARTIAL_BLOCK %r10, %r11, \TMP1, %xmm\i - PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data - pxor \XMM2, %xmm\i - GHASH_MUL %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 + je _get_AAD_done\@ -_get_AAD_done\num_initial_blocks\operation: - xor %r11, %r11 # initialise the data pointer offset as zero - # start AES for num_initial_blocks blocks - - mov %arg5, %rax # %rax = *Y0 - movdqu (%rax), \XMM0 # XMM0 = Y0 - PSHUFB_XMM %xmm14, \XMM0 + READ_PARTIAL_BLOCK %r10, %r11, \TMP1, \TMP7 + PSHUFB_XMM %xmm14, \TMP7 # byte-reflect the AAD data + pxor \TMP6, \TMP7 + GHASH_MUL \TMP7, \HASHKEY, \TMP1, \TMP2, \TMP3, \TMP4, \TMP5 + movdqu \TMP7, \TMP6 -.if (\i == 5) || (\i == 6) || (\i == 7) - MOVADQ ONE(%RIP),\TMP1 - MOVADQ (%arg1),\TMP2 -.irpc index, \i_seq - paddd \TMP1, \XMM0 # INCR Y0 - movdqa \XMM0, %xmm\index - PSHUFB_XMM %xmm14, %xmm\index # perform a 16 byte swap - pxor \TMP2, %xmm\index -.endr - lea 0x10(%arg1),%r10 - mov keysize,%eax - shr $2,%eax # 128->4, 192->6, 256->8 - add $5,%eax # 128->9, 192->11, 256->13 - -aes_loop_initial_dec\num_initial_blocks: - MOVADQ (%r10),\TMP1 -.irpc index, \i_seq - AESENC \TMP1, %xmm\index -.endr - add $16,%r10 - sub $1,%eax - jnz aes_loop_initial_dec\num_initial_blocks - - MOVADQ (%r10), \TMP1 -.irpc index, \i_seq - AESENCLAST \TMP1, %xmm\index # Last Round -.endr -.irpc index, \i_seq - movdqu (%arg3 , %r11, 1), \TMP1 - pxor \TMP1, %xmm\index - movdqu %xmm\index, (%arg2 , %r11, 1) - # write back plaintext/ciphertext for num_initial_blocks - add $16, %r11 - - movdqa \TMP1, %xmm\index - PSHUFB_XMM %xmm14, %xmm\index - # prepare plaintext/ciphertext for GHASH computation -.endr -.endif - - # apply GHASH on num_initial_blocks blocks +_get_AAD_done\@: + movdqu \TMP6, AadHash(%arg2) +.endm -.if \i == 5 - pxor %xmm5, %xmm6 - GHASH_MUL %xmm6, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 - pxor %xmm6, %xmm7 - GHASH_MUL %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 - pxor %xmm7, %xmm8 - GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 -.elseif \i == 6 - pxor %xmm6, %xmm7 - GHASH_MUL %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 - pxor %xmm7, %xmm8 - GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 -.elseif \i == 7 - pxor %xmm7, %xmm8 - GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 +# PARTIAL_BLOCK: Handles encryption/decryption and the tag partial blocks +# between update calls. +# Requires the input data be at least 1 byte long due to READ_PARTIAL_BLOCK +# Outputs encrypted bytes, and updates hash and partial info in gcm_data_context +# Clobbers rax, r10, r12, r13, xmm0-6, xmm9-13 +.macro PARTIAL_BLOCK CYPH_PLAIN_OUT PLAIN_CYPH_IN PLAIN_CYPH_LEN DATA_OFFSET \ + AAD_HASH operation + mov PBlockLen(%arg2), %r13 + cmp $0, %r13 + je _partial_block_done_\@ # Leave Macro if no partial blocks + # Read in input data without over reading + cmp $16, \PLAIN_CYPH_LEN + jl _fewer_than_16_bytes_\@ + movups (\PLAIN_CYPH_IN), %xmm1 # If more than 16 bytes, just fill xmm + jmp _data_read_\@ + +_fewer_than_16_bytes_\@: + lea (\PLAIN_CYPH_IN, \DATA_OFFSET, 1), %r10 + mov \PLAIN_CYPH_LEN, %r12 + READ_PARTIAL_BLOCK %r10 %r12 %xmm0 %xmm1 + + mov PBlockLen(%arg2), %r13 + +_data_read_\@: # Finished reading in data + + movdqu PBlockEncKey(%arg2), %xmm9 + movdqu HashKey(%arg2), %xmm13 + + lea SHIFT_MASK(%rip), %r12 + + # adjust the shuffle mask pointer to be able to shift r13 bytes + # r16-r13 is the number of bytes in plaintext mod 16) + add %r13, %r12 + movdqu (%r12), %xmm2 # get the appropriate shuffle mask + PSHUFB_XMM %xmm2, %xmm9 # shift right r13 bytes + +.ifc \operation, dec + movdqa %xmm1, %xmm3 + pxor %xmm1, %xmm9 # Cyphertext XOR E(K, Yn) + + mov \PLAIN_CYPH_LEN, %r10 + add %r13, %r10 + # Set r10 to be the amount of data left in CYPH_PLAIN_IN after filling + sub $16, %r10 + # Determine if if partial block is not being filled and + # shift mask accordingly + jge _no_extra_mask_1_\@ + sub %r10, %r12 +_no_extra_mask_1_\@: + + movdqu ALL_F-SHIFT_MASK(%r12), %xmm1 + # get the appropriate mask to mask out bottom r13 bytes of xmm9 + pand %xmm1, %xmm9 # mask out bottom r13 bytes of xmm9 + + pand %xmm1, %xmm3 + movdqa SHUF_MASK(%rip), %xmm10 + PSHUFB_XMM %xmm10, %xmm3 + PSHUFB_XMM %xmm2, %xmm3 + pxor %xmm3, \AAD_HASH + + cmp $0, %r10 + jl _partial_incomplete_1_\@ + + # GHASH computation for the last <16 Byte block + GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 + xor %rax,%rax + + mov %rax, PBlockLen(%arg2) + jmp _dec_done_\@ +_partial_incomplete_1_\@: + add \PLAIN_CYPH_LEN, PBlockLen(%arg2) +_dec_done_\@: + movdqu \AAD_HASH, AadHash(%arg2) +.else + pxor %xmm1, %xmm9 # Plaintext XOR E(K, Yn) + + mov \PLAIN_CYPH_LEN, %r10 + add %r13, %r10 + # Set r10 to be the amount of data left in CYPH_PLAIN_IN after filling + sub $16, %r10 + # Determine if if partial block is not being filled and + # shift mask accordingly + jge _no_extra_mask_2_\@ + sub %r10, %r12 +_no_extra_mask_2_\@: + + movdqu ALL_F-SHIFT_MASK(%r12), %xmm1 + # get the appropriate mask to mask out bottom r13 bytes of xmm9 + pand %xmm1, %xmm9 + + movdqa SHUF_MASK(%rip), %xmm1 + PSHUFB_XMM %xmm1, %xmm9 + PSHUFB_XMM %xmm2, %xmm9 + pxor %xmm9, \AAD_HASH + + cmp $0, %r10 + jl _partial_incomplete_2_\@ + + # GHASH computation for the last <16 Byte block + GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 + xor %rax,%rax + + mov %rax, PBlockLen(%arg2) + jmp _encode_done_\@ +_partial_incomplete_2_\@: + add \PLAIN_CYPH_LEN, PBlockLen(%arg2) +_encode_done_\@: + movdqu \AAD_HASH, AadHash(%arg2) + + movdqa SHUF_MASK(%rip), %xmm10 + # shuffle xmm9 back to output as ciphertext + PSHUFB_XMM %xmm10, %xmm9 + PSHUFB_XMM %xmm2, %xmm9 .endif - cmp $64, %r13 - jl _initial_blocks_done\num_initial_blocks\operation - # no need for precomputed values -/* -* -* Precomputations for HashKey parallel with encryption of first 4 blocks. -* Haskey_i_k holds XORed values of the low and high parts of the Haskey_i -*/ - MOVADQ ONE(%rip), \TMP1 - paddd \TMP1, \XMM0 # INCR Y0 - MOVADQ \XMM0, \XMM1 - PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap - - paddd \TMP1, \XMM0 # INCR Y0 - MOVADQ \XMM0, \XMM2 - PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap - - paddd \TMP1, \XMM0 # INCR Y0 - MOVADQ \XMM0, \XMM3 - PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap - - paddd \TMP1, \XMM0 # INCR Y0 - MOVADQ \XMM0, \XMM4 - PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap - - MOVADQ 0(%arg1),\TMP1 - pxor \TMP1, \XMM1 - pxor \TMP1, \XMM2 - pxor \TMP1, \XMM3 - pxor \TMP1, \XMM4 - movdqa \TMP3, \TMP5 - pshufd $78, \TMP3, \TMP1 - pxor \TMP3, \TMP1 - movdqa \TMP1, HashKey_k(%rsp) - GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 -# TMP5 = HashKey^2<<1 (mod poly) - movdqa \TMP5, HashKey_2(%rsp) -# HashKey_2 = HashKey^2<<1 (mod poly) - pshufd $78, \TMP5, \TMP1 - pxor \TMP5, \TMP1 - movdqa \TMP1, HashKey_2_k(%rsp) -.irpc index, 1234 # do 4 rounds - movaps 0x10*\index(%arg1), \TMP1 - AESENC \TMP1, \XMM1 - AESENC \TMP1, \XMM2 - AESENC \TMP1, \XMM3 - AESENC \TMP1, \XMM4 -.endr - GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 -# TMP5 = HashKey^3<<1 (mod poly) - movdqa \TMP5, HashKey_3(%rsp) - pshufd $78, \TMP5, \TMP1 - pxor \TMP5, \TMP1 - movdqa \TMP1, HashKey_3_k(%rsp) -.irpc index, 56789 # do next 5 rounds - movaps 0x10*\index(%arg1), \TMP1 - AESENC \TMP1, \XMM1 - AESENC \TMP1, \XMM2 - AESENC \TMP1, \XMM3 - AESENC \TMP1, \XMM4 -.endr - GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 -# TMP5 = HashKey^3<<1 (mod poly) - movdqa \TMP5, HashKey_4(%rsp) - pshufd $78, \TMP5, \TMP1 - pxor \TMP5, \TMP1 - movdqa \TMP1, HashKey_4_k(%rsp) - lea 0xa0(%arg1),%r10 - mov keysize,%eax - shr $2,%eax # 128->4, 192->6, 256->8 - sub $4,%eax # 128->0, 192->2, 256->4 - jz aes_loop_pre_dec_done\num_initial_blocks - -aes_loop_pre_dec\num_initial_blocks: - MOVADQ (%r10),\TMP2 -.irpc index, 1234 - AESENC \TMP2, %xmm\index -.endr - add $16,%r10 - sub $1,%eax - jnz aes_loop_pre_dec\num_initial_blocks - -aes_loop_pre_dec_done\num_initial_blocks: - MOVADQ (%r10), \TMP2 - AESENCLAST \TMP2, \XMM1 - AESENCLAST \TMP2, \XMM2 - AESENCLAST \TMP2, \XMM3 - AESENCLAST \TMP2, \XMM4 - movdqu 16*0(%arg3 , %r11 , 1), \TMP1 - pxor \TMP1, \XMM1 - movdqu \XMM1, 16*0(%arg2 , %r11 , 1) - movdqa \TMP1, \XMM1 - movdqu 16*1(%arg3 , %r11 , 1), \TMP1 - pxor \TMP1, \XMM2 - movdqu \XMM2, 16*1(%arg2 , %r11 , 1) - movdqa \TMP1, \XMM2 - movdqu 16*2(%arg3 , %r11 , 1), \TMP1 - pxor \TMP1, \XMM3 - movdqu \XMM3, 16*2(%arg2 , %r11 , 1) - movdqa \TMP1, \XMM3 - movdqu 16*3(%arg3 , %r11 , 1), \TMP1 - pxor \TMP1, \XMM4 - movdqu \XMM4, 16*3(%arg2 , %r11 , 1) - movdqa \TMP1, \XMM4 - add $64, %r11 - PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap - pxor \XMMDst, \XMM1 -# combine GHASHed value with the corresponding ciphertext - PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap - PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap - PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap - -_initial_blocks_done\num_initial_blocks\operation: - -.endm + # output encrypted Bytes + cmp $0, %r10 + jl _partial_fill_\@ + mov %r13, %r12 + mov $16, %r13 + # Set r13 to be the number of bytes to write out + sub %r12, %r13 + jmp _count_set_\@ +_partial_fill_\@: + mov \PLAIN_CYPH_LEN, %r13 +_count_set_\@: + movdqa %xmm9, %xmm0 + MOVQ_R64_XMM %xmm0, %rax + cmp $8, %r13 + jle _less_than_8_bytes_left_\@ + mov %rax, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1) + add $8, \DATA_OFFSET + psrldq $8, %xmm0 + MOVQ_R64_XMM %xmm0, %rax + sub $8, %r13 +_less_than_8_bytes_left_\@: + movb %al, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1) + add $1, \DATA_OFFSET + shr $8, %rax + sub $1, %r13 + jne _less_than_8_bytes_left_\@ +_partial_block_done_\@: +.endm # PARTIAL_BLOCK /* * if a = number of total plaintext bytes @@ -499,49 +789,19 @@ _initial_blocks_done\num_initial_blocks\operation: * the ciphertext * %r10, %r11, %r12, %rax, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9 registers * are clobbered -* arg1, %arg2, %arg3, %r14 are used as a pointer only, not modified +* arg1, %arg2, %arg3 are used as a pointer only, not modified */ -.macro INITIAL_BLOCKS_ENC num_initial_blocks TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \ -XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation - MOVADQ SHUF_MASK(%rip), %xmm14 - mov arg7, %r10 # %r10 = AAD - mov arg8, %r11 # %r11 = aadLen - pxor %xmm\i, %xmm\i - pxor \XMM2, \XMM2 +.macro INITIAL_BLOCKS_ENC_DEC TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \ + XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation + MOVADQ SHUF_MASK(%rip), %xmm14 - cmp $16, %r11 - jl _get_AAD_rest\num_initial_blocks\operation -_get_AAD_blocks\num_initial_blocks\operation: - movdqu (%r10), %xmm\i - PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data - pxor %xmm\i, \XMM2 - GHASH_MUL \XMM2, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 - add $16, %r10 - sub $16, %r11 - cmp $16, %r11 - jge _get_AAD_blocks\num_initial_blocks\operation + movdqu AadHash(%arg2), %xmm\i # XMM0 = Y0 - movdqu \XMM2, %xmm\i - - /* read the last <16B of AAD */ -_get_AAD_rest\num_initial_blocks\operation: - cmp $0, %r11 - je _get_AAD_done\num_initial_blocks\operation - - READ_PARTIAL_BLOCK %r10, %r11, \TMP1, %xmm\i - PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data - pxor \XMM2, %xmm\i - GHASH_MUL %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 - -_get_AAD_done\num_initial_blocks\operation: - xor %r11, %r11 # initialise the data pointer offset as zero # start AES for num_initial_blocks blocks - mov %arg5, %rax # %rax = *Y0 - movdqu (%rax), \XMM0 # XMM0 = Y0 - PSHUFB_XMM %xmm14, \XMM0 + movdqu CurCount(%arg2), \XMM0 # XMM0 = Y0 .if (\i == 5) || (\i == 6) || (\i == 7) @@ -549,7 +809,11 @@ _get_AAD_done\num_initial_blocks\operation: MOVADQ 0(%arg1),\TMP2 .irpc index, \i_seq paddd \TMP1, \XMM0 # INCR Y0 +.ifc \operation, dec + movdqa \XMM0, %xmm\index +.else MOVADQ \XMM0, %xmm\index +.endif PSHUFB_XMM %xmm14, %xmm\index # perform a 16 byte swap pxor \TMP2, %xmm\index .endr @@ -558,25 +822,29 @@ _get_AAD_done\num_initial_blocks\operation: shr $2,%eax # 128->4, 192->6, 256->8 add $5,%eax # 128->9, 192->11, 256->13 -aes_loop_initial_enc\num_initial_blocks: +aes_loop_initial_\@: MOVADQ (%r10),\TMP1 .irpc index, \i_seq AESENC \TMP1, %xmm\index .endr add $16,%r10 sub $1,%eax - jnz aes_loop_initial_enc\num_initial_blocks + jnz aes_loop_initial_\@ MOVADQ (%r10), \TMP1 .irpc index, \i_seq AESENCLAST \TMP1, %xmm\index # Last Round .endr .irpc index, \i_seq - movdqu (%arg3 , %r11, 1), \TMP1 + movdqu (%arg4 , %r11, 1), \TMP1 pxor \TMP1, %xmm\index - movdqu %xmm\index, (%arg2 , %r11, 1) + movdqu %xmm\index, (%arg3 , %r11, 1) # write back plaintext/ciphertext for num_initial_blocks add $16, %r11 + +.ifc \operation, dec + movdqa \TMP1, %xmm\index +.endif PSHUFB_XMM %xmm14, %xmm\index # prepare plaintext/ciphertext for GHASH computation @@ -602,7 +870,7 @@ aes_loop_initial_enc\num_initial_blocks: GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 .endif cmp $64, %r13 - jl _initial_blocks_done\num_initial_blocks\operation + jl _initial_blocks_done\@ # no need for precomputed values /* * @@ -631,17 +899,6 @@ aes_loop_initial_enc\num_initial_blocks: pxor \TMP1, \XMM2 pxor \TMP1, \XMM3 pxor \TMP1, \XMM4 - movdqa \TMP3, \TMP5 - pshufd $78, \TMP3, \TMP1 - pxor \TMP3, \TMP1 - movdqa \TMP1, HashKey_k(%rsp) - GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 -# TMP5 = HashKey^2<<1 (mod poly) - movdqa \TMP5, HashKey_2(%rsp) -# HashKey_2 = HashKey^2<<1 (mod poly) - pshufd $78, \TMP5, \TMP1 - pxor \TMP5, \TMP1 - movdqa \TMP1, HashKey_2_k(%rsp) .irpc index, 1234 # do 4 rounds movaps 0x10*\index(%arg1), \TMP1 AESENC \TMP1, \XMM1 @@ -649,12 +906,6 @@ aes_loop_initial_enc\num_initial_blocks: AESENC \TMP1, \XMM3 AESENC \TMP1, \XMM4 .endr - GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 -# TMP5 = HashKey^3<<1 (mod poly) - movdqa \TMP5, HashKey_3(%rsp) - pshufd $78, \TMP5, \TMP1 - pxor \TMP5, \TMP1 - movdqa \TMP1, HashKey_3_k(%rsp) .irpc index, 56789 # do next 5 rounds movaps 0x10*\index(%arg1), \TMP1 AESENC \TMP1, \XMM1 @@ -662,45 +913,56 @@ aes_loop_initial_enc\num_initial_blocks: AESENC \TMP1, \XMM3 AESENC \TMP1, \XMM4 .endr - GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 -# TMP5 = HashKey^3<<1 (mod poly) - movdqa \TMP5, HashKey_4(%rsp) - pshufd $78, \TMP5, \TMP1 - pxor \TMP5, \TMP1 - movdqa \TMP1, HashKey_4_k(%rsp) lea 0xa0(%arg1),%r10 mov keysize,%eax shr $2,%eax # 128->4, 192->6, 256->8 sub $4,%eax # 128->0, 192->2, 256->4 - jz aes_loop_pre_enc_done\num_initial_blocks + jz aes_loop_pre_done\@ -aes_loop_pre_enc\num_initial_blocks: +aes_loop_pre_\@: MOVADQ (%r10),\TMP2 .irpc index, 1234 AESENC \TMP2, %xmm\index .endr add $16,%r10 sub $1,%eax - jnz aes_loop_pre_enc\num_initial_blocks + jnz aes_loop_pre_\@ -aes_loop_pre_enc_done\num_initial_blocks: +aes_loop_pre_done\@: MOVADQ (%r10), \TMP2 AESENCLAST \TMP2, \XMM1 AESENCLAST \TMP2, \XMM2 AESENCLAST \TMP2, \XMM3 AESENCLAST \TMP2, \XMM4 - movdqu 16*0(%arg3 , %r11 , 1), \TMP1 + movdqu 16*0(%arg4 , %r11 , 1), \TMP1 pxor \TMP1, \XMM1 - movdqu 16*1(%arg3 , %r11 , 1), \TMP1 +.ifc \operation, dec + movdqu \XMM1, 16*0(%arg3 , %r11 , 1) + movdqa \TMP1, \XMM1 +.endif + movdqu 16*1(%arg4 , %r11 , 1), \TMP1 pxor \TMP1, \XMM2 - movdqu 16*2(%arg3 , %r11 , 1), \TMP1 +.ifc \operation, dec + movdqu \XMM2, 16*1(%arg3 , %r11 , 1) + movdqa \TMP1, \XMM2 +.endif + movdqu 16*2(%arg4 , %r11 , 1), \TMP1 pxor \TMP1, \XMM3 - movdqu 16*3(%arg3 , %r11 , 1), \TMP1 +.ifc \operation, dec + movdqu \XMM3, 16*2(%arg3 , %r11 , 1) + movdqa \TMP1, \XMM3 +.endif + movdqu 16*3(%arg4 , %r11 , 1), \TMP1 pxor \TMP1, \XMM4 - movdqu \XMM1, 16*0(%arg2 , %r11 , 1) - movdqu \XMM2, 16*1(%arg2 , %r11 , 1) - movdqu \XMM3, 16*2(%arg2 , %r11 , 1) - movdqu \XMM4, 16*3(%arg2 , %r11 , 1) +.ifc \operation, dec + movdqu \XMM4, 16*3(%arg3 , %r11 , 1) + movdqa \TMP1, \XMM4 +.else + movdqu \XMM1, 16*0(%arg3 , %r11 , 1) + movdqu \XMM2, 16*1(%arg3 , %r11 , 1) + movdqu \XMM3, 16*2(%arg3 , %r11 , 1) + movdqu \XMM4, 16*3(%arg3 , %r11 , 1) +.endif add $64, %r11 PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap @@ -710,14 +972,14 @@ aes_loop_pre_enc_done\num_initial_blocks: PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap -_initial_blocks_done\num_initial_blocks\operation: +_initial_blocks_done\@: .endm /* * encrypt 4 blocks at a time * ghash the 4 previously encrypted ciphertext blocks -* arg1, %arg2, %arg3 are used as pointers only, not modified +* arg1, %arg3, %arg4 are used as pointers only, not modified * %r11 is the data offset value */ .macro GHASH_4_ENCRYPT_4_PARALLEL_ENC TMP1 TMP2 TMP3 TMP4 TMP5 \ @@ -735,7 +997,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation pshufd $78, \XMM5, \TMP6 pxor \XMM5, \TMP6 paddd ONE(%rip), \XMM0 # INCR CNT - movdqa HashKey_4(%rsp), \TMP5 + movdqa HashKey_4(%arg2), \TMP5 PCLMULQDQ 0x11, \TMP5, \TMP4 # TMP4 = a1*b1 movdqa \XMM0, \XMM1 paddd ONE(%rip), \XMM0 # INCR CNT @@ -754,7 +1016,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation pxor (%arg1), \XMM2 pxor (%arg1), \XMM3 pxor (%arg1), \XMM4 - movdqa HashKey_4_k(%rsp), \TMP5 + movdqa HashKey_4_k(%arg2), \TMP5 PCLMULQDQ 0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0) movaps 0x10(%arg1), \TMP1 AESENC \TMP1, \XMM1 # Round 1 @@ -769,7 +1031,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation movdqa \XMM6, \TMP1 pshufd $78, \XMM6, \TMP2 pxor \XMM6, \TMP2 - movdqa HashKey_3(%rsp), \TMP5 + movdqa HashKey_3(%arg2), \TMP5 PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1 * b1 movaps 0x30(%arg1), \TMP3 AESENC \TMP3, \XMM1 # Round 3 @@ -782,7 +1044,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation AESENC \TMP3, \XMM2 AESENC \TMP3, \XMM3 AESENC \TMP3, \XMM4 - movdqa HashKey_3_k(%rsp), \TMP5 + movdqa HashKey_3_k(%arg2), \TMP5 PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) movaps 0x50(%arg1), \TMP3 AESENC \TMP3, \XMM1 # Round 5 @@ -796,7 +1058,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation movdqa \XMM7, \TMP1 pshufd $78, \XMM7, \TMP2 pxor \XMM7, \TMP2 - movdqa HashKey_2(%rsp ), \TMP5 + movdqa HashKey_2(%arg2), \TMP5 # Multiply TMP5 * HashKey using karatsuba @@ -812,7 +1074,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation AESENC \TMP3, \XMM2 AESENC \TMP3, \XMM3 AESENC \TMP3, \XMM4 - movdqa HashKey_2_k(%rsp), \TMP5 + movdqa HashKey_2_k(%arg2), \TMP5 PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) movaps 0x80(%arg1), \TMP3 AESENC \TMP3, \XMM1 # Round 8 @@ -830,7 +1092,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation movdqa \XMM8, \TMP1 pshufd $78, \XMM8, \TMP2 pxor \XMM8, \TMP2 - movdqa HashKey(%rsp), \TMP5 + movdqa HashKey(%arg2), \TMP5 PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 movaps 0x90(%arg1), \TMP3 AESENC \TMP3, \XMM1 # Round 9 @@ -842,37 +1104,37 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation mov keysize,%eax shr $2,%eax # 128->4, 192->6, 256->8 sub $4,%eax # 128->0, 192->2, 256->4 - jz aes_loop_par_enc_done + jz aes_loop_par_enc_done\@ -aes_loop_par_enc: +aes_loop_par_enc\@: MOVADQ (%r10),\TMP3 .irpc index, 1234 AESENC \TMP3, %xmm\index .endr add $16,%r10 sub $1,%eax - jnz aes_loop_par_enc + jnz aes_loop_par_enc\@ -aes_loop_par_enc_done: +aes_loop_par_enc_done\@: MOVADQ (%r10), \TMP3 AESENCLAST \TMP3, \XMM1 # Round 10 AESENCLAST \TMP3, \XMM2 AESENCLAST \TMP3, \XMM3 AESENCLAST \TMP3, \XMM4 - movdqa HashKey_k(%rsp), \TMP5 + movdqa HashKey_k(%arg2), \TMP5 PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) - movdqu (%arg3,%r11,1), \TMP3 + movdqu (%arg4,%r11,1), \TMP3 pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK - movdqu 16(%arg3,%r11,1), \TMP3 + movdqu 16(%arg4,%r11,1), \TMP3 pxor \TMP3, \XMM2 # Ciphertext/Plaintext XOR EK - movdqu 32(%arg3,%r11,1), \TMP3 + movdqu 32(%arg4,%r11,1), \TMP3 pxor \TMP3, \XMM3 # Ciphertext/Plaintext XOR EK - movdqu 48(%arg3,%r11,1), \TMP3 + movdqu 48(%arg4,%r11,1), \TMP3 pxor \TMP3, \XMM4 # Ciphertext/Plaintext XOR EK - movdqu \XMM1, (%arg2,%r11,1) # Write to the ciphertext buffer - movdqu \XMM2, 16(%arg2,%r11,1) # Write to the ciphertext buffer - movdqu \XMM3, 32(%arg2,%r11,1) # Write to the ciphertext buffer - movdqu \XMM4, 48(%arg2,%r11,1) # Write to the ciphertext buffer + movdqu \XMM1, (%arg3,%r11,1) # Write to the ciphertext buffer + movdqu \XMM2, 16(%arg3,%r11,1) # Write to the ciphertext buffer + movdqu \XMM3, 32(%arg3,%r11,1) # Write to the ciphertext buffer + movdqu \XMM4, 48(%arg3,%r11,1) # Write to the ciphertext buffer PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap PSHUFB_XMM %xmm15, \XMM3 # perform a 16 byte swap @@ -925,7 +1187,7 @@ aes_loop_par_enc_done: /* * decrypt 4 blocks at a time * ghash the 4 previously decrypted ciphertext blocks -* arg1, %arg2, %arg3 are used as pointers only, not modified +* arg1, %arg3, %arg4 are used as pointers only, not modified * %r11 is the data offset value */ .macro GHASH_4_ENCRYPT_4_PARALLEL_DEC TMP1 TMP2 TMP3 TMP4 TMP5 \ @@ -943,7 +1205,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation pshufd $78, \XMM5, \TMP6 pxor \XMM5, \TMP6 paddd ONE(%rip), \XMM0 # INCR CNT - movdqa HashKey_4(%rsp), \TMP5 + movdqa HashKey_4(%arg2), \TMP5 PCLMULQDQ 0x11, \TMP5, \TMP4 # TMP4 = a1*b1 movdqa \XMM0, \XMM1 paddd ONE(%rip), \XMM0 # INCR CNT @@ -962,7 +1224,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation pxor (%arg1), \XMM2 pxor (%arg1), \XMM3 pxor (%arg1), \XMM4 - movdqa HashKey_4_k(%rsp), \TMP5 + movdqa HashKey_4_k(%arg2), \TMP5 PCLMULQDQ 0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0) movaps 0x10(%arg1), \TMP1 AESENC \TMP1, \XMM1 # Round 1 @@ -977,7 +1239,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation movdqa \XMM6, \TMP1 pshufd $78, \XMM6, \TMP2 pxor \XMM6, \TMP2 - movdqa HashKey_3(%rsp), \TMP5 + movdqa HashKey_3(%arg2), \TMP5 PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1 * b1 movaps 0x30(%arg1), \TMP3 AESENC \TMP3, \XMM1 # Round 3 @@ -990,7 +1252,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation AESENC \TMP3, \XMM2 AESENC \TMP3, \XMM3 AESENC \TMP3, \XMM4 - movdqa HashKey_3_k(%rsp), \TMP5 + movdqa HashKey_3_k(%arg2), \TMP5 PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) movaps 0x50(%arg1), \TMP3 AESENC \TMP3, \XMM1 # Round 5 @@ -1004,7 +1266,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation movdqa \XMM7, \TMP1 pshufd $78, \XMM7, \TMP2 pxor \XMM7, \TMP2 - movdqa HashKey_2(%rsp ), \TMP5 + movdqa HashKey_2(%arg2), \TMP5 # Multiply TMP5 * HashKey using karatsuba @@ -1020,7 +1282,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation AESENC \TMP3, \XMM2 AESENC \TMP3, \XMM3 AESENC \TMP3, \XMM4 - movdqa HashKey_2_k(%rsp), \TMP5 + movdqa HashKey_2_k(%arg2), \TMP5 PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) movaps 0x80(%arg1), \TMP3 AESENC \TMP3, \XMM1 # Round 8 @@ -1038,7 +1300,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation movdqa \XMM8, \TMP1 pshufd $78, \XMM8, \TMP2 pxor \XMM8, \TMP2 - movdqa HashKey(%rsp), \TMP5 + movdqa HashKey(%arg2), \TMP5 PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 movaps 0x90(%arg1), \TMP3 AESENC \TMP3, \XMM1 # Round 9 @@ -1050,40 +1312,40 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation mov keysize,%eax shr $2,%eax # 128->4, 192->6, 256->8 sub $4,%eax # 128->0, 192->2, 256->4 - jz aes_loop_par_dec_done + jz aes_loop_par_dec_done\@ -aes_loop_par_dec: +aes_loop_par_dec\@: MOVADQ (%r10),\TMP3 .irpc index, 1234 AESENC \TMP3, %xmm\index .endr add $16,%r10 sub $1,%eax - jnz aes_loop_par_dec + jnz aes_loop_par_dec\@ -aes_loop_par_dec_done: +aes_loop_par_dec_done\@: MOVADQ (%r10), \TMP3 AESENCLAST \TMP3, \XMM1 # last round AESENCLAST \TMP3, \XMM2 AESENCLAST \TMP3, \XMM3 AESENCLAST \TMP3, \XMM4 - movdqa HashKey_k(%rsp), \TMP5 + movdqa HashKey_k(%arg2), \TMP5 PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) - movdqu (%arg3,%r11,1), \TMP3 + movdqu (%arg4,%r11,1), \TMP3 pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK - movdqu \XMM1, (%arg2,%r11,1) # Write to plaintext buffer + movdqu \XMM1, (%arg3,%r11,1) # Write to plaintext buffer movdqa \TMP3, \XMM1 - movdqu 16(%arg3,%r11,1), \TMP3 + movdqu 16(%arg4,%r11,1), \TMP3 pxor \TMP3, \XMM2 # Ciphertext/Plaintext XOR EK - movdqu \XMM2, 16(%arg2,%r11,1) # Write to plaintext buffer + movdqu \XMM2, 16(%arg3,%r11,1) # Write to plaintext buffer movdqa \TMP3, \XMM2 - movdqu 32(%arg3,%r11,1), \TMP3 + movdqu 32(%arg4,%r11,1), \TMP3 pxor \TMP3, \XMM3 # Ciphertext/Plaintext XOR EK - movdqu \XMM3, 32(%arg2,%r11,1) # Write to plaintext buffer + movdqu \XMM3, 32(%arg3,%r11,1) # Write to plaintext buffer movdqa \TMP3, \XMM3 - movdqu 48(%arg3,%r11,1), \TMP3 + movdqu 48(%arg4,%r11,1), \TMP3 pxor \TMP3, \XMM4 # Ciphertext/Plaintext XOR EK - movdqu \XMM4, 48(%arg2,%r11,1) # Write to plaintext buffer + movdqu \XMM4, 48(%arg3,%r11,1) # Write to plaintext buffer movdqa \TMP3, \XMM4 PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap @@ -1143,10 +1405,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst movdqa \XMM1, \TMP6 pshufd $78, \XMM1, \TMP2 pxor \XMM1, \TMP2 - movdqa HashKey_4(%rsp), \TMP5 + movdqa HashKey_4(%arg2), \TMP5 PCLMULQDQ 0x11, \TMP5, \TMP6 # TMP6 = a1*b1 PCLMULQDQ 0x00, \TMP5, \XMM1 # XMM1 = a0*b0 - movdqa HashKey_4_k(%rsp), \TMP4 + movdqa HashKey_4_k(%arg2), \TMP4 PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) movdqa \XMM1, \XMMDst movdqa \TMP2, \XMM1 # result in TMP6, XMMDst, XMM1 @@ -1156,10 +1418,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst movdqa \XMM2, \TMP1 pshufd $78, \XMM2, \TMP2 pxor \XMM2, \TMP2 - movdqa HashKey_3(%rsp), \TMP5 + movdqa HashKey_3(%arg2), \TMP5 PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 PCLMULQDQ 0x00, \TMP5, \XMM2 # XMM2 = a0*b0 - movdqa HashKey_3_k(%rsp), \TMP4 + movdqa HashKey_3_k(%arg2), \TMP4 PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) pxor \TMP1, \TMP6 pxor \XMM2, \XMMDst @@ -1171,10 +1433,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst movdqa \XMM3, \TMP1 pshufd $78, \XMM3, \TMP2 pxor \XMM3, \TMP2 - movdqa HashKey_2(%rsp), \TMP5 + movdqa HashKey_2(%arg2), \TMP5 PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 PCLMULQDQ 0x00, \TMP5, \XMM3 # XMM3 = a0*b0 - movdqa HashKey_2_k(%rsp), \TMP4 + movdqa HashKey_2_k(%arg2), \TMP4 PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) pxor \TMP1, \TMP6 pxor \XMM3, \XMMDst @@ -1184,10 +1446,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst movdqa \XMM4, \TMP1 pshufd $78, \XMM4, \TMP2 pxor \XMM4, \TMP2 - movdqa HashKey(%rsp), \TMP5 + movdqa HashKey(%arg2), \TMP5 PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 PCLMULQDQ 0x00, \TMP5, \XMM4 # XMM4 = a0*b0 - movdqa HashKey_k(%rsp), \TMP4 + movdqa HashKey_k(%arg2), \TMP4 PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) pxor \TMP1, \TMP6 pxor \XMM4, \XMMDst @@ -1256,6 +1518,8 @@ _esb_loop_\@: .endm /***************************************************************************** * void aesni_gcm_dec(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. +* struct gcm_context_data *data +* // Context data * u8 *out, // Plaintext output. Encrypt in-place is allowed. * const u8 *in, // Ciphertext input * u64 plaintext_len, // Length of data in bytes for decryption. @@ -1333,195 +1597,20 @@ _esb_loop_\@: * *****************************************************************************/ ENTRY(aesni_gcm_dec) - push %r12 - push %r13 - push %r14 - mov %rsp, %r14 -/* -* states of %xmm registers %xmm6:%xmm15 not saved -* all %xmm registers are clobbered -*/ - sub $VARIABLE_OFFSET, %rsp - and $~63, %rsp # align rsp to 64 bytes - mov %arg6, %r12 - movdqu (%r12), %xmm13 # %xmm13 = HashKey - movdqa SHUF_MASK(%rip), %xmm2 - PSHUFB_XMM %xmm2, %xmm13 - - -# Precompute HashKey<<1 (mod poly) from the hash key (required for GHASH) - - movdqa %xmm13, %xmm2 - psllq $1, %xmm13 - psrlq $63, %xmm2 - movdqa %xmm2, %xmm1 - pslldq $8, %xmm2 - psrldq $8, %xmm1 - por %xmm2, %xmm13 - - # Reduction - - pshufd $0x24, %xmm1, %xmm2 - pcmpeqd TWOONE(%rip), %xmm2 - pand POLY(%rip), %xmm2 - pxor %xmm2, %xmm13 # %xmm13 holds the HashKey<<1 (mod poly) - - - # Decrypt first few blocks - - movdqa %xmm13, HashKey(%rsp) # store HashKey<<1 (mod poly) - mov %arg4, %r13 # save the number of bytes of plaintext/ciphertext - and $-16, %r13 # %r13 = %r13 - (%r13 mod 16) - mov %r13, %r12 - and $(3<<4), %r12 - jz _initial_num_blocks_is_0_decrypt - cmp $(2<<4), %r12 - jb _initial_num_blocks_is_1_decrypt - je _initial_num_blocks_is_2_decrypt -_initial_num_blocks_is_3_decrypt: - INITIAL_BLOCKS_DEC 3, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ -%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, dec - sub $48, %r13 - jmp _initial_blocks_decrypted -_initial_num_blocks_is_2_decrypt: - INITIAL_BLOCKS_DEC 2, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ -%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, dec - sub $32, %r13 - jmp _initial_blocks_decrypted -_initial_num_blocks_is_1_decrypt: - INITIAL_BLOCKS_DEC 1, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ -%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, dec - sub $16, %r13 - jmp _initial_blocks_decrypted -_initial_num_blocks_is_0_decrypt: - INITIAL_BLOCKS_DEC 0, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ -%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, dec -_initial_blocks_decrypted: - cmp $0, %r13 - je _zero_cipher_left_decrypt - sub $64, %r13 - je _four_cipher_left_decrypt -_decrypt_by_4: - GHASH_4_ENCRYPT_4_PARALLEL_DEC %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, \ -%xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, dec - add $64, %r11 - sub $64, %r13 - jne _decrypt_by_4 -_four_cipher_left_decrypt: - GHASH_LAST_4 %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, \ -%xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm8 -_zero_cipher_left_decrypt: - mov %arg4, %r13 - and $15, %r13 # %r13 = arg4 (mod 16) - je _multiple_of_16_bytes_decrypt - - # Handle the last <16 byte block separately - - paddd ONE(%rip), %xmm0 # increment CNT to get Yn - movdqa SHUF_MASK(%rip), %xmm10 - PSHUFB_XMM %xmm10, %xmm0 - - ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # E(K, Yn) - - lea (%arg3,%r11,1), %r10 - mov %r13, %r12 - READ_PARTIAL_BLOCK %r10 %r12 %xmm2 %xmm1 - - lea ALL_F+16(%rip), %r12 - sub %r13, %r12 - movdqa %xmm1, %xmm2 - pxor %xmm1, %xmm0 # Ciphertext XOR E(K, Yn) - movdqu (%r12), %xmm1 - # get the appropriate mask to mask out top 16-%r13 bytes of %xmm0 - pand %xmm1, %xmm0 # mask out top 16-%r13 bytes of %xmm0 - pand %xmm1, %xmm2 - movdqa SHUF_MASK(%rip), %xmm10 - PSHUFB_XMM %xmm10 ,%xmm2 - - pxor %xmm2, %xmm8 - GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 - - # output %r13 bytes - MOVQ_R64_XMM %xmm0, %rax - cmp $8, %r13 - jle _less_than_8_bytes_left_decrypt - mov %rax, (%arg2 , %r11, 1) - add $8, %r11 - psrldq $8, %xmm0 - MOVQ_R64_XMM %xmm0, %rax - sub $8, %r13 -_less_than_8_bytes_left_decrypt: - mov %al, (%arg2, %r11, 1) - add $1, %r11 - shr $8, %rax - sub $1, %r13 - jne _less_than_8_bytes_left_decrypt -_multiple_of_16_bytes_decrypt: - mov arg8, %r12 # %r13 = aadLen (number of bytes) - shl $3, %r12 # convert into number of bits - movd %r12d, %xmm15 # len(A) in %xmm15 - shl $3, %arg4 # len(C) in bits (*128) - MOVQ_R64_XMM %arg4, %xmm1 - pslldq $8, %xmm15 # %xmm15 = len(A)||0x0000000000000000 - pxor %xmm1, %xmm15 # %xmm15 = len(A)||len(C) - pxor %xmm15, %xmm8 - GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 - # final GHASH computation - movdqa SHUF_MASK(%rip), %xmm10 - PSHUFB_XMM %xmm10, %xmm8 + FUNC_SAVE - mov %arg5, %rax # %rax = *Y0 - movdqu (%rax), %xmm0 # %xmm0 = Y0 - ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # E(K, Y0) - pxor %xmm8, %xmm0 -_return_T_decrypt: - mov arg9, %r10 # %r10 = authTag - mov arg10, %r11 # %r11 = auth_tag_len - cmp $16, %r11 - je _T_16_decrypt - cmp $8, %r11 - jl _T_4_decrypt -_T_8_decrypt: - MOVQ_R64_XMM %xmm0, %rax - mov %rax, (%r10) - add $8, %r10 - sub $8, %r11 - psrldq $8, %xmm0 - cmp $0, %r11 - je _return_T_done_decrypt -_T_4_decrypt: - movd %xmm0, %eax - mov %eax, (%r10) - add $4, %r10 - sub $4, %r11 - psrldq $4, %xmm0 - cmp $0, %r11 - je _return_T_done_decrypt -_T_123_decrypt: - movd %xmm0, %eax - cmp $2, %r11 - jl _T_1_decrypt - mov %ax, (%r10) - cmp $2, %r11 - je _return_T_done_decrypt - add $2, %r10 - sar $16, %eax -_T_1_decrypt: - mov %al, (%r10) - jmp _return_T_done_decrypt -_T_16_decrypt: - movdqu %xmm0, (%r10) -_return_T_done_decrypt: - mov %r14, %rsp - pop %r14 - pop %r13 - pop %r12 + GCM_INIT %arg6, arg7, arg8, arg9 + GCM_ENC_DEC dec + GCM_COMPLETE arg10, arg11 + FUNC_RESTORE ret ENDPROC(aesni_gcm_dec) /***************************************************************************** * void aesni_gcm_enc(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. +* struct gcm_context_data *data +* // Context data * u8 *out, // Ciphertext output. Encrypt in-place is allowed. * const u8 *in, // Plaintext input * u64 plaintext_len, // Length of data in bytes for encryption. @@ -1596,195 +1685,78 @@ ENDPROC(aesni_gcm_dec) * poly = x^128 + x^127 + x^126 + x^121 + 1 ***************************************************************************/ ENTRY(aesni_gcm_enc) - push %r12 - push %r13 - push %r14 - mov %rsp, %r14 -# -# states of %xmm registers %xmm6:%xmm15 not saved -# all %xmm registers are clobbered -# - sub $VARIABLE_OFFSET, %rsp - and $~63, %rsp - mov %arg6, %r12 - movdqu (%r12), %xmm13 - movdqa SHUF_MASK(%rip), %xmm2 - PSHUFB_XMM %xmm2, %xmm13 - - -# precompute HashKey<<1 mod poly from the HashKey (required for GHASH) - - movdqa %xmm13, %xmm2 - psllq $1, %xmm13 - psrlq $63, %xmm2 - movdqa %xmm2, %xmm1 - pslldq $8, %xmm2 - psrldq $8, %xmm1 - por %xmm2, %xmm13 - - # reduce HashKey<<1 - - pshufd $0x24, %xmm1, %xmm2 - pcmpeqd TWOONE(%rip), %xmm2 - pand POLY(%rip), %xmm2 - pxor %xmm2, %xmm13 - movdqa %xmm13, HashKey(%rsp) - mov %arg4, %r13 # %xmm13 holds HashKey<<1 (mod poly) - and $-16, %r13 - mov %r13, %r12 + FUNC_SAVE - # Encrypt first few blocks + GCM_INIT %arg6, arg7, arg8, arg9 + GCM_ENC_DEC enc - and $(3<<4), %r12 - jz _initial_num_blocks_is_0_encrypt - cmp $(2<<4), %r12 - jb _initial_num_blocks_is_1_encrypt - je _initial_num_blocks_is_2_encrypt -_initial_num_blocks_is_3_encrypt: - INITIAL_BLOCKS_ENC 3, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ -%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, enc - sub $48, %r13 - jmp _initial_blocks_encrypted -_initial_num_blocks_is_2_encrypt: - INITIAL_BLOCKS_ENC 2, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ -%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, enc - sub $32, %r13 - jmp _initial_blocks_encrypted -_initial_num_blocks_is_1_encrypt: - INITIAL_BLOCKS_ENC 1, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ -%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, enc - sub $16, %r13 - jmp _initial_blocks_encrypted -_initial_num_blocks_is_0_encrypt: - INITIAL_BLOCKS_ENC 0, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ -%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, enc -_initial_blocks_encrypted: - - # Main loop - Encrypt remaining blocks - - cmp $0, %r13 - je _zero_cipher_left_encrypt - sub $64, %r13 - je _four_cipher_left_encrypt -_encrypt_by_4_encrypt: - GHASH_4_ENCRYPT_4_PARALLEL_ENC %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, \ -%xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, enc - add $64, %r11 - sub $64, %r13 - jne _encrypt_by_4_encrypt -_four_cipher_left_encrypt: - GHASH_LAST_4 %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, \ -%xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm8 -_zero_cipher_left_encrypt: - mov %arg4, %r13 - and $15, %r13 # %r13 = arg4 (mod 16) - je _multiple_of_16_bytes_encrypt - - # Handle the last <16 Byte block separately - paddd ONE(%rip), %xmm0 # INCR CNT to get Yn - movdqa SHUF_MASK(%rip), %xmm10 - PSHUFB_XMM %xmm10, %xmm0 - - ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # Encrypt(K, Yn) - - lea (%arg3,%r11,1), %r10 - mov %r13, %r12 - READ_PARTIAL_BLOCK %r10 %r12 %xmm2 %xmm1 - - lea ALL_F+16(%rip), %r12 - sub %r13, %r12 - pxor %xmm1, %xmm0 # Plaintext XOR Encrypt(K, Yn) - movdqu (%r12), %xmm1 - # get the appropriate mask to mask out top 16-r13 bytes of xmm0 - pand %xmm1, %xmm0 # mask out top 16-r13 bytes of xmm0 - movdqa SHUF_MASK(%rip), %xmm10 - PSHUFB_XMM %xmm10,%xmm0 + GCM_COMPLETE arg10, arg11 + FUNC_RESTORE + ret +ENDPROC(aesni_gcm_enc) - pxor %xmm0, %xmm8 - GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 - # GHASH computation for the last <16 byte block - movdqa SHUF_MASK(%rip), %xmm10 - PSHUFB_XMM %xmm10, %xmm0 +/***************************************************************************** +* void aesni_gcm_init(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. +* struct gcm_context_data *data, +* // context data +* u8 *iv, // Pre-counter block j0: 4 byte salt (from Security Association) +* // concatenated with 8 byte Initialisation Vector (from IPSec ESP Payload) +* // concatenated with 0x00000001. 16-byte aligned pointer. +* u8 *hash_subkey, // H, the Hash sub key input. Data starts on a 16-byte boundary. +* const u8 *aad, // Additional Authentication Data (AAD) +* u64 aad_len) // Length of AAD in bytes. +*/ +ENTRY(aesni_gcm_init) + FUNC_SAVE + GCM_INIT %arg3, %arg4,%arg5, %arg6 + FUNC_RESTORE + ret +ENDPROC(aesni_gcm_init) - # shuffle xmm0 back to output as ciphertext +/***************************************************************************** +* void aesni_gcm_enc_update(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. +* struct gcm_context_data *data, +* // context data +* u8 *out, // Ciphertext output. Encrypt in-place is allowed. +* const u8 *in, // Plaintext input +* u64 plaintext_len, // Length of data in bytes for encryption. +*/ +ENTRY(aesni_gcm_enc_update) + FUNC_SAVE + GCM_ENC_DEC enc + FUNC_RESTORE + ret +ENDPROC(aesni_gcm_enc_update) - # Output %r13 bytes - MOVQ_R64_XMM %xmm0, %rax - cmp $8, %r13 - jle _less_than_8_bytes_left_encrypt - mov %rax, (%arg2 , %r11, 1) - add $8, %r11 - psrldq $8, %xmm0 - MOVQ_R64_XMM %xmm0, %rax - sub $8, %r13 -_less_than_8_bytes_left_encrypt: - mov %al, (%arg2, %r11, 1) - add $1, %r11 - shr $8, %rax - sub $1, %r13 - jne _less_than_8_bytes_left_encrypt -_multiple_of_16_bytes_encrypt: - mov arg8, %r12 # %r12 = addLen (number of bytes) - shl $3, %r12 - movd %r12d, %xmm15 # len(A) in %xmm15 - shl $3, %arg4 # len(C) in bits (*128) - MOVQ_R64_XMM %arg4, %xmm1 - pslldq $8, %xmm15 # %xmm15 = len(A)||0x0000000000000000 - pxor %xmm1, %xmm15 # %xmm15 = len(A)||len(C) - pxor %xmm15, %xmm8 - GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 - # final GHASH computation - movdqa SHUF_MASK(%rip), %xmm10 - PSHUFB_XMM %xmm10, %xmm8 # perform a 16 byte swap +/***************************************************************************** +* void aesni_gcm_dec_update(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. +* struct gcm_context_data *data, +* // context data +* u8 *out, // Ciphertext output. Encrypt in-place is allowed. +* const u8 *in, // Plaintext input +* u64 plaintext_len, // Length of data in bytes for encryption. +*/ +ENTRY(aesni_gcm_dec_update) + FUNC_SAVE + GCM_ENC_DEC dec + FUNC_RESTORE + ret +ENDPROC(aesni_gcm_dec_update) - mov %arg5, %rax # %rax = *Y0 - movdqu (%rax), %xmm0 # %xmm0 = Y0 - ENCRYPT_SINGLE_BLOCK %xmm0, %xmm15 # Encrypt(K, Y0) - pxor %xmm8, %xmm0 -_return_T_encrypt: - mov arg9, %r10 # %r10 = authTag - mov arg10, %r11 # %r11 = auth_tag_len - cmp $16, %r11 - je _T_16_encrypt - cmp $8, %r11 - jl _T_4_encrypt -_T_8_encrypt: - MOVQ_R64_XMM %xmm0, %rax - mov %rax, (%r10) - add $8, %r10 - sub $8, %r11 - psrldq $8, %xmm0 - cmp $0, %r11 - je _return_T_done_encrypt -_T_4_encrypt: - movd %xmm0, %eax - mov %eax, (%r10) - add $4, %r10 - sub $4, %r11 - psrldq $4, %xmm0 - cmp $0, %r11 - je _return_T_done_encrypt -_T_123_encrypt: - movd %xmm0, %eax - cmp $2, %r11 - jl _T_1_encrypt - mov %ax, (%r10) - cmp $2, %r11 - je _return_T_done_encrypt - add $2, %r10 - sar $16, %eax -_T_1_encrypt: - mov %al, (%r10) - jmp _return_T_done_encrypt -_T_16_encrypt: - movdqu %xmm0, (%r10) -_return_T_done_encrypt: - mov %r14, %rsp - pop %r14 - pop %r13 - pop %r12 +/***************************************************************************** +* void aesni_gcm_finalize(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. +* struct gcm_context_data *data, +* // context data +* u8 *auth_tag, // Authenticated Tag output. +* u64 auth_tag_len); // Authenticated Tag Length in bytes. Valid values are 16 (most likely), +* // 12 or 8. +*/ +ENTRY(aesni_gcm_finalize) + FUNC_SAVE + GCM_COMPLETE %arg3 %arg4 + FUNC_RESTORE ret -ENDPROC(aesni_gcm_enc) +ENDPROC(aesni_gcm_finalize) #endif diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 34cf1c1f8c98..acbe7e8336d8 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -72,6 +72,21 @@ struct aesni_xts_ctx { u8 raw_crypt_ctx[sizeof(struct crypto_aes_ctx)] AESNI_ALIGN_ATTR; }; +#define GCM_BLOCK_LEN 16 + +struct gcm_context_data { + /* init, update and finalize context data */ + u8 aad_hash[GCM_BLOCK_LEN]; + u64 aad_length; + u64 in_length; + u8 partial_block_enc_key[GCM_BLOCK_LEN]; + u8 orig_IV[GCM_BLOCK_LEN]; + u8 current_counter[GCM_BLOCK_LEN]; + u64 partial_block_len; + u64 unused; + u8 hash_keys[GCM_BLOCK_LEN * 8]; +}; + asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, unsigned int key_len); asmlinkage void aesni_enc(struct crypto_aes_ctx *ctx, u8 *out, @@ -105,6 +120,7 @@ asmlinkage void aesni_xts_crypt8(struct crypto_aes_ctx *ctx, u8 *out, /* asmlinkage void aesni_gcm_enc() * void *ctx, AES Key schedule. Starts on a 16 byte boundary. + * struct gcm_context_data. May be uninitialized. * u8 *out, Ciphertext output. Encrypt in-place is allowed. * const u8 *in, Plaintext input * unsigned long plaintext_len, Length of data in bytes for encryption. @@ -117,13 +133,15 @@ asmlinkage void aesni_xts_crypt8(struct crypto_aes_ctx *ctx, u8 *out, * unsigned long auth_tag_len), Authenticated Tag Length in bytes. * Valid values are 16 (most likely), 12 or 8. */ -asmlinkage void aesni_gcm_enc(void *ctx, u8 *out, +asmlinkage void aesni_gcm_enc(void *ctx, + struct gcm_context_data *gdata, u8 *out, const u8 *in, unsigned long plaintext_len, u8 *iv, u8 *hash_subkey, const u8 *aad, unsigned long aad_len, u8 *auth_tag, unsigned long auth_tag_len); /* asmlinkage void aesni_gcm_dec() * void *ctx, AES Key schedule. Starts on a 16 byte boundary. + * struct gcm_context_data. May be uninitialized. * u8 *out, Plaintext output. Decrypt in-place is allowed. * const u8 *in, Ciphertext input * unsigned long ciphertext_len, Length of data in bytes for decryption. @@ -137,11 +155,28 @@ asmlinkage void aesni_gcm_enc(void *ctx, u8 *out, * unsigned long auth_tag_len) Authenticated Tag Length in bytes. * Valid values are 16 (most likely), 12 or 8. */ -asmlinkage void aesni_gcm_dec(void *ctx, u8 *out, +asmlinkage void aesni_gcm_dec(void *ctx, + struct gcm_context_data *gdata, u8 *out, const u8 *in, unsigned long ciphertext_len, u8 *iv, u8 *hash_subkey, const u8 *aad, unsigned long aad_len, u8 *auth_tag, unsigned long auth_tag_len); +/* Scatter / Gather routines, with args similar to above */ +asmlinkage void aesni_gcm_init(void *ctx, + struct gcm_context_data *gdata, + u8 *iv, + u8 *hash_subkey, const u8 *aad, + unsigned long aad_len); +asmlinkage void aesni_gcm_enc_update(void *ctx, + struct gcm_context_data *gdata, u8 *out, + const u8 *in, unsigned long plaintext_len); +asmlinkage void aesni_gcm_dec_update(void *ctx, + struct gcm_context_data *gdata, u8 *out, + const u8 *in, + unsigned long ciphertext_len); +asmlinkage void aesni_gcm_finalize(void *ctx, + struct gcm_context_data *gdata, + u8 *auth_tag, unsigned long auth_tag_len); #ifdef CONFIG_AS_AVX asmlinkage void aes_ctr_enc_128_avx_by8(const u8 *in, u8 *iv, @@ -167,15 +202,17 @@ asmlinkage void aesni_gcm_dec_avx_gen2(void *ctx, u8 *out, const u8 *aad, unsigned long aad_len, u8 *auth_tag, unsigned long auth_tag_len); -static void aesni_gcm_enc_avx(void *ctx, u8 *out, +static void aesni_gcm_enc_avx(void *ctx, + struct gcm_context_data *data, u8 *out, const u8 *in, unsigned long plaintext_len, u8 *iv, u8 *hash_subkey, const u8 *aad, unsigned long aad_len, u8 *auth_tag, unsigned long auth_tag_len) { struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx; if ((plaintext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)){ - aesni_gcm_enc(ctx, out, in, plaintext_len, iv, hash_subkey, aad, - aad_len, auth_tag, auth_tag_len); + aesni_gcm_enc(ctx, data, out, in, + plaintext_len, iv, hash_subkey, aad, + aad_len, auth_tag, auth_tag_len); } else { aesni_gcm_precomp_avx_gen2(ctx, hash_subkey); aesni_gcm_enc_avx_gen2(ctx, out, in, plaintext_len, iv, aad, @@ -183,15 +220,17 @@ static void aesni_gcm_enc_avx(void *ctx, u8 *out, } } -static void aesni_gcm_dec_avx(void *ctx, u8 *out, +static void aesni_gcm_dec_avx(void *ctx, + struct gcm_context_data *data, u8 *out, const u8 *in, unsigned long ciphertext_len, u8 *iv, u8 *hash_subkey, const u8 *aad, unsigned long aad_len, u8 *auth_tag, unsigned long auth_tag_len) { struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx; if ((ciphertext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)) { - aesni_gcm_dec(ctx, out, in, ciphertext_len, iv, hash_subkey, aad, - aad_len, auth_tag, auth_tag_len); + aesni_gcm_dec(ctx, data, out, in, + ciphertext_len, iv, hash_subkey, aad, + aad_len, auth_tag, auth_tag_len); } else { aesni_gcm_precomp_avx_gen2(ctx, hash_subkey); aesni_gcm_dec_avx_gen2(ctx, out, in, ciphertext_len, iv, aad, @@ -218,15 +257,17 @@ asmlinkage void aesni_gcm_dec_avx_gen4(void *ctx, u8 *out, const u8 *aad, unsigned long aad_len, u8 *auth_tag, unsigned long auth_tag_len); -static void aesni_gcm_enc_avx2(void *ctx, u8 *out, +static void aesni_gcm_enc_avx2(void *ctx, + struct gcm_context_data *data, u8 *out, const u8 *in, unsigned long plaintext_len, u8 *iv, u8 *hash_subkey, const u8 *aad, unsigned long aad_len, u8 *auth_tag, unsigned long auth_tag_len) { struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx; if ((plaintext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)) { - aesni_gcm_enc(ctx, out, in, plaintext_len, iv, hash_subkey, aad, - aad_len, auth_tag, auth_tag_len); + aesni_gcm_enc(ctx, data, out, in, + plaintext_len, iv, hash_subkey, aad, + aad_len, auth_tag, auth_tag_len); } else if (plaintext_len < AVX_GEN4_OPTSIZE) { aesni_gcm_precomp_avx_gen2(ctx, hash_subkey); aesni_gcm_enc_avx_gen2(ctx, out, in, plaintext_len, iv, aad, @@ -238,15 +279,17 @@ static void aesni_gcm_enc_avx2(void *ctx, u8 *out, } } -static void aesni_gcm_dec_avx2(void *ctx, u8 *out, +static void aesni_gcm_dec_avx2(void *ctx, + struct gcm_context_data *data, u8 *out, const u8 *in, unsigned long ciphertext_len, u8 *iv, u8 *hash_subkey, const u8 *aad, unsigned long aad_len, u8 *auth_tag, unsigned long auth_tag_len) { struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx; if ((ciphertext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)) { - aesni_gcm_dec(ctx, out, in, ciphertext_len, iv, hash_subkey, - aad, aad_len, auth_tag, auth_tag_len); + aesni_gcm_dec(ctx, data, out, in, + ciphertext_len, iv, hash_subkey, + aad, aad_len, auth_tag, auth_tag_len); } else if (ciphertext_len < AVX_GEN4_OPTSIZE) { aesni_gcm_precomp_avx_gen2(ctx, hash_subkey); aesni_gcm_dec_avx_gen2(ctx, out, in, ciphertext_len, iv, aad, @@ -259,15 +302,19 @@ static void aesni_gcm_dec_avx2(void *ctx, u8 *out, } #endif -static void (*aesni_gcm_enc_tfm)(void *ctx, u8 *out, - const u8 *in, unsigned long plaintext_len, u8 *iv, - u8 *hash_subkey, const u8 *aad, unsigned long aad_len, - u8 *auth_tag, unsigned long auth_tag_len); +static void (*aesni_gcm_enc_tfm)(void *ctx, + struct gcm_context_data *data, u8 *out, + const u8 *in, unsigned long plaintext_len, + u8 *iv, u8 *hash_subkey, const u8 *aad, + unsigned long aad_len, u8 *auth_tag, + unsigned long auth_tag_len); -static void (*aesni_gcm_dec_tfm)(void *ctx, u8 *out, - const u8 *in, unsigned long ciphertext_len, u8 *iv, - u8 *hash_subkey, const u8 *aad, unsigned long aad_len, - u8 *auth_tag, unsigned long auth_tag_len); +static void (*aesni_gcm_dec_tfm)(void *ctx, + struct gcm_context_data *data, u8 *out, + const u8 *in, unsigned long ciphertext_len, + u8 *iv, u8 *hash_subkey, const u8 *aad, + unsigned long aad_len, u8 *auth_tag, + unsigned long auth_tag_len); static inline struct aesni_rfc4106_gcm_ctx *aesni_rfc4106_gcm_ctx_get(struct crypto_aead *tfm) @@ -744,6 +791,127 @@ static int generic_gcmaes_set_authsize(struct crypto_aead *tfm, return 0; } +static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req, + unsigned int assoclen, u8 *hash_subkey, + u8 *iv, void *aes_ctx) +{ + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + unsigned long auth_tag_len = crypto_aead_authsize(tfm); + struct gcm_context_data data AESNI_ALIGN_ATTR; + struct scatter_walk dst_sg_walk = {}; + unsigned long left = req->cryptlen; + unsigned long len, srclen, dstlen; + struct scatter_walk assoc_sg_walk; + struct scatter_walk src_sg_walk; + struct scatterlist src_start[2]; + struct scatterlist dst_start[2]; + struct scatterlist *src_sg; + struct scatterlist *dst_sg; + u8 *src, *dst, *assoc; + u8 *assocmem = NULL; + u8 authTag[16]; + + if (!enc) + left -= auth_tag_len; + + /* Linearize assoc, if not already linear */ + if (req->src->length >= assoclen && req->src->length && + (!PageHighMem(sg_page(req->src)) || + req->src->offset + req->src->length < PAGE_SIZE)) { + scatterwalk_start(&assoc_sg_walk, req->src); + assoc = scatterwalk_map(&assoc_sg_walk); + } else { + /* assoc can be any length, so must be on heap */ + assocmem = kmalloc(assoclen, GFP_ATOMIC); + if (unlikely(!assocmem)) + return -ENOMEM; + assoc = assocmem; + + scatterwalk_map_and_copy(assoc, req->src, 0, assoclen, 0); + } + + src_sg = scatterwalk_ffwd(src_start, req->src, req->assoclen); + scatterwalk_start(&src_sg_walk, src_sg); + if (req->src != req->dst) { + dst_sg = scatterwalk_ffwd(dst_start, req->dst, req->assoclen); + scatterwalk_start(&dst_sg_walk, dst_sg); + } + + kernel_fpu_begin(); + aesni_gcm_init(aes_ctx, &data, iv, + hash_subkey, assoc, assoclen); + if (req->src != req->dst) { + while (left) { + src = scatterwalk_map(&src_sg_walk); + dst = scatterwalk_map(&dst_sg_walk); + srclen = scatterwalk_clamp(&src_sg_walk, left); + dstlen = scatterwalk_clamp(&dst_sg_walk, left); + len = min(srclen, dstlen); + if (len) { + if (enc) + aesni_gcm_enc_update(aes_ctx, &data, + dst, src, len); + else + aesni_gcm_dec_update(aes_ctx, &data, + dst, src, len); + } + left -= len; + + scatterwalk_unmap(src); + scatterwalk_unmap(dst); + scatterwalk_advance(&src_sg_walk, len); + scatterwalk_advance(&dst_sg_walk, len); + scatterwalk_done(&src_sg_walk, 0, left); + scatterwalk_done(&dst_sg_walk, 1, left); + } + } else { + while (left) { + dst = src = scatterwalk_map(&src_sg_walk); + len = scatterwalk_clamp(&src_sg_walk, left); + if (len) { + if (enc) + aesni_gcm_enc_update(aes_ctx, &data, + src, src, len); + else + aesni_gcm_dec_update(aes_ctx, &data, + src, src, len); + } + left -= len; + scatterwalk_unmap(src); + scatterwalk_advance(&src_sg_walk, len); + scatterwalk_done(&src_sg_walk, 1, left); + } + } + aesni_gcm_finalize(aes_ctx, &data, authTag, auth_tag_len); + kernel_fpu_end(); + + if (!assocmem) + scatterwalk_unmap(assoc); + else + kfree(assocmem); + + if (!enc) { + u8 authTagMsg[16]; + + /* Copy out original authTag */ + scatterwalk_map_and_copy(authTagMsg, req->src, + req->assoclen + req->cryptlen - + auth_tag_len, + auth_tag_len, 0); + + /* Compare generated tag with passed in tag. */ + return crypto_memneq(authTagMsg, authTag, auth_tag_len) ? + -EBADMSG : 0; + } + + /* Copy in the authTag */ + scatterwalk_map_and_copy(authTag, req->dst, + req->assoclen + req->cryptlen, + auth_tag_len, 1); + + return 0; +} + static int gcmaes_encrypt(struct aead_request *req, unsigned int assoclen, u8 *hash_subkey, u8 *iv, void *aes_ctx) { @@ -753,7 +921,14 @@ static int gcmaes_encrypt(struct aead_request *req, unsigned int assoclen, unsigned long auth_tag_len = crypto_aead_authsize(tfm); struct scatter_walk src_sg_walk; struct scatter_walk dst_sg_walk = {}; + struct gcm_context_data data AESNI_ALIGN_ATTR; + if (((struct crypto_aes_ctx *)aes_ctx)->key_length != AES_KEYSIZE_128 || + aesni_gcm_enc_tfm == aesni_gcm_enc || + req->cryptlen < AVX_GEN2_OPTSIZE) { + return gcmaes_crypt_by_sg(true, req, assoclen, hash_subkey, iv, + aes_ctx); + } if (sg_is_last(req->src) && (!PageHighMem(sg_page(req->src)) || req->src->offset + req->src->length <= PAGE_SIZE) && @@ -782,7 +957,7 @@ static int gcmaes_encrypt(struct aead_request *req, unsigned int assoclen, } kernel_fpu_begin(); - aesni_gcm_enc_tfm(aes_ctx, dst, src, req->cryptlen, iv, + aesni_gcm_enc_tfm(aes_ctx, &data, dst, src, req->cryptlen, iv, hash_subkey, assoc, assoclen, dst + req->cryptlen, auth_tag_len); kernel_fpu_end(); @@ -817,8 +992,15 @@ static int gcmaes_decrypt(struct aead_request *req, unsigned int assoclen, u8 authTag[16]; struct scatter_walk src_sg_walk; struct scatter_walk dst_sg_walk = {}; + struct gcm_context_data data AESNI_ALIGN_ATTR; int retval = 0; + if (((struct crypto_aes_ctx *)aes_ctx)->key_length != AES_KEYSIZE_128 || + aesni_gcm_enc_tfm == aesni_gcm_enc || + req->cryptlen < AVX_GEN2_OPTSIZE) { + return gcmaes_crypt_by_sg(false, req, assoclen, hash_subkey, iv, + aes_ctx); + } tempCipherLen = (unsigned long)(req->cryptlen - auth_tag_len); if (sg_is_last(req->src) && @@ -849,7 +1031,7 @@ static int gcmaes_decrypt(struct aead_request *req, unsigned int assoclen, kernel_fpu_begin(); - aesni_gcm_dec_tfm(aes_ctx, dst, src, tempCipherLen, iv, + aesni_gcm_dec_tfm(aes_ctx, &data, dst, src, tempCipherLen, iv, hash_subkey, assoc, assoclen, authTag, auth_tag_len); kernel_fpu_end(); diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c index f9eca34301e2..3e0c07cc9124 100644 --- a/arch/x86/crypto/blowfish_glue.c +++ b/arch/x86/crypto/blowfish_glue.c @@ -25,13 +25,13 @@ * */ -#include <asm/processor.h> +#include <crypto/algapi.h> #include <crypto/blowfish.h> +#include <crypto/internal/skcipher.h> #include <linux/crypto.h> #include <linux/init.h> #include <linux/module.h> #include <linux/types.h> -#include <crypto/algapi.h> /* regular block cipher functions */ asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src, @@ -77,20 +77,28 @@ static void blowfish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) blowfish_dec_blk(crypto_tfm_ctx(tfm), dst, src); } -static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, +static int blowfish_setkey_skcipher(struct crypto_skcipher *tfm, + const u8 *key, unsigned int keylen) +{ + return blowfish_setkey(&tfm->base, key, keylen); +} + +static int ecb_crypt(struct skcipher_request *req, void (*fn)(struct bf_ctx *, u8 *, const u8 *), void (*fn_4way)(struct bf_ctx *, u8 *, const u8 *)) { - struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); unsigned int bsize = BF_BLOCK_SIZE; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct bf_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; unsigned int nbytes; int err; - err = blkcipher_walk_virt(desc, walk); + err = skcipher_walk_virt(&walk, req, false); - while ((nbytes = walk->nbytes)) { - u8 *wsrc = walk->src.virt.addr; - u8 *wdst = walk->dst.virt.addr; + while ((nbytes = walk.nbytes)) { + u8 *wsrc = walk.src.virt.addr; + u8 *wdst = walk.dst.virt.addr; /* Process four block batch */ if (nbytes >= bsize * 4) { @@ -116,34 +124,25 @@ static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, } while (nbytes >= bsize); done: - err = blkcipher_walk_done(desc, walk, nbytes); + err = skcipher_walk_done(&walk, nbytes); } return err; } -static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_encrypt(struct skcipher_request *req) { - struct blkcipher_walk walk; - - blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_crypt(desc, &walk, blowfish_enc_blk, blowfish_enc_blk_4way); + return ecb_crypt(req, blowfish_enc_blk, blowfish_enc_blk_4way); } -static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_decrypt(struct skcipher_request *req) { - struct blkcipher_walk walk; - - blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_crypt(desc, &walk, blowfish_dec_blk, blowfish_dec_blk_4way); + return ecb_crypt(req, blowfish_dec_blk, blowfish_dec_blk_4way); } -static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) +static unsigned int __cbc_encrypt(struct bf_ctx *ctx, + struct skcipher_walk *walk) { - struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); unsigned int bsize = BF_BLOCK_SIZE; unsigned int nbytes = walk->nbytes; u64 *src = (u64 *)walk->src.virt.addr; @@ -164,27 +163,27 @@ static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, return nbytes; } -static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_encrypt(struct skcipher_request *req) { - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct bf_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); + err = skcipher_walk_virt(&walk, req, false); while ((nbytes = walk.nbytes)) { - nbytes = __cbc_encrypt(desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); + nbytes = __cbc_encrypt(ctx, &walk); + err = skcipher_walk_done(&walk, nbytes); } return err; } -static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) +static unsigned int __cbc_decrypt(struct bf_ctx *ctx, + struct skcipher_walk *walk) { - struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); unsigned int bsize = BF_BLOCK_SIZE; unsigned int nbytes = walk->nbytes; u64 *src = (u64 *)walk->src.virt.addr; @@ -245,24 +244,25 @@ done: return nbytes; } -static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_decrypt(struct skcipher_request *req) { - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct bf_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); + err = skcipher_walk_virt(&walk, req, false); while ((nbytes = walk.nbytes)) { - nbytes = __cbc_decrypt(desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); + nbytes = __cbc_decrypt(ctx, &walk); + err = skcipher_walk_done(&walk, nbytes); } return err; } -static void ctr_crypt_final(struct bf_ctx *ctx, struct blkcipher_walk *walk) +static void ctr_crypt_final(struct bf_ctx *ctx, struct skcipher_walk *walk) { u8 *ctrblk = walk->iv; u8 keystream[BF_BLOCK_SIZE]; @@ -276,10 +276,8 @@ static void ctr_crypt_final(struct bf_ctx *ctx, struct blkcipher_walk *walk) crypto_inc(ctrblk, BF_BLOCK_SIZE); } -static unsigned int __ctr_crypt(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) +static unsigned int __ctr_crypt(struct bf_ctx *ctx, struct skcipher_walk *walk) { - struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); unsigned int bsize = BF_BLOCK_SIZE; unsigned int nbytes = walk->nbytes; u64 *src = (u64 *)walk->src.virt.addr; @@ -332,29 +330,30 @@ done: return nbytes; } -static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ctr_crypt(struct skcipher_request *req) { - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct bf_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt_block(desc, &walk, BF_BLOCK_SIZE); + err = skcipher_walk_virt(&walk, req, false); while ((nbytes = walk.nbytes) >= BF_BLOCK_SIZE) { - nbytes = __ctr_crypt(desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); + nbytes = __ctr_crypt(ctx, &walk); + err = skcipher_walk_done(&walk, nbytes); } - if (walk.nbytes) { - ctr_crypt_final(crypto_blkcipher_ctx(desc->tfm), &walk); - err = blkcipher_walk_done(desc, &walk, 0); + if (nbytes) { + ctr_crypt_final(ctx, &walk); + err = skcipher_walk_done(&walk, 0); } return err; } -static struct crypto_alg bf_algs[4] = { { +static struct crypto_alg bf_cipher_alg = { .cra_name = "blowfish", .cra_driver_name = "blowfish-asm", .cra_priority = 200, @@ -372,66 +371,50 @@ static struct crypto_alg bf_algs[4] = { { .cia_decrypt = blowfish_decrypt, } } -}, { - .cra_name = "ecb(blowfish)", - .cra_driver_name = "ecb-blowfish-asm", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = BF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct bf_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = BF_MIN_KEY_SIZE, - .max_keysize = BF_MAX_KEY_SIZE, - .setkey = blowfish_setkey, - .encrypt = ecb_encrypt, - .decrypt = ecb_decrypt, - }, +}; + +static struct skcipher_alg bf_skcipher_algs[] = { + { + .base.cra_name = "ecb(blowfish)", + .base.cra_driver_name = "ecb-blowfish-asm", + .base.cra_priority = 300, + .base.cra_blocksize = BF_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct bf_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = BF_MIN_KEY_SIZE, + .max_keysize = BF_MAX_KEY_SIZE, + .setkey = blowfish_setkey_skcipher, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, { + .base.cra_name = "cbc(blowfish)", + .base.cra_driver_name = "cbc-blowfish-asm", + .base.cra_priority = 300, + .base.cra_blocksize = BF_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct bf_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = BF_MIN_KEY_SIZE, + .max_keysize = BF_MAX_KEY_SIZE, + .ivsize = BF_BLOCK_SIZE, + .setkey = blowfish_setkey_skcipher, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, { + .base.cra_name = "ctr(blowfish)", + .base.cra_driver_name = "ctr-blowfish-asm", + .base.cra_priority = 300, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct bf_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = BF_MIN_KEY_SIZE, + .max_keysize = BF_MAX_KEY_SIZE, + .ivsize = BF_BLOCK_SIZE, + .chunksize = BF_BLOCK_SIZE, + .setkey = blowfish_setkey_skcipher, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, }, -}, { - .cra_name = "cbc(blowfish)", - .cra_driver_name = "cbc-blowfish-asm", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = BF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct bf_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = BF_MIN_KEY_SIZE, - .max_keysize = BF_MAX_KEY_SIZE, - .ivsize = BF_BLOCK_SIZE, - .setkey = blowfish_setkey, - .encrypt = cbc_encrypt, - .decrypt = cbc_decrypt, - }, - }, -}, { - .cra_name = "ctr(blowfish)", - .cra_driver_name = "ctr-blowfish-asm", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct bf_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = BF_MIN_KEY_SIZE, - .max_keysize = BF_MAX_KEY_SIZE, - .ivsize = BF_BLOCK_SIZE, - .setkey = blowfish_setkey, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, - }, - }, -} }; +}; static bool is_blacklisted_cpu(void) { @@ -456,6 +439,8 @@ MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist"); static int __init init(void) { + int err; + if (!force && is_blacklisted_cpu()) { printk(KERN_INFO "blowfish-x86_64: performance on this CPU " @@ -464,12 +449,23 @@ static int __init init(void) return -ENODEV; } - return crypto_register_algs(bf_algs, ARRAY_SIZE(bf_algs)); + err = crypto_register_alg(&bf_cipher_alg); + if (err) + return err; + + err = crypto_register_skciphers(bf_skcipher_algs, + ARRAY_SIZE(bf_skcipher_algs)); + if (err) + crypto_unregister_alg(&bf_cipher_alg); + + return err; } static void __exit fini(void) { - crypto_unregister_algs(bf_algs, ARRAY_SIZE(bf_algs)); + crypto_unregister_alg(&bf_cipher_alg); + crypto_unregister_skciphers(bf_skcipher_algs, + ARRAY_SIZE(bf_skcipher_algs)); } module_init(init); diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c index 60907c139c4e..d4992e458f92 100644 --- a/arch/x86/crypto/camellia_aesni_avx2_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c @@ -10,18 +10,15 @@ * */ -#include <linux/module.h> -#include <linux/types.h> -#include <linux/crypto.h> -#include <linux/err.h> -#include <crypto/ablk_helper.h> -#include <crypto/algapi.h> -#include <crypto/ctr.h> -#include <crypto/lrw.h> -#include <crypto/xts.h> -#include <asm/fpu/api.h> #include <asm/crypto/camellia.h> #include <asm/crypto/glue_helper.h> +#include <crypto/algapi.h> +#include <crypto/internal/simd.h> +#include <crypto/xts.h> +#include <linux/crypto.h> +#include <linux/err.h> +#include <linux/module.h> +#include <linux/types.h> #define CAMELLIA_AESNI_PARALLEL_BLOCKS 16 #define CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS 32 @@ -150,413 +147,120 @@ static const struct common_glue_ctx camellia_dec_xts = { } } }; -static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int camellia_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keylen) { - return glue_ecb_crypt_128bit(&camellia_enc, desc, dst, src, nbytes); + return __camellia_setkey(crypto_skcipher_ctx(tfm), key, keylen, + &tfm->base.crt_flags); } -static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_encrypt(struct skcipher_request *req) { - return glue_ecb_crypt_128bit(&camellia_dec, desc, dst, src, nbytes); + return glue_ecb_req_128bit(&camellia_enc, req); } -static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(camellia_enc_blk), desc, - dst, src, nbytes); -} - -static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - return glue_cbc_decrypt_128bit(&camellia_dec_cbc, desc, dst, src, - nbytes); -} - -static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - return glue_ctr_crypt_128bit(&camellia_ctr, desc, dst, src, nbytes); -} - -static inline bool camellia_fpu_begin(bool fpu_enabled, unsigned int nbytes) -{ - return glue_fpu_begin(CAMELLIA_BLOCK_SIZE, - CAMELLIA_AESNI_PARALLEL_BLOCKS, NULL, fpu_enabled, - nbytes); -} - -static inline void camellia_fpu_end(bool fpu_enabled) -{ - glue_fpu_end(fpu_enabled); -} - -static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key, - unsigned int key_len) -{ - return __camellia_setkey(crypto_tfm_ctx(tfm), in_key, key_len, - &tfm->crt_flags); -} - -struct crypt_priv { - struct camellia_ctx *ctx; - bool fpu_enabled; -}; - -static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) +static int ecb_decrypt(struct skcipher_request *req) { - const unsigned int bsize = CAMELLIA_BLOCK_SIZE; - struct crypt_priv *ctx = priv; - int i; - - ctx->fpu_enabled = camellia_fpu_begin(ctx->fpu_enabled, nbytes); - - if (nbytes >= CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS * bsize) { - camellia_ecb_enc_32way(ctx->ctx, srcdst, srcdst); - srcdst += bsize * CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS; - nbytes -= bsize * CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS; - } - - if (nbytes >= CAMELLIA_AESNI_PARALLEL_BLOCKS * bsize) { - camellia_ecb_enc_16way(ctx->ctx, srcdst, srcdst); - srcdst += bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS; - nbytes -= bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS; - } - - while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) { - camellia_enc_blk_2way(ctx->ctx, srcdst, srcdst); - srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS; - nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS; - } - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - camellia_enc_blk(ctx->ctx, srcdst, srcdst); + return glue_ecb_req_128bit(&camellia_dec, req); } -static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) +static int cbc_encrypt(struct skcipher_request *req) { - const unsigned int bsize = CAMELLIA_BLOCK_SIZE; - struct crypt_priv *ctx = priv; - int i; - - ctx->fpu_enabled = camellia_fpu_begin(ctx->fpu_enabled, nbytes); - - if (nbytes >= CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS * bsize) { - camellia_ecb_dec_32way(ctx->ctx, srcdst, srcdst); - srcdst += bsize * CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS; - nbytes -= bsize * CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS; - } - - if (nbytes >= CAMELLIA_AESNI_PARALLEL_BLOCKS * bsize) { - camellia_ecb_dec_16way(ctx->ctx, srcdst, srcdst); - srcdst += bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS; - nbytes -= bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS; - } - - while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) { - camellia_dec_blk_2way(ctx->ctx, srcdst, srcdst); - srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS; - nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS; - } - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - camellia_dec_blk(ctx->ctx, srcdst, srcdst); + return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(camellia_enc_blk), + req); } -static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_decrypt(struct skcipher_request *req) { - struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS]; - struct crypt_priv crypt_ctx = { - .ctx = &ctx->camellia_ctx, - .fpu_enabled = false, - }; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &crypt_ctx, - .crypt_fn = encrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - ret = lrw_crypt(desc, dst, src, nbytes, &req); - camellia_fpu_end(crypt_ctx.fpu_enabled); - - return ret; + return glue_cbc_decrypt_req_128bit(&camellia_dec_cbc, req); } -static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ctr_crypt(struct skcipher_request *req) { - struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS]; - struct crypt_priv crypt_ctx = { - .ctx = &ctx->camellia_ctx, - .fpu_enabled = false, - }; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &crypt_ctx, - .crypt_fn = decrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - ret = lrw_crypt(desc, dst, src, nbytes, &req); - camellia_fpu_end(crypt_ctx.fpu_enabled); - - return ret; + return glue_ctr_req_128bit(&camellia_ctr, req); } -static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int xts_encrypt(struct skcipher_request *req) { - struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - return glue_xts_crypt_128bit(&camellia_enc_xts, desc, dst, src, nbytes, - XTS_TWEAK_CAST(camellia_enc_blk), - &ctx->tweak_ctx, &ctx->crypt_ctx); + return glue_xts_req_128bit(&camellia_enc_xts, req, + XTS_TWEAK_CAST(camellia_enc_blk), + &ctx->tweak_ctx, &ctx->crypt_ctx); } -static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int xts_decrypt(struct skcipher_request *req) { - struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - return glue_xts_crypt_128bit(&camellia_dec_xts, desc, dst, src, nbytes, - XTS_TWEAK_CAST(camellia_enc_blk), - &ctx->tweak_ctx, &ctx->crypt_ctx); + return glue_xts_req_128bit(&camellia_dec_xts, req, + XTS_TWEAK_CAST(camellia_enc_blk), + &ctx->tweak_ctx, &ctx->crypt_ctx); } -static struct crypto_alg cmll_algs[10] = { { - .cra_name = "__ecb-camellia-aesni-avx2", - .cra_driver_name = "__driver-ecb-camellia-aesni-avx2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct camellia_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE, - .setkey = camellia_setkey, - .encrypt = ecb_encrypt, - .decrypt = ecb_decrypt, - }, - }, -}, { - .cra_name = "__cbc-camellia-aesni-avx2", - .cra_driver_name = "__driver-cbc-camellia-aesni-avx2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct camellia_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE, - .setkey = camellia_setkey, - .encrypt = cbc_encrypt, - .decrypt = cbc_decrypt, - }, - }, -}, { - .cra_name = "__ctr-camellia-aesni-avx2", - .cra_driver_name = "__driver-ctr-camellia-aesni-avx2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct camellia_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = camellia_setkey, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, - }, - }, -}, { - .cra_name = "__lrw-camellia-aesni-avx2", - .cra_driver_name = "__driver-lrw-camellia-aesni-avx2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct camellia_lrw_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_exit = lrw_camellia_exit_tfm, - .cra_u = { - .blkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE + - CAMELLIA_BLOCK_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE + - CAMELLIA_BLOCK_SIZE, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = lrw_camellia_setkey, - .encrypt = lrw_encrypt, - .decrypt = lrw_decrypt, - }, - }, -}, { - .cra_name = "__xts-camellia-aesni-avx2", - .cra_driver_name = "__driver-xts-camellia-aesni-avx2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct camellia_xts_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE * 2, - .max_keysize = CAMELLIA_MAX_KEY_SIZE * 2, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = xts_camellia_setkey, - .encrypt = xts_encrypt, - .decrypt = xts_decrypt, - }, - }, -}, { - .cra_name = "ecb(camellia)", - .cra_driver_name = "ecb-camellia-aesni-avx2", - .cra_priority = 500, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "cbc(camellia)", - .cra_driver_name = "cbc-camellia-aesni-avx2", - .cra_priority = 500, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = __ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "ctr(camellia)", - .cra_driver_name = "ctr-camellia-aesni-avx2", - .cra_priority = 500, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_encrypt, - .geniv = "chainiv", - }, - }, -}, { - .cra_name = "lrw(camellia)", - .cra_driver_name = "lrw-camellia-aesni-avx2", - .cra_priority = 500, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE + - CAMELLIA_BLOCK_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE + - CAMELLIA_BLOCK_SIZE, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "xts(camellia)", - .cra_driver_name = "xts-camellia-aesni-avx2", - .cra_priority = 500, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE * 2, - .max_keysize = CAMELLIA_MAX_KEY_SIZE * 2, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, +static struct skcipher_alg camellia_algs[] = { + { + .base.cra_name = "__ecb(camellia)", + .base.cra_driver_name = "__ecb-camellia-aesni-avx2", + .base.cra_priority = 500, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = CAMELLIA_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct camellia_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = CAMELLIA_MIN_KEY_SIZE, + .max_keysize = CAMELLIA_MAX_KEY_SIZE, + .setkey = camellia_setkey, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, { + .base.cra_name = "__cbc(camellia)", + .base.cra_driver_name = "__cbc-camellia-aesni-avx2", + .base.cra_priority = 500, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = CAMELLIA_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct camellia_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = CAMELLIA_MIN_KEY_SIZE, + .max_keysize = CAMELLIA_MAX_KEY_SIZE, + .ivsize = CAMELLIA_BLOCK_SIZE, + .setkey = camellia_setkey, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, { + .base.cra_name = "__ctr(camellia)", + .base.cra_driver_name = "__ctr-camellia-aesni-avx2", + .base.cra_priority = 500, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct camellia_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = CAMELLIA_MIN_KEY_SIZE, + .max_keysize = CAMELLIA_MAX_KEY_SIZE, + .ivsize = CAMELLIA_BLOCK_SIZE, + .chunksize = CAMELLIA_BLOCK_SIZE, + .setkey = camellia_setkey, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + }, { + .base.cra_name = "__xts(camellia)", + .base.cra_driver_name = "__xts-camellia-aesni-avx2", + .base.cra_priority = 500, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = CAMELLIA_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct camellia_xts_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = 2 * CAMELLIA_MIN_KEY_SIZE, + .max_keysize = 2 * CAMELLIA_MAX_KEY_SIZE, + .ivsize = CAMELLIA_BLOCK_SIZE, + .setkey = xts_camellia_setkey, + .encrypt = xts_encrypt, + .decrypt = xts_decrypt, }, -} }; +}; + +static struct simd_skcipher_alg *camellia_simd_algs[ARRAY_SIZE(camellia_algs)]; static int __init camellia_aesni_init(void) { @@ -576,12 +280,15 @@ static int __init camellia_aesni_init(void) return -ENODEV; } - return crypto_register_algs(cmll_algs, ARRAY_SIZE(cmll_algs)); + return simd_register_skciphers_compat(camellia_algs, + ARRAY_SIZE(camellia_algs), + camellia_simd_algs); } static void __exit camellia_aesni_fini(void) { - crypto_unregister_algs(cmll_algs, ARRAY_SIZE(cmll_algs)); + simd_unregister_skciphers(camellia_algs, ARRAY_SIZE(camellia_algs), + camellia_simd_algs); } module_init(camellia_aesni_init); diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c index d96429da88eb..d09f6521466a 100644 --- a/arch/x86/crypto/camellia_aesni_avx_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx_glue.c @@ -10,18 +10,15 @@ * */ -#include <linux/module.h> -#include <linux/types.h> -#include <linux/crypto.h> -#include <linux/err.h> -#include <crypto/ablk_helper.h> -#include <crypto/algapi.h> -#include <crypto/ctr.h> -#include <crypto/lrw.h> -#include <crypto/xts.h> -#include <asm/fpu/api.h> #include <asm/crypto/camellia.h> #include <asm/crypto/glue_helper.h> +#include <crypto/algapi.h> +#include <crypto/internal/simd.h> +#include <crypto/xts.h> +#include <linux/crypto.h> +#include <linux/err.h> +#include <linux/module.h> +#include <linux/types.h> #define CAMELLIA_AESNI_PARALLEL_BLOCKS 16 @@ -154,401 +151,142 @@ static const struct common_glue_ctx camellia_dec_xts = { } } }; -static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - return glue_ecb_crypt_128bit(&camellia_enc, desc, dst, src, nbytes); -} - -static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int camellia_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keylen) { - return glue_ecb_crypt_128bit(&camellia_dec, desc, dst, src, nbytes); + return __camellia_setkey(crypto_skcipher_ctx(tfm), key, keylen, + &tfm->base.crt_flags); } -static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_encrypt(struct skcipher_request *req) { - return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(camellia_enc_blk), desc, - dst, src, nbytes); + return glue_ecb_req_128bit(&camellia_enc, req); } -static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_decrypt(struct skcipher_request *req) { - return glue_cbc_decrypt_128bit(&camellia_dec_cbc, desc, dst, src, - nbytes); + return glue_ecb_req_128bit(&camellia_dec, req); } -static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_encrypt(struct skcipher_request *req) { - return glue_ctr_crypt_128bit(&camellia_ctr, desc, dst, src, nbytes); + return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(camellia_enc_blk), + req); } -static inline bool camellia_fpu_begin(bool fpu_enabled, unsigned int nbytes) +static int cbc_decrypt(struct skcipher_request *req) { - return glue_fpu_begin(CAMELLIA_BLOCK_SIZE, - CAMELLIA_AESNI_PARALLEL_BLOCKS, NULL, fpu_enabled, - nbytes); + return glue_cbc_decrypt_req_128bit(&camellia_dec_cbc, req); } -static inline void camellia_fpu_end(bool fpu_enabled) +static int ctr_crypt(struct skcipher_request *req) { - glue_fpu_end(fpu_enabled); + return glue_ctr_req_128bit(&camellia_ctr, req); } -static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key, - unsigned int key_len) +int xts_camellia_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keylen) { - return __camellia_setkey(crypto_tfm_ctx(tfm), in_key, key_len, - &tfm->crt_flags); + struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm); + u32 *flags = &tfm->base.crt_flags; + int err; + + err = xts_verify_key(tfm, key, keylen); + if (err) + return err; + + /* first half of xts-key is for crypt */ + err = __camellia_setkey(&ctx->crypt_ctx, key, keylen / 2, flags); + if (err) + return err; + + /* second half of xts-key is for tweak */ + return __camellia_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2, + flags); } +EXPORT_SYMBOL_GPL(xts_camellia_setkey); -struct crypt_priv { - struct camellia_ctx *ctx; - bool fpu_enabled; -}; - -static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) +static int xts_encrypt(struct skcipher_request *req) { - const unsigned int bsize = CAMELLIA_BLOCK_SIZE; - struct crypt_priv *ctx = priv; - int i; - - ctx->fpu_enabled = camellia_fpu_begin(ctx->fpu_enabled, nbytes); - - if (nbytes >= CAMELLIA_AESNI_PARALLEL_BLOCKS * bsize) { - camellia_ecb_enc_16way(ctx->ctx, srcdst, srcdst); - srcdst += bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS; - nbytes -= bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS; - } - - while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) { - camellia_enc_blk_2way(ctx->ctx, srcdst, srcdst); - srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS; - nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS; - } + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - camellia_enc_blk(ctx->ctx, srcdst, srcdst); + return glue_xts_req_128bit(&camellia_enc_xts, req, + XTS_TWEAK_CAST(camellia_enc_blk), + &ctx->tweak_ctx, &ctx->crypt_ctx); } -static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) +static int xts_decrypt(struct skcipher_request *req) { - const unsigned int bsize = CAMELLIA_BLOCK_SIZE; - struct crypt_priv *ctx = priv; - int i; - - ctx->fpu_enabled = camellia_fpu_begin(ctx->fpu_enabled, nbytes); - - if (nbytes >= CAMELLIA_AESNI_PARALLEL_BLOCKS * bsize) { - camellia_ecb_dec_16way(ctx->ctx, srcdst, srcdst); - srcdst += bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS; - nbytes -= bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS; - } + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) { - camellia_dec_blk_2way(ctx->ctx, srcdst, srcdst); - srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS; - nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS; - } - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - camellia_dec_blk(ctx->ctx, srcdst, srcdst); + return glue_xts_req_128bit(&camellia_dec_xts, req, + XTS_TWEAK_CAST(camellia_enc_blk), + &ctx->tweak_ctx, &ctx->crypt_ctx); } -static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[CAMELLIA_AESNI_PARALLEL_BLOCKS]; - struct crypt_priv crypt_ctx = { - .ctx = &ctx->camellia_ctx, - .fpu_enabled = false, - }; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &crypt_ctx, - .crypt_fn = encrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - ret = lrw_crypt(desc, dst, src, nbytes, &req); - camellia_fpu_end(crypt_ctx.fpu_enabled); - - return ret; -} - -static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[CAMELLIA_AESNI_PARALLEL_BLOCKS]; - struct crypt_priv crypt_ctx = { - .ctx = &ctx->camellia_ctx, - .fpu_enabled = false, - }; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &crypt_ctx, - .crypt_fn = decrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - ret = lrw_crypt(desc, dst, src, nbytes, &req); - camellia_fpu_end(crypt_ctx.fpu_enabled); - - return ret; -} - -static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - - return glue_xts_crypt_128bit(&camellia_enc_xts, desc, dst, src, nbytes, - XTS_TWEAK_CAST(camellia_enc_blk), - &ctx->tweak_ctx, &ctx->crypt_ctx); -} - -static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - - return glue_xts_crypt_128bit(&camellia_dec_xts, desc, dst, src, nbytes, - XTS_TWEAK_CAST(camellia_enc_blk), - &ctx->tweak_ctx, &ctx->crypt_ctx); -} - -static struct crypto_alg cmll_algs[10] = { { - .cra_name = "__ecb-camellia-aesni", - .cra_driver_name = "__driver-ecb-camellia-aesni", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct camellia_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE, - .setkey = camellia_setkey, - .encrypt = ecb_encrypt, - .decrypt = ecb_decrypt, - }, - }, -}, { - .cra_name = "__cbc-camellia-aesni", - .cra_driver_name = "__driver-cbc-camellia-aesni", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct camellia_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE, - .setkey = camellia_setkey, - .encrypt = cbc_encrypt, - .decrypt = cbc_decrypt, - }, - }, -}, { - .cra_name = "__ctr-camellia-aesni", - .cra_driver_name = "__driver-ctr-camellia-aesni", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct camellia_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = camellia_setkey, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, - }, - }, -}, { - .cra_name = "__lrw-camellia-aesni", - .cra_driver_name = "__driver-lrw-camellia-aesni", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct camellia_lrw_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_exit = lrw_camellia_exit_tfm, - .cra_u = { - .blkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE + - CAMELLIA_BLOCK_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE + - CAMELLIA_BLOCK_SIZE, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = lrw_camellia_setkey, - .encrypt = lrw_encrypt, - .decrypt = lrw_decrypt, - }, - }, -}, { - .cra_name = "__xts-camellia-aesni", - .cra_driver_name = "__driver-xts-camellia-aesni", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct camellia_xts_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE * 2, - .max_keysize = CAMELLIA_MAX_KEY_SIZE * 2, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = xts_camellia_setkey, - .encrypt = xts_encrypt, - .decrypt = xts_decrypt, - }, - }, -}, { - .cra_name = "ecb(camellia)", - .cra_driver_name = "ecb-camellia-aesni", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "cbc(camellia)", - .cra_driver_name = "cbc-camellia-aesni", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = __ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "ctr(camellia)", - .cra_driver_name = "ctr-camellia-aesni", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_encrypt, - .geniv = "chainiv", - }, - }, -}, { - .cra_name = "lrw(camellia)", - .cra_driver_name = "lrw-camellia-aesni", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE + - CAMELLIA_BLOCK_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE + - CAMELLIA_BLOCK_SIZE, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "xts(camellia)", - .cra_driver_name = "xts-camellia-aesni", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE * 2, - .max_keysize = CAMELLIA_MAX_KEY_SIZE * 2, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, +static struct skcipher_alg camellia_algs[] = { + { + .base.cra_name = "__ecb(camellia)", + .base.cra_driver_name = "__ecb-camellia-aesni", + .base.cra_priority = 400, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = CAMELLIA_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct camellia_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = CAMELLIA_MIN_KEY_SIZE, + .max_keysize = CAMELLIA_MAX_KEY_SIZE, + .setkey = camellia_setkey, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, { + .base.cra_name = "__cbc(camellia)", + .base.cra_driver_name = "__cbc-camellia-aesni", + .base.cra_priority = 400, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = CAMELLIA_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct camellia_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = CAMELLIA_MIN_KEY_SIZE, + .max_keysize = CAMELLIA_MAX_KEY_SIZE, + .ivsize = CAMELLIA_BLOCK_SIZE, + .setkey = camellia_setkey, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, { + .base.cra_name = "__ctr(camellia)", + .base.cra_driver_name = "__ctr-camellia-aesni", + .base.cra_priority = 400, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct camellia_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = CAMELLIA_MIN_KEY_SIZE, + .max_keysize = CAMELLIA_MAX_KEY_SIZE, + .ivsize = CAMELLIA_BLOCK_SIZE, + .chunksize = CAMELLIA_BLOCK_SIZE, + .setkey = camellia_setkey, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + }, { + .base.cra_name = "__xts(camellia)", + .base.cra_driver_name = "__xts-camellia-aesni", + .base.cra_priority = 400, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = CAMELLIA_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct camellia_xts_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = 2 * CAMELLIA_MIN_KEY_SIZE, + .max_keysize = 2 * CAMELLIA_MAX_KEY_SIZE, + .ivsize = CAMELLIA_BLOCK_SIZE, + .setkey = xts_camellia_setkey, + .encrypt = xts_encrypt, + .decrypt = xts_decrypt, }, -} }; +}; + +static struct simd_skcipher_alg *camellia_simd_algs[ARRAY_SIZE(camellia_algs)]; static int __init camellia_aesni_init(void) { @@ -567,12 +305,15 @@ static int __init camellia_aesni_init(void) return -ENODEV; } - return crypto_register_algs(cmll_algs, ARRAY_SIZE(cmll_algs)); + return simd_register_skciphers_compat(camellia_algs, + ARRAY_SIZE(camellia_algs), + camellia_simd_algs); } static void __exit camellia_aesni_fini(void) { - crypto_unregister_algs(cmll_algs, ARRAY_SIZE(cmll_algs)); + simd_unregister_skciphers(camellia_algs, ARRAY_SIZE(camellia_algs), + camellia_simd_algs); } module_init(camellia_aesni_init); diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c index af4840ab2a3d..dcd5e0f71b00 100644 --- a/arch/x86/crypto/camellia_glue.c +++ b/arch/x86/crypto/camellia_glue.c @@ -23,15 +23,12 @@ * */ -#include <asm/processor.h> #include <asm/unaligned.h> #include <linux/crypto.h> #include <linux/init.h> #include <linux/module.h> #include <linux/types.h> #include <crypto/algapi.h> -#include <crypto/lrw.h> -#include <crypto/xts.h> #include <asm/crypto/camellia.h> #include <asm/crypto/glue_helper.h> @@ -1272,13 +1269,19 @@ int __camellia_setkey(struct camellia_ctx *cctx, const unsigned char *key, } EXPORT_SYMBOL_GPL(__camellia_setkey); -static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key, +static int camellia_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int key_len) { - return __camellia_setkey(crypto_tfm_ctx(tfm), in_key, key_len, + return __camellia_setkey(crypto_tfm_ctx(tfm), key, key_len, &tfm->crt_flags); } +static int camellia_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key, + unsigned int key_len) +{ + return camellia_setkey(&tfm->base, key, key_len); +} + void camellia_decrypt_cbc_2way(void *ctx, u128 *dst, const u128 *src) { u128 iv = *src; @@ -1373,188 +1376,33 @@ static const struct common_glue_ctx camellia_dec_cbc = { } } }; -static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - return glue_ecb_crypt_128bit(&camellia_enc, desc, dst, src, nbytes); -} - -static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - return glue_ecb_crypt_128bit(&camellia_dec, desc, dst, src, nbytes); -} - -static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(camellia_enc_blk), desc, - dst, src, nbytes); -} - -static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - return glue_cbc_decrypt_128bit(&camellia_dec_cbc, desc, dst, src, - nbytes); -} - -static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - return glue_ctr_crypt_128bit(&camellia_ctr, desc, dst, src, nbytes); -} - -static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) -{ - const unsigned int bsize = CAMELLIA_BLOCK_SIZE; - struct camellia_ctx *ctx = priv; - int i; - - while (nbytes >= 2 * bsize) { - camellia_enc_blk_2way(ctx, srcdst, srcdst); - srcdst += bsize * 2; - nbytes -= bsize * 2; - } - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - camellia_enc_blk(ctx, srcdst, srcdst); -} - -static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) -{ - const unsigned int bsize = CAMELLIA_BLOCK_SIZE; - struct camellia_ctx *ctx = priv; - int i; - - while (nbytes >= 2 * bsize) { - camellia_dec_blk_2way(ctx, srcdst, srcdst); - srcdst += bsize * 2; - nbytes -= bsize * 2; - } - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - camellia_dec_blk(ctx, srcdst, srcdst); -} - -int lrw_camellia_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) -{ - struct camellia_lrw_ctx *ctx = crypto_tfm_ctx(tfm); - int err; - - err = __camellia_setkey(&ctx->camellia_ctx, key, - keylen - CAMELLIA_BLOCK_SIZE, - &tfm->crt_flags); - if (err) - return err; - - return lrw_init_table(&ctx->lrw_table, - key + keylen - CAMELLIA_BLOCK_SIZE); -} -EXPORT_SYMBOL_GPL(lrw_camellia_setkey); - -static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[2 * 4]; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &ctx->camellia_ctx, - .crypt_fn = encrypt_callback, - }; - - return lrw_crypt(desc, dst, src, nbytes, &req); -} - -static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_encrypt(struct skcipher_request *req) { - struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[2 * 4]; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &ctx->camellia_ctx, - .crypt_fn = decrypt_callback, - }; - - return lrw_crypt(desc, dst, src, nbytes, &req); + return glue_ecb_req_128bit(&camellia_enc, req); } -void lrw_camellia_exit_tfm(struct crypto_tfm *tfm) +static int ecb_decrypt(struct skcipher_request *req) { - struct camellia_lrw_ctx *ctx = crypto_tfm_ctx(tfm); - - lrw_free_table(&ctx->lrw_table); + return glue_ecb_req_128bit(&camellia_dec, req); } -EXPORT_SYMBOL_GPL(lrw_camellia_exit_tfm); -int xts_camellia_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) +static int cbc_encrypt(struct skcipher_request *req) { - struct camellia_xts_ctx *ctx = crypto_tfm_ctx(tfm); - u32 *flags = &tfm->crt_flags; - int err; - - err = xts_check_key(tfm, key, keylen); - if (err) - return err; - - /* first half of xts-key is for crypt */ - err = __camellia_setkey(&ctx->crypt_ctx, key, keylen / 2, flags); - if (err) - return err; - - /* second half of xts-key is for tweak */ - return __camellia_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2, - flags); + return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(camellia_enc_blk), + req); } -EXPORT_SYMBOL_GPL(xts_camellia_setkey); -static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_decrypt(struct skcipher_request *req) { - struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - le128 buf[2 * 4]; - struct xts_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .tweak_ctx = &ctx->tweak_ctx, - .tweak_fn = XTS_TWEAK_CAST(camellia_enc_blk), - .crypt_ctx = &ctx->crypt_ctx, - .crypt_fn = encrypt_callback, - }; - - return xts_crypt(desc, dst, src, nbytes, &req); + return glue_cbc_decrypt_req_128bit(&camellia_dec_cbc, req); } -static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ctr_crypt(struct skcipher_request *req) { - struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - le128 buf[2 * 4]; - struct xts_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .tweak_ctx = &ctx->tweak_ctx, - .tweak_fn = XTS_TWEAK_CAST(camellia_enc_blk), - .crypt_ctx = &ctx->crypt_ctx, - .crypt_fn = decrypt_callback, - }; - - return xts_crypt(desc, dst, src, nbytes, &req); + return glue_ctr_req_128bit(&camellia_ctr, req); } -static struct crypto_alg camellia_algs[6] = { { +static struct crypto_alg camellia_cipher_alg = { .cra_name = "camellia", .cra_driver_name = "camellia-asm", .cra_priority = 200, @@ -1572,109 +1420,50 @@ static struct crypto_alg camellia_algs[6] = { { .cia_decrypt = camellia_decrypt } } -}, { - .cra_name = "ecb(camellia)", - .cra_driver_name = "ecb-camellia-asm", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct camellia_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE, - .setkey = camellia_setkey, - .encrypt = ecb_encrypt, - .decrypt = ecb_decrypt, - }, - }, -}, { - .cra_name = "cbc(camellia)", - .cra_driver_name = "cbc-camellia-asm", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct camellia_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = camellia_setkey, - .encrypt = cbc_encrypt, - .decrypt = cbc_decrypt, - }, - }, -}, { - .cra_name = "ctr(camellia)", - .cra_driver_name = "ctr-camellia-asm", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct camellia_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = camellia_setkey, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, - }, - }, -}, { - .cra_name = "lrw(camellia)", - .cra_driver_name = "lrw-camellia-asm", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct camellia_lrw_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_exit = lrw_camellia_exit_tfm, - .cra_u = { - .blkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE + - CAMELLIA_BLOCK_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE + - CAMELLIA_BLOCK_SIZE, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = lrw_camellia_setkey, - .encrypt = lrw_encrypt, - .decrypt = lrw_decrypt, - }, - }, -}, { - .cra_name = "xts(camellia)", - .cra_driver_name = "xts-camellia-asm", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct camellia_xts_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE * 2, - .max_keysize = CAMELLIA_MAX_KEY_SIZE * 2, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = xts_camellia_setkey, - .encrypt = xts_encrypt, - .decrypt = xts_decrypt, - }, - }, -} }; +}; + +static struct skcipher_alg camellia_skcipher_algs[] = { + { + .base.cra_name = "ecb(camellia)", + .base.cra_driver_name = "ecb-camellia-asm", + .base.cra_priority = 300, + .base.cra_blocksize = CAMELLIA_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct camellia_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = CAMELLIA_MIN_KEY_SIZE, + .max_keysize = CAMELLIA_MAX_KEY_SIZE, + .setkey = camellia_setkey_skcipher, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, { + .base.cra_name = "cbc(camellia)", + .base.cra_driver_name = "cbc-camellia-asm", + .base.cra_priority = 300, + .base.cra_blocksize = CAMELLIA_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct camellia_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = CAMELLIA_MIN_KEY_SIZE, + .max_keysize = CAMELLIA_MAX_KEY_SIZE, + .ivsize = CAMELLIA_BLOCK_SIZE, + .setkey = camellia_setkey_skcipher, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, { + .base.cra_name = "ctr(camellia)", + .base.cra_driver_name = "ctr-camellia-asm", + .base.cra_priority = 300, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct camellia_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = CAMELLIA_MIN_KEY_SIZE, + .max_keysize = CAMELLIA_MAX_KEY_SIZE, + .ivsize = CAMELLIA_BLOCK_SIZE, + .chunksize = CAMELLIA_BLOCK_SIZE, + .setkey = camellia_setkey_skcipher, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + } +}; static bool is_blacklisted_cpu(void) { @@ -1700,6 +1489,8 @@ MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist"); static int __init init(void) { + int err; + if (!force && is_blacklisted_cpu()) { printk(KERN_INFO "camellia-x86_64: performance on this CPU " @@ -1708,12 +1499,23 @@ static int __init init(void) return -ENODEV; } - return crypto_register_algs(camellia_algs, ARRAY_SIZE(camellia_algs)); + err = crypto_register_alg(&camellia_cipher_alg); + if (err) + return err; + + err = crypto_register_skciphers(camellia_skcipher_algs, + ARRAY_SIZE(camellia_skcipher_algs)); + if (err) + crypto_unregister_alg(&camellia_cipher_alg); + + return err; } static void __exit fini(void) { - crypto_unregister_algs(camellia_algs, ARRAY_SIZE(camellia_algs)); + crypto_unregister_alg(&camellia_cipher_alg); + crypto_unregister_skciphers(camellia_skcipher_algs, + ARRAY_SIZE(camellia_skcipher_algs)); } module_init(init); diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c index dbea6020ffe7..41034745d6a2 100644 --- a/arch/x86/crypto/cast5_avx_glue.c +++ b/arch/x86/crypto/cast5_avx_glue.c @@ -21,18 +21,14 @@ * */ -#include <linux/module.h> -#include <linux/hardirq.h> -#include <linux/types.h> -#include <linux/crypto.h> -#include <linux/err.h> -#include <crypto/ablk_helper.h> +#include <asm/crypto/glue_helper.h> #include <crypto/algapi.h> #include <crypto/cast5.h> -#include <crypto/cryptd.h> -#include <crypto/ctr.h> -#include <asm/fpu/api.h> -#include <asm/crypto/glue_helper.h> +#include <crypto/internal/simd.h> +#include <linux/crypto.h> +#include <linux/err.h> +#include <linux/module.h> +#include <linux/types.h> #define CAST5_PARALLEL_BLOCKS 16 @@ -45,10 +41,17 @@ asmlinkage void cast5_cbc_dec_16way(struct cast5_ctx *ctx, u8 *dst, asmlinkage void cast5_ctr_16way(struct cast5_ctx *ctx, u8 *dst, const u8 *src, __be64 *iv); -static inline bool cast5_fpu_begin(bool fpu_enabled, unsigned int nbytes) +static int cast5_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keylen) +{ + return cast5_setkey(&tfm->base, key, keylen); +} + +static inline bool cast5_fpu_begin(bool fpu_enabled, struct skcipher_walk *walk, + unsigned int nbytes) { return glue_fpu_begin(CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS, - NULL, fpu_enabled, nbytes); + walk, fpu_enabled, nbytes); } static inline void cast5_fpu_end(bool fpu_enabled) @@ -56,29 +59,28 @@ static inline void cast5_fpu_end(bool fpu_enabled) return glue_fpu_end(fpu_enabled); } -static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, - bool enc) +static int ecb_crypt(struct skcipher_request *req, bool enc) { bool fpu_enabled = false; - struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; const unsigned int bsize = CAST5_BLOCK_SIZE; unsigned int nbytes; void (*fn)(struct cast5_ctx *ctx, u8 *dst, const u8 *src); int err; - fn = (enc) ? cast5_ecb_enc_16way : cast5_ecb_dec_16way; - - err = blkcipher_walk_virt(desc, walk); - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + err = skcipher_walk_virt(&walk, req, false); - while ((nbytes = walk->nbytes)) { - u8 *wsrc = walk->src.virt.addr; - u8 *wdst = walk->dst.virt.addr; + while ((nbytes = walk.nbytes)) { + u8 *wsrc = walk.src.virt.addr; + u8 *wdst = walk.dst.virt.addr; - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); + fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes); /* Process multi-block batch */ if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { + fn = (enc) ? cast5_ecb_enc_16way : cast5_ecb_dec_16way; do { fn(ctx, wdst, wsrc); @@ -103,76 +105,58 @@ static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, } while (nbytes >= bsize); done: - err = blkcipher_walk_done(desc, walk, nbytes); + err = skcipher_walk_done(&walk, nbytes); } cast5_fpu_end(fpu_enabled); return err; } -static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_encrypt(struct skcipher_request *req) { - struct blkcipher_walk walk; - - blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_crypt(desc, &walk, true); + return ecb_crypt(req, true); } -static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_decrypt(struct skcipher_request *req) { - struct blkcipher_walk walk; - - blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_crypt(desc, &walk, false); + return ecb_crypt(req, false); } -static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) +static int cbc_encrypt(struct skcipher_request *req) { - struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); const unsigned int bsize = CAST5_BLOCK_SIZE; - unsigned int nbytes = walk->nbytes; - u64 *src = (u64 *)walk->src.virt.addr; - u64 *dst = (u64 *)walk->dst.virt.addr; - u64 *iv = (u64 *)walk->iv; - - do { - *dst = *src ^ *iv; - __cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst); - iv = dst; - - src += 1; - dst += 1; - nbytes -= bsize; - } while (nbytes >= bsize); - - *(u64 *)walk->iv = *iv; - return nbytes; -} - -static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); + err = skcipher_walk_virt(&walk, req, false); while ((nbytes = walk.nbytes)) { - nbytes = __cbc_encrypt(desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); + u64 *src = (u64 *)walk.src.virt.addr; + u64 *dst = (u64 *)walk.dst.virt.addr; + u64 *iv = (u64 *)walk.iv; + + do { + *dst = *src ^ *iv; + __cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst); + iv = dst; + src++; + dst++; + nbytes -= bsize; + } while (nbytes >= bsize); + + *(u64 *)walk.iv = *iv; + err = skcipher_walk_done(&walk, nbytes); } return err; } -static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) +static unsigned int __cbc_decrypt(struct cast5_ctx *ctx, + struct skcipher_walk *walk) { - struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); const unsigned int bsize = CAST5_BLOCK_SIZE; unsigned int nbytes = walk->nbytes; u64 *src = (u64 *)walk->src.virt.addr; @@ -224,31 +208,29 @@ done: return nbytes; } -static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_decrypt(struct skcipher_request *req) { + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm); bool fpu_enabled = false; - struct blkcipher_walk walk; + struct skcipher_walk walk; + unsigned int nbytes; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + err = skcipher_walk_virt(&walk, req, false); while ((nbytes = walk.nbytes)) { - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); - nbytes = __cbc_decrypt(desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); + fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes); + nbytes = __cbc_decrypt(ctx, &walk); + err = skcipher_walk_done(&walk, nbytes); } cast5_fpu_end(fpu_enabled); return err; } -static void ctr_crypt_final(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) +static void ctr_crypt_final(struct skcipher_walk *walk, struct cast5_ctx *ctx) { - struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); u8 *ctrblk = walk->iv; u8 keystream[CAST5_BLOCK_SIZE]; u8 *src = walk->src.virt.addr; @@ -261,10 +243,9 @@ static void ctr_crypt_final(struct blkcipher_desc *desc, crypto_inc(ctrblk, CAST5_BLOCK_SIZE); } -static unsigned int __ctr_crypt(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) +static unsigned int __ctr_crypt(struct skcipher_walk *walk, + struct cast5_ctx *ctx) { - struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); const unsigned int bsize = CAST5_BLOCK_SIZE; unsigned int nbytes = walk->nbytes; u64 *src = (u64 *)walk->src.virt.addr; @@ -307,162 +288,80 @@ done: return nbytes; } -static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ctr_crypt(struct skcipher_request *req) { + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm); bool fpu_enabled = false; - struct blkcipher_walk walk; + struct skcipher_walk walk; + unsigned int nbytes; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt_block(desc, &walk, CAST5_BLOCK_SIZE); - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + err = skcipher_walk_virt(&walk, req, false); while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) { - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); - nbytes = __ctr_crypt(desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); + fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes); + nbytes = __ctr_crypt(&walk, ctx); + err = skcipher_walk_done(&walk, nbytes); } cast5_fpu_end(fpu_enabled); if (walk.nbytes) { - ctr_crypt_final(desc, &walk); - err = blkcipher_walk_done(desc, &walk, 0); + ctr_crypt_final(&walk, ctx); + err = skcipher_walk_done(&walk, 0); } return err; } +static struct skcipher_alg cast5_algs[] = { + { + .base.cra_name = "__ecb(cast5)", + .base.cra_driver_name = "__ecb-cast5-avx", + .base.cra_priority = 200, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = CAST5_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct cast5_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = CAST5_MIN_KEY_SIZE, + .max_keysize = CAST5_MAX_KEY_SIZE, + .setkey = cast5_setkey_skcipher, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, { + .base.cra_name = "__cbc(cast5)", + .base.cra_driver_name = "__cbc-cast5-avx", + .base.cra_priority = 200, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = CAST5_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct cast5_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = CAST5_MIN_KEY_SIZE, + .max_keysize = CAST5_MAX_KEY_SIZE, + .ivsize = CAST5_BLOCK_SIZE, + .setkey = cast5_setkey_skcipher, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, { + .base.cra_name = "__ctr(cast5)", + .base.cra_driver_name = "__ctr-cast5-avx", + .base.cra_priority = 200, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct cast5_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = CAST5_MIN_KEY_SIZE, + .max_keysize = CAST5_MAX_KEY_SIZE, + .ivsize = CAST5_BLOCK_SIZE, + .chunksize = CAST5_BLOCK_SIZE, + .setkey = cast5_setkey_skcipher, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + } +}; -static struct crypto_alg cast5_algs[6] = { { - .cra_name = "__ecb-cast5-avx", - .cra_driver_name = "__driver-ecb-cast5-avx", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = CAST5_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct cast5_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CAST5_MIN_KEY_SIZE, - .max_keysize = CAST5_MAX_KEY_SIZE, - .setkey = cast5_setkey, - .encrypt = ecb_encrypt, - .decrypt = ecb_decrypt, - }, - }, -}, { - .cra_name = "__cbc-cast5-avx", - .cra_driver_name = "__driver-cbc-cast5-avx", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = CAST5_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct cast5_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CAST5_MIN_KEY_SIZE, - .max_keysize = CAST5_MAX_KEY_SIZE, - .setkey = cast5_setkey, - .encrypt = cbc_encrypt, - .decrypt = cbc_decrypt, - }, - }, -}, { - .cra_name = "__ctr-cast5-avx", - .cra_driver_name = "__driver-ctr-cast5-avx", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct cast5_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CAST5_MIN_KEY_SIZE, - .max_keysize = CAST5_MAX_KEY_SIZE, - .ivsize = CAST5_BLOCK_SIZE, - .setkey = cast5_setkey, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, - }, - }, -}, { - .cra_name = "ecb(cast5)", - .cra_driver_name = "ecb-cast5-avx", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = CAST5_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = CAST5_MIN_KEY_SIZE, - .max_keysize = CAST5_MAX_KEY_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "cbc(cast5)", - .cra_driver_name = "cbc-cast5-avx", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = CAST5_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = CAST5_MIN_KEY_SIZE, - .max_keysize = CAST5_MAX_KEY_SIZE, - .ivsize = CAST5_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = __ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "ctr(cast5)", - .cra_driver_name = "ctr-cast5-avx", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = CAST5_MIN_KEY_SIZE, - .max_keysize = CAST5_MAX_KEY_SIZE, - .ivsize = CAST5_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_encrypt, - .geniv = "chainiv", - }, - }, -} }; +static struct simd_skcipher_alg *cast5_simd_algs[ARRAY_SIZE(cast5_algs)]; static int __init cast5_init(void) { @@ -474,12 +373,15 @@ static int __init cast5_init(void) return -ENODEV; } - return crypto_register_algs(cast5_algs, ARRAY_SIZE(cast5_algs)); + return simd_register_skciphers_compat(cast5_algs, + ARRAY_SIZE(cast5_algs), + cast5_simd_algs); } static void __exit cast5_exit(void) { - crypto_unregister_algs(cast5_algs, ARRAY_SIZE(cast5_algs)); + simd_unregister_skciphers(cast5_algs, ARRAY_SIZE(cast5_algs), + cast5_simd_algs); } module_init(cast5_init); diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c index 50e684768c55..9fb66b5e94b2 100644 --- a/arch/x86/crypto/cast6_avx_glue.c +++ b/arch/x86/crypto/cast6_avx_glue.c @@ -24,19 +24,13 @@ */ #include <linux/module.h> -#include <linux/hardirq.h> #include <linux/types.h> #include <linux/crypto.h> #include <linux/err.h> -#include <crypto/ablk_helper.h> #include <crypto/algapi.h> #include <crypto/cast6.h> -#include <crypto/cryptd.h> -#include <crypto/b128ops.h> -#include <crypto/ctr.h> -#include <crypto/lrw.h> +#include <crypto/internal/simd.h> #include <crypto/xts.h> -#include <asm/fpu/api.h> #include <asm/crypto/glue_helper.h> #define CAST6_PARALLEL_BLOCKS 8 @@ -56,6 +50,12 @@ asmlinkage void cast6_xts_enc_8way(struct cast6_ctx *ctx, u8 *dst, asmlinkage void cast6_xts_dec_8way(struct cast6_ctx *ctx, u8 *dst, const u8 *src, le128 *iv); +static int cast6_setkey_skcipher(struct crypto_skcipher *tfm, + const u8 *key, unsigned int keylen) +{ + return cast6_setkey(&tfm->base, key, keylen); +} + static void cast6_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv) { glue_xts_crypt_128bit_one(ctx, dst, src, iv, @@ -157,164 +157,30 @@ static const struct common_glue_ctx cast6_dec_xts = { } } }; -static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - return glue_ecb_crypt_128bit(&cast6_enc, desc, dst, src, nbytes); -} - -static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - return glue_ecb_crypt_128bit(&cast6_dec, desc, dst, src, nbytes); -} - -static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__cast6_encrypt), desc, - dst, src, nbytes); -} - -static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - return glue_cbc_decrypt_128bit(&cast6_dec_cbc, desc, dst, src, - nbytes); -} - -static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_encrypt(struct skcipher_request *req) { - return glue_ctr_crypt_128bit(&cast6_ctr, desc, dst, src, nbytes); + return glue_ecb_req_128bit(&cast6_enc, req); } -static inline bool cast6_fpu_begin(bool fpu_enabled, unsigned int nbytes) +static int ecb_decrypt(struct skcipher_request *req) { - return glue_fpu_begin(CAST6_BLOCK_SIZE, CAST6_PARALLEL_BLOCKS, - NULL, fpu_enabled, nbytes); + return glue_ecb_req_128bit(&cast6_dec, req); } -static inline void cast6_fpu_end(bool fpu_enabled) +static int cbc_encrypt(struct skcipher_request *req) { - glue_fpu_end(fpu_enabled); + return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(__cast6_encrypt), + req); } -struct crypt_priv { - struct cast6_ctx *ctx; - bool fpu_enabled; -}; - -static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) +static int cbc_decrypt(struct skcipher_request *req) { - const unsigned int bsize = CAST6_BLOCK_SIZE; - struct crypt_priv *ctx = priv; - int i; - - ctx->fpu_enabled = cast6_fpu_begin(ctx->fpu_enabled, nbytes); - - if (nbytes == bsize * CAST6_PARALLEL_BLOCKS) { - cast6_ecb_enc_8way(ctx->ctx, srcdst, srcdst); - return; - } - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - __cast6_encrypt(ctx->ctx, srcdst, srcdst); + return glue_cbc_decrypt_req_128bit(&cast6_dec_cbc, req); } -static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) +static int ctr_crypt(struct skcipher_request *req) { - const unsigned int bsize = CAST6_BLOCK_SIZE; - struct crypt_priv *ctx = priv; - int i; - - ctx->fpu_enabled = cast6_fpu_begin(ctx->fpu_enabled, nbytes); - - if (nbytes == bsize * CAST6_PARALLEL_BLOCKS) { - cast6_ecb_dec_8way(ctx->ctx, srcdst, srcdst); - return; - } - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - __cast6_decrypt(ctx->ctx, srcdst, srcdst); -} - -struct cast6_lrw_ctx { - struct lrw_table_ctx lrw_table; - struct cast6_ctx cast6_ctx; -}; - -static int lrw_cast6_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) -{ - struct cast6_lrw_ctx *ctx = crypto_tfm_ctx(tfm); - int err; - - err = __cast6_setkey(&ctx->cast6_ctx, key, keylen - CAST6_BLOCK_SIZE, - &tfm->crt_flags); - if (err) - return err; - - return lrw_init_table(&ctx->lrw_table, key + keylen - CAST6_BLOCK_SIZE); -} - -static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct cast6_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[CAST6_PARALLEL_BLOCKS]; - struct crypt_priv crypt_ctx = { - .ctx = &ctx->cast6_ctx, - .fpu_enabled = false, - }; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &crypt_ctx, - .crypt_fn = encrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - ret = lrw_crypt(desc, dst, src, nbytes, &req); - cast6_fpu_end(crypt_ctx.fpu_enabled); - - return ret; -} - -static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct cast6_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[CAST6_PARALLEL_BLOCKS]; - struct crypt_priv crypt_ctx = { - .ctx = &ctx->cast6_ctx, - .fpu_enabled = false, - }; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &crypt_ctx, - .crypt_fn = decrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - ret = lrw_crypt(desc, dst, src, nbytes, &req); - cast6_fpu_end(crypt_ctx.fpu_enabled); - - return ret; -} - -static void lrw_exit_tfm(struct crypto_tfm *tfm) -{ - struct cast6_lrw_ctx *ctx = crypto_tfm_ctx(tfm); - - lrw_free_table(&ctx->lrw_table); + return glue_ctr_req_128bit(&cast6_ctr, req); } struct cast6_xts_ctx { @@ -322,14 +188,14 @@ struct cast6_xts_ctx { struct cast6_ctx crypt_ctx; }; -static int xts_cast6_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) +static int xts_cast6_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keylen) { - struct cast6_xts_ctx *ctx = crypto_tfm_ctx(tfm); - u32 *flags = &tfm->crt_flags; + struct cast6_xts_ctx *ctx = crypto_skcipher_ctx(tfm); + u32 *flags = &tfm->base.crt_flags; int err; - err = xts_check_key(tfm, key, keylen); + err = xts_verify_key(tfm, key, keylen); if (err) return err; @@ -343,245 +209,87 @@ static int xts_cast6_setkey(struct crypto_tfm *tfm, const u8 *key, flags); } -static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int xts_encrypt(struct skcipher_request *req) { - struct cast6_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct cast6_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - return glue_xts_crypt_128bit(&cast6_enc_xts, desc, dst, src, nbytes, - XTS_TWEAK_CAST(__cast6_encrypt), - &ctx->tweak_ctx, &ctx->crypt_ctx); + return glue_xts_req_128bit(&cast6_enc_xts, req, + XTS_TWEAK_CAST(__cast6_encrypt), + &ctx->tweak_ctx, &ctx->crypt_ctx); } -static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int xts_decrypt(struct skcipher_request *req) { - struct cast6_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct cast6_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - return glue_xts_crypt_128bit(&cast6_dec_xts, desc, dst, src, nbytes, - XTS_TWEAK_CAST(__cast6_encrypt), - &ctx->tweak_ctx, &ctx->crypt_ctx); + return glue_xts_req_128bit(&cast6_dec_xts, req, + XTS_TWEAK_CAST(__cast6_encrypt), + &ctx->tweak_ctx, &ctx->crypt_ctx); } -static struct crypto_alg cast6_algs[10] = { { - .cra_name = "__ecb-cast6-avx", - .cra_driver_name = "__driver-ecb-cast6-avx", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = CAST6_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct cast6_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CAST6_MIN_KEY_SIZE, - .max_keysize = CAST6_MAX_KEY_SIZE, - .setkey = cast6_setkey, - .encrypt = ecb_encrypt, - .decrypt = ecb_decrypt, - }, - }, -}, { - .cra_name = "__cbc-cast6-avx", - .cra_driver_name = "__driver-cbc-cast6-avx", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = CAST6_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct cast6_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CAST6_MIN_KEY_SIZE, - .max_keysize = CAST6_MAX_KEY_SIZE, - .setkey = cast6_setkey, - .encrypt = cbc_encrypt, - .decrypt = cbc_decrypt, - }, - }, -}, { - .cra_name = "__ctr-cast6-avx", - .cra_driver_name = "__driver-ctr-cast6-avx", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct cast6_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CAST6_MIN_KEY_SIZE, - .max_keysize = CAST6_MAX_KEY_SIZE, - .ivsize = CAST6_BLOCK_SIZE, - .setkey = cast6_setkey, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, - }, - }, -}, { - .cra_name = "__lrw-cast6-avx", - .cra_driver_name = "__driver-lrw-cast6-avx", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = CAST6_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct cast6_lrw_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_exit = lrw_exit_tfm, - .cra_u = { - .blkcipher = { - .min_keysize = CAST6_MIN_KEY_SIZE + - CAST6_BLOCK_SIZE, - .max_keysize = CAST6_MAX_KEY_SIZE + - CAST6_BLOCK_SIZE, - .ivsize = CAST6_BLOCK_SIZE, - .setkey = lrw_cast6_setkey, - .encrypt = lrw_encrypt, - .decrypt = lrw_decrypt, - }, - }, -}, { - .cra_name = "__xts-cast6-avx", - .cra_driver_name = "__driver-xts-cast6-avx", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = CAST6_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct cast6_xts_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CAST6_MIN_KEY_SIZE * 2, - .max_keysize = CAST6_MAX_KEY_SIZE * 2, - .ivsize = CAST6_BLOCK_SIZE, - .setkey = xts_cast6_setkey, - .encrypt = xts_encrypt, - .decrypt = xts_decrypt, - }, - }, -}, { - .cra_name = "ecb(cast6)", - .cra_driver_name = "ecb-cast6-avx", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = CAST6_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = CAST6_MIN_KEY_SIZE, - .max_keysize = CAST6_MAX_KEY_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "cbc(cast6)", - .cra_driver_name = "cbc-cast6-avx", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = CAST6_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = CAST6_MIN_KEY_SIZE, - .max_keysize = CAST6_MAX_KEY_SIZE, - .ivsize = CAST6_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = __ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "ctr(cast6)", - .cra_driver_name = "ctr-cast6-avx", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = CAST6_MIN_KEY_SIZE, - .max_keysize = CAST6_MAX_KEY_SIZE, - .ivsize = CAST6_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_encrypt, - .geniv = "chainiv", - }, - }, -}, { - .cra_name = "lrw(cast6)", - .cra_driver_name = "lrw-cast6-avx", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = CAST6_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = CAST6_MIN_KEY_SIZE + - CAST6_BLOCK_SIZE, - .max_keysize = CAST6_MAX_KEY_SIZE + - CAST6_BLOCK_SIZE, - .ivsize = CAST6_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "xts(cast6)", - .cra_driver_name = "xts-cast6-avx", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = CAST6_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = CAST6_MIN_KEY_SIZE * 2, - .max_keysize = CAST6_MAX_KEY_SIZE * 2, - .ivsize = CAST6_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, +static struct skcipher_alg cast6_algs[] = { + { + .base.cra_name = "__ecb(cast6)", + .base.cra_driver_name = "__ecb-cast6-avx", + .base.cra_priority = 200, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = CAST6_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct cast6_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = CAST6_MIN_KEY_SIZE, + .max_keysize = CAST6_MAX_KEY_SIZE, + .setkey = cast6_setkey_skcipher, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, { + .base.cra_name = "__cbc(cast6)", + .base.cra_driver_name = "__cbc-cast6-avx", + .base.cra_priority = 200, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = CAST6_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct cast6_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = CAST6_MIN_KEY_SIZE, + .max_keysize = CAST6_MAX_KEY_SIZE, + .ivsize = CAST6_BLOCK_SIZE, + .setkey = cast6_setkey_skcipher, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, { + .base.cra_name = "__ctr(cast6)", + .base.cra_driver_name = "__ctr-cast6-avx", + .base.cra_priority = 200, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct cast6_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = CAST6_MIN_KEY_SIZE, + .max_keysize = CAST6_MAX_KEY_SIZE, + .ivsize = CAST6_BLOCK_SIZE, + .chunksize = CAST6_BLOCK_SIZE, + .setkey = cast6_setkey_skcipher, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + }, { + .base.cra_name = "__xts(cast6)", + .base.cra_driver_name = "__xts-cast6-avx", + .base.cra_priority = 200, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = CAST6_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct cast6_xts_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = 2 * CAST6_MIN_KEY_SIZE, + .max_keysize = 2 * CAST6_MAX_KEY_SIZE, + .ivsize = CAST6_BLOCK_SIZE, + .setkey = xts_cast6_setkey, + .encrypt = xts_encrypt, + .decrypt = xts_decrypt, }, -} }; +}; + +static struct simd_skcipher_alg *cast6_simd_algs[ARRAY_SIZE(cast6_algs)]; static int __init cast6_init(void) { @@ -593,12 +301,15 @@ static int __init cast6_init(void) return -ENODEV; } - return crypto_register_algs(cast6_algs, ARRAY_SIZE(cast6_algs)); + return simd_register_skciphers_compat(cast6_algs, + ARRAY_SIZE(cast6_algs), + cast6_simd_algs); } static void __exit cast6_exit(void) { - crypto_unregister_algs(cast6_algs, ARRAY_SIZE(cast6_algs)); + simd_unregister_skciphers(cast6_algs, ARRAY_SIZE(cast6_algs), + cast6_simd_algs); } module_init(cast6_init); diff --git a/arch/x86/crypto/des3_ede_glue.c b/arch/x86/crypto/des3_ede_glue.c index 30c0a37f4882..5c610d4ef9fc 100644 --- a/arch/x86/crypto/des3_ede_glue.c +++ b/arch/x86/crypto/des3_ede_glue.c @@ -20,13 +20,13 @@ * */ -#include <asm/processor.h> +#include <crypto/algapi.h> #include <crypto/des.h> +#include <crypto/internal/skcipher.h> #include <linux/crypto.h> #include <linux/init.h> #include <linux/module.h> #include <linux/types.h> -#include <crypto/algapi.h> struct des3_ede_x86_ctx { u32 enc_expkey[DES3_EDE_EXPKEY_WORDS]; @@ -83,18 +83,18 @@ static void des3_ede_x86_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) des3_ede_dec_blk(crypto_tfm_ctx(tfm), dst, src); } -static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, - const u32 *expkey) +static int ecb_crypt(struct skcipher_request *req, const u32 *expkey) { - unsigned int bsize = DES3_EDE_BLOCK_SIZE; + const unsigned int bsize = DES3_EDE_BLOCK_SIZE; + struct skcipher_walk walk; unsigned int nbytes; int err; - err = blkcipher_walk_virt(desc, walk); + err = skcipher_walk_virt(&walk, req, false); - while ((nbytes = walk->nbytes)) { - u8 *wsrc = walk->src.virt.addr; - u8 *wdst = walk->dst.virt.addr; + while ((nbytes = walk.nbytes)) { + u8 *wsrc = walk.src.virt.addr; + u8 *wdst = walk.dst.virt.addr; /* Process four block batch */ if (nbytes >= bsize * 3) { @@ -121,36 +121,31 @@ static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, } while (nbytes >= bsize); done: - err = blkcipher_walk_done(desc, walk, nbytes); + err = skcipher_walk_done(&walk, nbytes); } return err; } -static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_encrypt(struct skcipher_request *req) { - struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct des3_ede_x86_ctx *ctx = crypto_skcipher_ctx(tfm); - blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_crypt(desc, &walk, ctx->enc_expkey); + return ecb_crypt(req, ctx->enc_expkey); } -static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_decrypt(struct skcipher_request *req) { - struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct des3_ede_x86_ctx *ctx = crypto_skcipher_ctx(tfm); - blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_crypt(desc, &walk, ctx->dec_expkey); + return ecb_crypt(req, ctx->dec_expkey); } -static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) +static unsigned int __cbc_encrypt(struct des3_ede_x86_ctx *ctx, + struct skcipher_walk *walk) { - struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); unsigned int bsize = DES3_EDE_BLOCK_SIZE; unsigned int nbytes = walk->nbytes; u64 *src = (u64 *)walk->src.virt.addr; @@ -171,27 +166,27 @@ static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, return nbytes; } -static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_encrypt(struct skcipher_request *req) { - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct des3_ede_x86_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); + err = skcipher_walk_virt(&walk, req, false); while ((nbytes = walk.nbytes)) { - nbytes = __cbc_encrypt(desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); + nbytes = __cbc_encrypt(ctx, &walk); + err = skcipher_walk_done(&walk, nbytes); } return err; } -static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) +static unsigned int __cbc_decrypt(struct des3_ede_x86_ctx *ctx, + struct skcipher_walk *walk) { - struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); unsigned int bsize = DES3_EDE_BLOCK_SIZE; unsigned int nbytes = walk->nbytes; u64 *src = (u64 *)walk->src.virt.addr; @@ -250,25 +245,26 @@ done: return nbytes; } -static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_decrypt(struct skcipher_request *req) { - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct des3_ede_x86_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); + err = skcipher_walk_virt(&walk, req, false); while ((nbytes = walk.nbytes)) { - nbytes = __cbc_decrypt(desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); + nbytes = __cbc_decrypt(ctx, &walk); + err = skcipher_walk_done(&walk, nbytes); } return err; } static void ctr_crypt_final(struct des3_ede_x86_ctx *ctx, - struct blkcipher_walk *walk) + struct skcipher_walk *walk) { u8 *ctrblk = walk->iv; u8 keystream[DES3_EDE_BLOCK_SIZE]; @@ -282,10 +278,9 @@ static void ctr_crypt_final(struct des3_ede_x86_ctx *ctx, crypto_inc(ctrblk, DES3_EDE_BLOCK_SIZE); } -static unsigned int __ctr_crypt(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) +static unsigned int __ctr_crypt(struct des3_ede_x86_ctx *ctx, + struct skcipher_walk *walk) { - struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); unsigned int bsize = DES3_EDE_BLOCK_SIZE; unsigned int nbytes = walk->nbytes; __be64 *src = (__be64 *)walk->src.virt.addr; @@ -333,23 +328,24 @@ done: return nbytes; } -static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ctr_crypt(struct skcipher_request *req) { - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct des3_ede_x86_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt_block(desc, &walk, DES3_EDE_BLOCK_SIZE); + err = skcipher_walk_virt(&walk, req, false); while ((nbytes = walk.nbytes) >= DES3_EDE_BLOCK_SIZE) { - nbytes = __ctr_crypt(desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); + nbytes = __ctr_crypt(ctx, &walk); + err = skcipher_walk_done(&walk, nbytes); } - if (walk.nbytes) { - ctr_crypt_final(crypto_blkcipher_ctx(desc->tfm), &walk); - err = blkcipher_walk_done(desc, &walk, 0); + if (nbytes) { + ctr_crypt_final(ctx, &walk); + err = skcipher_walk_done(&walk, 0); } return err; @@ -381,7 +377,14 @@ static int des3_ede_x86_setkey(struct crypto_tfm *tfm, const u8 *key, return 0; } -static struct crypto_alg des3_ede_algs[4] = { { +static int des3_ede_x86_setkey_skcipher(struct crypto_skcipher *tfm, + const u8 *key, + unsigned int keylen) +{ + return des3_ede_x86_setkey(&tfm->base, key, keylen); +} + +static struct crypto_alg des3_ede_cipher = { .cra_name = "des3_ede", .cra_driver_name = "des3_ede-asm", .cra_priority = 200, @@ -399,66 +402,50 @@ static struct crypto_alg des3_ede_algs[4] = { { .cia_decrypt = des3_ede_x86_decrypt, } } -}, { - .cra_name = "ecb(des3_ede)", - .cra_driver_name = "ecb-des3_ede-asm", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = DES3_EDE_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct des3_ede_x86_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = DES3_EDE_KEY_SIZE, - .max_keysize = DES3_EDE_KEY_SIZE, - .setkey = des3_ede_x86_setkey, - .encrypt = ecb_encrypt, - .decrypt = ecb_decrypt, - }, - }, -}, { - .cra_name = "cbc(des3_ede)", - .cra_driver_name = "cbc-des3_ede-asm", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = DES3_EDE_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct des3_ede_x86_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = DES3_EDE_KEY_SIZE, - .max_keysize = DES3_EDE_KEY_SIZE, - .ivsize = DES3_EDE_BLOCK_SIZE, - .setkey = des3_ede_x86_setkey, - .encrypt = cbc_encrypt, - .decrypt = cbc_decrypt, - }, - }, -}, { - .cra_name = "ctr(des3_ede)", - .cra_driver_name = "ctr-des3_ede-asm", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct des3_ede_x86_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = DES3_EDE_KEY_SIZE, - .max_keysize = DES3_EDE_KEY_SIZE, - .ivsize = DES3_EDE_BLOCK_SIZE, - .setkey = des3_ede_x86_setkey, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, - }, - }, -} }; +}; + +static struct skcipher_alg des3_ede_skciphers[] = { + { + .base.cra_name = "ecb(des3_ede)", + .base.cra_driver_name = "ecb-des3_ede-asm", + .base.cra_priority = 300, + .base.cra_blocksize = DES3_EDE_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct des3_ede_x86_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = DES3_EDE_KEY_SIZE, + .max_keysize = DES3_EDE_KEY_SIZE, + .setkey = des3_ede_x86_setkey_skcipher, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, { + .base.cra_name = "cbc(des3_ede)", + .base.cra_driver_name = "cbc-des3_ede-asm", + .base.cra_priority = 300, + .base.cra_blocksize = DES3_EDE_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct des3_ede_x86_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = DES3_EDE_KEY_SIZE, + .max_keysize = DES3_EDE_KEY_SIZE, + .ivsize = DES3_EDE_BLOCK_SIZE, + .setkey = des3_ede_x86_setkey_skcipher, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, { + .base.cra_name = "ctr(des3_ede)", + .base.cra_driver_name = "ctr-des3_ede-asm", + .base.cra_priority = 300, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct des3_ede_x86_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = DES3_EDE_KEY_SIZE, + .max_keysize = DES3_EDE_KEY_SIZE, + .ivsize = DES3_EDE_BLOCK_SIZE, + .chunksize = DES3_EDE_BLOCK_SIZE, + .setkey = des3_ede_x86_setkey_skcipher, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + } +}; static bool is_blacklisted_cpu(void) { @@ -483,17 +470,30 @@ MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist"); static int __init des3_ede_x86_init(void) { + int err; + if (!force && is_blacklisted_cpu()) { pr_info("des3_ede-x86_64: performance on this CPU would be suboptimal: disabling des3_ede-x86_64.\n"); return -ENODEV; } - return crypto_register_algs(des3_ede_algs, ARRAY_SIZE(des3_ede_algs)); + err = crypto_register_alg(&des3_ede_cipher); + if (err) + return err; + + err = crypto_register_skciphers(des3_ede_skciphers, + ARRAY_SIZE(des3_ede_skciphers)); + if (err) + crypto_unregister_alg(&des3_ede_cipher); + + return err; } static void __exit des3_ede_x86_fini(void) { - crypto_unregister_algs(des3_ede_algs, ARRAY_SIZE(des3_ede_algs)); + crypto_unregister_alg(&des3_ede_cipher); + crypto_unregister_skciphers(des3_ede_skciphers, + ARRAY_SIZE(des3_ede_skciphers)); } module_init(des3_ede_x86_init); diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c index d61e57960fe0..a78ef99a9981 100644 --- a/arch/x86/crypto/glue_helper.c +++ b/arch/x86/crypto/glue_helper.c @@ -29,313 +29,212 @@ #include <crypto/b128ops.h> #include <crypto/gf128mul.h> #include <crypto/internal/skcipher.h> -#include <crypto/lrw.h> #include <crypto/xts.h> #include <asm/crypto/glue_helper.h> -static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, - struct blkcipher_desc *desc, - struct blkcipher_walk *walk) +int glue_ecb_req_128bit(const struct common_glue_ctx *gctx, + struct skcipher_request *req) { - void *ctx = crypto_blkcipher_ctx(desc->tfm); + void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); const unsigned int bsize = 128 / 8; - unsigned int nbytes, i, func_bytes; + struct skcipher_walk walk; bool fpu_enabled = false; + unsigned int nbytes; int err; - err = blkcipher_walk_virt(desc, walk); + err = skcipher_walk_virt(&walk, req, false); - while ((nbytes = walk->nbytes)) { - u8 *wsrc = walk->src.virt.addr; - u8 *wdst = walk->dst.virt.addr; + while ((nbytes = walk.nbytes)) { + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + unsigned int func_bytes; + unsigned int i; fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, - desc, fpu_enabled, nbytes); - + &walk, fpu_enabled, nbytes); for (i = 0; i < gctx->num_funcs; i++) { func_bytes = bsize * gctx->funcs[i].num_blocks; - /* Process multi-block batch */ - if (nbytes >= func_bytes) { - do { - gctx->funcs[i].fn_u.ecb(ctx, wdst, - wsrc); + if (nbytes < func_bytes) + continue; - wsrc += func_bytes; - wdst += func_bytes; - nbytes -= func_bytes; - } while (nbytes >= func_bytes); + /* Process multi-block batch */ + do { + gctx->funcs[i].fn_u.ecb(ctx, dst, src); + src += func_bytes; + dst += func_bytes; + nbytes -= func_bytes; + } while (nbytes >= func_bytes); - if (nbytes < bsize) - goto done; - } + if (nbytes < bsize) + break; } - -done: - err = blkcipher_walk_done(desc, walk, nbytes); + err = skcipher_walk_done(&walk, nbytes); } glue_fpu_end(fpu_enabled); return err; } +EXPORT_SYMBOL_GPL(glue_ecb_req_128bit); -int glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, - struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +int glue_cbc_encrypt_req_128bit(const common_glue_func_t fn, + struct skcipher_request *req) { - struct blkcipher_walk walk; - - blkcipher_walk_init(&walk, dst, src, nbytes); - return __glue_ecb_crypt_128bit(gctx, desc, &walk); -} -EXPORT_SYMBOL_GPL(glue_ecb_crypt_128bit); - -static unsigned int __glue_cbc_encrypt_128bit(const common_glue_func_t fn, - struct blkcipher_desc *desc, - struct blkcipher_walk *walk) -{ - void *ctx = crypto_blkcipher_ctx(desc->tfm); + void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); const unsigned int bsize = 128 / 8; - unsigned int nbytes = walk->nbytes; - u128 *src = (u128 *)walk->src.virt.addr; - u128 *dst = (u128 *)walk->dst.virt.addr; - u128 *iv = (u128 *)walk->iv; - - do { - u128_xor(dst, src, iv); - fn(ctx, (u8 *)dst, (u8 *)dst); - iv = dst; - - src += 1; - dst += 1; - nbytes -= bsize; - } while (nbytes >= bsize); - - *(u128 *)walk->iv = *iv; - return nbytes; -} - -int glue_cbc_encrypt_128bit(const common_glue_func_t fn, - struct blkcipher_desc *desc, - struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct blkcipher_walk walk; + struct skcipher_walk walk; + unsigned int nbytes; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); + err = skcipher_walk_virt(&walk, req, false); while ((nbytes = walk.nbytes)) { - nbytes = __glue_cbc_encrypt_128bit(fn, desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); + const u128 *src = (u128 *)walk.src.virt.addr; + u128 *dst = (u128 *)walk.dst.virt.addr; + u128 *iv = (u128 *)walk.iv; + + do { + u128_xor(dst, src, iv); + fn(ctx, (u8 *)dst, (u8 *)dst); + iv = dst; + src++; + dst++; + nbytes -= bsize; + } while (nbytes >= bsize); + + *(u128 *)walk.iv = *iv; + err = skcipher_walk_done(&walk, nbytes); } - return err; } -EXPORT_SYMBOL_GPL(glue_cbc_encrypt_128bit); +EXPORT_SYMBOL_GPL(glue_cbc_encrypt_req_128bit); -static unsigned int -__glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx, - struct blkcipher_desc *desc, - struct blkcipher_walk *walk) +int glue_cbc_decrypt_req_128bit(const struct common_glue_ctx *gctx, + struct skcipher_request *req) { - void *ctx = crypto_blkcipher_ctx(desc->tfm); + void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); const unsigned int bsize = 128 / 8; - unsigned int nbytes = walk->nbytes; - u128 *src = (u128 *)walk->src.virt.addr; - u128 *dst = (u128 *)walk->dst.virt.addr; - u128 last_iv; - unsigned int num_blocks, func_bytes; - unsigned int i; + struct skcipher_walk walk; + bool fpu_enabled = false; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); - /* Start of the last block. */ - src += nbytes / bsize - 1; - dst += nbytes / bsize - 1; + while ((nbytes = walk.nbytes)) { + const u128 *src = walk.src.virt.addr; + u128 *dst = walk.dst.virt.addr; + unsigned int func_bytes, num_blocks; + unsigned int i; + u128 last_iv; - last_iv = *src; + fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, + &walk, fpu_enabled, nbytes); + /* Start of the last block. */ + src += nbytes / bsize - 1; + dst += nbytes / bsize - 1; - for (i = 0; i < gctx->num_funcs; i++) { - num_blocks = gctx->funcs[i].num_blocks; - func_bytes = bsize * num_blocks; + last_iv = *src; - /* Process multi-block batch */ - if (nbytes >= func_bytes) { + for (i = 0; i < gctx->num_funcs; i++) { + num_blocks = gctx->funcs[i].num_blocks; + func_bytes = bsize * num_blocks; + + if (nbytes < func_bytes) + continue; + + /* Process multi-block batch */ do { - nbytes -= func_bytes - bsize; src -= num_blocks - 1; dst -= num_blocks - 1; gctx->funcs[i].fn_u.cbc(ctx, dst, src); - nbytes -= bsize; + nbytes -= func_bytes; if (nbytes < bsize) goto done; - u128_xor(dst, dst, src - 1); - src -= 1; - dst -= 1; + u128_xor(dst, dst, --src); + dst--; } while (nbytes >= func_bytes); } - } - done: - u128_xor(dst, dst, (u128 *)walk->iv); - *(u128 *)walk->iv = last_iv; - - return nbytes; -} - -int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx, - struct blkcipher_desc *desc, - struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - const unsigned int bsize = 128 / 8; - bool fpu_enabled = false; - struct blkcipher_walk walk; - int err; - - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); - - while ((nbytes = walk.nbytes)) { - fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, - desc, fpu_enabled, nbytes); - nbytes = __glue_cbc_decrypt_128bit(gctx, desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); + u128_xor(dst, dst, (u128 *)walk.iv); + *(u128 *)walk.iv = last_iv; + err = skcipher_walk_done(&walk, nbytes); } glue_fpu_end(fpu_enabled); return err; } -EXPORT_SYMBOL_GPL(glue_cbc_decrypt_128bit); +EXPORT_SYMBOL_GPL(glue_cbc_decrypt_req_128bit); -static void glue_ctr_crypt_final_128bit(const common_glue_ctr_func_t fn_ctr, - struct blkcipher_desc *desc, - struct blkcipher_walk *walk) +int glue_ctr_req_128bit(const struct common_glue_ctx *gctx, + struct skcipher_request *req) { - void *ctx = crypto_blkcipher_ctx(desc->tfm); - u8 *src = (u8 *)walk->src.virt.addr; - u8 *dst = (u8 *)walk->dst.virt.addr; - unsigned int nbytes = walk->nbytes; - le128 ctrblk; - u128 tmp; + void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); + const unsigned int bsize = 128 / 8; + struct skcipher_walk walk; + bool fpu_enabled = false; + unsigned int nbytes; + int err; - be128_to_le128(&ctrblk, (be128 *)walk->iv); + err = skcipher_walk_virt(&walk, req, false); - memcpy(&tmp, src, nbytes); - fn_ctr(ctx, &tmp, &tmp, &ctrblk); - memcpy(dst, &tmp, nbytes); + while ((nbytes = walk.nbytes) >= bsize) { + const u128 *src = walk.src.virt.addr; + u128 *dst = walk.dst.virt.addr; + unsigned int func_bytes, num_blocks; + unsigned int i; + le128 ctrblk; - le128_to_be128((be128 *)walk->iv, &ctrblk); -} + fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, + &walk, fpu_enabled, nbytes); -static unsigned int __glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, - struct blkcipher_desc *desc, - struct blkcipher_walk *walk) -{ - const unsigned int bsize = 128 / 8; - void *ctx = crypto_blkcipher_ctx(desc->tfm); - unsigned int nbytes = walk->nbytes; - u128 *src = (u128 *)walk->src.virt.addr; - u128 *dst = (u128 *)walk->dst.virt.addr; - le128 ctrblk; - unsigned int num_blocks, func_bytes; - unsigned int i; + be128_to_le128(&ctrblk, (be128 *)walk.iv); - be128_to_le128(&ctrblk, (be128 *)walk->iv); + for (i = 0; i < gctx->num_funcs; i++) { + num_blocks = gctx->funcs[i].num_blocks; + func_bytes = bsize * num_blocks; - /* Process multi-block batch */ - for (i = 0; i < gctx->num_funcs; i++) { - num_blocks = gctx->funcs[i].num_blocks; - func_bytes = bsize * num_blocks; + if (nbytes < func_bytes) + continue; - if (nbytes >= func_bytes) { + /* Process multi-block batch */ do { gctx->funcs[i].fn_u.ctr(ctx, dst, src, &ctrblk); - src += num_blocks; dst += num_blocks; nbytes -= func_bytes; } while (nbytes >= func_bytes); if (nbytes < bsize) - goto done; + break; } - } - -done: - le128_to_be128((be128 *)walk->iv, &ctrblk); - return nbytes; -} - -int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, - struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - const unsigned int bsize = 128 / 8; - bool fpu_enabled = false; - struct blkcipher_walk walk; - int err; - - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt_block(desc, &walk, bsize); - while ((nbytes = walk.nbytes) >= bsize) { - fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, - desc, fpu_enabled, nbytes); - nbytes = __glue_ctr_crypt_128bit(gctx, desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); + le128_to_be128((be128 *)walk.iv, &ctrblk); + err = skcipher_walk_done(&walk, nbytes); } glue_fpu_end(fpu_enabled); - if (walk.nbytes) { - glue_ctr_crypt_final_128bit( - gctx->funcs[gctx->num_funcs - 1].fn_u.ctr, desc, &walk); - err = blkcipher_walk_done(desc, &walk, 0); - } - - return err; -} -EXPORT_SYMBOL_GPL(glue_ctr_crypt_128bit); - -static unsigned int __glue_xts_crypt_128bit(const struct common_glue_ctx *gctx, - void *ctx, - struct blkcipher_desc *desc, - struct blkcipher_walk *walk) -{ - const unsigned int bsize = 128 / 8; - unsigned int nbytes = walk->nbytes; - u128 *src = (u128 *)walk->src.virt.addr; - u128 *dst = (u128 *)walk->dst.virt.addr; - unsigned int num_blocks, func_bytes; - unsigned int i; - - /* Process multi-block batch */ - for (i = 0; i < gctx->num_funcs; i++) { - num_blocks = gctx->funcs[i].num_blocks; - func_bytes = bsize * num_blocks; - - if (nbytes >= func_bytes) { - do { - gctx->funcs[i].fn_u.xts(ctx, dst, src, - (le128 *)walk->iv); + if (nbytes) { + le128 ctrblk; + u128 tmp; - src += num_blocks; - dst += num_blocks; - nbytes -= func_bytes; - } while (nbytes >= func_bytes); + be128_to_le128(&ctrblk, (be128 *)walk.iv); + memcpy(&tmp, walk.src.virt.addr, nbytes); + gctx->funcs[gctx->num_funcs - 1].fn_u.ctr(ctx, &tmp, &tmp, + &ctrblk); + memcpy(walk.dst.virt.addr, &tmp, nbytes); + le128_to_be128((be128 *)walk.iv, &ctrblk); - if (nbytes < bsize) - goto done; - } + err = skcipher_walk_done(&walk, 0); } -done: - return nbytes; + return err; } +EXPORT_SYMBOL_GPL(glue_ctr_req_128bit); static unsigned int __glue_xts_req_128bit(const struct common_glue_ctx *gctx, void *ctx, @@ -372,46 +271,6 @@ done: return nbytes; } -/* for implementations implementing faster XTS IV generator */ -int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx, - struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes, - void (*tweak_fn)(void *ctx, u8 *dst, const u8 *src), - void *tweak_ctx, void *crypt_ctx) -{ - const unsigned int bsize = 128 / 8; - bool fpu_enabled = false; - struct blkcipher_walk walk; - int err; - - blkcipher_walk_init(&walk, dst, src, nbytes); - - err = blkcipher_walk_virt(desc, &walk); - nbytes = walk.nbytes; - if (!nbytes) - return err; - - /* set minimum length to bsize, for tweak_fn */ - fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, - desc, fpu_enabled, - nbytes < bsize ? bsize : nbytes); - - /* calculate first value of T */ - tweak_fn(tweak_ctx, walk.iv, walk.iv); - - while (nbytes) { - nbytes = __glue_xts_crypt_128bit(gctx, crypt_ctx, desc, &walk); - - err = blkcipher_walk_done(desc, &walk, nbytes); - nbytes = walk.nbytes; - } - - glue_fpu_end(fpu_enabled); - - return err; -} -EXPORT_SYMBOL_GPL(glue_xts_crypt_128bit); - int glue_xts_req_128bit(const struct common_glue_ctx *gctx, struct skcipher_request *req, common_glue_func_t tweak_fn, void *tweak_ctx, @@ -429,9 +288,9 @@ int glue_xts_req_128bit(const struct common_glue_ctx *gctx, return err; /* set minimum length to bsize, for tweak_fn */ - fpu_enabled = glue_skwalk_fpu_begin(bsize, gctx->fpu_blocks_limit, - &walk, fpu_enabled, - nbytes < bsize ? bsize : nbytes); + fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, + &walk, fpu_enabled, + nbytes < bsize ? bsize : nbytes); /* calculate first value of T */ tweak_fn(tweak_ctx, walk.iv, walk.iv); diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c index 870f6d812a2d..03347b16ac9d 100644 --- a/arch/x86/crypto/serpent_avx2_glue.c +++ b/arch/x86/crypto/serpent_avx2_glue.c @@ -14,15 +14,12 @@ #include <linux/types.h> #include <linux/crypto.h> #include <linux/err.h> -#include <crypto/ablk_helper.h> #include <crypto/algapi.h> -#include <crypto/ctr.h> -#include <crypto/lrw.h> -#include <crypto/xts.h> +#include <crypto/internal/simd.h> #include <crypto/serpent.h> -#include <asm/fpu/api.h> -#include <asm/crypto/serpent-avx.h> +#include <crypto/xts.h> #include <asm/crypto/glue_helper.h> +#include <asm/crypto/serpent-avx.h> #define SERPENT_AVX2_PARALLEL_BLOCKS 16 @@ -40,6 +37,12 @@ asmlinkage void serpent_xts_enc_16way(struct serpent_ctx *ctx, u8 *dst, asmlinkage void serpent_xts_dec_16way(struct serpent_ctx *ctx, u8 *dst, const u8 *src, le128 *iv); +static int serpent_setkey_skcipher(struct crypto_skcipher *tfm, + const u8 *key, unsigned int keylen) +{ + return __serpent_setkey(crypto_skcipher_ctx(tfm), key, keylen); +} + static const struct common_glue_ctx serpent_enc = { .num_funcs = 3, .fpu_blocks_limit = 8, @@ -136,403 +139,113 @@ static const struct common_glue_ctx serpent_dec_xts = { } } }; -static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_encrypt(struct skcipher_request *req) { - return glue_ecb_crypt_128bit(&serpent_enc, desc, dst, src, nbytes); + return glue_ecb_req_128bit(&serpent_enc, req); } -static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_decrypt(struct skcipher_request *req) { - return glue_ecb_crypt_128bit(&serpent_dec, desc, dst, src, nbytes); + return glue_ecb_req_128bit(&serpent_dec, req); } -static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_encrypt(struct skcipher_request *req) { - return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__serpent_encrypt), desc, - dst, src, nbytes); + return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(__serpent_encrypt), + req); } -static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_decrypt(struct skcipher_request *req) { - return glue_cbc_decrypt_128bit(&serpent_dec_cbc, desc, dst, src, - nbytes); + return glue_cbc_decrypt_req_128bit(&serpent_dec_cbc, req); } -static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ctr_crypt(struct skcipher_request *req) { - return glue_ctr_crypt_128bit(&serpent_ctr, desc, dst, src, nbytes); + return glue_ctr_req_128bit(&serpent_ctr, req); } -static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes) +static int xts_encrypt(struct skcipher_request *req) { - /* since reusing AVX functions, starts using FPU at 8 parallel blocks */ - return glue_fpu_begin(SERPENT_BLOCK_SIZE, 8, NULL, fpu_enabled, nbytes); -} + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm); -static inline void serpent_fpu_end(bool fpu_enabled) -{ - glue_fpu_end(fpu_enabled); + return glue_xts_req_128bit(&serpent_enc_xts, req, + XTS_TWEAK_CAST(__serpent_encrypt), + &ctx->tweak_ctx, &ctx->crypt_ctx); } -struct crypt_priv { - struct serpent_ctx *ctx; - bool fpu_enabled; -}; - -static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) +static int xts_decrypt(struct skcipher_request *req) { - const unsigned int bsize = SERPENT_BLOCK_SIZE; - struct crypt_priv *ctx = priv; - int i; - - ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); - - if (nbytes >= SERPENT_AVX2_PARALLEL_BLOCKS * bsize) { - serpent_ecb_enc_16way(ctx->ctx, srcdst, srcdst); - srcdst += bsize * SERPENT_AVX2_PARALLEL_BLOCKS; - nbytes -= bsize * SERPENT_AVX2_PARALLEL_BLOCKS; - } + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - while (nbytes >= SERPENT_PARALLEL_BLOCKS * bsize) { - serpent_ecb_enc_8way_avx(ctx->ctx, srcdst, srcdst); - srcdst += bsize * SERPENT_PARALLEL_BLOCKS; - nbytes -= bsize * SERPENT_PARALLEL_BLOCKS; - } - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - __serpent_encrypt(ctx->ctx, srcdst, srcdst); + return glue_xts_req_128bit(&serpent_dec_xts, req, + XTS_TWEAK_CAST(__serpent_encrypt), + &ctx->tweak_ctx, &ctx->crypt_ctx); } -static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) -{ - const unsigned int bsize = SERPENT_BLOCK_SIZE; - struct crypt_priv *ctx = priv; - int i; - - ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); - - if (nbytes >= SERPENT_AVX2_PARALLEL_BLOCKS * bsize) { - serpent_ecb_dec_16way(ctx->ctx, srcdst, srcdst); - srcdst += bsize * SERPENT_AVX2_PARALLEL_BLOCKS; - nbytes -= bsize * SERPENT_AVX2_PARALLEL_BLOCKS; - } - - while (nbytes >= SERPENT_PARALLEL_BLOCKS * bsize) { - serpent_ecb_dec_8way_avx(ctx->ctx, srcdst, srcdst); - srcdst += bsize * SERPENT_PARALLEL_BLOCKS; - nbytes -= bsize * SERPENT_PARALLEL_BLOCKS; - } - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - __serpent_decrypt(ctx->ctx, srcdst, srcdst); -} - -static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[SERPENT_AVX2_PARALLEL_BLOCKS]; - struct crypt_priv crypt_ctx = { - .ctx = &ctx->serpent_ctx, - .fpu_enabled = false, - }; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &crypt_ctx, - .crypt_fn = encrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - ret = lrw_crypt(desc, dst, src, nbytes, &req); - serpent_fpu_end(crypt_ctx.fpu_enabled); - - return ret; -} - -static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[SERPENT_AVX2_PARALLEL_BLOCKS]; - struct crypt_priv crypt_ctx = { - .ctx = &ctx->serpent_ctx, - .fpu_enabled = false, - }; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &crypt_ctx, - .crypt_fn = decrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - ret = lrw_crypt(desc, dst, src, nbytes, &req); - serpent_fpu_end(crypt_ctx.fpu_enabled); - - return ret; -} - -static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - - return glue_xts_crypt_128bit(&serpent_enc_xts, desc, dst, src, nbytes, - XTS_TWEAK_CAST(__serpent_encrypt), - &ctx->tweak_ctx, &ctx->crypt_ctx); -} - -static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - - return glue_xts_crypt_128bit(&serpent_dec_xts, desc, dst, src, nbytes, - XTS_TWEAK_CAST(__serpent_encrypt), - &ctx->tweak_ctx, &ctx->crypt_ctx); -} - -static struct crypto_alg srp_algs[10] = { { - .cra_name = "__ecb-serpent-avx2", - .cra_driver_name = "__driver-ecb-serpent-avx2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct serpent_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(srp_algs[0].cra_list), - .cra_u = { - .blkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .setkey = serpent_setkey, - .encrypt = ecb_encrypt, - .decrypt = ecb_decrypt, - }, - }, -}, { - .cra_name = "__cbc-serpent-avx2", - .cra_driver_name = "__driver-cbc-serpent-avx2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct serpent_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(srp_algs[1].cra_list), - .cra_u = { - .blkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .setkey = serpent_setkey, - .encrypt = cbc_encrypt, - .decrypt = cbc_decrypt, - }, - }, -}, { - .cra_name = "__ctr-serpent-avx2", - .cra_driver_name = "__driver-ctr-serpent-avx2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct serpent_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(srp_algs[2].cra_list), - .cra_u = { - .blkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = serpent_setkey, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, - }, - }, -}, { - .cra_name = "__lrw-serpent-avx2", - .cra_driver_name = "__driver-lrw-serpent-avx2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct serpent_lrw_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(srp_algs[3].cra_list), - .cra_exit = lrw_serpent_exit_tfm, - .cra_u = { - .blkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE + - SERPENT_BLOCK_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE + - SERPENT_BLOCK_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = lrw_serpent_setkey, - .encrypt = lrw_encrypt, - .decrypt = lrw_decrypt, - }, - }, -}, { - .cra_name = "__xts-serpent-avx2", - .cra_driver_name = "__driver-xts-serpent-avx2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct serpent_xts_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(srp_algs[4].cra_list), - .cra_u = { - .blkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE * 2, - .max_keysize = SERPENT_MAX_KEY_SIZE * 2, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = xts_serpent_setkey, - .encrypt = xts_encrypt, - .decrypt = xts_decrypt, - }, - }, -}, { - .cra_name = "ecb(serpent)", - .cra_driver_name = "ecb-serpent-avx2", - .cra_priority = 600, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(srp_algs[5].cra_list), - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "cbc(serpent)", - .cra_driver_name = "cbc-serpent-avx2", - .cra_priority = 600, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(srp_algs[6].cra_list), - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = __ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "ctr(serpent)", - .cra_driver_name = "ctr-serpent-avx2", - .cra_priority = 600, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(srp_algs[7].cra_list), - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_encrypt, - .geniv = "chainiv", - }, - }, -}, { - .cra_name = "lrw(serpent)", - .cra_driver_name = "lrw-serpent-avx2", - .cra_priority = 600, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(srp_algs[8].cra_list), - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE + - SERPENT_BLOCK_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE + - SERPENT_BLOCK_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "xts(serpent)", - .cra_driver_name = "xts-serpent-avx2", - .cra_priority = 600, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(srp_algs[9].cra_list), - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE * 2, - .max_keysize = SERPENT_MAX_KEY_SIZE * 2, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, +static struct skcipher_alg serpent_algs[] = { + { + .base.cra_name = "__ecb(serpent)", + .base.cra_driver_name = "__ecb-serpent-avx2", + .base.cra_priority = 600, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = SERPENT_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct serpent_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = SERPENT_MIN_KEY_SIZE, + .max_keysize = SERPENT_MAX_KEY_SIZE, + .setkey = serpent_setkey_skcipher, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, { + .base.cra_name = "__cbc(serpent)", + .base.cra_driver_name = "__cbc-serpent-avx2", + .base.cra_priority = 600, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = SERPENT_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct serpent_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = SERPENT_MIN_KEY_SIZE, + .max_keysize = SERPENT_MAX_KEY_SIZE, + .ivsize = SERPENT_BLOCK_SIZE, + .setkey = serpent_setkey_skcipher, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, { + .base.cra_name = "__ctr(serpent)", + .base.cra_driver_name = "__ctr-serpent-avx2", + .base.cra_priority = 600, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct serpent_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = SERPENT_MIN_KEY_SIZE, + .max_keysize = SERPENT_MAX_KEY_SIZE, + .ivsize = SERPENT_BLOCK_SIZE, + .chunksize = SERPENT_BLOCK_SIZE, + .setkey = serpent_setkey_skcipher, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + }, { + .base.cra_name = "__xts(serpent)", + .base.cra_driver_name = "__xts-serpent-avx2", + .base.cra_priority = 600, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = SERPENT_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct serpent_xts_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = 2 * SERPENT_MIN_KEY_SIZE, + .max_keysize = 2 * SERPENT_MAX_KEY_SIZE, + .ivsize = SERPENT_BLOCK_SIZE, + .setkey = xts_serpent_setkey, + .encrypt = xts_encrypt, + .decrypt = xts_decrypt, }, -} }; +}; + +static struct simd_skcipher_alg *serpent_simd_algs[ARRAY_SIZE(serpent_algs)]; static int __init init(void) { @@ -548,12 +261,15 @@ static int __init init(void) return -ENODEV; } - return crypto_register_algs(srp_algs, ARRAY_SIZE(srp_algs)); + return simd_register_skciphers_compat(serpent_algs, + ARRAY_SIZE(serpent_algs), + serpent_simd_algs); } static void __exit fini(void) { - crypto_unregister_algs(srp_algs, ARRAY_SIZE(srp_algs)); + simd_unregister_skciphers(serpent_algs, ARRAY_SIZE(serpent_algs), + serpent_simd_algs); } module_init(init); diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c index 6f778d3daa22..458567ecf76c 100644 --- a/arch/x86/crypto/serpent_avx_glue.c +++ b/arch/x86/crypto/serpent_avx_glue.c @@ -24,21 +24,15 @@ */ #include <linux/module.h> -#include <linux/hardirq.h> #include <linux/types.h> #include <linux/crypto.h> #include <linux/err.h> -#include <crypto/ablk_helper.h> #include <crypto/algapi.h> +#include <crypto/internal/simd.h> #include <crypto/serpent.h> -#include <crypto/cryptd.h> -#include <crypto/b128ops.h> -#include <crypto/ctr.h> -#include <crypto/lrw.h> #include <crypto/xts.h> -#include <asm/fpu/api.h> -#include <asm/crypto/serpent-avx.h> #include <asm/crypto/glue_helper.h> +#include <asm/crypto/serpent-avx.h> /* 8-way parallel cipher functions */ asmlinkage void serpent_ecb_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst, @@ -91,6 +85,31 @@ void serpent_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv) } EXPORT_SYMBOL_GPL(serpent_xts_dec); +static int serpent_setkey_skcipher(struct crypto_skcipher *tfm, + const u8 *key, unsigned int keylen) +{ + return __serpent_setkey(crypto_skcipher_ctx(tfm), key, keylen); +} + +int xts_serpent_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keylen) +{ + struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm); + int err; + + err = xts_verify_key(tfm, key, keylen); + if (err) + return err; + + /* first half of xts-key is for crypt */ + err = __serpent_setkey(&ctx->crypt_ctx, key, keylen / 2); + if (err) + return err; + + /* second half of xts-key is for tweak */ + return __serpent_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2); +} +EXPORT_SYMBOL_GPL(xts_serpent_setkey); static const struct common_glue_ctx serpent_enc = { .num_funcs = 2, @@ -170,423 +189,113 @@ static const struct common_glue_ctx serpent_dec_xts = { } } }; -static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_encrypt(struct skcipher_request *req) { - return glue_ecb_crypt_128bit(&serpent_enc, desc, dst, src, nbytes); + return glue_ecb_req_128bit(&serpent_enc, req); } -static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_decrypt(struct skcipher_request *req) { - return glue_ecb_crypt_128bit(&serpent_dec, desc, dst, src, nbytes); + return glue_ecb_req_128bit(&serpent_dec, req); } -static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_encrypt(struct skcipher_request *req) { - return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__serpent_encrypt), desc, - dst, src, nbytes); + return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(__serpent_encrypt), + req); } -static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_decrypt(struct skcipher_request *req) { - return glue_cbc_decrypt_128bit(&serpent_dec_cbc, desc, dst, src, - nbytes); + return glue_cbc_decrypt_req_128bit(&serpent_dec_cbc, req); } -static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ctr_crypt(struct skcipher_request *req) { - return glue_ctr_crypt_128bit(&serpent_ctr, desc, dst, src, nbytes); + return glue_ctr_req_128bit(&serpent_ctr, req); } -static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes) +static int xts_encrypt(struct skcipher_request *req) { - return glue_fpu_begin(SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS, - NULL, fpu_enabled, nbytes); -} + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm); -static inline void serpent_fpu_end(bool fpu_enabled) -{ - glue_fpu_end(fpu_enabled); + return glue_xts_req_128bit(&serpent_enc_xts, req, + XTS_TWEAK_CAST(__serpent_encrypt), + &ctx->tweak_ctx, &ctx->crypt_ctx); } -struct crypt_priv { - struct serpent_ctx *ctx; - bool fpu_enabled; -}; - -static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) -{ - const unsigned int bsize = SERPENT_BLOCK_SIZE; - struct crypt_priv *ctx = priv; - int i; - - ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); - - if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) { - serpent_ecb_enc_8way_avx(ctx->ctx, srcdst, srcdst); - return; - } - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - __serpent_encrypt(ctx->ctx, srcdst, srcdst); -} - -static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) +static int xts_decrypt(struct skcipher_request *req) { - const unsigned int bsize = SERPENT_BLOCK_SIZE; - struct crypt_priv *ctx = priv; - int i; - - ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) { - serpent_ecb_dec_8way_avx(ctx->ctx, srcdst, srcdst); - return; - } - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - __serpent_decrypt(ctx->ctx, srcdst, srcdst); -} - -int lrw_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) -{ - struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm); - int err; - - err = __serpent_setkey(&ctx->serpent_ctx, key, keylen - - SERPENT_BLOCK_SIZE); - if (err) - return err; - - return lrw_init_table(&ctx->lrw_table, key + keylen - - SERPENT_BLOCK_SIZE); -} -EXPORT_SYMBOL_GPL(lrw_serpent_setkey); - -static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[SERPENT_PARALLEL_BLOCKS]; - struct crypt_priv crypt_ctx = { - .ctx = &ctx->serpent_ctx, - .fpu_enabled = false, - }; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &crypt_ctx, - .crypt_fn = encrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - ret = lrw_crypt(desc, dst, src, nbytes, &req); - serpent_fpu_end(crypt_ctx.fpu_enabled); - - return ret; + return glue_xts_req_128bit(&serpent_dec_xts, req, + XTS_TWEAK_CAST(__serpent_encrypt), + &ctx->tweak_ctx, &ctx->crypt_ctx); } -static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[SERPENT_PARALLEL_BLOCKS]; - struct crypt_priv crypt_ctx = { - .ctx = &ctx->serpent_ctx, - .fpu_enabled = false, - }; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &crypt_ctx, - .crypt_fn = decrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - ret = lrw_crypt(desc, dst, src, nbytes, &req); - serpent_fpu_end(crypt_ctx.fpu_enabled); - - return ret; -} - -void lrw_serpent_exit_tfm(struct crypto_tfm *tfm) -{ - struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm); - - lrw_free_table(&ctx->lrw_table); -} -EXPORT_SYMBOL_GPL(lrw_serpent_exit_tfm); - -int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) -{ - struct serpent_xts_ctx *ctx = crypto_tfm_ctx(tfm); - int err; - - err = xts_check_key(tfm, key, keylen); - if (err) - return err; - - /* first half of xts-key is for crypt */ - err = __serpent_setkey(&ctx->crypt_ctx, key, keylen / 2); - if (err) - return err; - - /* second half of xts-key is for tweak */ - return __serpent_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2); -} -EXPORT_SYMBOL_GPL(xts_serpent_setkey); - -static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - - return glue_xts_crypt_128bit(&serpent_enc_xts, desc, dst, src, nbytes, - XTS_TWEAK_CAST(__serpent_encrypt), - &ctx->tweak_ctx, &ctx->crypt_ctx); -} - -static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - - return glue_xts_crypt_128bit(&serpent_dec_xts, desc, dst, src, nbytes, - XTS_TWEAK_CAST(__serpent_encrypt), - &ctx->tweak_ctx, &ctx->crypt_ctx); -} - -static struct crypto_alg serpent_algs[10] = { { - .cra_name = "__ecb-serpent-avx", - .cra_driver_name = "__driver-ecb-serpent-avx", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct serpent_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .setkey = serpent_setkey, - .encrypt = ecb_encrypt, - .decrypt = ecb_decrypt, - }, - }, -}, { - .cra_name = "__cbc-serpent-avx", - .cra_driver_name = "__driver-cbc-serpent-avx", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct serpent_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .setkey = serpent_setkey, - .encrypt = cbc_encrypt, - .decrypt = cbc_decrypt, - }, - }, -}, { - .cra_name = "__ctr-serpent-avx", - .cra_driver_name = "__driver-ctr-serpent-avx", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct serpent_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = serpent_setkey, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, - }, - }, -}, { - .cra_name = "__lrw-serpent-avx", - .cra_driver_name = "__driver-lrw-serpent-avx", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct serpent_lrw_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_exit = lrw_serpent_exit_tfm, - .cra_u = { - .blkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE + - SERPENT_BLOCK_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE + - SERPENT_BLOCK_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = lrw_serpent_setkey, - .encrypt = lrw_encrypt, - .decrypt = lrw_decrypt, - }, - }, -}, { - .cra_name = "__xts-serpent-avx", - .cra_driver_name = "__driver-xts-serpent-avx", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct serpent_xts_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE * 2, - .max_keysize = SERPENT_MAX_KEY_SIZE * 2, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = xts_serpent_setkey, - .encrypt = xts_encrypt, - .decrypt = xts_decrypt, - }, - }, -}, { - .cra_name = "ecb(serpent)", - .cra_driver_name = "ecb-serpent-avx", - .cra_priority = 500, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "cbc(serpent)", - .cra_driver_name = "cbc-serpent-avx", - .cra_priority = 500, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = __ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "ctr(serpent)", - .cra_driver_name = "ctr-serpent-avx", - .cra_priority = 500, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_encrypt, - .geniv = "chainiv", - }, - }, -}, { - .cra_name = "lrw(serpent)", - .cra_driver_name = "lrw-serpent-avx", - .cra_priority = 500, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE + - SERPENT_BLOCK_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE + - SERPENT_BLOCK_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "xts(serpent)", - .cra_driver_name = "xts-serpent-avx", - .cra_priority = 500, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE * 2, - .max_keysize = SERPENT_MAX_KEY_SIZE * 2, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, +static struct skcipher_alg serpent_algs[] = { + { + .base.cra_name = "__ecb(serpent)", + .base.cra_driver_name = "__ecb-serpent-avx", + .base.cra_priority = 500, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = SERPENT_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct serpent_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = SERPENT_MIN_KEY_SIZE, + .max_keysize = SERPENT_MAX_KEY_SIZE, + .setkey = serpent_setkey_skcipher, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, { + .base.cra_name = "__cbc(serpent)", + .base.cra_driver_name = "__cbc-serpent-avx", + .base.cra_priority = 500, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = SERPENT_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct serpent_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = SERPENT_MIN_KEY_SIZE, + .max_keysize = SERPENT_MAX_KEY_SIZE, + .ivsize = SERPENT_BLOCK_SIZE, + .setkey = serpent_setkey_skcipher, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, { + .base.cra_name = "__ctr(serpent)", + .base.cra_driver_name = "__ctr-serpent-avx", + .base.cra_priority = 500, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct serpent_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = SERPENT_MIN_KEY_SIZE, + .max_keysize = SERPENT_MAX_KEY_SIZE, + .ivsize = SERPENT_BLOCK_SIZE, + .chunksize = SERPENT_BLOCK_SIZE, + .setkey = serpent_setkey_skcipher, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + }, { + .base.cra_name = "__xts(serpent)", + .base.cra_driver_name = "__xts-serpent-avx", + .base.cra_priority = 500, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = SERPENT_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct serpent_xts_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = 2 * SERPENT_MIN_KEY_SIZE, + .max_keysize = 2 * SERPENT_MAX_KEY_SIZE, + .ivsize = SERPENT_BLOCK_SIZE, + .setkey = xts_serpent_setkey, + .encrypt = xts_encrypt, + .decrypt = xts_decrypt, }, -} }; +}; + +static struct simd_skcipher_alg *serpent_simd_algs[ARRAY_SIZE(serpent_algs)]; static int __init serpent_init(void) { @@ -598,12 +307,15 @@ static int __init serpent_init(void) return -ENODEV; } - return crypto_register_algs(serpent_algs, ARRAY_SIZE(serpent_algs)); + return simd_register_skciphers_compat(serpent_algs, + ARRAY_SIZE(serpent_algs), + serpent_simd_algs); } static void __exit serpent_exit(void) { - crypto_unregister_algs(serpent_algs, ARRAY_SIZE(serpent_algs)); + simd_unregister_skciphers(serpent_algs, ARRAY_SIZE(serpent_algs), + serpent_simd_algs); } module_init(serpent_init); diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c index ac0e831943f5..3dafe137596a 100644 --- a/arch/x86/crypto/serpent_sse2_glue.c +++ b/arch/x86/crypto/serpent_sse2_glue.c @@ -30,21 +30,22 @@ */ #include <linux/module.h> -#include <linux/hardirq.h> #include <linux/types.h> #include <linux/crypto.h> #include <linux/err.h> -#include <crypto/ablk_helper.h> #include <crypto/algapi.h> -#include <crypto/serpent.h> -#include <crypto/cryptd.h> #include <crypto/b128ops.h> -#include <crypto/ctr.h> -#include <crypto/lrw.h> -#include <crypto/xts.h> +#include <crypto/internal/simd.h> +#include <crypto/serpent.h> #include <asm/crypto/serpent-sse2.h> #include <asm/crypto/glue_helper.h> +static int serpent_setkey_skcipher(struct crypto_skcipher *tfm, + const u8 *key, unsigned int keylen) +{ + return __serpent_setkey(crypto_skcipher_ctx(tfm), key, keylen); +} + static void serpent_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src) { u128 ivs[SERPENT_PARALLEL_BLOCKS - 1]; @@ -139,464 +140,79 @@ static const struct common_glue_ctx serpent_dec_cbc = { } } }; -static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - return glue_ecb_crypt_128bit(&serpent_enc, desc, dst, src, nbytes); -} - -static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - return glue_ecb_crypt_128bit(&serpent_dec, desc, dst, src, nbytes); -} - -static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_encrypt(struct skcipher_request *req) { - return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__serpent_encrypt), desc, - dst, src, nbytes); + return glue_ecb_req_128bit(&serpent_enc, req); } -static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_decrypt(struct skcipher_request *req) { - return glue_cbc_decrypt_128bit(&serpent_dec_cbc, desc, dst, src, - nbytes); + return glue_ecb_req_128bit(&serpent_dec, req); } -static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_encrypt(struct skcipher_request *req) { - return glue_ctr_crypt_128bit(&serpent_ctr, desc, dst, src, nbytes); + return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(__serpent_encrypt), + req); } -static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes) +static int cbc_decrypt(struct skcipher_request *req) { - return glue_fpu_begin(SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS, - NULL, fpu_enabled, nbytes); + return glue_cbc_decrypt_req_128bit(&serpent_dec_cbc, req); } -static inline void serpent_fpu_end(bool fpu_enabled) +static int ctr_crypt(struct skcipher_request *req) { - glue_fpu_end(fpu_enabled); + return glue_ctr_req_128bit(&serpent_ctr, req); } -struct crypt_priv { - struct serpent_ctx *ctx; - bool fpu_enabled; -}; - -static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) -{ - const unsigned int bsize = SERPENT_BLOCK_SIZE; - struct crypt_priv *ctx = priv; - int i; - - ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); - - if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) { - serpent_enc_blk_xway(ctx->ctx, srcdst, srcdst); - return; - } - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - __serpent_encrypt(ctx->ctx, srcdst, srcdst); -} - -static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) -{ - const unsigned int bsize = SERPENT_BLOCK_SIZE; - struct crypt_priv *ctx = priv; - int i; - - ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); - - if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) { - serpent_dec_blk_xway(ctx->ctx, srcdst, srcdst); - return; - } - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - __serpent_decrypt(ctx->ctx, srcdst, srcdst); -} - -struct serpent_lrw_ctx { - struct lrw_table_ctx lrw_table; - struct serpent_ctx serpent_ctx; -}; - -static int lrw_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) -{ - struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm); - int err; - - err = __serpent_setkey(&ctx->serpent_ctx, key, keylen - - SERPENT_BLOCK_SIZE); - if (err) - return err; - - return lrw_init_table(&ctx->lrw_table, key + keylen - - SERPENT_BLOCK_SIZE); -} - -static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[SERPENT_PARALLEL_BLOCKS]; - struct crypt_priv crypt_ctx = { - .ctx = &ctx->serpent_ctx, - .fpu_enabled = false, - }; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &crypt_ctx, - .crypt_fn = encrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - ret = lrw_crypt(desc, dst, src, nbytes, &req); - serpent_fpu_end(crypt_ctx.fpu_enabled); - - return ret; -} - -static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[SERPENT_PARALLEL_BLOCKS]; - struct crypt_priv crypt_ctx = { - .ctx = &ctx->serpent_ctx, - .fpu_enabled = false, - }; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &crypt_ctx, - .crypt_fn = decrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - ret = lrw_crypt(desc, dst, src, nbytes, &req); - serpent_fpu_end(crypt_ctx.fpu_enabled); - - return ret; -} - -static void lrw_exit_tfm(struct crypto_tfm *tfm) -{ - struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm); - - lrw_free_table(&ctx->lrw_table); -} - -struct serpent_xts_ctx { - struct serpent_ctx tweak_ctx; - struct serpent_ctx crypt_ctx; +static struct skcipher_alg serpent_algs[] = { + { + .base.cra_name = "__ecb(serpent)", + .base.cra_driver_name = "__ecb-serpent-sse2", + .base.cra_priority = 400, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = SERPENT_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct serpent_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = SERPENT_MIN_KEY_SIZE, + .max_keysize = SERPENT_MAX_KEY_SIZE, + .setkey = serpent_setkey_skcipher, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, { + .base.cra_name = "__cbc(serpent)", + .base.cra_driver_name = "__cbc-serpent-sse2", + .base.cra_priority = 400, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = SERPENT_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct serpent_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = SERPENT_MIN_KEY_SIZE, + .max_keysize = SERPENT_MAX_KEY_SIZE, + .ivsize = SERPENT_BLOCK_SIZE, + .setkey = serpent_setkey_skcipher, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, { + .base.cra_name = "__ctr(serpent)", + .base.cra_driver_name = "__ctr-serpent-sse2", + .base.cra_priority = 400, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct serpent_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = SERPENT_MIN_KEY_SIZE, + .max_keysize = SERPENT_MAX_KEY_SIZE, + .ivsize = SERPENT_BLOCK_SIZE, + .chunksize = SERPENT_BLOCK_SIZE, + .setkey = serpent_setkey_skcipher, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + }, }; -static int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) -{ - struct serpent_xts_ctx *ctx = crypto_tfm_ctx(tfm); - int err; - - err = xts_check_key(tfm, key, keylen); - if (err) - return err; - - /* first half of xts-key is for crypt */ - err = __serpent_setkey(&ctx->crypt_ctx, key, keylen / 2); - if (err) - return err; - - /* second half of xts-key is for tweak */ - return __serpent_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2); -} - -static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - le128 buf[SERPENT_PARALLEL_BLOCKS]; - struct crypt_priv crypt_ctx = { - .ctx = &ctx->crypt_ctx, - .fpu_enabled = false, - }; - struct xts_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .tweak_ctx = &ctx->tweak_ctx, - .tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt), - .crypt_ctx = &crypt_ctx, - .crypt_fn = encrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - ret = xts_crypt(desc, dst, src, nbytes, &req); - serpent_fpu_end(crypt_ctx.fpu_enabled); - - return ret; -} - -static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - le128 buf[SERPENT_PARALLEL_BLOCKS]; - struct crypt_priv crypt_ctx = { - .ctx = &ctx->crypt_ctx, - .fpu_enabled = false, - }; - struct xts_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .tweak_ctx = &ctx->tweak_ctx, - .tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt), - .crypt_ctx = &crypt_ctx, - .crypt_fn = decrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - ret = xts_crypt(desc, dst, src, nbytes, &req); - serpent_fpu_end(crypt_ctx.fpu_enabled); - - return ret; -} - -static struct crypto_alg serpent_algs[10] = { { - .cra_name = "__ecb-serpent-sse2", - .cra_driver_name = "__driver-ecb-serpent-sse2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct serpent_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .setkey = serpent_setkey, - .encrypt = ecb_encrypt, - .decrypt = ecb_decrypt, - }, - }, -}, { - .cra_name = "__cbc-serpent-sse2", - .cra_driver_name = "__driver-cbc-serpent-sse2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct serpent_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .setkey = serpent_setkey, - .encrypt = cbc_encrypt, - .decrypt = cbc_decrypt, - }, - }, -}, { - .cra_name = "__ctr-serpent-sse2", - .cra_driver_name = "__driver-ctr-serpent-sse2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct serpent_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = serpent_setkey, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, - }, - }, -}, { - .cra_name = "__lrw-serpent-sse2", - .cra_driver_name = "__driver-lrw-serpent-sse2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct serpent_lrw_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_exit = lrw_exit_tfm, - .cra_u = { - .blkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE + - SERPENT_BLOCK_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE + - SERPENT_BLOCK_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = lrw_serpent_setkey, - .encrypt = lrw_encrypt, - .decrypt = lrw_decrypt, - }, - }, -}, { - .cra_name = "__xts-serpent-sse2", - .cra_driver_name = "__driver-xts-serpent-sse2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct serpent_xts_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE * 2, - .max_keysize = SERPENT_MAX_KEY_SIZE * 2, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = xts_serpent_setkey, - .encrypt = xts_encrypt, - .decrypt = xts_decrypt, - }, - }, -}, { - .cra_name = "ecb(serpent)", - .cra_driver_name = "ecb-serpent-sse2", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "cbc(serpent)", - .cra_driver_name = "cbc-serpent-sse2", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = __ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "ctr(serpent)", - .cra_driver_name = "ctr-serpent-sse2", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_encrypt, - .geniv = "chainiv", - }, - }, -}, { - .cra_name = "lrw(serpent)", - .cra_driver_name = "lrw-serpent-sse2", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE + - SERPENT_BLOCK_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE + - SERPENT_BLOCK_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "xts(serpent)", - .cra_driver_name = "xts-serpent-sse2", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE * 2, - .max_keysize = SERPENT_MAX_KEY_SIZE * 2, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -} }; +static struct simd_skcipher_alg *serpent_simd_algs[ARRAY_SIZE(serpent_algs)]; static int __init serpent_sse2_init(void) { @@ -605,12 +221,15 @@ static int __init serpent_sse2_init(void) return -ENODEV; } - return crypto_register_algs(serpent_algs, ARRAY_SIZE(serpent_algs)); + return simd_register_skciphers_compat(serpent_algs, + ARRAY_SIZE(serpent_algs), + serpent_simd_algs); } static void __exit serpent_sse2_exit(void) { - crypto_unregister_algs(serpent_algs, ARRAY_SIZE(serpent_algs)); + simd_unregister_skciphers(serpent_algs, ARRAY_SIZE(serpent_algs), + serpent_simd_algs); } module_init(serpent_sse2_init); diff --git a/arch/x86/crypto/sha1-mb/sha1_mb.c b/arch/x86/crypto/sha1-mb/sha1_mb.c index acf9fdf01671..e17655ffde79 100644 --- a/arch/x86/crypto/sha1-mb/sha1_mb.c +++ b/arch/x86/crypto/sha1-mb/sha1_mb.c @@ -106,13 +106,6 @@ static asmlinkage struct job_sha1* (*sha1_job_mgr_flush) static asmlinkage struct job_sha1* (*sha1_job_mgr_get_comp_job) (struct sha1_mb_mgr *state); -static inline void sha1_init_digest(uint32_t *digest) -{ - static const uint32_t initial_digest[SHA1_DIGEST_LENGTH] = {SHA1_H0, - SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }; - memcpy(digest, initial_digest, sizeof(initial_digest)); -} - static inline uint32_t sha1_pad(uint8_t padblock[SHA1_BLOCK_SIZE * 2], uint64_t total_len) { @@ -244,11 +237,8 @@ static struct sha1_hash_ctx *sha1_ctx_mgr_submit(struct sha1_ctx_mgr *mgr, uint32_t len, int flags) { - if (flags & (~HASH_ENTIRE)) { - /* - * User should not pass anything other than FIRST, UPDATE, or - * LAST - */ + if (flags & ~(HASH_UPDATE | HASH_LAST)) { + /* User should not pass anything other than UPDATE or LAST */ ctx->error = HASH_CTX_ERROR_INVALID_FLAGS; return ctx; } @@ -259,24 +249,12 @@ static struct sha1_hash_ctx *sha1_ctx_mgr_submit(struct sha1_ctx_mgr *mgr, return ctx; } - if ((ctx->status & HASH_CTX_STS_COMPLETE) && !(flags & HASH_FIRST)) { + if (ctx->status & HASH_CTX_STS_COMPLETE) { /* Cannot update a finished job. */ ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED; return ctx; } - - if (flags & HASH_FIRST) { - /* Init digest */ - sha1_init_digest(ctx->job.result_digest); - - /* Reset byte counter */ - ctx->total_length = 0; - - /* Clear extra blocks */ - ctx->partial_block_buffer_length = 0; - } - /* * If we made it here, there were no errors during this call to * submit diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h b/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h index 13590ccf965c..9454bd16f9f8 100644 --- a/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h +++ b/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h @@ -57,11 +57,9 @@ #include "sha1_mb_mgr.h" #define HASH_UPDATE 0x00 -#define HASH_FIRST 0x01 -#define HASH_LAST 0x02 -#define HASH_ENTIRE 0x03 -#define HASH_DONE 0x04 -#define HASH_FINAL 0x08 +#define HASH_LAST 0x01 +#define HASH_DONE 0x02 +#define HASH_FINAL 0x04 #define HASH_CTX_STS_IDLE 0x00 #define HASH_CTX_STS_PROCESSING 0x01 diff --git a/arch/x86/crypto/sha256-mb/sha256_mb.c b/arch/x86/crypto/sha256-mb/sha256_mb.c index 7926a226b120..4c46ac1b6653 100644 --- a/arch/x86/crypto/sha256-mb/sha256_mb.c +++ b/arch/x86/crypto/sha256-mb/sha256_mb.c @@ -106,14 +106,6 @@ static asmlinkage struct job_sha256* (*sha256_job_mgr_flush) static asmlinkage struct job_sha256* (*sha256_job_mgr_get_comp_job) (struct sha256_mb_mgr *state); -inline void sha256_init_digest(uint32_t *digest) -{ - static const uint32_t initial_digest[SHA256_DIGEST_LENGTH] = { - SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3, - SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7}; - memcpy(digest, initial_digest, sizeof(initial_digest)); -} - inline uint32_t sha256_pad(uint8_t padblock[SHA256_BLOCK_SIZE * 2], uint64_t total_len) { @@ -245,10 +237,8 @@ static struct sha256_hash_ctx *sha256_ctx_mgr_submit(struct sha256_ctx_mgr *mgr, uint32_t len, int flags) { - if (flags & (~HASH_ENTIRE)) { - /* User should not pass anything other than FIRST, UPDATE - * or LAST - */ + if (flags & ~(HASH_UPDATE | HASH_LAST)) { + /* User should not pass anything other than UPDATE or LAST */ ctx->error = HASH_CTX_ERROR_INVALID_FLAGS; return ctx; } @@ -259,23 +249,12 @@ static struct sha256_hash_ctx *sha256_ctx_mgr_submit(struct sha256_ctx_mgr *mgr, return ctx; } - if ((ctx->status & HASH_CTX_STS_COMPLETE) && !(flags & HASH_FIRST)) { + if (ctx->status & HASH_CTX_STS_COMPLETE) { /* Cannot update a finished job. */ ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED; return ctx; } - if (flags & HASH_FIRST) { - /* Init digest */ - sha256_init_digest(ctx->job.result_digest); - - /* Reset byte counter */ - ctx->total_length = 0; - - /* Clear extra blocks */ - ctx->partial_block_buffer_length = 0; - } - /* If we made it here, there was no error during this call to submit */ ctx->error = HASH_CTX_ERROR_NONE; diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h b/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h index aabb30320af0..7c432543dc7f 100644 --- a/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h +++ b/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h @@ -57,11 +57,9 @@ #include "sha256_mb_mgr.h" #define HASH_UPDATE 0x00 -#define HASH_FIRST 0x01 -#define HASH_LAST 0x02 -#define HASH_ENTIRE 0x03 -#define HASH_DONE 0x04 -#define HASH_FINAL 0x08 +#define HASH_LAST 0x01 +#define HASH_DONE 0x02 +#define HASH_FINAL 0x04 #define HASH_CTX_STS_IDLE 0x00 #define HASH_CTX_STS_PROCESSING 0x01 diff --git a/arch/x86/crypto/sha512-mb/sha512_mb.c b/arch/x86/crypto/sha512-mb/sha512_mb.c index 458409b7568d..39e2bbdc1836 100644 --- a/arch/x86/crypto/sha512-mb/sha512_mb.c +++ b/arch/x86/crypto/sha512-mb/sha512_mb.c @@ -107,15 +107,6 @@ static asmlinkage struct job_sha512* (*sha512_job_mgr_flush) static asmlinkage struct job_sha512* (*sha512_job_mgr_get_comp_job) (struct sha512_mb_mgr *state); -inline void sha512_init_digest(uint64_t *digest) -{ - static const uint64_t initial_digest[SHA512_DIGEST_LENGTH] = { - SHA512_H0, SHA512_H1, SHA512_H2, - SHA512_H3, SHA512_H4, SHA512_H5, - SHA512_H6, SHA512_H7 }; - memcpy(digest, initial_digest, sizeof(initial_digest)); -} - inline uint32_t sha512_pad(uint8_t padblock[SHA512_BLOCK_SIZE * 2], uint64_t total_len) { @@ -263,11 +254,8 @@ static struct sha512_hash_ctx mgr = cstate->mgr; spin_lock_irqsave(&cstate->work_lock, irqflags); - if (flags & (~HASH_ENTIRE)) { - /* - * User should not pass anything other than FIRST, UPDATE, or - * LAST - */ + if (flags & ~(HASH_UPDATE | HASH_LAST)) { + /* User should not pass anything other than UPDATE or LAST */ ctx->error = HASH_CTX_ERROR_INVALID_FLAGS; goto unlock; } @@ -278,24 +266,12 @@ static struct sha512_hash_ctx goto unlock; } - if ((ctx->status & HASH_CTX_STS_COMPLETE) && !(flags & HASH_FIRST)) { + if (ctx->status & HASH_CTX_STS_COMPLETE) { /* Cannot update a finished job. */ ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED; goto unlock; } - - if (flags & HASH_FIRST) { - /* Init digest */ - sha512_init_digest(ctx->job.result_digest); - - /* Reset byte counter */ - ctx->total_length = 0; - - /* Clear extra blocks */ - ctx->partial_block_buffer_length = 0; - } - /* * If we made it here, there were no errors during this call to * submit diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h b/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h index e4653f5eec3f..e5c465bd821e 100644 --- a/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h +++ b/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h @@ -57,11 +57,9 @@ #include "sha512_mb_mgr.h" #define HASH_UPDATE 0x00 -#define HASH_FIRST 0x01 -#define HASH_LAST 0x02 -#define HASH_ENTIRE 0x03 -#define HASH_DONE 0x04 -#define HASH_FINAL 0x08 +#define HASH_LAST 0x01 +#define HASH_DONE 0x02 +#define HASH_FINAL 0x04 #define HASH_CTX_STS_IDLE 0x00 #define HASH_CTX_STS_PROCESSING 0x01 diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c index b7a3904b953c..66d989230d10 100644 --- a/arch/x86/crypto/twofish_avx_glue.c +++ b/arch/x86/crypto/twofish_avx_glue.c @@ -24,24 +24,15 @@ */ #include <linux/module.h> -#include <linux/hardirq.h> #include <linux/types.h> #include <linux/crypto.h> #include <linux/err.h> -#include <crypto/ablk_helper.h> #include <crypto/algapi.h> +#include <crypto/internal/simd.h> #include <crypto/twofish.h> -#include <crypto/cryptd.h> -#include <crypto/b128ops.h> -#include <crypto/ctr.h> -#include <crypto/lrw.h> #include <crypto/xts.h> -#include <asm/fpu/api.h> -#include <asm/crypto/twofish.h> #include <asm/crypto/glue_helper.h> -#include <crypto/scatterwalk.h> -#include <linux/workqueue.h> -#include <linux/spinlock.h> +#include <asm/crypto/twofish.h> #define TWOFISH_PARALLEL_BLOCKS 8 @@ -61,6 +52,12 @@ asmlinkage void twofish_xts_enc_8way(struct twofish_ctx *ctx, u8 *dst, asmlinkage void twofish_xts_dec_8way(struct twofish_ctx *ctx, u8 *dst, const u8 *src, le128 *iv); +static int twofish_setkey_skcipher(struct crypto_skcipher *tfm, + const u8 *key, unsigned int keylen) +{ + return twofish_setkey(&tfm->base, key, keylen); +} + static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, const u8 *src) { @@ -79,6 +76,31 @@ static void twofish_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv) GLUE_FUNC_CAST(twofish_dec_blk)); } +struct twofish_xts_ctx { + struct twofish_ctx tweak_ctx; + struct twofish_ctx crypt_ctx; +}; + +static int xts_twofish_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keylen) +{ + struct twofish_xts_ctx *ctx = crypto_skcipher_ctx(tfm); + u32 *flags = &tfm->base.crt_flags; + int err; + + err = xts_verify_key(tfm, key, keylen); + if (err) + return err; + + /* first half of xts-key is for crypt */ + err = __twofish_setkey(&ctx->crypt_ctx, key, keylen / 2, flags); + if (err) + return err; + + /* second half of xts-key is for tweak */ + return __twofish_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2, + flags); +} static const struct common_glue_ctx twofish_enc = { .num_funcs = 3, @@ -170,389 +192,113 @@ static const struct common_glue_ctx twofish_dec_xts = { } } }; -static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - return glue_ecb_crypt_128bit(&twofish_enc, desc, dst, src, nbytes); -} - -static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - return glue_ecb_crypt_128bit(&twofish_dec, desc, dst, src, nbytes); -} - -static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_encrypt(struct skcipher_request *req) { - return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(twofish_enc_blk), desc, - dst, src, nbytes); + return glue_ecb_req_128bit(&twofish_enc, req); } -static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_decrypt(struct skcipher_request *req) { - return glue_cbc_decrypt_128bit(&twofish_dec_cbc, desc, dst, src, - nbytes); + return glue_ecb_req_128bit(&twofish_dec, req); } -static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_encrypt(struct skcipher_request *req) { - return glue_ctr_crypt_128bit(&twofish_ctr, desc, dst, src, nbytes); + return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(twofish_enc_blk), + req); } -static inline bool twofish_fpu_begin(bool fpu_enabled, unsigned int nbytes) +static int cbc_decrypt(struct skcipher_request *req) { - return glue_fpu_begin(TF_BLOCK_SIZE, TWOFISH_PARALLEL_BLOCKS, NULL, - fpu_enabled, nbytes); + return glue_cbc_decrypt_req_128bit(&twofish_dec_cbc, req); } -static inline void twofish_fpu_end(bool fpu_enabled) +static int ctr_crypt(struct skcipher_request *req) { - glue_fpu_end(fpu_enabled); + return glue_ctr_req_128bit(&twofish_ctr, req); } -struct crypt_priv { - struct twofish_ctx *ctx; - bool fpu_enabled; -}; - -static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) +static int xts_encrypt(struct skcipher_request *req) { - const unsigned int bsize = TF_BLOCK_SIZE; - struct crypt_priv *ctx = priv; - int i; - - ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes); - - if (nbytes == bsize * TWOFISH_PARALLEL_BLOCKS) { - twofish_ecb_enc_8way(ctx->ctx, srcdst, srcdst); - return; - } - - for (i = 0; i < nbytes / (bsize * 3); i++, srcdst += bsize * 3) - twofish_enc_blk_3way(ctx->ctx, srcdst, srcdst); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct twofish_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - nbytes %= bsize * 3; - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - twofish_enc_blk(ctx->ctx, srcdst, srcdst); + return glue_xts_req_128bit(&twofish_enc_xts, req, + XTS_TWEAK_CAST(twofish_enc_blk), + &ctx->tweak_ctx, &ctx->crypt_ctx); } -static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) +static int xts_decrypt(struct skcipher_request *req) { - const unsigned int bsize = TF_BLOCK_SIZE; - struct crypt_priv *ctx = priv; - int i; - - ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes); - - if (nbytes == bsize * TWOFISH_PARALLEL_BLOCKS) { - twofish_ecb_dec_8way(ctx->ctx, srcdst, srcdst); - return; - } + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct twofish_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - for (i = 0; i < nbytes / (bsize * 3); i++, srcdst += bsize * 3) - twofish_dec_blk_3way(ctx->ctx, srcdst, srcdst); - - nbytes %= bsize * 3; - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - twofish_dec_blk(ctx->ctx, srcdst, srcdst); + return glue_xts_req_128bit(&twofish_dec_xts, req, + XTS_TWEAK_CAST(twofish_enc_blk), + &ctx->tweak_ctx, &ctx->crypt_ctx); } -static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[TWOFISH_PARALLEL_BLOCKS]; - struct crypt_priv crypt_ctx = { - .ctx = &ctx->twofish_ctx, - .fpu_enabled = false, - }; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &crypt_ctx, - .crypt_fn = encrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - ret = lrw_crypt(desc, dst, src, nbytes, &req); - twofish_fpu_end(crypt_ctx.fpu_enabled); - - return ret; -} - -static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[TWOFISH_PARALLEL_BLOCKS]; - struct crypt_priv crypt_ctx = { - .ctx = &ctx->twofish_ctx, - .fpu_enabled = false, - }; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &crypt_ctx, - .crypt_fn = decrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - ret = lrw_crypt(desc, dst, src, nbytes, &req); - twofish_fpu_end(crypt_ctx.fpu_enabled); - - return ret; -} - -static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - - return glue_xts_crypt_128bit(&twofish_enc_xts, desc, dst, src, nbytes, - XTS_TWEAK_CAST(twofish_enc_blk), - &ctx->tweak_ctx, &ctx->crypt_ctx); -} - -static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - - return glue_xts_crypt_128bit(&twofish_dec_xts, desc, dst, src, nbytes, - XTS_TWEAK_CAST(twofish_enc_blk), - &ctx->tweak_ctx, &ctx->crypt_ctx); -} - -static struct crypto_alg twofish_algs[10] = { { - .cra_name = "__ecb-twofish-avx", - .cra_driver_name = "__driver-ecb-twofish-avx", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct twofish_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = TF_MIN_KEY_SIZE, - .max_keysize = TF_MAX_KEY_SIZE, - .setkey = twofish_setkey, - .encrypt = ecb_encrypt, - .decrypt = ecb_decrypt, - }, - }, -}, { - .cra_name = "__cbc-twofish-avx", - .cra_driver_name = "__driver-cbc-twofish-avx", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct twofish_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = TF_MIN_KEY_SIZE, - .max_keysize = TF_MAX_KEY_SIZE, - .setkey = twofish_setkey, - .encrypt = cbc_encrypt, - .decrypt = cbc_decrypt, - }, - }, -}, { - .cra_name = "__ctr-twofish-avx", - .cra_driver_name = "__driver-ctr-twofish-avx", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct twofish_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = TF_MIN_KEY_SIZE, - .max_keysize = TF_MAX_KEY_SIZE, - .ivsize = TF_BLOCK_SIZE, - .setkey = twofish_setkey, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, - }, - }, -}, { - .cra_name = "__lrw-twofish-avx", - .cra_driver_name = "__driver-lrw-twofish-avx", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct twofish_lrw_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_exit = lrw_twofish_exit_tfm, - .cra_u = { - .blkcipher = { - .min_keysize = TF_MIN_KEY_SIZE + - TF_BLOCK_SIZE, - .max_keysize = TF_MAX_KEY_SIZE + - TF_BLOCK_SIZE, - .ivsize = TF_BLOCK_SIZE, - .setkey = lrw_twofish_setkey, - .encrypt = lrw_encrypt, - .decrypt = lrw_decrypt, - }, - }, -}, { - .cra_name = "__xts-twofish-avx", - .cra_driver_name = "__driver-xts-twofish-avx", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct twofish_xts_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = TF_MIN_KEY_SIZE * 2, - .max_keysize = TF_MAX_KEY_SIZE * 2, - .ivsize = TF_BLOCK_SIZE, - .setkey = xts_twofish_setkey, - .encrypt = xts_encrypt, - .decrypt = xts_decrypt, - }, - }, -}, { - .cra_name = "ecb(twofish)", - .cra_driver_name = "ecb-twofish-avx", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = TF_MIN_KEY_SIZE, - .max_keysize = TF_MAX_KEY_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "cbc(twofish)", - .cra_driver_name = "cbc-twofish-avx", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = TF_MIN_KEY_SIZE, - .max_keysize = TF_MAX_KEY_SIZE, - .ivsize = TF_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = __ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "ctr(twofish)", - .cra_driver_name = "ctr-twofish-avx", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = TF_MIN_KEY_SIZE, - .max_keysize = TF_MAX_KEY_SIZE, - .ivsize = TF_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_encrypt, - .geniv = "chainiv", - }, - }, -}, { - .cra_name = "lrw(twofish)", - .cra_driver_name = "lrw-twofish-avx", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = TF_MIN_KEY_SIZE + - TF_BLOCK_SIZE, - .max_keysize = TF_MAX_KEY_SIZE + - TF_BLOCK_SIZE, - .ivsize = TF_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "xts(twofish)", - .cra_driver_name = "xts-twofish-avx", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = TF_MIN_KEY_SIZE * 2, - .max_keysize = TF_MAX_KEY_SIZE * 2, - .ivsize = TF_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, +static struct skcipher_alg twofish_algs[] = { + { + .base.cra_name = "__ecb(twofish)", + .base.cra_driver_name = "__ecb-twofish-avx", + .base.cra_priority = 400, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = TF_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct twofish_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = TF_MIN_KEY_SIZE, + .max_keysize = TF_MAX_KEY_SIZE, + .setkey = twofish_setkey_skcipher, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, { + .base.cra_name = "__cbc(twofish)", + .base.cra_driver_name = "__cbc-twofish-avx", + .base.cra_priority = 400, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = TF_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct twofish_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = TF_MIN_KEY_SIZE, + .max_keysize = TF_MAX_KEY_SIZE, + .ivsize = TF_BLOCK_SIZE, + .setkey = twofish_setkey_skcipher, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, { + .base.cra_name = "__ctr(twofish)", + .base.cra_driver_name = "__ctr-twofish-avx", + .base.cra_priority = 400, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct twofish_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = TF_MIN_KEY_SIZE, + .max_keysize = TF_MAX_KEY_SIZE, + .ivsize = TF_BLOCK_SIZE, + .chunksize = TF_BLOCK_SIZE, + .setkey = twofish_setkey_skcipher, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + }, { + .base.cra_name = "__xts(twofish)", + .base.cra_driver_name = "__xts-twofish-avx", + .base.cra_priority = 400, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = TF_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct twofish_xts_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = 2 * TF_MIN_KEY_SIZE, + .max_keysize = 2 * TF_MAX_KEY_SIZE, + .ivsize = TF_BLOCK_SIZE, + .setkey = xts_twofish_setkey, + .encrypt = xts_encrypt, + .decrypt = xts_decrypt, }, -} }; +}; + +static struct simd_skcipher_alg *twofish_simd_algs[ARRAY_SIZE(twofish_algs)]; static int __init twofish_init(void) { @@ -563,12 +309,15 @@ static int __init twofish_init(void) return -ENODEV; } - return crypto_register_algs(twofish_algs, ARRAY_SIZE(twofish_algs)); + return simd_register_skciphers_compat(twofish_algs, + ARRAY_SIZE(twofish_algs), + twofish_simd_algs); } static void __exit twofish_exit(void) { - crypto_unregister_algs(twofish_algs, ARRAY_SIZE(twofish_algs)); + simd_unregister_skciphers(twofish_algs, ARRAY_SIZE(twofish_algs), + twofish_simd_algs); } module_init(twofish_init); diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c index 243e90a4b5d9..571485502ec8 100644 --- a/arch/x86/crypto/twofish_glue_3way.c +++ b/arch/x86/crypto/twofish_glue_3way.c @@ -20,22 +20,26 @@ * */ -#include <asm/processor.h> +#include <asm/crypto/glue_helper.h> +#include <asm/crypto/twofish.h> +#include <crypto/algapi.h> +#include <crypto/b128ops.h> +#include <crypto/internal/skcipher.h> +#include <crypto/twofish.h> #include <linux/crypto.h> #include <linux/init.h> #include <linux/module.h> #include <linux/types.h> -#include <crypto/algapi.h> -#include <crypto/twofish.h> -#include <crypto/b128ops.h> -#include <asm/crypto/twofish.h> -#include <asm/crypto/glue_helper.h> -#include <crypto/lrw.h> -#include <crypto/xts.h> EXPORT_SYMBOL_GPL(__twofish_enc_blk_3way); EXPORT_SYMBOL_GPL(twofish_dec_blk_3way); +static int twofish_setkey_skcipher(struct crypto_skcipher *tfm, + const u8 *key, unsigned int keylen) +{ + return twofish_setkey(&tfm->base, key, keylen); +} + static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, const u8 *src) { @@ -151,284 +155,74 @@ static const struct common_glue_ctx twofish_dec_cbc = { } } }; -static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - return glue_ecb_crypt_128bit(&twofish_enc, desc, dst, src, nbytes); -} - -static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - return glue_ecb_crypt_128bit(&twofish_dec, desc, dst, src, nbytes); -} - -static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(twofish_enc_blk), desc, - dst, src, nbytes); -} - -static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - return glue_cbc_decrypt_128bit(&twofish_dec_cbc, desc, dst, src, - nbytes); -} - -static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - return glue_ctr_crypt_128bit(&twofish_ctr, desc, dst, src, nbytes); -} - -static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) +static int ecb_encrypt(struct skcipher_request *req) { - const unsigned int bsize = TF_BLOCK_SIZE; - struct twofish_ctx *ctx = priv; - int i; - - if (nbytes == 3 * bsize) { - twofish_enc_blk_3way(ctx, srcdst, srcdst); - return; - } - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - twofish_enc_blk(ctx, srcdst, srcdst); -} - -static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) -{ - const unsigned int bsize = TF_BLOCK_SIZE; - struct twofish_ctx *ctx = priv; - int i; - - if (nbytes == 3 * bsize) { - twofish_dec_blk_3way(ctx, srcdst, srcdst); - return; - } - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - twofish_dec_blk(ctx, srcdst, srcdst); -} - -int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) -{ - struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm); - int err; - - err = __twofish_setkey(&ctx->twofish_ctx, key, keylen - TF_BLOCK_SIZE, - &tfm->crt_flags); - if (err) - return err; - - return lrw_init_table(&ctx->lrw_table, key + keylen - TF_BLOCK_SIZE); + return glue_ecb_req_128bit(&twofish_enc, req); } -EXPORT_SYMBOL_GPL(lrw_twofish_setkey); -static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_decrypt(struct skcipher_request *req) { - struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[3]; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &ctx->twofish_ctx, - .crypt_fn = encrypt_callback, - }; - - return lrw_crypt(desc, dst, src, nbytes, &req); + return glue_ecb_req_128bit(&twofish_dec, req); } -static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_encrypt(struct skcipher_request *req) { - struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[3]; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &ctx->twofish_ctx, - .crypt_fn = decrypt_callback, - }; - - return lrw_crypt(desc, dst, src, nbytes, &req); + return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(twofish_enc_blk), + req); } -void lrw_twofish_exit_tfm(struct crypto_tfm *tfm) +static int cbc_decrypt(struct skcipher_request *req) { - struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm); - - lrw_free_table(&ctx->lrw_table); -} -EXPORT_SYMBOL_GPL(lrw_twofish_exit_tfm); - -int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) -{ - struct twofish_xts_ctx *ctx = crypto_tfm_ctx(tfm); - u32 *flags = &tfm->crt_flags; - int err; - - err = xts_check_key(tfm, key, keylen); - if (err) - return err; - - /* first half of xts-key is for crypt */ - err = __twofish_setkey(&ctx->crypt_ctx, key, keylen / 2, flags); - if (err) - return err; - - /* second half of xts-key is for tweak */ - return __twofish_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2, - flags); -} -EXPORT_SYMBOL_GPL(xts_twofish_setkey); - -static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - le128 buf[3]; - struct xts_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .tweak_ctx = &ctx->tweak_ctx, - .tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk), - .crypt_ctx = &ctx->crypt_ctx, - .crypt_fn = encrypt_callback, - }; - - return xts_crypt(desc, dst, src, nbytes, &req); + return glue_cbc_decrypt_req_128bit(&twofish_dec_cbc, req); } -static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ctr_crypt(struct skcipher_request *req) { - struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - le128 buf[3]; - struct xts_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .tweak_ctx = &ctx->tweak_ctx, - .tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk), - .crypt_ctx = &ctx->crypt_ctx, - .crypt_fn = decrypt_callback, - }; - - return xts_crypt(desc, dst, src, nbytes, &req); + return glue_ctr_req_128bit(&twofish_ctr, req); } -static struct crypto_alg tf_algs[5] = { { - .cra_name = "ecb(twofish)", - .cra_driver_name = "ecb-twofish-3way", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct twofish_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = TF_MIN_KEY_SIZE, - .max_keysize = TF_MAX_KEY_SIZE, - .setkey = twofish_setkey, - .encrypt = ecb_encrypt, - .decrypt = ecb_decrypt, - }, - }, -}, { - .cra_name = "cbc(twofish)", - .cra_driver_name = "cbc-twofish-3way", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct twofish_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = TF_MIN_KEY_SIZE, - .max_keysize = TF_MAX_KEY_SIZE, - .ivsize = TF_BLOCK_SIZE, - .setkey = twofish_setkey, - .encrypt = cbc_encrypt, - .decrypt = cbc_decrypt, - }, - }, -}, { - .cra_name = "ctr(twofish)", - .cra_driver_name = "ctr-twofish-3way", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct twofish_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = TF_MIN_KEY_SIZE, - .max_keysize = TF_MAX_KEY_SIZE, - .ivsize = TF_BLOCK_SIZE, - .setkey = twofish_setkey, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, - }, - }, -}, { - .cra_name = "lrw(twofish)", - .cra_driver_name = "lrw-twofish-3way", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct twofish_lrw_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_exit = lrw_twofish_exit_tfm, - .cra_u = { - .blkcipher = { - .min_keysize = TF_MIN_KEY_SIZE + TF_BLOCK_SIZE, - .max_keysize = TF_MAX_KEY_SIZE + TF_BLOCK_SIZE, - .ivsize = TF_BLOCK_SIZE, - .setkey = lrw_twofish_setkey, - .encrypt = lrw_encrypt, - .decrypt = lrw_decrypt, - }, - }, -}, { - .cra_name = "xts(twofish)", - .cra_driver_name = "xts-twofish-3way", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct twofish_xts_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = TF_MIN_KEY_SIZE * 2, - .max_keysize = TF_MAX_KEY_SIZE * 2, - .ivsize = TF_BLOCK_SIZE, - .setkey = xts_twofish_setkey, - .encrypt = xts_encrypt, - .decrypt = xts_decrypt, - }, +static struct skcipher_alg tf_skciphers[] = { + { + .base.cra_name = "ecb(twofish)", + .base.cra_driver_name = "ecb-twofish-3way", + .base.cra_priority = 300, + .base.cra_blocksize = TF_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct twofish_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = TF_MIN_KEY_SIZE, + .max_keysize = TF_MAX_KEY_SIZE, + .setkey = twofish_setkey_skcipher, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, { + .base.cra_name = "cbc(twofish)", + .base.cra_driver_name = "cbc-twofish-3way", + .base.cra_priority = 300, + .base.cra_blocksize = TF_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct twofish_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = TF_MIN_KEY_SIZE, + .max_keysize = TF_MAX_KEY_SIZE, + .ivsize = TF_BLOCK_SIZE, + .setkey = twofish_setkey_skcipher, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, { + .base.cra_name = "ctr(twofish)", + .base.cra_driver_name = "ctr-twofish-3way", + .base.cra_priority = 300, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct twofish_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = TF_MIN_KEY_SIZE, + .max_keysize = TF_MAX_KEY_SIZE, + .ivsize = TF_BLOCK_SIZE, + .chunksize = TF_BLOCK_SIZE, + .setkey = twofish_setkey_skcipher, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, }, -} }; +}; static bool is_blacklisted_cpu(void) { @@ -478,12 +272,13 @@ static int __init init(void) return -ENODEV; } - return crypto_register_algs(tf_algs, ARRAY_SIZE(tf_algs)); + return crypto_register_skciphers(tf_skciphers, + ARRAY_SIZE(tf_skciphers)); } static void __exit fini(void) { - crypto_unregister_algs(tf_algs, ARRAY_SIZE(tf_algs)); + crypto_unregister_skciphers(tf_skciphers, ARRAY_SIZE(tf_skciphers)); } module_init(init); diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 6ad064c8cf35..bef8e2b202a8 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -902,6 +902,9 @@ BUILD_INTERRUPT3(hyperv_callback_vector, HYPERVISOR_CALLBACK_VECTOR, BUILD_INTERRUPT3(hyperv_reenlightenment_vector, HYPERV_REENLIGHTENMENT_VECTOR, hyperv_reenlightenment_intr) +BUILD_INTERRUPT3(hv_stimer0_callback_vector, HYPERV_STIMER0_VECTOR, + hv_stimer0_vector_handler) + #endif /* CONFIG_HYPERV */ ENTRY(page_fault) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 936e19642eab..b0a4649e55ce 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1140,6 +1140,9 @@ apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \ apicinterrupt3 HYPERV_REENLIGHTENMENT_VECTOR \ hyperv_reenlightenment_vector hyperv_reenlightenment_intr + +apicinterrupt3 HYPERV_STIMER0_VECTOR \ + hv_stimer0_callback_vector hv_stimer0_vector_handler #endif /* CONFIG_HYPERV */ idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 2edc49e7409b..cfecc2272f2d 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -21,7 +21,7 @@ #include <asm/apic.h> #include <asm/desc.h> #include <asm/hypervisor.h> -#include <asm/hyperv.h> +#include <asm/hyperv-tlfs.h> #include <asm/mshyperv.h> #include <linux/version.h> #include <linux/vmalloc.h> @@ -88,11 +88,15 @@ EXPORT_SYMBOL_GPL(hyperv_cs); u32 *hv_vp_index; EXPORT_SYMBOL_GPL(hv_vp_index); +struct hv_vp_assist_page **hv_vp_assist_page; +EXPORT_SYMBOL_GPL(hv_vp_assist_page); + u32 hv_max_vp_index; static int hv_cpu_init(unsigned int cpu) { u64 msr_vp_index; + struct hv_vp_assist_page **hvp = &hv_vp_assist_page[smp_processor_id()]; hv_get_vp_index(msr_vp_index); @@ -101,6 +105,22 @@ static int hv_cpu_init(unsigned int cpu) if (msr_vp_index > hv_max_vp_index) hv_max_vp_index = msr_vp_index; + if (!hv_vp_assist_page) + return 0; + + if (!*hvp) + *hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL); + + if (*hvp) { + u64 val; + + val = vmalloc_to_pfn(*hvp); + val = (val << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) | + HV_X64_MSR_VP_ASSIST_PAGE_ENABLE; + + wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, val); + } + return 0; } @@ -198,6 +218,9 @@ static int hv_cpu_die(unsigned int cpu) struct hv_reenlightenment_control re_ctrl; unsigned int new_cpu; + if (hv_vp_assist_page && hv_vp_assist_page[cpu]) + wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, 0); + if (hv_reenlightenment_cb == NULL) return 0; @@ -224,6 +247,7 @@ void hyperv_init(void) { u64 guest_id, required_msrs; union hv_x64_msr_hypercall_contents hypercall_msr; + int cpuhp; if (x86_hyper_type != X86_HYPER_MS_HYPERV) return; @@ -241,9 +265,17 @@ void hyperv_init(void) if (!hv_vp_index) return; - if (cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/hyperv_init:online", - hv_cpu_init, hv_cpu_die) < 0) + hv_vp_assist_page = kcalloc(num_possible_cpus(), + sizeof(*hv_vp_assist_page), GFP_KERNEL); + if (!hv_vp_assist_page) { + ms_hyperv.hints &= ~HV_X64_ENLIGHTENED_VMCS_RECOMMENDED; goto free_vp_index; + } + + cpuhp = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/hyperv_init:online", + hv_cpu_init, hv_cpu_die); + if (cpuhp < 0) + goto free_vp_assist_page; /* * Setup the hypercall page and enable hypercalls. @@ -256,7 +288,7 @@ void hyperv_init(void) hv_hypercall_pg = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL_RX); if (hv_hypercall_pg == NULL) { wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0); - goto free_vp_index; + goto remove_cpuhp_state; } rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); @@ -304,6 +336,11 @@ register_msr_cs: return; +remove_cpuhp_state: + cpuhp_remove_state(cpuhp); +free_vp_assist_page: + kfree(hv_vp_assist_page); + hv_vp_assist_page = NULL; free_vp_index: kfree(hv_vp_index); hv_vp_index = NULL; diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h index 46e1ef17d92d..92212bf0484f 100644 --- a/arch/x86/include/asm/atomic64_32.h +++ b/arch/x86/include/asm/atomic64_32.h @@ -123,7 +123,7 @@ static inline long long arch_atomic64_read(const atomic64_t *v) long long r; alternative_atomic64(read, "=&A" (r), "c" (v) : "memory"); return r; - } +} /** * arch_atomic64_add_return - add and return diff --git a/arch/x86/include/asm/crypto/camellia.h b/arch/x86/include/asm/crypto/camellia.h index 10f8d590bcfe..a5d86fc0593f 100644 --- a/arch/x86/include/asm/crypto/camellia.h +++ b/arch/x86/include/asm/crypto/camellia.h @@ -2,8 +2,9 @@ #ifndef ASM_X86_CAMELLIA_H #define ASM_X86_CAMELLIA_H -#include <linux/kernel.h> +#include <crypto/b128ops.h> #include <linux/crypto.h> +#include <linux/kernel.h> #define CAMELLIA_MIN_KEY_SIZE 16 #define CAMELLIA_MAX_KEY_SIZE 32 @@ -11,16 +12,13 @@ #define CAMELLIA_TABLE_BYTE_LEN 272 #define CAMELLIA_PARALLEL_BLOCKS 2 +struct crypto_skcipher; + struct camellia_ctx { u64 key_table[CAMELLIA_TABLE_BYTE_LEN / sizeof(u64)]; u32 key_length; }; -struct camellia_lrw_ctx { - struct lrw_table_ctx lrw_table; - struct camellia_ctx camellia_ctx; -}; - struct camellia_xts_ctx { struct camellia_ctx tweak_ctx; struct camellia_ctx crypt_ctx; @@ -30,11 +28,7 @@ extern int __camellia_setkey(struct camellia_ctx *cctx, const unsigned char *key, unsigned int key_len, u32 *flags); -extern int lrw_camellia_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen); -extern void lrw_camellia_exit_tfm(struct crypto_tfm *tfm); - -extern int xts_camellia_setkey(struct crypto_tfm *tfm, const u8 *key, +extern int xts_camellia_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen); /* regular block cipher functions */ diff --git a/arch/x86/include/asm/crypto/glue_helper.h b/arch/x86/include/asm/crypto/glue_helper.h index 553a03de55c3..d1818634ae7e 100644 --- a/arch/x86/include/asm/crypto/glue_helper.h +++ b/arch/x86/include/asm/crypto/glue_helper.h @@ -45,7 +45,7 @@ struct common_glue_ctx { }; static inline bool glue_fpu_begin(unsigned int bsize, int fpu_blocks_limit, - struct blkcipher_desc *desc, + struct skcipher_walk *walk, bool fpu_enabled, unsigned int nbytes) { if (likely(fpu_blocks_limit < 0)) @@ -61,33 +61,6 @@ static inline bool glue_fpu_begin(unsigned int bsize, int fpu_blocks_limit, if (nbytes < bsize * (unsigned int)fpu_blocks_limit) return false; - if (desc) { - /* prevent sleeping if FPU is in use */ - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - } - - kernel_fpu_begin(); - return true; -} - -static inline bool glue_skwalk_fpu_begin(unsigned int bsize, - int fpu_blocks_limit, - struct skcipher_walk *walk, - bool fpu_enabled, unsigned int nbytes) -{ - if (likely(fpu_blocks_limit < 0)) - return false; - - if (fpu_enabled) - return true; - - /* - * Vector-registers are only used when chunk to be processed is large - * enough, so do not enable FPU until it is necessary. - */ - if (nbytes < bsize * (unsigned int)fpu_blocks_limit) - return false; - /* prevent sleeping if FPU is in use */ skcipher_walk_atomise(walk); @@ -126,41 +99,17 @@ static inline void le128_inc(le128 *i) i->b = cpu_to_le64(b); } -extern int glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, - struct blkcipher_desc *desc, - struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes); - -extern int glue_cbc_encrypt_128bit(const common_glue_func_t fn, - struct blkcipher_desc *desc, - struct scatterlist *dst, - struct scatterlist *src, - unsigned int nbytes); - -extern int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx, - struct blkcipher_desc *desc, - struct scatterlist *dst, - struct scatterlist *src, - unsigned int nbytes); - -extern int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, - struct blkcipher_desc *desc, - struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes); - -extern int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx, - struct blkcipher_desc *desc, - struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes, - common_glue_func_t tweak_fn, void *tweak_ctx, - void *crypt_ctx); - -extern int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx, - struct blkcipher_desc *desc, - struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes, - common_glue_func_t tweak_fn, void *tweak_ctx, - void *crypt_ctx); +extern int glue_ecb_req_128bit(const struct common_glue_ctx *gctx, + struct skcipher_request *req); + +extern int glue_cbc_encrypt_req_128bit(const common_glue_func_t fn, + struct skcipher_request *req); + +extern int glue_cbc_decrypt_req_128bit(const struct common_glue_ctx *gctx, + struct skcipher_request *req); + +extern int glue_ctr_req_128bit(const struct common_glue_ctx *gctx, + struct skcipher_request *req); extern int glue_xts_req_128bit(const struct common_glue_ctx *gctx, struct skcipher_request *req, diff --git a/arch/x86/include/asm/crypto/serpent-avx.h b/arch/x86/include/asm/crypto/serpent-avx.h index c958b7bd0fcb..db7c9cc32234 100644 --- a/arch/x86/include/asm/crypto/serpent-avx.h +++ b/arch/x86/include/asm/crypto/serpent-avx.h @@ -2,15 +2,13 @@ #ifndef ASM_X86_SERPENT_AVX_H #define ASM_X86_SERPENT_AVX_H -#include <linux/crypto.h> +#include <crypto/b128ops.h> #include <crypto/serpent.h> +#include <linux/types.h> -#define SERPENT_PARALLEL_BLOCKS 8 +struct crypto_skcipher; -struct serpent_lrw_ctx { - struct lrw_table_ctx lrw_table; - struct serpent_ctx serpent_ctx; -}; +#define SERPENT_PARALLEL_BLOCKS 8 struct serpent_xts_ctx { struct serpent_ctx tweak_ctx; @@ -38,12 +36,7 @@ extern void __serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, extern void serpent_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv); extern void serpent_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv); -extern int lrw_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen); - -extern void lrw_serpent_exit_tfm(struct crypto_tfm *tfm); - -extern int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, +extern int xts_serpent_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen); #endif diff --git a/arch/x86/include/asm/crypto/twofish.h b/arch/x86/include/asm/crypto/twofish.h index 65bb80adba3e..f618bf272b90 100644 --- a/arch/x86/include/asm/crypto/twofish.h +++ b/arch/x86/include/asm/crypto/twofish.h @@ -4,19 +4,8 @@ #include <linux/crypto.h> #include <crypto/twofish.h> -#include <crypto/lrw.h> #include <crypto/b128ops.h> -struct twofish_lrw_ctx { - struct lrw_table_ctx lrw_table; - struct twofish_ctx twofish_ctx; -}; - -struct twofish_xts_ctx { - struct twofish_ctx tweak_ctx; - struct twofish_ctx crypt_ctx; -}; - /* regular block cipher functions from twofish_x86_64 module */ asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst, const u8 *src); @@ -36,12 +25,4 @@ extern void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, extern void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src, le128 *iv); -extern int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen); - -extern void lrw_twofish_exit_tfm(struct crypto_tfm *tfm); - -extern int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen); - #endif /* ASM_X86_TWOFISH_H */ diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 7c341a74ec8c..5ea2afd4c871 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -40,6 +40,7 @@ typedef struct { #endif #if IS_ENABLED(CONFIG_HYPERV) unsigned int irq_hv_reenlightenment_count; + unsigned int hyperv_stimer0_count; #endif } ____cacheline_aligned irq_cpustat_t; diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/asm/hyperv-tlfs.h index 099414345865..416cb0e0c496 100644 --- a/arch/x86/include/uapi/asm/hyperv.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -1,6 +1,13 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _ASM_X86_HYPERV_H -#define _ASM_X86_HYPERV_H + +/* + * This file contains definitions from Hyper-V Hypervisor Top-Level Functional + * Specification (TLFS): + * https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/tlfs + */ + +#ifndef _ASM_X86_HYPERV_TLFS_H +#define _ASM_X86_HYPERV_TLFS_H #include <linux/types.h> @@ -14,6 +21,7 @@ #define HYPERV_CPUID_FEATURES 0x40000003 #define HYPERV_CPUID_ENLIGHTMENT_INFO 0x40000004 #define HYPERV_CPUID_IMPLEMENT_LIMITS 0x40000005 +#define HYPERV_CPUID_NESTED_FEATURES 0x4000000A #define HYPERV_HYPERVISOR_PRESENT_BIT 0x80000000 #define HYPERV_CPUID_MIN 0x40000005 @@ -77,6 +85,9 @@ /* Crash MSR available */ #define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE (1 << 10) +/* stimer Direct Mode is available */ +#define HV_X64_STIMER_DIRECT_MODE_AVAILABLE (1 << 19) + /* * Feature identification: EBX indicates which flags were specified at * partition creation. The format is the same as the partition creation @@ -156,6 +167,9 @@ /* Recommend using the newer ExProcessorMasks interface */ #define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED (1 << 11) +/* Recommend using enlightened VMCS */ +#define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED (1 << 14) + /* * Crash notification flag. */ @@ -189,7 +203,7 @@ #define HV_X64_MSR_EOI 0x40000070 #define HV_X64_MSR_ICR 0x40000071 #define HV_X64_MSR_TPR 0x40000072 -#define HV_X64_MSR_APIC_ASSIST_PAGE 0x40000073 +#define HV_X64_MSR_VP_ASSIST_PAGE 0x40000073 /* Define synthetic interrupt controller model specific registers. */ #define HV_X64_MSR_SCONTROL 0x40000080 @@ -237,6 +251,55 @@ #define HV_X64_MSR_CRASH_PARAMS \ (1 + (HV_X64_MSR_CRASH_P4 - HV_X64_MSR_CRASH_P0)) +/* + * Declare the MSR used to setup pages used to communicate with the hypervisor. + */ +union hv_x64_msr_hypercall_contents { + u64 as_uint64; + struct { + u64 enable:1; + u64 reserved:11; + u64 guest_physical_address:52; + }; +}; + +/* + * TSC page layout. + */ +struct ms_hyperv_tsc_page { + volatile u32 tsc_sequence; + u32 reserved1; + volatile u64 tsc_scale; + volatile s64 tsc_offset; + u64 reserved2[509]; +}; + +/* + * The guest OS needs to register the guest ID with the hypervisor. + * The guest ID is a 64 bit entity and the structure of this ID is + * specified in the Hyper-V specification: + * + * msdn.microsoft.com/en-us/library/windows/hardware/ff542653%28v=vs.85%29.aspx + * + * While the current guideline does not specify how Linux guest ID(s) + * need to be generated, our plan is to publish the guidelines for + * Linux and other guest operating systems that currently are hosted + * on Hyper-V. The implementation here conforms to this yet + * unpublished guidelines. + * + * + * Bit(s) + * 63 - Indicates if the OS is Open Source or not; 1 is Open Source + * 62:56 - Os Type; Linux is 0x100 + * 55:48 - Distro specific identification + * 47:16 - Linux kernel version number + * 15:0 - Distro specific identification + * + * + */ + +#define HV_LINUX_VENDOR_ID 0x8100 + /* TSC emulation after migration */ #define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106 @@ -275,10 +338,13 @@ struct hv_tsc_emulation_status { #define HVCALL_POST_MESSAGE 0x005c #define HVCALL_SIGNAL_EVENT 0x005d -#define HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE 0x00000001 -#define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT 12 -#define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK \ - (~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) +#define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE 0x00000001 +#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT 12 +#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK \ + (~((1ull << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) + +/* Hyper-V Enlightened VMCS version mask in nested features CPUID */ +#define HV_X64_ENLIGHTENED_VMCS_VERSION 0xff #define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001 #define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12 @@ -298,12 +364,22 @@ enum HV_GENERIC_SET_FORMAT { HV_GENERIC_SET_ALL, }; +#define HV_HYPERCALL_RESULT_MASK GENMASK_ULL(15, 0) +#define HV_HYPERCALL_FAST_BIT BIT(16) +#define HV_HYPERCALL_VARHEAD_OFFSET 17 +#define HV_HYPERCALL_REP_COMP_OFFSET 32 +#define HV_HYPERCALL_REP_COMP_MASK GENMASK_ULL(43, 32) +#define HV_HYPERCALL_REP_START_OFFSET 48 +#define HV_HYPERCALL_REP_START_MASK GENMASK_ULL(59, 48) + /* hypercall status code */ #define HV_STATUS_SUCCESS 0 #define HV_STATUS_INVALID_HYPERCALL_CODE 2 #define HV_STATUS_INVALID_HYPERCALL_INPUT 3 #define HV_STATUS_INVALID_ALIGNMENT 4 +#define HV_STATUS_INVALID_PARAMETER 5 #define HV_STATUS_INSUFFICIENT_MEMORY 11 +#define HV_STATUS_INVALID_PORT_ID 17 #define HV_STATUS_INVALID_CONNECTION_ID 18 #define HV_STATUS_INSUFFICIENT_BUFFERS 19 @@ -318,6 +394,8 @@ typedef struct _HV_REFERENCE_TSC_PAGE { #define HV_SYNIC_SINT_COUNT (16) /* Define the expected SynIC version. */ #define HV_SYNIC_VERSION_1 (0x1) +/* Valid SynIC vectors are 16-255. */ +#define HV_SYNIC_FIRST_VALID_VECTOR (16) #define HV_SYNIC_CONTROL_ENABLE (1ULL << 0) #define HV_SYNIC_SIMP_ENABLE (1ULL << 0) @@ -412,6 +490,216 @@ struct hv_timer_message_payload { __u64 delivery_time; /* When the message was delivered */ }; +/* Define virtual processor assist page structure. */ +struct hv_vp_assist_page { + __u32 apic_assist; + __u32 reserved; + __u64 vtl_control[2]; + __u64 nested_enlightenments_control[2]; + __u32 enlighten_vmentry; + __u64 current_nested_vmcs; +}; + +struct hv_enlightened_vmcs { + u32 revision_id; + u32 abort; + + u16 host_es_selector; + u16 host_cs_selector; + u16 host_ss_selector; + u16 host_ds_selector; + u16 host_fs_selector; + u16 host_gs_selector; + u16 host_tr_selector; + + u64 host_ia32_pat; + u64 host_ia32_efer; + + u64 host_cr0; + u64 host_cr3; + u64 host_cr4; + + u64 host_ia32_sysenter_esp; + u64 host_ia32_sysenter_eip; + u64 host_rip; + u32 host_ia32_sysenter_cs; + + u32 pin_based_vm_exec_control; + u32 vm_exit_controls; + u32 secondary_vm_exec_control; + + u64 io_bitmap_a; + u64 io_bitmap_b; + u64 msr_bitmap; + + u16 guest_es_selector; + u16 guest_cs_selector; + u16 guest_ss_selector; + u16 guest_ds_selector; + u16 guest_fs_selector; + u16 guest_gs_selector; + u16 guest_ldtr_selector; + u16 guest_tr_selector; + + u32 guest_es_limit; + u32 guest_cs_limit; + u32 guest_ss_limit; + u32 guest_ds_limit; + u32 guest_fs_limit; + u32 guest_gs_limit; + u32 guest_ldtr_limit; + u32 guest_tr_limit; + u32 guest_gdtr_limit; + u32 guest_idtr_limit; + + u32 guest_es_ar_bytes; + u32 guest_cs_ar_bytes; + u32 guest_ss_ar_bytes; + u32 guest_ds_ar_bytes; + u32 guest_fs_ar_bytes; + u32 guest_gs_ar_bytes; + u32 guest_ldtr_ar_bytes; + u32 guest_tr_ar_bytes; + + u64 guest_es_base; + u64 guest_cs_base; + u64 guest_ss_base; + u64 guest_ds_base; + u64 guest_fs_base; + u64 guest_gs_base; + u64 guest_ldtr_base; + u64 guest_tr_base; + u64 guest_gdtr_base; + u64 guest_idtr_base; + + u64 padding64_1[3]; + + u64 vm_exit_msr_store_addr; + u64 vm_exit_msr_load_addr; + u64 vm_entry_msr_load_addr; + + u64 cr3_target_value0; + u64 cr3_target_value1; + u64 cr3_target_value2; + u64 cr3_target_value3; + + u32 page_fault_error_code_mask; + u32 page_fault_error_code_match; + + u32 cr3_target_count; + u32 vm_exit_msr_store_count; + u32 vm_exit_msr_load_count; + u32 vm_entry_msr_load_count; + + u64 tsc_offset; + u64 virtual_apic_page_addr; + u64 vmcs_link_pointer; + + u64 guest_ia32_debugctl; + u64 guest_ia32_pat; + u64 guest_ia32_efer; + + u64 guest_pdptr0; + u64 guest_pdptr1; + u64 guest_pdptr2; + u64 guest_pdptr3; + + u64 guest_pending_dbg_exceptions; + u64 guest_sysenter_esp; + u64 guest_sysenter_eip; + + u32 guest_activity_state; + u32 guest_sysenter_cs; + + u64 cr0_guest_host_mask; + u64 cr4_guest_host_mask; + u64 cr0_read_shadow; + u64 cr4_read_shadow; + u64 guest_cr0; + u64 guest_cr3; + u64 guest_cr4; + u64 guest_dr7; + + u64 host_fs_base; + u64 host_gs_base; + u64 host_tr_base; + u64 host_gdtr_base; + u64 host_idtr_base; + u64 host_rsp; + + u64 ept_pointer; + + u16 virtual_processor_id; + u16 padding16[3]; + + u64 padding64_2[5]; + u64 guest_physical_address; + + u32 vm_instruction_error; + u32 vm_exit_reason; + u32 vm_exit_intr_info; + u32 vm_exit_intr_error_code; + u32 idt_vectoring_info_field; + u32 idt_vectoring_error_code; + u32 vm_exit_instruction_len; + u32 vmx_instruction_info; + + u64 exit_qualification; + u64 exit_io_instruction_ecx; + u64 exit_io_instruction_esi; + u64 exit_io_instruction_edi; + u64 exit_io_instruction_eip; + + u64 guest_linear_address; + u64 guest_rsp; + u64 guest_rflags; + + u32 guest_interruptibility_info; + u32 cpu_based_vm_exec_control; + u32 exception_bitmap; + u32 vm_entry_controls; + u32 vm_entry_intr_info_field; + u32 vm_entry_exception_error_code; + u32 vm_entry_instruction_len; + u32 tpr_threshold; + + u64 guest_rip; + + u32 hv_clean_fields; + u32 hv_padding_32; + u32 hv_synthetic_controls; + u32 hv_enlightenments_control; + u32 hv_vp_id; + + u64 hv_vm_id; + u64 partition_assist_page; + u64 padding64_4[4]; + u64 guest_bndcfgs; + u64 padding64_5[7]; + u64 xss_exit_bitmap; + u64 padding64_6[7]; +}; + +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE 0 +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP BIT(0) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP BIT(1) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2 BIT(2) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1 BIT(3) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC BIT(4) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT BIT(5) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY BIT(6) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN BIT(7) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR BIT(8) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT BIT(9) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC BIT(10) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1 BIT(11) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2 BIT(12) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER BIT(13) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1 BIT(14) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ENLIGHTENMENTSCONTROL BIT(15) + +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL 0xFFFF + #define HV_STIMER_ENABLE (1ULL << 0) #define HV_STIMER_PERIODIC (1ULL << 1) #define HV_STIMER_LAZY (1ULL << 2) diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index e71c1120426b..404c5fdff859 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -106,9 +106,10 @@ #if IS_ENABLED(CONFIG_HYPERV) #define HYPERV_REENLIGHTENMENT_VECTOR 0xee +#define HYPERV_STIMER0_VECTOR 0xed #endif -#define LOCAL_TIMER_VECTOR 0xed +#define LOCAL_TIMER_VECTOR 0xec #define NR_VECTORS 256 diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index b605a5b6a30c..949c977bc4c9 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -34,6 +34,7 @@ #include <asm/msr-index.h> #include <asm/asm.h> #include <asm/kvm_page_track.h> +#include <asm/hyperv-tlfs.h> #define KVM_MAX_VCPUS 288 #define KVM_SOFT_MAX_VCPUS 240 @@ -73,6 +74,7 @@ #define KVM_REQ_HV_RESET KVM_ARCH_REQ(20) #define KVM_REQ_HV_EXIT KVM_ARCH_REQ(21) #define KVM_REQ_HV_STIMER KVM_ARCH_REQ(22) +#define KVM_REQ_LOAD_EOI_EXITMAP KVM_ARCH_REQ(23) #define CR0_RESERVED_BITS \ (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ @@ -498,6 +500,7 @@ struct kvm_vcpu_arch { u64 apic_base; struct kvm_lapic *apic; /* kernel irqchip context */ bool apicv_active; + bool load_eoi_exitmap_pending; DECLARE_BITMAP(ioapic_handled_vectors, 256); unsigned long apic_attention; int32_t apic_arb_prio; @@ -571,7 +574,7 @@ struct kvm_vcpu_arch { } exception; struct kvm_queued_interrupt { - bool pending; + bool injected; bool soft; u8 nr; } interrupt; @@ -754,6 +757,12 @@ struct kvm_hv { u64 hv_crash_ctl; HV_REFERENCE_TSC_PAGE tsc_ref; + + struct idr conn_to_evt; + + u64 hv_reenlightenment_control; + u64 hv_tsc_emulation_control; + u64 hv_tsc_emulation_status; }; enum kvm_irqchip_mode { @@ -762,15 +771,6 @@ enum kvm_irqchip_mode { KVM_IRQCHIP_SPLIT, /* created with KVM_CAP_SPLIT_IRQCHIP */ }; -struct kvm_sev_info { - bool active; /* SEV enabled guest */ - unsigned int asid; /* ASID used for this guest */ - unsigned int handle; /* SEV firmware handle */ - int fd; /* SEV device fd */ - unsigned long pages_locked; /* Number of pages locked */ - struct list_head regions_list; /* List of registered regions */ -}; - struct kvm_arch { unsigned int n_used_mmu_pages; unsigned int n_requested_mmu_pages; @@ -800,13 +800,13 @@ struct kvm_arch { struct mutex apic_map_lock; struct kvm_apic_map *apic_map; - unsigned int tss_addr; bool apic_access_page_done; gpa_t wall_clock; - bool ept_identity_pagetable_done; - gpa_t ept_identity_map_addr; + bool mwait_in_guest; + bool hlt_in_guest; + bool pause_in_guest; unsigned long irq_sources_bitmap; s64 kvmclock_offset; @@ -849,17 +849,8 @@ struct kvm_arch { bool disabled_lapic_found; - /* Struct members for AVIC */ - u32 avic_vm_id; - u32 ldr_mode; - struct page *avic_logical_id_table_page; - struct page *avic_physical_id_table_page; - struct hlist_node hnode; - bool x2apic_format; bool x2apic_broadcast_quirk_disabled; - - struct kvm_sev_info sev_info; }; struct kvm_vm_stat { @@ -936,6 +927,8 @@ struct kvm_x86_ops { bool (*cpu_has_high_real_mode_segbase)(void); void (*cpuid_update)(struct kvm_vcpu *vcpu); + struct kvm *(*vm_alloc)(void); + void (*vm_free)(struct kvm *); int (*vm_init)(struct kvm *kvm); void (*vm_destroy)(struct kvm *kvm); @@ -1007,6 +1000,7 @@ struct kvm_x86_ops { void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); + int (*set_identity_map_addr)(struct kvm *kvm, u64 ident_addr); int (*get_tdp_level)(struct kvm_vcpu *vcpu); u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); int (*get_lpage_level)(void); @@ -1109,6 +1103,17 @@ struct kvm_arch_async_pf { extern struct kvm_x86_ops *kvm_x86_ops; +#define __KVM_HAVE_ARCH_VM_ALLOC +static inline struct kvm *kvm_arch_alloc_vm(void) +{ + return kvm_x86_ops->vm_alloc(); +} + +static inline void kvm_arch_free_vm(struct kvm *kvm) +{ + return kvm_x86_ops->vm_free(kvm); +} + int kvm_mmu_module_init(void); void kvm_mmu_module_exit(void); @@ -1187,6 +1192,8 @@ enum emulation_result { #define EMULTYPE_SKIP (1 << 2) #define EMULTYPE_RETRY (1 << 3) #define EMULTYPE_NO_REEXECUTE (1 << 4) +#define EMULTYPE_NO_UD_ON_FAIL (1 << 5) +#define EMULTYPE_VMWARE (1 << 6) int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, int emulation_type, void *insn, int insn_len); @@ -1204,8 +1211,7 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr); struct x86_emulate_ctxt; -int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port); -int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size, unsigned short port); +int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in); int kvm_emulate_cpuid(struct kvm_vcpu *vcpu); int kvm_emulate_halt(struct kvm_vcpu *vcpu); int kvm_vcpu_halt(struct kvm_vcpu *vcpu); diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 7b407dda2bd7..3aea2658323a 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -88,6 +88,7 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1, #ifdef CONFIG_KVM_GUEST bool kvm_para_available(void); unsigned int kvm_arch_para_features(void); +unsigned int kvm_arch_para_hints(void); void kvm_async_pf_task_wait(u32 token, int interrupt_kernel); void kvm_async_pf_task_wake(u32 token); u32 kvm_read_and_reset_pf_reason(void); @@ -115,6 +116,11 @@ static inline unsigned int kvm_arch_para_features(void) return 0; } +static inline unsigned int kvm_arch_para_hints(void) +{ + return 0; +} + static inline u32 kvm_read_and_reset_pf_reason(void) { return 0; diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 25283f7eb299..b90e79610cf7 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -6,90 +6,23 @@ #include <linux/atomic.h> #include <linux/nmi.h> #include <asm/io.h> -#include <asm/hyperv.h> +#include <asm/hyperv-tlfs.h> #include <asm/nospec-branch.h> -/* - * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent - * is set by CPUID(HVCPUID_VERSION_FEATURES). - */ -enum hv_cpuid_function { - HVCPUID_VERSION_FEATURES = 0x00000001, - HVCPUID_VENDOR_MAXFUNCTION = 0x40000000, - HVCPUID_INTERFACE = 0x40000001, - - /* - * The remaining functions depend on the value of - * HVCPUID_INTERFACE - */ - HVCPUID_VERSION = 0x40000002, - HVCPUID_FEATURES = 0x40000003, - HVCPUID_ENLIGHTENMENT_INFO = 0x40000004, - HVCPUID_IMPLEMENTATION_LIMITS = 0x40000005, -}; - struct ms_hyperv_info { u32 features; u32 misc_features; u32 hints; + u32 nested_features; u32 max_vp_index; u32 max_lp_index; }; extern struct ms_hyperv_info ms_hyperv; -/* - * Declare the MSR used to setup pages used to communicate with the hypervisor. - */ -union hv_x64_msr_hypercall_contents { - u64 as_uint64; - struct { - u64 enable:1; - u64 reserved:11; - u64 guest_physical_address:52; - }; -}; - -/* - * TSC page layout. - */ - -struct ms_hyperv_tsc_page { - volatile u32 tsc_sequence; - u32 reserved1; - volatile u64 tsc_scale; - volatile s64 tsc_offset; - u64 reserved2[509]; -}; - -/* - * The guest OS needs to register the guest ID with the hypervisor. - * The guest ID is a 64 bit entity and the structure of this ID is - * specified in the Hyper-V specification: - * - * msdn.microsoft.com/en-us/library/windows/hardware/ff542653%28v=vs.85%29.aspx - * - * While the current guideline does not specify how Linux guest ID(s) - * need to be generated, our plan is to publish the guidelines for - * Linux and other guest operating systems that currently are hosted - * on Hyper-V. The implementation here conforms to this yet - * unpublished guidelines. - * - * - * Bit(s) - * 63 - Indicates if the OS is Open Source or not; 1 is Open Source - * 62:56 - Os Type; Linux is 0x100 - * 55:48 - Distro specific identification - * 47:16 - Linux kernel version number - * 15:0 - Distro specific identification - * - * - */ - -#define HV_LINUX_VENDOR_ID 0x8100 /* - * Generate the guest ID based on the guideline described above. + * Generate the guest ID. */ static inline __u64 generate_guest_id(__u64 d_info1, __u64 kernel_version, @@ -173,6 +106,19 @@ void hv_remove_kexec_handler(void); void hv_setup_crash_handler(void (*handler)(struct pt_regs *regs)); void hv_remove_crash_handler(void); +/* + * Routines for stimer0 Direct Mode handling. + * On x86/x64, there are no percpu actions to take. + */ +void hv_stimer0_vector_handler(struct pt_regs *regs); +void hv_stimer0_callback_vector(void); +int hv_setup_stimer0_irq(int *irq, int *vector, void (*handler)(void)); +void hv_remove_stimer0_irq(int irq); + +static inline void hv_enable_stimer0_percpu_irq(int irq) {} +static inline void hv_disable_stimer0_percpu_irq(int irq) {} + + #if IS_ENABLED(CONFIG_HYPERV) extern struct clocksource *hyperv_cs; extern void *hv_hypercall_pg; @@ -215,14 +161,6 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output) return hv_status; } -#define HV_HYPERCALL_RESULT_MASK GENMASK_ULL(15, 0) -#define HV_HYPERCALL_FAST_BIT BIT(16) -#define HV_HYPERCALL_VARHEAD_OFFSET 17 -#define HV_HYPERCALL_REP_COMP_OFFSET 32 -#define HV_HYPERCALL_REP_COMP_MASK GENMASK_ULL(43, 32) -#define HV_HYPERCALL_REP_START_OFFSET 48 -#define HV_HYPERCALL_REP_START_MASK GENMASK_ULL(59, 48) - /* Fast hypercall with 8 bytes of input and no output */ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1) { @@ -294,6 +232,15 @@ static inline u64 hv_do_rep_hypercall(u16 code, u16 rep_count, u16 varhead_size, */ extern u32 *hv_vp_index; extern u32 hv_max_vp_index; +extern struct hv_vp_assist_page **hv_vp_assist_page; + +static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu) +{ + if (!hv_vp_assist_page) + return NULL; + + return hv_vp_assist_page[cpu]; +} /** * hv_cpu_number_to_vp_number() - Map CPU to VP. @@ -330,6 +277,10 @@ static inline void hyperv_setup_mmu_ops(void) {} static inline void set_hv_tscchange_cb(void (*cb)(void)) {} static inline void clear_hv_tscchange_cb(void) {} static inline void hyperv_stop_tsc_emulation(void) {}; +static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu) +{ + return NULL; +} #endif /* CONFIG_HYPERV */ #ifdef CONFIG_HYPERV_TSCPAGE diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index c9084dedfcfa..53d5b1b9255e 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -353,7 +353,21 @@ /* Fam 15h MSRs */ #define MSR_F15H_PERF_CTL 0xc0010200 +#define MSR_F15H_PERF_CTL0 MSR_F15H_PERF_CTL +#define MSR_F15H_PERF_CTL1 (MSR_F15H_PERF_CTL + 2) +#define MSR_F15H_PERF_CTL2 (MSR_F15H_PERF_CTL + 4) +#define MSR_F15H_PERF_CTL3 (MSR_F15H_PERF_CTL + 6) +#define MSR_F15H_PERF_CTL4 (MSR_F15H_PERF_CTL + 8) +#define MSR_F15H_PERF_CTL5 (MSR_F15H_PERF_CTL + 10) + #define MSR_F15H_PERF_CTR 0xc0010201 +#define MSR_F15H_PERF_CTR0 MSR_F15H_PERF_CTR +#define MSR_F15H_PERF_CTR1 (MSR_F15H_PERF_CTR + 2) +#define MSR_F15H_PERF_CTR2 (MSR_F15H_PERF_CTR + 4) +#define MSR_F15H_PERF_CTR3 (MSR_F15H_PERF_CTR + 6) +#define MSR_F15H_PERF_CTR4 (MSR_F15H_PERF_CTR + 8) +#define MSR_F15H_PERF_CTR5 (MSR_F15H_PERF_CTR + 10) + #define MSR_F15H_NB_PERF_CTL 0xc0010240 #define MSR_F15H_NB_PERF_CTR 0xc0010241 #define MSR_F15H_PTSC 0xc0010280 diff --git a/arch/x86/include/asm/platform_sst_audio.h b/arch/x86/include/asm/platform_sst_audio.h index 5973a2f3db3d..059823bb8af7 100644 --- a/arch/x86/include/asm/platform_sst_audio.h +++ b/arch/x86/include/asm/platform_sst_audio.h @@ -135,6 +135,7 @@ struct sst_platform_info { const struct sst_res_info *res_info; const struct sst_lib_dnld_info *lib_info; const char *platform; + bool streams_lost_on_suspend; }; int add_sst_platform_device(void); #endif diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index b0ccd4847a58..4fa4206029e3 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -407,9 +407,19 @@ union irq_stack_union { DECLARE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __visible; DECLARE_INIT_PER_CPU(irq_stack_union); +static inline unsigned long cpu_kernelmode_gs_base(int cpu) +{ + return (unsigned long)per_cpu(irq_stack_union.gs_base, cpu); +} + DECLARE_PER_CPU(char *, irq_stack_ptr); DECLARE_PER_CPU(unsigned int, irq_count); extern asmlinkage void ignore_sysret(void); + +#if IS_ENABLED(CONFIG_KVM) +/* Save actual FS/GS selectors and bases to current->thread */ +void save_fsgs_for_kvm(void); +#endif #else /* X86_64 */ #ifdef CONFIG_CC_STACKPROTECTOR /* diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 0487ac054870..93b462e48067 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -60,7 +60,8 @@ struct __attribute__ ((__packed__)) vmcb_control_area { u32 intercept_dr; u32 intercept_exceptions; u64 intercept; - u8 reserved_1[42]; + u8 reserved_1[40]; + u16 pause_filter_thresh; u16 pause_filter_count; u64 iopm_base_pa; u64 msrpm_base_pa; diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 199e15bd3ec5..ce8b4da07e35 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -122,12 +122,14 @@ struct x86_init_pci { * @guest_late_init: guest late init * @x2apic_available: X2APIC detection * @init_mem_mapping: setup early mappings during init_mem_mapping() + * @init_after_bootmem: guest init after boot allocator is finished */ struct x86_hyper_init { void (*init_platform)(void); void (*guest_late_init)(void); bool (*x2apic_available)(void); void (*init_mem_mapping)(void); + void (*init_after_bootmem)(void); }; /** diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index f3a960488eae..c535c2fdea13 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -354,8 +354,25 @@ struct kvm_xcrs { __u64 padding[16]; }; -/* definition of registers in kvm_run */ +#define KVM_SYNC_X86_REGS (1UL << 0) +#define KVM_SYNC_X86_SREGS (1UL << 1) +#define KVM_SYNC_X86_EVENTS (1UL << 2) + +#define KVM_SYNC_X86_VALID_FIELDS \ + (KVM_SYNC_X86_REGS| \ + KVM_SYNC_X86_SREGS| \ + KVM_SYNC_X86_EVENTS) + +/* kvm_sync_regs struct included by kvm_run struct */ struct kvm_sync_regs { + /* Members of this structure are potentially malicious. + * Care must be taken by code reading, esp. interpreting, + * data fields from them inside KVM to prevent TOCTOU and + * double-fetch types of vulnerabilities. + */ + struct kvm_regs regs; + struct kvm_sregs sregs; + struct kvm_vcpu_events events; }; #define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0) diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h index 6cfa9c8cb7d6..4c851ebb3ceb 100644 --- a/arch/x86/include/uapi/asm/kvm_para.h +++ b/arch/x86/include/uapi/asm/kvm_para.h @@ -3,15 +3,16 @@ #define _UAPI_ASM_X86_KVM_PARA_H #include <linux/types.h> -#include <asm/hyperv.h> /* This CPUID returns the signature 'KVMKVMKVM' in ebx, ecx, and edx. It * should be used to determine that a VM is running under KVM. */ #define KVM_CPUID_SIGNATURE 0x40000000 -/* This CPUID returns a feature bitmap in eax. Before enabling a particular - * paravirtualization, the appropriate feature bit should be checked. +/* This CPUID returns two feature bitmaps in eax, edx. Before enabling + * a particular paravirtualization, the appropriate feature bit should + * be checked in eax. The performance hint feature bit should be checked + * in edx. */ #define KVM_CPUID_FEATURES 0x40000001 #define KVM_FEATURE_CLOCKSOURCE 0 @@ -28,6 +29,8 @@ #define KVM_FEATURE_PV_TLB_FLUSH 9 #define KVM_FEATURE_ASYNC_PF_VMEXIT 10 +#define KVM_HINTS_DEDICATED 0 + /* The last 8 bits are used to indicate how to interpret the flags field * in pvclock structure. If no bits are set, all flags are ignored. */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 348cf4821240..4702fbd98f92 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -487,7 +487,7 @@ void load_percpu_segment(int cpu) loadsegment(fs, __KERNEL_PERCPU); #else __loadsegment_simple(gs, 0); - wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu)); + wrmsrl(MSR_GS_BASE, cpu_kernelmode_gs_base(cpu)); #endif load_stack_canary_segment(); } @@ -1398,6 +1398,7 @@ __setup("clearcpuid=", setup_clearcpuid); #ifdef CONFIG_X86_64 DEFINE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __aligned(PAGE_SIZE) __visible; +EXPORT_PER_CPU_SYMBOL_GPL(irq_stack_union); /* * The following percpu variables are hot. Align current_task to diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 48179928ff38..0624957aa068 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -58,7 +58,7 @@ static u8 amd_ucode_patch[PATCH_MAX_SIZE]; /* * Microcode patch container file is prepended to the initrd in cpio - * format. See Documentation/x86/early-microcode.txt + * format. See Documentation/x86/microcode.txt */ static const char ucode_path[] __maybe_unused = "kernel/x86/microcode/AuthenticAMD.bin"; diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 9340f41ce8d3..031082c96db8 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -22,7 +22,7 @@ #include <linux/kexec.h> #include <asm/processor.h> #include <asm/hypervisor.h> -#include <asm/hyperv.h> +#include <asm/hyperv-tlfs.h> #include <asm/mshyperv.h> #include <asm/desc.h> #include <asm/irq_regs.h> @@ -37,6 +37,7 @@ EXPORT_SYMBOL_GPL(ms_hyperv); #if IS_ENABLED(CONFIG_HYPERV) static void (*vmbus_handler)(void); +static void (*hv_stimer0_handler)(void); static void (*hv_kexec_handler)(void); static void (*hv_crash_handler)(struct pt_regs *regs); @@ -69,6 +70,41 @@ void hv_remove_vmbus_irq(void) EXPORT_SYMBOL_GPL(hv_setup_vmbus_irq); EXPORT_SYMBOL_GPL(hv_remove_vmbus_irq); +/* + * Routines to do per-architecture handling of stimer0 + * interrupts when in Direct Mode + */ + +__visible void __irq_entry hv_stimer0_vector_handler(struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + + entering_irq(); + inc_irq_stat(hyperv_stimer0_count); + if (hv_stimer0_handler) + hv_stimer0_handler(); + ack_APIC_irq(); + + exiting_irq(); + set_irq_regs(old_regs); +} + +int hv_setup_stimer0_irq(int *irq, int *vector, void (*handler)(void)) +{ + *vector = HYPERV_STIMER0_VECTOR; + *irq = 0; /* Unused on x86/x64 */ + hv_stimer0_handler = handler; + return 0; +} +EXPORT_SYMBOL_GPL(hv_setup_stimer0_irq); + +void hv_remove_stimer0_irq(int irq) +{ + /* We have no way to deallocate the interrupt gate */ + hv_stimer0_handler = NULL; +} +EXPORT_SYMBOL_GPL(hv_remove_stimer0_irq); + void hv_setup_kexec_handler(void (*handler)(void)) { hv_kexec_handler = handler; @@ -180,8 +216,8 @@ static void __init ms_hyperv_init_platform(void) pr_info("Hyper-V: features 0x%x, hints 0x%x\n", ms_hyperv.features, ms_hyperv.hints); - ms_hyperv.max_vp_index = cpuid_eax(HVCPUID_IMPLEMENTATION_LIMITS); - ms_hyperv.max_lp_index = cpuid_ebx(HVCPUID_IMPLEMENTATION_LIMITS); + ms_hyperv.max_vp_index = cpuid_eax(HYPERV_CPUID_IMPLEMENT_LIMITS); + ms_hyperv.max_lp_index = cpuid_ebx(HYPERV_CPUID_IMPLEMENT_LIMITS); pr_debug("Hyper-V: max %u virtual processors, %u logical processors\n", ms_hyperv.max_vp_index, ms_hyperv.max_lp_index); @@ -189,11 +225,12 @@ static void __init ms_hyperv_init_platform(void) /* * Extract host information. */ - if (cpuid_eax(HVCPUID_VENDOR_MAXFUNCTION) >= HVCPUID_VERSION) { - hv_host_info_eax = cpuid_eax(HVCPUID_VERSION); - hv_host_info_ebx = cpuid_ebx(HVCPUID_VERSION); - hv_host_info_ecx = cpuid_ecx(HVCPUID_VERSION); - hv_host_info_edx = cpuid_edx(HVCPUID_VERSION); + if (cpuid_eax(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS) >= + HYPERV_CPUID_VERSION) { + hv_host_info_eax = cpuid_eax(HYPERV_CPUID_VERSION); + hv_host_info_ebx = cpuid_ebx(HYPERV_CPUID_VERSION); + hv_host_info_ecx = cpuid_ecx(HYPERV_CPUID_VERSION); + hv_host_info_edx = cpuid_edx(HYPERV_CPUID_VERSION); pr_info("Hyper-V Host Build:%d-%d.%d-%d-%d.%d\n", hv_host_info_eax, hv_host_info_ebx >> 16, @@ -207,6 +244,11 @@ static void __init ms_hyperv_init_platform(void) x86_platform.calibrate_cpu = hv_get_tsc_khz; } + if (ms_hyperv.hints & HV_X64_ENLIGHTENED_VMCS_RECOMMENDED) { + ms_hyperv.nested_features = + cpuid_eax(HYPERV_CPUID_NESTED_FEATURES); + } + #ifdef CONFIG_X86_LOCAL_APIC if (ms_hyperv.features & HV_X64_ACCESS_FREQUENCY_MSRS && ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE) { @@ -257,6 +299,10 @@ static void __init ms_hyperv_init_platform(void) alloc_intr_gate(HYPERV_REENLIGHTENMENT_VECTOR, hyperv_reenlightenment_vector); + /* Setup the IDT for stimer0 */ + if (ms_hyperv.misc_features & HV_X64_STIMER_DIRECT_MODE_AVAILABLE) + alloc_intr_gate(HYPERV_STIMER0_VECTOR, + hv_stimer0_callback_vector); #endif } diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 45fb4d2565f8..328d027d829d 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -150,6 +150,13 @@ int arch_show_interrupts(struct seq_file *p, int prec) irq_stats(j)->irq_hv_reenlightenment_count); seq_puts(p, " Hyper-V reenlightenment interrupts\n"); } + if (test_bit(HYPERV_STIMER0_VECTOR, system_vectors)) { + seq_printf(p, "%*s: ", prec, "HVS"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", + irq_stats(j)->hyperv_stimer0_count); + seq_puts(p, " Hyper-V stimer0 interrupts\n"); + } #endif seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count)); #if defined(CONFIG_X86_IO_APIC) diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index fae86e36e399..7867417cfaff 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -454,6 +454,13 @@ static void __init sev_map_percpu_data(void) } #ifdef CONFIG_SMP +static void __init kvm_smp_prepare_cpus(unsigned int max_cpus) +{ + native_smp_prepare_cpus(max_cpus); + if (kvm_para_has_hint(KVM_HINTS_DEDICATED)) + static_branch_disable(&virt_spin_lock_key); +} + static void __init kvm_smp_prepare_boot_cpu(void) { /* @@ -546,6 +553,7 @@ static void __init kvm_guest_init(void) } if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) && + !kvm_para_has_hint(KVM_HINTS_DEDICATED) && kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) pv_mmu_ops.flush_tlb_others = kvm_flush_tlb_others; @@ -556,6 +564,7 @@ static void __init kvm_guest_init(void) kvm_setup_vsyscall_timeinfo(); #ifdef CONFIG_SMP + smp_ops.smp_prepare_cpus = kvm_smp_prepare_cpus; smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; if (cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/kvm:online", kvm_cpu_online, kvm_cpu_down_prepare) < 0) @@ -605,6 +614,11 @@ unsigned int kvm_arch_para_features(void) return cpuid_eax(kvm_cpuid_base() | KVM_CPUID_FEATURES); } +unsigned int kvm_arch_para_hints(void) +{ + return cpuid_edx(kvm_cpuid_base() | KVM_CPUID_FEATURES); +} + static uint32_t __init kvm_detect(void) { return kvm_cpuid_base(); @@ -635,6 +649,7 @@ static __init int kvm_setup_pv_tlb_flush(void) int cpu; if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) && + !kvm_para_has_hint(KVM_HINTS_DEDICATED) && kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) { for_each_possible_cpu(cpu) { zalloc_cpumask_var_node(per_cpu_ptr(&__pv_tlb_mask, cpu), @@ -730,6 +745,9 @@ void __init kvm_spinlock_init(void) if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT)) return; + if (kvm_para_has_hint(KVM_HINTS_DEDICATED)) + return; + __pv_init_lock_hash(); pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath; pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock); diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 14437116ffea..77625b60a510 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -6,7 +6,6 @@ #include <linux/bootmem.h> #include <linux/gfp.h> #include <linux/pci.h> -#include <linux/kmemleak.h> #include <asm/proto.h> #include <asm/dma.h> diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 9eb448c7859d..4b100fe0f508 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -205,6 +205,20 @@ static __always_inline void save_fsgs(struct task_struct *task) save_base_legacy(task, task->thread.gsindex, GS); } +#if IS_ENABLED(CONFIG_KVM) +/* + * While a process is running,current->thread.fsbase and current->thread.gsbase + * may not match the corresponding CPU registers (see save_base_legacy()). KVM + * wants an efficient way to save and restore FSBASE and GSBASE. + * When FSGSBASE extensions are enabled, this will have to use RD{FS,GS}BASE. + */ +void save_fsgs_for_kvm(void) +{ + save_fsgs(current); +} +EXPORT_SYMBOL_GPL(save_fsgs_for_kvm); +#endif + static __always_inline void loadseg(enum which_selector which, unsigned short sel) { diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c index 838a4dc90a6d..df92605d8724 100644 --- a/arch/x86/kernel/signal_compat.c +++ b/arch/x86/kernel/signal_compat.c @@ -26,7 +26,7 @@ static inline void signal_compat_build_tests(void) * new fields are handled in copy_siginfo_to_user32()! */ BUILD_BUG_ON(NSIGILL != 11); - BUILD_BUG_ON(NSIGFPE != 13); + BUILD_BUG_ON(NSIGFPE != 14); BUILD_BUG_ON(NSIGSEGV != 7); BUILD_BUG_ON(NSIGBUS != 5); BUILD_BUG_ON(NSIGTRAP != 4); diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index ebda84a91510..3ab867603e81 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -92,6 +92,7 @@ struct x86_init_ops x86_init __initdata = { .guest_late_init = x86_init_noop, .x2apic_available = bool_x86_init_noop, .init_mem_mapping = x86_init_noop, + .init_after_bootmem = x86_init_noop, }, .acpi = { diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index b671fc2d0422..82055b90a8b3 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -135,6 +135,11 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu) return -EINVAL; } + best = kvm_find_cpuid_entry(vcpu, KVM_CPUID_FEATURES, 0); + if (kvm_hlt_in_guest(vcpu->kvm) && best && + (best->eax & (1 << KVM_FEATURE_PV_UNHALT))) + best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT); + /* Update physical-address width */ vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); kvm_mmu_reset_context(vcpu); @@ -370,7 +375,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) | F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) | 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM) | - F(TOPOEXT); + F(TOPOEXT) | F(PERFCTR_CORE); /* cpuid 0x80000008.ebx */ const u32 kvm_cpuid_8000_0008_ebx_x86_features = diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index d91eaeb01034..b3705ae52824 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -30,6 +30,7 @@ #include "x86.h" #include "tss.h" #include "mmu.h" +#include "pmu.h" /* * Operand types @@ -2887,6 +2888,9 @@ static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt) return ctxt->ops->cpl(ctxt) > iopl; } +#define VMWARE_PORT_VMPORT (0x5658) +#define VMWARE_PORT_VMRPC (0x5659) + static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, u16 port, u16 len) { @@ -2898,6 +2902,14 @@ static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, unsigned mask = (1 << len) - 1; unsigned long base; + /* + * VMware allows access to these ports even if denied + * by TSS I/O permission bitmap. Mimic behavior. + */ + if (enable_vmware_backdoor && + ((port == VMWARE_PORT_VMPORT) || (port == VMWARE_PORT_VMRPC))) + return true; + ops->get_segment(ctxt, &tr, &tr_seg, &base3, VCPU_SREG_TR); if (!tr_seg.p) return false; @@ -4282,6 +4294,13 @@ static int check_rdpmc(struct x86_emulate_ctxt *ctxt) u64 cr4 = ctxt->ops->get_cr(ctxt, 4); u64 rcx = reg_read(ctxt, VCPU_REGS_RCX); + /* + * VMware allows access to these Pseduo-PMCs even when read via RDPMC + * in Ring3 when CR4.PCE=0. + */ + if (enable_vmware_backdoor && is_vmware_backdoor_pmc(rcx)) + return X86EMUL_CONTINUE; + if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) || ctxt->ops->check_pmc(ctxt, rcx)) return emulate_gp(ctxt, 0); @@ -4498,6 +4517,10 @@ static const struct gprefix pfx_0f_2b = { ID(0, &instr_dual_0f_2b), ID(0, &instr_dual_0f_2b), N, N, }; +static const struct gprefix pfx_0f_10_0f_11 = { + I(Unaligned, em_mov), I(Unaligned, em_mov), N, N, +}; + static const struct gprefix pfx_0f_28_0f_29 = { I(Aligned, em_mov), I(Aligned, em_mov), N, N, }; @@ -4709,7 +4732,9 @@ static const struct opcode twobyte_table[256] = { DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N, N, D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N, /* 0x10 - 0x1F */ - N, N, N, N, N, N, N, N, + GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_10_0f_11), + GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_10_0f_11), + N, N, N, N, N, N, D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N, N, N, N, N, D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 0x20 - 0x2F */ diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index dc97f2544b6f..98618e397342 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -29,6 +29,7 @@ #include <linux/kvm_host.h> #include <linux/highmem.h> #include <linux/sched/cputime.h> +#include <linux/eventfd.h> #include <asm/apicdef.h> #include <trace/events/kvm.h> @@ -74,13 +75,38 @@ static bool synic_has_vector_auto_eoi(struct kvm_vcpu_hv_synic *synic, return false; } +static void synic_update_vector(struct kvm_vcpu_hv_synic *synic, + int vector) +{ + if (vector < HV_SYNIC_FIRST_VALID_VECTOR) + return; + + if (synic_has_vector_connected(synic, vector)) + __set_bit(vector, synic->vec_bitmap); + else + __clear_bit(vector, synic->vec_bitmap); + + if (synic_has_vector_auto_eoi(synic, vector)) + __set_bit(vector, synic->auto_eoi_bitmap); + else + __clear_bit(vector, synic->auto_eoi_bitmap); +} + static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint, u64 data, bool host) { - int vector; + int vector, old_vector; + bool masked; vector = data & HV_SYNIC_SINT_VECTOR_MASK; - if (vector < 16 && !host) + masked = data & HV_SYNIC_SINT_MASKED; + + /* + * Valid vectors are 16-255, however, nested Hyper-V attempts to write + * default '0x10000' value on boot and this should not #GP. We need to + * allow zero-initing the register from host as well. + */ + if (vector < HV_SYNIC_FIRST_VALID_VECTOR && !host && !masked) return 1; /* * Guest may configure multiple SINTs to use the same vector, so @@ -88,18 +114,13 @@ static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint, * bitmap of vectors with auto-eoi behavior. The bitmaps are * updated here, and atomically queried on fast paths. */ + old_vector = synic_read_sint(synic, sint) & HV_SYNIC_SINT_VECTOR_MASK; atomic64_set(&synic->sint[sint], data); - if (synic_has_vector_connected(synic, vector)) - __set_bit(vector, synic->vec_bitmap); - else - __clear_bit(vector, synic->vec_bitmap); + synic_update_vector(synic, old_vector); - if (synic_has_vector_auto_eoi(synic, vector)) - __set_bit(vector, synic->auto_eoi_bitmap); - else - __clear_bit(vector, synic->auto_eoi_bitmap); + synic_update_vector(synic, vector); /* Load SynIC vectors into EOI exit bitmap */ kvm_make_request(KVM_REQ_SCAN_IOAPIC, synic_to_vcpu(synic)); @@ -736,6 +757,9 @@ static bool kvm_hv_msr_partition_wide(u32 msr) case HV_X64_MSR_CRASH_CTL: case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: case HV_X64_MSR_RESET: + case HV_X64_MSR_REENLIGHTENMENT_CONTROL: + case HV_X64_MSR_TSC_EMULATION_CONTROL: + case HV_X64_MSR_TSC_EMULATION_STATUS: r = true; break; } @@ -981,6 +1005,15 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data, kvm_make_request(KVM_REQ_HV_RESET, vcpu); } break; + case HV_X64_MSR_REENLIGHTENMENT_CONTROL: + hv->hv_reenlightenment_control = data; + break; + case HV_X64_MSR_TSC_EMULATION_CONTROL: + hv->hv_tsc_emulation_control = data; + break; + case HV_X64_MSR_TSC_EMULATION_STATUS: + hv->hv_tsc_emulation_status = data; + break; default: vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n", msr, data); @@ -1009,17 +1042,17 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) return 1; hv->vp_index = (u32)data; break; - case HV_X64_MSR_APIC_ASSIST_PAGE: { + case HV_X64_MSR_VP_ASSIST_PAGE: { u64 gfn; unsigned long addr; - if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) { + if (!(data & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE)) { hv->hv_vapic = data; if (kvm_lapic_enable_pv_eoi(vcpu, 0)) return 1; break; } - gfn = data >> HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT; + gfn = data >> HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT; addr = kvm_vcpu_gfn_to_hva(vcpu, gfn); if (kvm_is_error_hva(addr)) return 1; @@ -1105,6 +1138,15 @@ static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) case HV_X64_MSR_RESET: data = 0; break; + case HV_X64_MSR_REENLIGHTENMENT_CONTROL: + data = hv->hv_reenlightenment_control; + break; + case HV_X64_MSR_TSC_EMULATION_CONTROL: + data = hv->hv_tsc_emulation_control; + break; + case HV_X64_MSR_TSC_EMULATION_STATUS: + data = hv->hv_tsc_emulation_status; + break; default: vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); return 1; @@ -1129,7 +1171,7 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata); case HV_X64_MSR_TPR: return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata); - case HV_X64_MSR_APIC_ASSIST_PAGE: + case HV_X64_MSR_VP_ASSIST_PAGE: data = hv->hv_vapic; break; case HV_X64_MSR_VP_RUNTIME: @@ -1226,10 +1268,47 @@ static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu) return 1; } +static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, bool fast, u64 param) +{ + struct eventfd_ctx *eventfd; + + if (unlikely(!fast)) { + int ret; + gpa_t gpa = param; + + if ((gpa & (__alignof__(param) - 1)) || + offset_in_page(gpa) + sizeof(param) > PAGE_SIZE) + return HV_STATUS_INVALID_ALIGNMENT; + + ret = kvm_vcpu_read_guest(vcpu, gpa, ¶m, sizeof(param)); + if (ret < 0) + return HV_STATUS_INVALID_ALIGNMENT; + } + + /* + * Per spec, bits 32-47 contain the extra "flag number". However, we + * have no use for it, and in all known usecases it is zero, so just + * report lookup failure if it isn't. + */ + if (param & 0xffff00000000ULL) + return HV_STATUS_INVALID_PORT_ID; + /* remaining bits are reserved-zero */ + if (param & ~KVM_HYPERV_CONN_ID_MASK) + return HV_STATUS_INVALID_HYPERCALL_INPUT; + + /* conn_to_evt is protected by vcpu->kvm->srcu */ + eventfd = idr_find(&vcpu->kvm->arch.hyperv.conn_to_evt, param); + if (!eventfd) + return HV_STATUS_INVALID_PORT_ID; + + eventfd_signal(eventfd, 1); + return HV_STATUS_SUCCESS; +} + int kvm_hv_hypercall(struct kvm_vcpu *vcpu) { - u64 param, ingpa, outgpa, ret; - uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0; + u64 param, ingpa, outgpa, ret = HV_STATUS_SUCCESS; + uint16_t code, rep_idx, rep_cnt; bool fast, longmode; /* @@ -1268,7 +1347,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) /* Hypercall continuation is not supported yet */ if (rep_cnt || rep_idx) { - res = HV_STATUS_INVALID_HYPERCALL_CODE; + ret = HV_STATUS_INVALID_HYPERCALL_CODE; goto set_result; } @@ -1276,11 +1355,15 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) case HVCALL_NOTIFY_LONG_SPIN_WAIT: kvm_vcpu_on_spin(vcpu, true); break; - case HVCALL_POST_MESSAGE: case HVCALL_SIGNAL_EVENT: + ret = kvm_hvcall_signal_event(vcpu, fast, ingpa); + if (ret != HV_STATUS_INVALID_PORT_ID) + break; + /* maybe userspace knows this conn_id: fall through */ + case HVCALL_POST_MESSAGE: /* don't bother userspace if it has no way to handle it */ if (!vcpu_to_synic(vcpu)->active) { - res = HV_STATUS_INVALID_HYPERCALL_CODE; + ret = HV_STATUS_INVALID_HYPERCALL_CODE; break; } vcpu->run->exit_reason = KVM_EXIT_HYPERV; @@ -1292,12 +1375,79 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) kvm_hv_hypercall_complete_userspace; return 0; default: - res = HV_STATUS_INVALID_HYPERCALL_CODE; + ret = HV_STATUS_INVALID_HYPERCALL_CODE; break; } set_result: - ret = res | (((u64)rep_done & 0xfff) << 32); kvm_hv_hypercall_set_result(vcpu, ret); return 1; } + +void kvm_hv_init_vm(struct kvm *kvm) +{ + mutex_init(&kvm->arch.hyperv.hv_lock); + idr_init(&kvm->arch.hyperv.conn_to_evt); +} + +void kvm_hv_destroy_vm(struct kvm *kvm) +{ + struct eventfd_ctx *eventfd; + int i; + + idr_for_each_entry(&kvm->arch.hyperv.conn_to_evt, eventfd, i) + eventfd_ctx_put(eventfd); + idr_destroy(&kvm->arch.hyperv.conn_to_evt); +} + +static int kvm_hv_eventfd_assign(struct kvm *kvm, u32 conn_id, int fd) +{ + struct kvm_hv *hv = &kvm->arch.hyperv; + struct eventfd_ctx *eventfd; + int ret; + + eventfd = eventfd_ctx_fdget(fd); + if (IS_ERR(eventfd)) + return PTR_ERR(eventfd); + + mutex_lock(&hv->hv_lock); + ret = idr_alloc(&hv->conn_to_evt, eventfd, conn_id, conn_id + 1, + GFP_KERNEL); + mutex_unlock(&hv->hv_lock); + + if (ret >= 0) + return 0; + + if (ret == -ENOSPC) + ret = -EEXIST; + eventfd_ctx_put(eventfd); + return ret; +} + +static int kvm_hv_eventfd_deassign(struct kvm *kvm, u32 conn_id) +{ + struct kvm_hv *hv = &kvm->arch.hyperv; + struct eventfd_ctx *eventfd; + + mutex_lock(&hv->hv_lock); + eventfd = idr_remove(&hv->conn_to_evt, conn_id); + mutex_unlock(&hv->hv_lock); + + if (!eventfd) + return -ENOENT; + + synchronize_srcu(&kvm->srcu); + eventfd_ctx_put(eventfd); + return 0; +} + +int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args) +{ + if ((args->flags & ~KVM_HYPERV_EVENTFD_DEASSIGN) || + (args->conn_id & ~KVM_HYPERV_CONN_ID_MASK)) + return -EINVAL; + + if (args->flags == KVM_HYPERV_EVENTFD_DEASSIGN) + return kvm_hv_eventfd_deassign(kvm, args->conn_id); + return kvm_hv_eventfd_assign(kvm, args->conn_id, args->fd); +} diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h index e637631a9574..837465d69c6d 100644 --- a/arch/x86/kvm/hyperv.h +++ b/arch/x86/kvm/hyperv.h @@ -88,4 +88,8 @@ void kvm_hv_process_stimers(struct kvm_vcpu *vcpu); void kvm_hv_setup_tsc_page(struct kvm *kvm, struct pvclock_vcpu_time_info *hv_clock); +void kvm_hv_init_vm(struct kvm *kvm); +void kvm_hv_destroy_vm(struct kvm *kvm); +int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args); + #endif diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index f171051eecf3..faa264822cee 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -73,8 +73,19 @@ static int kvm_cpu_has_extint(struct kvm_vcpu *v) */ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v) { + /* + * FIXME: interrupt.injected represents an interrupt that it's + * side-effects have already been applied (e.g. bit from IRR + * already moved to ISR). Therefore, it is incorrect to rely + * on interrupt.injected to know if there is a pending + * interrupt in the user-mode LAPIC. + * This leads to nVMX/nSVM not be able to distinguish + * if it should exit from L2 to L1 on EXTERNAL_INTERRUPT on + * pending interrupt or should re-inject an injected + * interrupt. + */ if (!lapic_in_kernel(v)) - return v->arch.interrupt.pending; + return v->arch.interrupt.injected; if (kvm_cpu_has_extint(v)) return 1; @@ -91,8 +102,19 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v) */ int kvm_cpu_has_interrupt(struct kvm_vcpu *v) { + /* + * FIXME: interrupt.injected represents an interrupt that it's + * side-effects have already been applied (e.g. bit from IRR + * already moved to ISR). Therefore, it is incorrect to rely + * on interrupt.injected to know if there is a pending + * interrupt in the user-mode LAPIC. + * This leads to nVMX/nSVM not be able to distinguish + * if it should exit from L2 to L1 on EXTERNAL_INTERRUPT on + * pending interrupt or should re-inject an injected + * interrupt. + */ if (!lapic_in_kernel(v)) - return v->arch.interrupt.pending; + return v->arch.interrupt.injected; if (kvm_cpu_has_extint(v)) return 1; diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h index f500293dad8d..9619dcc2b325 100644 --- a/arch/x86/kvm/kvm_cache_regs.h +++ b/arch/x86/kvm/kvm_cache_regs.h @@ -41,7 +41,7 @@ static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index) if (!test_bit(VCPU_EXREG_PDPTR, (unsigned long *)&vcpu->arch.regs_avail)) - kvm_x86_ops->cache_reg(vcpu, VCPU_EXREG_PDPTR); + kvm_x86_ops->cache_reg(vcpu, (enum kvm_reg)VCPU_EXREG_PDPTR); return vcpu->arch.walk_mmu->pdptrs[index]; } @@ -93,6 +93,11 @@ static inline void enter_guest_mode(struct kvm_vcpu *vcpu) static inline void leave_guest_mode(struct kvm_vcpu *vcpu) { vcpu->arch.hflags &= ~HF_GUEST_MASK; + + if (vcpu->arch.load_eoi_exitmap_pending) { + vcpu->arch.load_eoi_exitmap_pending = false; + kvm_make_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu); + } } static inline bool is_guest_mode(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 391dda8d43b7..70dcb5548022 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -321,8 +321,16 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu) if (!lapic_in_kernel(vcpu)) return; + /* + * KVM emulates 82093AA datasheet (with in-kernel IOAPIC implementation) + * which doesn't have EOI register; Some buggy OSes (e.g. Windows with + * Hyper-V role) disable EOI broadcast in lapic not checking for IOAPIC + * version first and level-triggered interrupts never get EOIed in + * IOAPIC. + */ feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0); - if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31)))) + if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31))) && + !ioapic_in_kernel(vcpu->kvm)) v |= APIC_LVR_DIRECTED_EOI; kvm_lapic_set_reg(apic, APIC_LVR, v); } diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 56c36014f7b7..edce055e9fd7 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -109,7 +109,7 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data); static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu) { - return vcpu->arch.hyperv.hv_vapic & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE; + return vcpu->arch.hyperv.hv_vapic & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE; } int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 763bb3bade63..8494dbae41b9 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3031,7 +3031,7 @@ static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn) return RET_PF_RETRY; } - return RET_PF_EMULATE; + return -EFAULT; } static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu, diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 5abae72266b7..6288e9d7068e 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -452,14 +452,21 @@ error: * done by is_rsvd_bits_set() above. * * We set up the value of exit_qualification to inject: - * [2:0] - Derive from [2:0] of real exit_qualification at EPT violation + * [2:0] - Derive from the access bits. The exit_qualification might be + * out of date if it is serving an EPT misconfiguration. * [5:3] - Calculated by the page walk of the guest EPT page tables * [7:8] - Derived from [7:8] of real exit_qualification * * The other bits are set to 0. */ if (!(errcode & PFERR_RSVD_MASK)) { - vcpu->arch.exit_qualification &= 0x187; + vcpu->arch.exit_qualification &= 0x180; + if (write_fault) + vcpu->arch.exit_qualification |= EPT_VIOLATION_ACC_WRITE; + if (user_fault) + vcpu->arch.exit_qualification |= EPT_VIOLATION_ACC_READ; + if (fetch_fault) + vcpu->arch.exit_qualification |= EPT_VIOLATION_ACC_INSTR; vcpu->arch.exit_qualification |= (pte_access & 0x7) << 3; } #endif diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 026db42a86c3..58ead7db71a3 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -244,12 +244,49 @@ int kvm_pmu_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx) return kvm_x86_ops->pmu_ops->is_valid_msr_idx(vcpu, idx); } +bool is_vmware_backdoor_pmc(u32 pmc_idx) +{ + switch (pmc_idx) { + case VMWARE_BACKDOOR_PMC_HOST_TSC: + case VMWARE_BACKDOOR_PMC_REAL_TIME: + case VMWARE_BACKDOOR_PMC_APPARENT_TIME: + return true; + } + return false; +} + +static int kvm_pmu_rdpmc_vmware(struct kvm_vcpu *vcpu, unsigned idx, u64 *data) +{ + u64 ctr_val; + + switch (idx) { + case VMWARE_BACKDOOR_PMC_HOST_TSC: + ctr_val = rdtsc(); + break; + case VMWARE_BACKDOOR_PMC_REAL_TIME: + ctr_val = ktime_get_boot_ns(); + break; + case VMWARE_BACKDOOR_PMC_APPARENT_TIME: + ctr_val = ktime_get_boot_ns() + + vcpu->kvm->arch.kvmclock_offset; + break; + default: + return 1; + } + + *data = ctr_val; + return 0; +} + int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data) { bool fast_mode = idx & (1u << 31); struct kvm_pmc *pmc; u64 ctr_val; + if (is_vmware_backdoor_pmc(idx)) + return kvm_pmu_rdpmc_vmware(vcpu, idx, data); + pmc = kvm_x86_ops->pmu_ops->msr_idx_to_pmc(vcpu, idx); if (!pmc) return 1; diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index a9a62b9a73e2..ba8898e1a854 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h @@ -9,6 +9,10 @@ /* retrieve the 4 bits for EN and PMI out of IA32_FIXED_CTR_CTRL */ #define fixed_ctrl_field(ctrl_reg, idx) (((ctrl_reg) >> ((idx)*4)) & 0xf) +#define VMWARE_BACKDOOR_PMC_HOST_TSC 0x10000 +#define VMWARE_BACKDOOR_PMC_REAL_TIME 0x10001 +#define VMWARE_BACKDOOR_PMC_APPARENT_TIME 0x10002 + struct kvm_event_hw_type_mapping { u8 eventsel; u8 unit_mask; @@ -114,6 +118,8 @@ void kvm_pmu_reset(struct kvm_vcpu *vcpu); void kvm_pmu_init(struct kvm_vcpu *vcpu); void kvm_pmu_destroy(struct kvm_vcpu *vcpu); +bool is_vmware_backdoor_pmc(u32 pmc_idx); + extern struct kvm_pmu_ops intel_pmu_ops; extern struct kvm_pmu_ops amd_pmu_ops; #endif /* __KVM_X86_PMU_H */ diff --git a/arch/x86/kvm/pmu_amd.c b/arch/x86/kvm/pmu_amd.c index cd944435dfbd..1495a735b38e 100644 --- a/arch/x86/kvm/pmu_amd.c +++ b/arch/x86/kvm/pmu_amd.c @@ -19,6 +19,21 @@ #include "lapic.h" #include "pmu.h" +enum pmu_type { + PMU_TYPE_COUNTER = 0, + PMU_TYPE_EVNTSEL, +}; + +enum index { + INDEX_ZERO = 0, + INDEX_ONE, + INDEX_TWO, + INDEX_THREE, + INDEX_FOUR, + INDEX_FIVE, + INDEX_ERROR, +}; + /* duplicated from amd_perfmon_event_map, K7 and above should work. */ static struct kvm_event_hw_type_mapping amd_event_mapping[] = { [0] = { 0x76, 0x00, PERF_COUNT_HW_CPU_CYCLES }, @@ -31,6 +46,88 @@ static struct kvm_event_hw_type_mapping amd_event_mapping[] = { [7] = { 0xd1, 0x00, PERF_COUNT_HW_STALLED_CYCLES_BACKEND }, }; +static unsigned int get_msr_base(struct kvm_pmu *pmu, enum pmu_type type) +{ + struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu); + + if (guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE)) { + if (type == PMU_TYPE_COUNTER) + return MSR_F15H_PERF_CTR; + else + return MSR_F15H_PERF_CTL; + } else { + if (type == PMU_TYPE_COUNTER) + return MSR_K7_PERFCTR0; + else + return MSR_K7_EVNTSEL0; + } +} + +static enum index msr_to_index(u32 msr) +{ + switch (msr) { + case MSR_F15H_PERF_CTL0: + case MSR_F15H_PERF_CTR0: + case MSR_K7_EVNTSEL0: + case MSR_K7_PERFCTR0: + return INDEX_ZERO; + case MSR_F15H_PERF_CTL1: + case MSR_F15H_PERF_CTR1: + case MSR_K7_EVNTSEL1: + case MSR_K7_PERFCTR1: + return INDEX_ONE; + case MSR_F15H_PERF_CTL2: + case MSR_F15H_PERF_CTR2: + case MSR_K7_EVNTSEL2: + case MSR_K7_PERFCTR2: + return INDEX_TWO; + case MSR_F15H_PERF_CTL3: + case MSR_F15H_PERF_CTR3: + case MSR_K7_EVNTSEL3: + case MSR_K7_PERFCTR3: + return INDEX_THREE; + case MSR_F15H_PERF_CTL4: + case MSR_F15H_PERF_CTR4: + return INDEX_FOUR; + case MSR_F15H_PERF_CTL5: + case MSR_F15H_PERF_CTR5: + return INDEX_FIVE; + default: + return INDEX_ERROR; + } +} + +static inline struct kvm_pmc *get_gp_pmc_amd(struct kvm_pmu *pmu, u32 msr, + enum pmu_type type) +{ + switch (msr) { + case MSR_F15H_PERF_CTL0: + case MSR_F15H_PERF_CTL1: + case MSR_F15H_PERF_CTL2: + case MSR_F15H_PERF_CTL3: + case MSR_F15H_PERF_CTL4: + case MSR_F15H_PERF_CTL5: + case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3: + if (type != PMU_TYPE_EVNTSEL) + return NULL; + break; + case MSR_F15H_PERF_CTR0: + case MSR_F15H_PERF_CTR1: + case MSR_F15H_PERF_CTR2: + case MSR_F15H_PERF_CTR3: + case MSR_F15H_PERF_CTR4: + case MSR_F15H_PERF_CTR5: + case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3: + if (type != PMU_TYPE_COUNTER) + return NULL; + break; + default: + return NULL; + } + + return &pmu->gp_counters[msr_to_index(msr)]; +} + static unsigned amd_find_arch_event(struct kvm_pmu *pmu, u8 event_select, u8 unit_mask) @@ -64,7 +161,18 @@ static bool amd_pmc_is_enabled(struct kvm_pmc *pmc) static struct kvm_pmc *amd_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx) { - return get_gp_pmc(pmu, MSR_K7_EVNTSEL0 + pmc_idx, MSR_K7_EVNTSEL0); + unsigned int base = get_msr_base(pmu, PMU_TYPE_COUNTER); + struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu); + + if (guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE)) { + /* + * The idx is contiguous. The MSRs are not. The counter MSRs + * are interleaved with the event select MSRs. + */ + pmc_idx *= 2; + } + + return get_gp_pmc_amd(pmu, base + pmc_idx, PMU_TYPE_COUNTER); } /* returns 0 if idx's corresponding MSR exists; otherwise returns 1. */ @@ -96,8 +204,8 @@ static bool amd_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr) struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); int ret = false; - ret = get_gp_pmc(pmu, msr, MSR_K7_PERFCTR0) || - get_gp_pmc(pmu, msr, MSR_K7_EVNTSEL0); + ret = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER) || + get_gp_pmc_amd(pmu, msr, PMU_TYPE_EVNTSEL); return ret; } @@ -107,14 +215,14 @@ static int amd_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data) struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); struct kvm_pmc *pmc; - /* MSR_K7_PERFCTRn */ - pmc = get_gp_pmc(pmu, msr, MSR_K7_PERFCTR0); + /* MSR_PERFCTRn */ + pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER); if (pmc) { *data = pmc_read_counter(pmc); return 0; } - /* MSR_K7_EVNTSELn */ - pmc = get_gp_pmc(pmu, msr, MSR_K7_EVNTSEL0); + /* MSR_EVNTSELn */ + pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_EVNTSEL); if (pmc) { *data = pmc->eventsel; return 0; @@ -130,14 +238,14 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) u32 msr = msr_info->index; u64 data = msr_info->data; - /* MSR_K7_PERFCTRn */ - pmc = get_gp_pmc(pmu, msr, MSR_K7_PERFCTR0); + /* MSR_PERFCTRn */ + pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER); if (pmc) { pmc->counter += data - pmc_read_counter(pmc); return 0; } - /* MSR_K7_EVNTSELn */ - pmc = get_gp_pmc(pmu, msr, MSR_K7_EVNTSEL0); + /* MSR_EVNTSELn */ + pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_EVNTSEL); if (pmc) { if (data == pmc->eventsel) return 0; @@ -154,7 +262,11 @@ static void amd_pmu_refresh(struct kvm_vcpu *vcpu) { struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); - pmu->nr_arch_gp_counters = AMD64_NUM_COUNTERS; + if (guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE)) + pmu->nr_arch_gp_counters = AMD64_NUM_COUNTERS_CORE; + else + pmu->nr_arch_gp_counters = AMD64_NUM_COUNTERS; + pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << 48) - 1; pmu->reserved_bits = 0xffffffff00200000ull; /* not applicable to AMD; but clean them to prevent any fall out */ @@ -169,7 +281,9 @@ static void amd_pmu_init(struct kvm_vcpu *vcpu) struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); int i; - for (i = 0; i < AMD64_NUM_COUNTERS ; i++) { + BUILD_BUG_ON(AMD64_NUM_COUNTERS_CORE > INTEL_PMC_MAX_GENERIC); + + for (i = 0; i < AMD64_NUM_COUNTERS_CORE ; i++) { pmu->gp_counters[i].type = KVM_PMC_GP; pmu->gp_counters[i].vcpu = vcpu; pmu->gp_counters[i].idx = i; @@ -181,7 +295,7 @@ static void amd_pmu_reset(struct kvm_vcpu *vcpu) struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); int i; - for (i = 0; i < AMD64_NUM_COUNTERS; i++) { + for (i = 0; i < AMD64_NUM_COUNTERS_CORE; i++) { struct kvm_pmc *pmc = &pmu->gp_counters[i]; pmc_stop_counter(pmc); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 9d2043f94e29..b58787daf9f8 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -131,6 +131,28 @@ static const u32 host_save_user_msrs[] = { #define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs) +struct kvm_sev_info { + bool active; /* SEV enabled guest */ + unsigned int asid; /* ASID used for this guest */ + unsigned int handle; /* SEV firmware handle */ + int fd; /* SEV device fd */ + unsigned long pages_locked; /* Number of pages locked */ + struct list_head regions_list; /* List of registered regions */ +}; + +struct kvm_svm { + struct kvm kvm; + + /* Struct members for AVIC */ + u32 avic_vm_id; + u32 ldr_mode; + struct page *avic_logical_id_table_page; + struct page *avic_physical_id_table_page; + struct hlist_node hnode; + + struct kvm_sev_info sev_info; +}; + struct kvm_vcpu; struct nested_state { @@ -276,6 +298,54 @@ static bool npt_enabled = true; static bool npt_enabled; #endif +/* + * These 2 parameters are used to config the controls for Pause-Loop Exiting: + * pause_filter_count: On processors that support Pause filtering(indicated + * by CPUID Fn8000_000A_EDX), the VMCB provides a 16 bit pause filter + * count value. On VMRUN this value is loaded into an internal counter. + * Each time a pause instruction is executed, this counter is decremented + * until it reaches zero at which time a #VMEXIT is generated if pause + * intercept is enabled. Refer to AMD APM Vol 2 Section 15.14.4 Pause + * Intercept Filtering for more details. + * This also indicate if ple logic enabled. + * + * pause_filter_thresh: In addition, some processor families support advanced + * pause filtering (indicated by CPUID Fn8000_000A_EDX) upper bound on + * the amount of time a guest is allowed to execute in a pause loop. + * In this mode, a 16-bit pause filter threshold field is added in the + * VMCB. The threshold value is a cycle count that is used to reset the + * pause counter. As with simple pause filtering, VMRUN loads the pause + * count value from VMCB into an internal counter. Then, on each pause + * instruction the hardware checks the elapsed number of cycles since + * the most recent pause instruction against the pause filter threshold. + * If the elapsed cycle count is greater than the pause filter threshold, + * then the internal pause count is reloaded from the VMCB and execution + * continues. If the elapsed cycle count is less than the pause filter + * threshold, then the internal pause count is decremented. If the count + * value is less than zero and PAUSE intercept is enabled, a #VMEXIT is + * triggered. If advanced pause filtering is supported and pause filter + * threshold field is set to zero, the filter will operate in the simpler, + * count only mode. + */ + +static unsigned short pause_filter_thresh = KVM_DEFAULT_PLE_GAP; +module_param(pause_filter_thresh, ushort, 0444); + +static unsigned short pause_filter_count = KVM_SVM_DEFAULT_PLE_WINDOW; +module_param(pause_filter_count, ushort, 0444); + +/* Default doubles per-vcpu window every exit. */ +static unsigned short pause_filter_count_grow = KVM_DEFAULT_PLE_WINDOW_GROW; +module_param(pause_filter_count_grow, ushort, 0444); + +/* Default resets per-vcpu window every exit to pause_filter_count. */ +static unsigned short pause_filter_count_shrink = KVM_DEFAULT_PLE_WINDOW_SHRINK; +module_param(pause_filter_count_shrink, ushort, 0444); + +/* Default is to compute the maximum so we can never overflow. */ +static unsigned short pause_filter_count_max = KVM_SVM_DEFAULT_PLE_WINDOW_MAX; +module_param(pause_filter_count_max, ushort, 0444); + /* allow nested paging (virtualized MMU) for all guests */ static int npt = true; module_param(npt, int, S_IRUGO); @@ -352,6 +422,12 @@ struct enc_region { unsigned long size; }; + +static inline struct kvm_svm *to_kvm_svm(struct kvm *kvm) +{ + return container_of(kvm, struct kvm_svm, kvm); +} + static inline bool svm_sev_enabled(void) { return max_sev_asid; @@ -359,14 +435,14 @@ static inline bool svm_sev_enabled(void) static inline bool sev_guest(struct kvm *kvm) { - struct kvm_sev_info *sev = &kvm->arch.sev_info; + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; return sev->active; } static inline int sev_get_asid(struct kvm *kvm) { - struct kvm_sev_info *sev = &kvm->arch.sev_info; + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; return sev->asid; } @@ -1083,7 +1159,7 @@ static void disable_nmi_singlestep(struct vcpu_svm *svm) } /* Note: - * This hash table is used to map VM_ID to a struct kvm_arch, + * This hash table is used to map VM_ID to a struct kvm_svm, * when handling AMD IOMMU GALOG notification to schedule in * a particular vCPU. */ @@ -1100,7 +1176,7 @@ static DEFINE_SPINLOCK(svm_vm_data_hash_lock); static int avic_ga_log_notifier(u32 ga_tag) { unsigned long flags; - struct kvm_arch *ka = NULL; + struct kvm_svm *kvm_svm; struct kvm_vcpu *vcpu = NULL; u32 vm_id = AVIC_GATAG_TO_VMID(ga_tag); u32 vcpu_id = AVIC_GATAG_TO_VCPUID(ga_tag); @@ -1108,13 +1184,10 @@ static int avic_ga_log_notifier(u32 ga_tag) pr_debug("SVM: %s: vm_id=%#x, vcpu_id=%#x\n", __func__, vm_id, vcpu_id); spin_lock_irqsave(&svm_vm_data_hash_lock, flags); - hash_for_each_possible(svm_vm_data_hash, ka, hnode, vm_id) { - struct kvm *kvm = container_of(ka, struct kvm, arch); - struct kvm_arch *vm_data = &kvm->arch; - - if (vm_data->avic_vm_id != vm_id) + hash_for_each_possible(svm_vm_data_hash, kvm_svm, hnode, vm_id) { + if (kvm_svm->avic_vm_id != vm_id) continue; - vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id); + vcpu = kvm_get_vcpu_by_id(&kvm_svm->kvm, vcpu_id); break; } spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags); @@ -1172,6 +1245,42 @@ err: return rc; } +static void grow_ple_window(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + struct vmcb_control_area *control = &svm->vmcb->control; + int old = control->pause_filter_count; + + control->pause_filter_count = __grow_ple_window(old, + pause_filter_count, + pause_filter_count_grow, + pause_filter_count_max); + + if (control->pause_filter_count != old) + mark_dirty(svm->vmcb, VMCB_INTERCEPTS); + + trace_kvm_ple_window_grow(vcpu->vcpu_id, + control->pause_filter_count, old); +} + +static void shrink_ple_window(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + struct vmcb_control_area *control = &svm->vmcb->control; + int old = control->pause_filter_count; + + control->pause_filter_count = + __shrink_ple_window(old, + pause_filter_count, + pause_filter_count_shrink, + pause_filter_count); + if (control->pause_filter_count != old) + mark_dirty(svm->vmcb, VMCB_INTERCEPTS); + + trace_kvm_ple_window_shrink(vcpu->vcpu_id, + control->pause_filter_count, old); +} + static __init int svm_hardware_setup(void) { int cpu; @@ -1202,6 +1311,14 @@ static __init int svm_hardware_setup(void) kvm_tsc_scaling_ratio_frac_bits = 32; } + /* Check for pause filtering support */ + if (!boot_cpu_has(X86_FEATURE_PAUSEFILTER)) { + pause_filter_count = 0; + pause_filter_thresh = 0; + } else if (!boot_cpu_has(X86_FEATURE_PFTHRESHOLD)) { + pause_filter_thresh = 0; + } + if (nested) { printk(KERN_INFO "kvm: Nested Virtualization enabled\n"); kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE); @@ -1328,10 +1445,10 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) static void avic_init_vmcb(struct vcpu_svm *svm) { struct vmcb *vmcb = svm->vmcb; - struct kvm_arch *vm_data = &svm->vcpu.kvm->arch; + struct kvm_svm *kvm_svm = to_kvm_svm(svm->vcpu.kvm); phys_addr_t bpa = __sme_set(page_to_phys(svm->avic_backing_page)); - phys_addr_t lpa = __sme_set(page_to_phys(vm_data->avic_logical_id_table_page)); - phys_addr_t ppa = __sme_set(page_to_phys(vm_data->avic_physical_id_table_page)); + phys_addr_t lpa = __sme_set(page_to_phys(kvm_svm->avic_logical_id_table_page)); + phys_addr_t ppa = __sme_set(page_to_phys(kvm_svm->avic_physical_id_table_page)); vmcb->control.avic_backing_page = bpa & AVIC_HPA_MASK; vmcb->control.avic_logical_id = lpa & AVIC_HPA_MASK; @@ -1363,6 +1480,14 @@ static void init_vmcb(struct vcpu_svm *svm) set_exception_intercept(svm, MC_VECTOR); set_exception_intercept(svm, AC_VECTOR); set_exception_intercept(svm, DB_VECTOR); + /* + * Guest access to VMware backdoor ports could legitimately + * trigger #GP because of TSS I/O permission bitmap. + * We intercept those #GP and allow access to them anyway + * as VMware does. + */ + if (enable_vmware_backdoor) + set_exception_intercept(svm, GP_VECTOR); set_intercept(svm, INTERCEPT_INTR); set_intercept(svm, INTERCEPT_NMI); @@ -1371,7 +1496,6 @@ static void init_vmcb(struct vcpu_svm *svm) set_intercept(svm, INTERCEPT_RDPMC); set_intercept(svm, INTERCEPT_CPUID); set_intercept(svm, INTERCEPT_INVD); - set_intercept(svm, INTERCEPT_HLT); set_intercept(svm, INTERCEPT_INVLPG); set_intercept(svm, INTERCEPT_INVLPGA); set_intercept(svm, INTERCEPT_IOIO_PROT); @@ -1389,11 +1513,14 @@ static void init_vmcb(struct vcpu_svm *svm) set_intercept(svm, INTERCEPT_XSETBV); set_intercept(svm, INTERCEPT_RSM); - if (!kvm_mwait_in_guest()) { + if (!kvm_mwait_in_guest(svm->vcpu.kvm)) { set_intercept(svm, INTERCEPT_MONITOR); set_intercept(svm, INTERCEPT_MWAIT); } + if (!kvm_hlt_in_guest(svm->vcpu.kvm)) + set_intercept(svm, INTERCEPT_HLT); + control->iopm_base_pa = __sme_set(iopm_base); control->msrpm_base_pa = __sme_set(__pa(svm->msrpm)); control->int_ctl = V_INTR_MASKING_MASK; @@ -1449,9 +1576,13 @@ static void init_vmcb(struct vcpu_svm *svm) svm->nested.vmcb = 0; svm->vcpu.arch.hflags = 0; - if (boot_cpu_has(X86_FEATURE_PAUSEFILTER)) { - control->pause_filter_count = 3000; + if (pause_filter_count) { + control->pause_filter_count = pause_filter_count; + if (pause_filter_thresh) + control->pause_filter_thresh = pause_filter_thresh; set_intercept(svm, INTERCEPT_PAUSE); + } else { + clr_intercept(svm, INTERCEPT_PAUSE); } if (kvm_vcpu_apicv_active(&svm->vcpu)) @@ -1488,12 +1619,12 @@ static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu, unsigned int index) { u64 *avic_physical_id_table; - struct kvm_arch *vm_data = &vcpu->kvm->arch; + struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm); if (index >= AVIC_MAX_PHYSICAL_ID_COUNT) return NULL; - avic_physical_id_table = page_address(vm_data->avic_physical_id_table_page); + avic_physical_id_table = page_address(kvm_svm->avic_physical_id_table_page); return &avic_physical_id_table[index]; } @@ -1576,7 +1707,7 @@ static void __sev_asid_free(int asid) static void sev_asid_free(struct kvm *kvm) { - struct kvm_sev_info *sev = &kvm->arch.sev_info; + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; __sev_asid_free(sev->asid); } @@ -1616,7 +1747,7 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr, unsigned long ulen, unsigned long *n, int write) { - struct kvm_sev_info *sev = &kvm->arch.sev_info; + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; unsigned long npages, npinned, size; unsigned long locked, lock_limit; struct page **pages; @@ -1667,7 +1798,7 @@ err: static void sev_unpin_memory(struct kvm *kvm, struct page **pages, unsigned long npages) { - struct kvm_sev_info *sev = &kvm->arch.sev_info; + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; release_pages(pages, npages); kvfree(pages); @@ -1705,9 +1836,20 @@ static void __unregister_enc_region_locked(struct kvm *kvm, kfree(region); } +static struct kvm *svm_vm_alloc(void) +{ + struct kvm_svm *kvm_svm = kzalloc(sizeof(struct kvm_svm), GFP_KERNEL); + return &kvm_svm->kvm; +} + +static void svm_vm_free(struct kvm *kvm) +{ + kfree(to_kvm_svm(kvm)); +} + static void sev_vm_destroy(struct kvm *kvm) { - struct kvm_sev_info *sev = &kvm->arch.sev_info; + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; struct list_head *head = &sev->regions_list; struct list_head *pos, *q; @@ -1736,18 +1878,18 @@ static void sev_vm_destroy(struct kvm *kvm) static void avic_vm_destroy(struct kvm *kvm) { unsigned long flags; - struct kvm_arch *vm_data = &kvm->arch; + struct kvm_svm *kvm_svm = to_kvm_svm(kvm); if (!avic) return; - if (vm_data->avic_logical_id_table_page) - __free_page(vm_data->avic_logical_id_table_page); - if (vm_data->avic_physical_id_table_page) - __free_page(vm_data->avic_physical_id_table_page); + if (kvm_svm->avic_logical_id_table_page) + __free_page(kvm_svm->avic_logical_id_table_page); + if (kvm_svm->avic_physical_id_table_page) + __free_page(kvm_svm->avic_physical_id_table_page); spin_lock_irqsave(&svm_vm_data_hash_lock, flags); - hash_del(&vm_data->hnode); + hash_del(&kvm_svm->hnode); spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags); } @@ -1761,10 +1903,10 @@ static int avic_vm_init(struct kvm *kvm) { unsigned long flags; int err = -ENOMEM; - struct kvm_arch *vm_data = &kvm->arch; + struct kvm_svm *kvm_svm = to_kvm_svm(kvm); + struct kvm_svm *k2; struct page *p_page; struct page *l_page; - struct kvm_arch *ka; u32 vm_id; if (!avic) @@ -1775,7 +1917,7 @@ static int avic_vm_init(struct kvm *kvm) if (!p_page) goto free_avic; - vm_data->avic_physical_id_table_page = p_page; + kvm_svm->avic_physical_id_table_page = p_page; clear_page(page_address(p_page)); /* Allocating logical APIC ID table (4KB) */ @@ -1783,7 +1925,7 @@ static int avic_vm_init(struct kvm *kvm) if (!l_page) goto free_avic; - vm_data->avic_logical_id_table_page = l_page; + kvm_svm->avic_logical_id_table_page = l_page; clear_page(page_address(l_page)); spin_lock_irqsave(&svm_vm_data_hash_lock, flags); @@ -1795,15 +1937,13 @@ static int avic_vm_init(struct kvm *kvm) } /* Is it still in use? Only possible if wrapped at least once */ if (next_vm_id_wrapped) { - hash_for_each_possible(svm_vm_data_hash, ka, hnode, vm_id) { - struct kvm *k2 = container_of(ka, struct kvm, arch); - struct kvm_arch *vd2 = &k2->arch; - if (vd2->avic_vm_id == vm_id) + hash_for_each_possible(svm_vm_data_hash, k2, hnode, vm_id) { + if (k2->avic_vm_id == vm_id) goto again; } } - vm_data->avic_vm_id = vm_id; - hash_add(svm_vm_data_hash, &vm_data->hnode, vm_data->avic_vm_id); + kvm_svm->avic_vm_id = vm_id; + hash_add(svm_vm_data_hash, &kvm_svm->hnode, kvm_svm->avic_vm_id); spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags); return 0; @@ -2535,14 +2675,7 @@ static int bp_interception(struct vcpu_svm *svm) static int ud_interception(struct vcpu_svm *svm) { - int er; - - er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD); - if (er == EMULATE_USER_EXIT) - return 0; - if (er != EMULATE_DONE) - kvm_queue_exception(&svm->vcpu, UD_VECTOR); - return 1; + return handle_ud(&svm->vcpu); } static int ac_interception(struct vcpu_svm *svm) @@ -2551,6 +2684,23 @@ static int ac_interception(struct vcpu_svm *svm) return 1; } +static int gp_interception(struct vcpu_svm *svm) +{ + struct kvm_vcpu *vcpu = &svm->vcpu; + u32 error_code = svm->vmcb->control.exit_info_1; + int er; + + WARN_ON_ONCE(!enable_vmware_backdoor); + + er = emulate_instruction(vcpu, + EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL); + if (er == EMULATE_USER_EXIT) + return 0; + else if (er != EMULATE_DONE) + kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); + return 1; +} + static bool is_erratum_383(void) { int err, i; @@ -2639,7 +2789,7 @@ static int io_interception(struct vcpu_svm *svm) { struct kvm_vcpu *vcpu = &svm->vcpu; u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */ - int size, in, string, ret; + int size, in, string; unsigned port; ++svm->vcpu.stat.io_exits; @@ -2651,16 +2801,8 @@ static int io_interception(struct vcpu_svm *svm) port = io_info >> 16; size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; svm->next_rip = svm->vmcb->control.exit_info_2; - ret = kvm_skip_emulated_instruction(&svm->vcpu); - /* - * TODO: we might be squashing a KVM_GUESTDBG_SINGLESTEP-triggered - * KVM_EXIT_DEBUG here. - */ - if (in) - return kvm_fast_pio_in(vcpu, size, port) && ret; - else - return kvm_fast_pio_out(vcpu, size, port) && ret; + return kvm_fast_pio(&svm->vcpu, size, port, in); } static int nmi_interception(struct vcpu_svm *svm) @@ -4233,6 +4375,9 @@ static int pause_interception(struct vcpu_svm *svm) struct kvm_vcpu *vcpu = &svm->vcpu; bool in_kernel = (svm_get_cpl(vcpu) == 0); + if (pause_filter_thresh) + grow_ple_window(vcpu); + kvm_vcpu_on_spin(vcpu, in_kernel); return 1; } @@ -4323,7 +4468,7 @@ static int avic_incomplete_ipi_interception(struct vcpu_svm *svm) static u32 *avic_get_logical_id_entry(struct kvm_vcpu *vcpu, u32 ldr, bool flat) { - struct kvm_arch *vm_data = &vcpu->kvm->arch; + struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm); int index; u32 *logical_apic_id_table; int dlid = GET_APIC_LOGICAL_ID(ldr); @@ -4345,7 +4490,7 @@ static u32 *avic_get_logical_id_entry(struct kvm_vcpu *vcpu, u32 ldr, bool flat) index = (cluster << 2) + apic; } - logical_apic_id_table = (u32 *) page_address(vm_data->avic_logical_id_table_page); + logical_apic_id_table = (u32 *) page_address(kvm_svm->avic_logical_id_table_page); return &logical_apic_id_table[index]; } @@ -4425,7 +4570,7 @@ static int avic_handle_apic_id_update(struct kvm_vcpu *vcpu) static int avic_handle_dfr_update(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - struct kvm_arch *vm_data = &vcpu->kvm->arch; + struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm); u32 dfr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR); u32 mod = (dfr >> 28) & 0xf; @@ -4434,11 +4579,11 @@ static int avic_handle_dfr_update(struct kvm_vcpu *vcpu) * If this changes, we need to flush the AVIC logical * APID id table. */ - if (vm_data->ldr_mode == mod) + if (kvm_svm->ldr_mode == mod) return 0; - clear_page(page_address(vm_data->avic_logical_id_table_page)); - vm_data->ldr_mode = mod; + clear_page(page_address(kvm_svm->avic_logical_id_table_page)); + kvm_svm->ldr_mode = mod; if (svm->ldr_reg) avic_handle_ldr_update(vcpu); @@ -4558,6 +4703,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception, [SVM_EXIT_EXCP_BASE + AC_VECTOR] = ac_interception, + [SVM_EXIT_EXCP_BASE + GP_VECTOR] = gp_interception, [SVM_EXIT_INTR] = intr_interception, [SVM_EXIT_NMI] = nmi_interception, [SVM_EXIT_SMI] = nop_on_interception, @@ -4606,6 +4752,8 @@ static void dump_vmcb(struct kvm_vcpu *vcpu) pr_err("%-20s%08x\n", "exceptions:", control->intercept_exceptions); pr_err("%-20s%016llx\n", "intercepts:", control->intercept); pr_err("%-20s%d\n", "pause filter count:", control->pause_filter_count); + pr_err("%-20s%d\n", "pause filter threshold:", + control->pause_filter_thresh); pr_err("%-20s%016llx\n", "iopm_base_pa:", control->iopm_base_pa); pr_err("%-20s%016llx\n", "msrpm_base_pa:", control->msrpm_base_pa); pr_err("%-20s%016llx\n", "tsc_offset:", control->tsc_offset); @@ -5073,7 +5221,7 @@ static int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq, /* Try to enable guest_mode in IRTE */ pi.base = __sme_set(page_to_phys(svm->avic_backing_page) & AVIC_HPA_MASK); - pi.ga_tag = AVIC_GATAG(kvm->arch.avic_vm_id, + pi.ga_tag = AVIC_GATAG(to_kvm_svm(kvm)->avic_vm_id, svm->vcpu.vcpu_id); pi.is_guest_mode = true; pi.vcpu_data = &vcpu_info; @@ -5237,6 +5385,11 @@ static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr) return 0; } +static int svm_set_identity_map_addr(struct kvm *kvm, u64 ident_addr) +{ + return 0; +} + static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa) { struct vcpu_svm *svm = to_svm(vcpu); @@ -5538,14 +5691,14 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip; if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI)) - kvm_before_handle_nmi(&svm->vcpu); + kvm_before_interrupt(&svm->vcpu); stgi(); /* Any pending NMI will happen here */ if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI)) - kvm_after_handle_nmi(&svm->vcpu); + kvm_after_interrupt(&svm->vcpu); sync_cr8_to_lapic(vcpu); @@ -5921,6 +6074,8 @@ static void svm_handle_external_intr(struct kvm_vcpu *vcpu) static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu) { + if (pause_filter_thresh) + shrink_ple_window(vcpu); } static inline void avic_post_state_restore(struct kvm_vcpu *vcpu) @@ -6037,7 +6192,7 @@ static int sev_asid_new(void) static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) { - struct kvm_sev_info *sev = &kvm->arch.sev_info; + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; int asid, ret; ret = -EBUSY; @@ -6102,14 +6257,14 @@ static int __sev_issue_cmd(int fd, int id, void *data, int *error) static int sev_issue_cmd(struct kvm *kvm, int id, void *data, int *error) { - struct kvm_sev_info *sev = &kvm->arch.sev_info; + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; return __sev_issue_cmd(sev->fd, id, data, error); } static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp) { - struct kvm_sev_info *sev = &kvm->arch.sev_info; + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; struct sev_data_launch_start *start; struct kvm_sev_launch_start params; void *dh_blob, *session_blob; @@ -6207,7 +6362,7 @@ static int get_num_contig_pages(int idx, struct page **inpages, static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp) { unsigned long vaddr, vaddr_end, next_vaddr, npages, size; - struct kvm_sev_info *sev = &kvm->arch.sev_info; + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; struct kvm_sev_launch_update_data params; struct sev_data_launch_update_data *data; struct page **inpages; @@ -6283,7 +6438,7 @@ e_free: static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp) { void __user *measure = (void __user *)(uintptr_t)argp->data; - struct kvm_sev_info *sev = &kvm->arch.sev_info; + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; struct sev_data_launch_measure *data; struct kvm_sev_launch_measure params; void __user *p = NULL; @@ -6351,7 +6506,7 @@ e_free: static int sev_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp) { - struct kvm_sev_info *sev = &kvm->arch.sev_info; + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; struct sev_data_launch_finish *data; int ret; @@ -6371,7 +6526,7 @@ static int sev_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp) static int sev_guest_status(struct kvm *kvm, struct kvm_sev_cmd *argp) { - struct kvm_sev_info *sev = &kvm->arch.sev_info; + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; struct kvm_sev_guest_status params; struct sev_data_guest_status *data; int ret; @@ -6403,7 +6558,7 @@ static int __sev_issue_dbg_cmd(struct kvm *kvm, unsigned long src, unsigned long dst, int size, int *error, bool enc) { - struct kvm_sev_info *sev = &kvm->arch.sev_info; + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; struct sev_data_dbg *data; int ret; @@ -6635,7 +6790,7 @@ err: static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp) { - struct kvm_sev_info *sev = &kvm->arch.sev_info; + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; struct sev_data_launch_secret *data; struct kvm_sev_launch_secret params; struct page **pages; @@ -6759,7 +6914,7 @@ out: static int svm_register_enc_region(struct kvm *kvm, struct kvm_enc_region *range) { - struct kvm_sev_info *sev = &kvm->arch.sev_info; + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; struct enc_region *region; int ret = 0; @@ -6801,7 +6956,7 @@ e_free: static struct enc_region * find_enc_region(struct kvm *kvm, struct kvm_enc_region *range) { - struct kvm_sev_info *sev = &kvm->arch.sev_info; + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; struct list_head *head = &sev->regions_list; struct enc_region *i; @@ -6859,6 +7014,8 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .vcpu_free = svm_free_vcpu, .vcpu_reset = svm_vcpu_reset, + .vm_alloc = svm_vm_alloc, + .vm_free = svm_vm_free, .vm_init = avic_vm_init, .vm_destroy = svm_vm_destroy, @@ -6925,6 +7082,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .apicv_post_state_restore = avic_post_state_restore, .set_tss_addr = svm_set_tss_addr, + .set_identity_map_addr = svm_set_identity_map_addr, .get_tdp_level = get_npt_level, .get_mt_mask = svm_get_mt_mask, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 92496b9b5f2b..aafcc9881e88 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -52,9 +52,11 @@ #include <asm/irq_remapping.h> #include <asm/mmu_context.h> #include <asm/nospec-branch.h> +#include <asm/mshyperv.h> #include "trace.h" #include "pmu.h" +#include "vmx_evmcs.h" #define __ex(x) __kvm_handle_fault_on_reboot(x) #define __ex_clear(x, reg) \ @@ -130,13 +132,15 @@ module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO); #endif #define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD) -#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE) -#define KVM_VM_CR0_ALWAYS_ON \ - (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE) +#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR0_NE +#define KVM_VM_CR0_ALWAYS_ON \ + (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | \ + X86_CR0_WP | X86_CR0_PG | X86_CR0_PE) #define KVM_CR4_GUEST_OWNED_BITS \ (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_TSD) +#define KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR4_VMXE #define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE) #define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE) @@ -165,34 +169,33 @@ module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO); * Time is measured based on a counter that runs at the same rate as the TSC, * refer SDM volume 3b section 21.6.13 & 22.1.3. */ -#define KVM_VMX_DEFAULT_PLE_GAP 128 -#define KVM_VMX_DEFAULT_PLE_WINDOW 4096 -#define KVM_VMX_DEFAULT_PLE_WINDOW_GROW 2 -#define KVM_VMX_DEFAULT_PLE_WINDOW_SHRINK 0 -#define KVM_VMX_DEFAULT_PLE_WINDOW_MAX \ - INT_MAX / KVM_VMX_DEFAULT_PLE_WINDOW_GROW +static unsigned int ple_gap = KVM_DEFAULT_PLE_GAP; -static int ple_gap = KVM_VMX_DEFAULT_PLE_GAP; -module_param(ple_gap, int, S_IRUGO); - -static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW; -module_param(ple_window, int, S_IRUGO); +static unsigned int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW; +module_param(ple_window, uint, 0444); /* Default doubles per-vcpu window every exit. */ -static int ple_window_grow = KVM_VMX_DEFAULT_PLE_WINDOW_GROW; -module_param(ple_window_grow, int, S_IRUGO); +static unsigned int ple_window_grow = KVM_DEFAULT_PLE_WINDOW_GROW; +module_param(ple_window_grow, uint, 0444); /* Default resets per-vcpu window every exit to ple_window. */ -static int ple_window_shrink = KVM_VMX_DEFAULT_PLE_WINDOW_SHRINK; -module_param(ple_window_shrink, int, S_IRUGO); +static unsigned int ple_window_shrink = KVM_DEFAULT_PLE_WINDOW_SHRINK; +module_param(ple_window_shrink, uint, 0444); /* Default is to compute the maximum so we can never overflow. */ -static int ple_window_actual_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX; -static int ple_window_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX; -module_param(ple_window_max, int, S_IRUGO); +static unsigned int ple_window_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX; +module_param(ple_window_max, uint, 0444); extern const ulong vmx_return; +struct kvm_vmx { + struct kvm kvm; + + unsigned int tss_addr; + bool ept_identity_pagetable_done; + gpa_t ept_identity_map_addr; +}; + #define NR_AUTOLOAD_MSRS 8 struct vmcs { @@ -424,6 +427,35 @@ struct __packed vmcs12 { */ #define VMCS12_MAX_FIELD_INDEX 0x17 +struct nested_vmx_msrs { + /* + * We only store the "true" versions of the VMX capability MSRs. We + * generate the "non-true" versions by setting the must-be-1 bits + * according to the SDM. + */ + u32 procbased_ctls_low; + u32 procbased_ctls_high; + u32 secondary_ctls_low; + u32 secondary_ctls_high; + u32 pinbased_ctls_low; + u32 pinbased_ctls_high; + u32 exit_ctls_low; + u32 exit_ctls_high; + u32 entry_ctls_low; + u32 entry_ctls_high; + u32 misc_low; + u32 misc_high; + u32 ept_caps; + u32 vpid_caps; + u64 basic; + u64 cr0_fixed0; + u64 cr0_fixed1; + u64 cr4_fixed0; + u64 cr4_fixed1; + u64 vmcs_enum; + u64 vmfunc_controls; +}; + /* * The nested_vmx structure is part of vcpu_vmx, and holds information we need * for correct emulation of VMX (i.e., nested VMX) on this vcpu. @@ -475,32 +507,7 @@ struct nested_vmx { u16 vpid02; u16 last_vpid; - /* - * We only store the "true" versions of the VMX capability MSRs. We - * generate the "non-true" versions by setting the must-be-1 bits - * according to the SDM. - */ - u32 nested_vmx_procbased_ctls_low; - u32 nested_vmx_procbased_ctls_high; - u32 nested_vmx_secondary_ctls_low; - u32 nested_vmx_secondary_ctls_high; - u32 nested_vmx_pinbased_ctls_low; - u32 nested_vmx_pinbased_ctls_high; - u32 nested_vmx_exit_ctls_low; - u32 nested_vmx_exit_ctls_high; - u32 nested_vmx_entry_ctls_low; - u32 nested_vmx_entry_ctls_high; - u32 nested_vmx_misc_low; - u32 nested_vmx_misc_high; - u32 nested_vmx_ept_caps; - u32 nested_vmx_vpid_caps; - u64 nested_vmx_basic; - u64 nested_vmx_cr0_fixed0; - u64 nested_vmx_cr0_fixed1; - u64 nested_vmx_cr4_fixed0; - u64 nested_vmx_cr4_fixed1; - u64 nested_vmx_vmcs_enum; - u64 nested_vmx_vmfunc_controls; + struct nested_vmx_msrs msrs; /* SMM related state */ struct { @@ -691,6 +698,11 @@ enum segment_cache_field { SEG_FIELD_NR = 4 }; +static inline struct kvm_vmx *to_kvm_vmx(struct kvm *kvm) +{ + return container_of(kvm, struct kvm_vmx, kvm); +} + static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) { return container_of(vcpu, struct vcpu_vmx, vcpu); @@ -953,6 +965,7 @@ static struct vmcs_config { u32 cpu_based_2nd_exec_ctrl; u32 vmexit_ctrl; u32 vmentry_ctrl; + struct nested_vmx_msrs nested; } vmcs_config; static struct vmx_capability { @@ -999,6 +1012,169 @@ static const u32 vmx_msr_index[] = { MSR_EFER, MSR_TSC_AUX, MSR_STAR, }; +DEFINE_STATIC_KEY_FALSE(enable_evmcs); + +#define current_evmcs ((struct hv_enlightened_vmcs *)this_cpu_read(current_vmcs)) + +#define KVM_EVMCS_VERSION 1 + +#if IS_ENABLED(CONFIG_HYPERV) +static bool __read_mostly enlightened_vmcs = true; +module_param(enlightened_vmcs, bool, 0444); + +static inline void evmcs_write64(unsigned long field, u64 value) +{ + u16 clean_field; + int offset = get_evmcs_offset(field, &clean_field); + + if (offset < 0) + return; + + *(u64 *)((char *)current_evmcs + offset) = value; + + current_evmcs->hv_clean_fields &= ~clean_field; +} + +static inline void evmcs_write32(unsigned long field, u32 value) +{ + u16 clean_field; + int offset = get_evmcs_offset(field, &clean_field); + + if (offset < 0) + return; + + *(u32 *)((char *)current_evmcs + offset) = value; + current_evmcs->hv_clean_fields &= ~clean_field; +} + +static inline void evmcs_write16(unsigned long field, u16 value) +{ + u16 clean_field; + int offset = get_evmcs_offset(field, &clean_field); + + if (offset < 0) + return; + + *(u16 *)((char *)current_evmcs + offset) = value; + current_evmcs->hv_clean_fields &= ~clean_field; +} + +static inline u64 evmcs_read64(unsigned long field) +{ + int offset = get_evmcs_offset(field, NULL); + + if (offset < 0) + return 0; + + return *(u64 *)((char *)current_evmcs + offset); +} + +static inline u32 evmcs_read32(unsigned long field) +{ + int offset = get_evmcs_offset(field, NULL); + + if (offset < 0) + return 0; + + return *(u32 *)((char *)current_evmcs + offset); +} + +static inline u16 evmcs_read16(unsigned long field) +{ + int offset = get_evmcs_offset(field, NULL); + + if (offset < 0) + return 0; + + return *(u16 *)((char *)current_evmcs + offset); +} + +static void evmcs_load(u64 phys_addr) +{ + struct hv_vp_assist_page *vp_ap = + hv_get_vp_assist_page(smp_processor_id()); + + vp_ap->current_nested_vmcs = phys_addr; + vp_ap->enlighten_vmentry = 1; +} + +static void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf) +{ + /* + * Enlightened VMCSv1 doesn't support these: + * + * POSTED_INTR_NV = 0x00000002, + * GUEST_INTR_STATUS = 0x00000810, + * APIC_ACCESS_ADDR = 0x00002014, + * POSTED_INTR_DESC_ADDR = 0x00002016, + * EOI_EXIT_BITMAP0 = 0x0000201c, + * EOI_EXIT_BITMAP1 = 0x0000201e, + * EOI_EXIT_BITMAP2 = 0x00002020, + * EOI_EXIT_BITMAP3 = 0x00002022, + */ + vmcs_conf->pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR; + vmcs_conf->cpu_based_2nd_exec_ctrl &= + ~SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY; + vmcs_conf->cpu_based_2nd_exec_ctrl &= + ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; + vmcs_conf->cpu_based_2nd_exec_ctrl &= + ~SECONDARY_EXEC_APIC_REGISTER_VIRT; + + /* + * GUEST_PML_INDEX = 0x00000812, + * PML_ADDRESS = 0x0000200e, + */ + vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_ENABLE_PML; + + /* VM_FUNCTION_CONTROL = 0x00002018, */ + vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_ENABLE_VMFUNC; + + /* + * EPTP_LIST_ADDRESS = 0x00002024, + * VMREAD_BITMAP = 0x00002026, + * VMWRITE_BITMAP = 0x00002028, + */ + vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_SHADOW_VMCS; + + /* + * TSC_MULTIPLIER = 0x00002032, + */ + vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_TSC_SCALING; + + /* + * PLE_GAP = 0x00004020, + * PLE_WINDOW = 0x00004022, + */ + vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; + + /* + * VMX_PREEMPTION_TIMER_VALUE = 0x0000482E, + */ + vmcs_conf->pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER; + + /* + * GUEST_IA32_PERF_GLOBAL_CTRL = 0x00002808, + * HOST_IA32_PERF_GLOBAL_CTRL = 0x00002c04, + */ + vmcs_conf->vmexit_ctrl &= ~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL; + vmcs_conf->vmentry_ctrl &= ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL; + + /* + * Currently unsupported in KVM: + * GUEST_IA32_RTIT_CTL = 0x00002814, + */ +} +#else /* !IS_ENABLED(CONFIG_HYPERV) */ +static inline void evmcs_write64(unsigned long field, u64 value) {} +static inline void evmcs_write32(unsigned long field, u32 value) {} +static inline void evmcs_write16(unsigned long field, u16 value) {} +static inline u64 evmcs_read64(unsigned long field) { return 0; } +static inline u32 evmcs_read32(unsigned long field) { return 0; } +static inline u16 evmcs_read16(unsigned long field) { return 0; } +static inline void evmcs_load(u64 phys_addr) {} +static inline void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf) {} +#endif /* IS_ENABLED(CONFIG_HYPERV) */ + static inline bool is_exception_n(u32 intr_info, u8 vector) { return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | @@ -1031,6 +1207,11 @@ static inline bool is_invalid_opcode(u32 intr_info) return is_exception_n(intr_info, UD_VECTOR); } +static inline bool is_gp_fault(u32 intr_info) +{ + return is_exception_n(intr_info, GP_VECTOR); +} + static inline bool is_external_interrupt(u32 intr_info) { return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) @@ -1320,7 +1501,7 @@ static inline bool report_flexpriority(void) static inline unsigned nested_cpu_vmx_misc_cr3_count(struct kvm_vcpu *vcpu) { - return vmx_misc_cr3_count(to_vmx(vcpu)->nested.nested_vmx_misc_low); + return vmx_misc_cr3_count(to_vmx(vcpu)->nested.msrs.misc_low); } static inline bool nested_cpu_has(struct vmcs12 *vmcs12, u32 bit) @@ -1341,6 +1522,16 @@ static inline bool nested_cpu_has_preemption_timer(struct vmcs12 *vmcs12) PIN_BASED_VMX_PREEMPTION_TIMER; } +static inline bool nested_cpu_has_nmi_exiting(struct vmcs12 *vmcs12) +{ + return vmcs12->pin_based_vm_exec_control & PIN_BASED_NMI_EXITING; +} + +static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12) +{ + return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS; +} + static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12) { return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_EPT); @@ -1479,6 +1670,9 @@ static void vmcs_load(struct vmcs *vmcs) u64 phys_addr = __pa(vmcs); u8 error; + if (static_branch_unlikely(&enable_evmcs)) + return evmcs_load(phys_addr); + asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) "; setna %0" : "=qm"(error) : "a"(&phys_addr), "m"(phys_addr) : "cc", "memory"); @@ -1652,18 +1846,24 @@ static __always_inline unsigned long __vmcs_readl(unsigned long field) static __always_inline u16 vmcs_read16(unsigned long field) { vmcs_check16(field); + if (static_branch_unlikely(&enable_evmcs)) + return evmcs_read16(field); return __vmcs_readl(field); } static __always_inline u32 vmcs_read32(unsigned long field) { vmcs_check32(field); + if (static_branch_unlikely(&enable_evmcs)) + return evmcs_read32(field); return __vmcs_readl(field); } static __always_inline u64 vmcs_read64(unsigned long field) { vmcs_check64(field); + if (static_branch_unlikely(&enable_evmcs)) + return evmcs_read64(field); #ifdef CONFIG_X86_64 return __vmcs_readl(field); #else @@ -1674,6 +1874,8 @@ static __always_inline u64 vmcs_read64(unsigned long field) static __always_inline unsigned long vmcs_readl(unsigned long field) { vmcs_checkl(field); + if (static_branch_unlikely(&enable_evmcs)) + return evmcs_read64(field); return __vmcs_readl(field); } @@ -1697,18 +1899,27 @@ static __always_inline void __vmcs_writel(unsigned long field, unsigned long val static __always_inline void vmcs_write16(unsigned long field, u16 value) { vmcs_check16(field); + if (static_branch_unlikely(&enable_evmcs)) + return evmcs_write16(field, value); + __vmcs_writel(field, value); } static __always_inline void vmcs_write32(unsigned long field, u32 value) { vmcs_check32(field); + if (static_branch_unlikely(&enable_evmcs)) + return evmcs_write32(field, value); + __vmcs_writel(field, value); } static __always_inline void vmcs_write64(unsigned long field, u64 value) { vmcs_check64(field); + if (static_branch_unlikely(&enable_evmcs)) + return evmcs_write64(field, value); + __vmcs_writel(field, value); #ifndef CONFIG_X86_64 asm volatile (""); @@ -1719,6 +1930,9 @@ static __always_inline void vmcs_write64(unsigned long field, u64 value) static __always_inline void vmcs_writel(unsigned long field, unsigned long value) { vmcs_checkl(field); + if (static_branch_unlikely(&enable_evmcs)) + return evmcs_write64(field, value); + __vmcs_writel(field, value); } @@ -1726,6 +1940,9 @@ static __always_inline void vmcs_clear_bits(unsigned long field, u32 mask) { BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x2000, "vmcs_clear_bits does not support 64-bit fields"); + if (static_branch_unlikely(&enable_evmcs)) + return evmcs_write32(field, evmcs_read32(field) & ~mask); + __vmcs_writel(field, __vmcs_readl(field) & ~mask); } @@ -1733,6 +1950,9 @@ static __always_inline void vmcs_set_bits(unsigned long field, u32 mask) { BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x2000, "vmcs_set_bits does not support 64-bit fields"); + if (static_branch_unlikely(&enable_evmcs)) + return evmcs_write32(field, evmcs_read32(field) | mask); + __vmcs_writel(field, __vmcs_readl(field) | mask); } @@ -1864,6 +2084,14 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) | (1u << DB_VECTOR) | (1u << AC_VECTOR); + /* + * Guest access to VMware backdoor ports could legitimately + * trigger #GP because of TSS I/O permission bitmap. + * We intercept those #GP and allow access to them anyway + * as VMware does. + */ + if (enable_vmware_backdoor) + eb |= (1u << GP_VECTOR); if ((vcpu->guest_debug & (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) == (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) @@ -2129,6 +2357,9 @@ static unsigned long segment_base(u16 selector) static void vmx_save_host_state(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); +#ifdef CONFIG_X86_64 + int cpu = raw_smp_processor_id(); +#endif int i; if (vmx->host_state.loaded) @@ -2141,7 +2372,15 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) */ vmx->host_state.ldt_sel = kvm_read_ldt(); vmx->host_state.gs_ldt_reload_needed = vmx->host_state.ldt_sel; + +#ifdef CONFIG_X86_64 + save_fsgs_for_kvm(); + vmx->host_state.fs_sel = current->thread.fsindex; + vmx->host_state.gs_sel = current->thread.gsindex; +#else savesegment(fs, vmx->host_state.fs_sel); + savesegment(gs, vmx->host_state.gs_sel); +#endif if (!(vmx->host_state.fs_sel & 7)) { vmcs_write16(HOST_FS_SELECTOR, vmx->host_state.fs_sel); vmx->host_state.fs_reload_needed = 0; @@ -2149,7 +2388,6 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) vmcs_write16(HOST_FS_SELECTOR, 0); vmx->host_state.fs_reload_needed = 1; } - savesegment(gs, vmx->host_state.gs_sel); if (!(vmx->host_state.gs_sel & 7)) vmcs_write16(HOST_GS_SELECTOR, vmx->host_state.gs_sel); else { @@ -2160,20 +2398,16 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) #ifdef CONFIG_X86_64 savesegment(ds, vmx->host_state.ds_sel); savesegment(es, vmx->host_state.es_sel); -#endif -#ifdef CONFIG_X86_64 - vmcs_writel(HOST_FS_BASE, read_msr(MSR_FS_BASE)); - vmcs_writel(HOST_GS_BASE, read_msr(MSR_GS_BASE)); -#else - vmcs_writel(HOST_FS_BASE, segment_base(vmx->host_state.fs_sel)); - vmcs_writel(HOST_GS_BASE, segment_base(vmx->host_state.gs_sel)); -#endif + vmcs_writel(HOST_FS_BASE, current->thread.fsbase); + vmcs_writel(HOST_GS_BASE, cpu_kernelmode_gs_base(cpu)); -#ifdef CONFIG_X86_64 - rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); + vmx->msr_host_kernel_gs_base = current->thread.gsbase; if (is_long_mode(&vmx->vcpu)) wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); +#else + vmcs_writel(HOST_FS_BASE, segment_base(vmx->host_state.fs_sel)); + vmcs_writel(HOST_GS_BASE, segment_base(vmx->host_state.gs_sel)); #endif if (boot_cpu_has(X86_FEATURE_MPX)) rdmsrl(MSR_IA32_BNDCFGS, vmx->host_state.msr_host_bndcfgs); @@ -2532,6 +2766,19 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned long *exit return 0; } +static void vmx_clear_hlt(struct kvm_vcpu *vcpu) +{ + /* + * Ensure that we clear the HLT state in the VMCS. We don't need to + * explicitly skip the instruction because if the HLT state is set, + * then the instruction is already executing and RIP has already been + * advanced. + */ + if (kvm_hlt_in_guest(vcpu->kvm) && + vmcs_read32(GUEST_ACTIVITY_STATE) == GUEST_ACTIVITY_HLT) + vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE); +} + static void vmx_queue_exception(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -2554,6 +2801,8 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu) return; } + WARN_ON_ONCE(vmx->emulation_required); + if (kvm_exception_is_soft(nr)) { vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, vmx->vcpu.arch.event_exit_inst_len); @@ -2562,6 +2811,8 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu) intr_info |= INTR_TYPE_HARD_EXCEPTION; vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); + + vmx_clear_hlt(vcpu); } static bool vmx_rdtscp_supported(void) @@ -2689,8 +2940,13 @@ static inline bool nested_vmx_allowed(struct kvm_vcpu *vcpu) * bit in the high half is on if the corresponding bit in the control field * may be on. See also vmx_control_verify(). */ -static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) +static void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, bool apicv) { + if (!nested) { + memset(msrs, 0, sizeof(*msrs)); + return; + } + /* * Note that as a general rule, the high half of the MSRs (bits in * the control fields which may be 1) should be initialized by the @@ -2708,70 +2964,68 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) /* pin-based controls */ rdmsr(MSR_IA32_VMX_PINBASED_CTLS, - vmx->nested.nested_vmx_pinbased_ctls_low, - vmx->nested.nested_vmx_pinbased_ctls_high); - vmx->nested.nested_vmx_pinbased_ctls_low |= + msrs->pinbased_ctls_low, + msrs->pinbased_ctls_high); + msrs->pinbased_ctls_low |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR; - vmx->nested.nested_vmx_pinbased_ctls_high &= + msrs->pinbased_ctls_high &= PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING | - PIN_BASED_VIRTUAL_NMIS; - vmx->nested.nested_vmx_pinbased_ctls_high |= + PIN_BASED_VIRTUAL_NMIS | + (apicv ? PIN_BASED_POSTED_INTR : 0); + msrs->pinbased_ctls_high |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR | PIN_BASED_VMX_PREEMPTION_TIMER; - if (kvm_vcpu_apicv_active(&vmx->vcpu)) - vmx->nested.nested_vmx_pinbased_ctls_high |= - PIN_BASED_POSTED_INTR; /* exit controls */ rdmsr(MSR_IA32_VMX_EXIT_CTLS, - vmx->nested.nested_vmx_exit_ctls_low, - vmx->nested.nested_vmx_exit_ctls_high); - vmx->nested.nested_vmx_exit_ctls_low = + msrs->exit_ctls_low, + msrs->exit_ctls_high); + msrs->exit_ctls_low = VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR; - vmx->nested.nested_vmx_exit_ctls_high &= + msrs->exit_ctls_high &= #ifdef CONFIG_X86_64 VM_EXIT_HOST_ADDR_SPACE_SIZE | #endif VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT; - vmx->nested.nested_vmx_exit_ctls_high |= + msrs->exit_ctls_high |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER | VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT; if (kvm_mpx_supported()) - vmx->nested.nested_vmx_exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS; + msrs->exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS; /* We support free control of debug control saving. */ - vmx->nested.nested_vmx_exit_ctls_low &= ~VM_EXIT_SAVE_DEBUG_CONTROLS; + msrs->exit_ctls_low &= ~VM_EXIT_SAVE_DEBUG_CONTROLS; /* entry controls */ rdmsr(MSR_IA32_VMX_ENTRY_CTLS, - vmx->nested.nested_vmx_entry_ctls_low, - vmx->nested.nested_vmx_entry_ctls_high); - vmx->nested.nested_vmx_entry_ctls_low = + msrs->entry_ctls_low, + msrs->entry_ctls_high); + msrs->entry_ctls_low = VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; - vmx->nested.nested_vmx_entry_ctls_high &= + msrs->entry_ctls_high &= #ifdef CONFIG_X86_64 VM_ENTRY_IA32E_MODE | #endif VM_ENTRY_LOAD_IA32_PAT; - vmx->nested.nested_vmx_entry_ctls_high |= + msrs->entry_ctls_high |= (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER); if (kvm_mpx_supported()) - vmx->nested.nested_vmx_entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS; + msrs->entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS; /* We support free control of debug control loading. */ - vmx->nested.nested_vmx_entry_ctls_low &= ~VM_ENTRY_LOAD_DEBUG_CONTROLS; + msrs->entry_ctls_low &= ~VM_ENTRY_LOAD_DEBUG_CONTROLS; /* cpu-based controls */ rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, - vmx->nested.nested_vmx_procbased_ctls_low, - vmx->nested.nested_vmx_procbased_ctls_high); - vmx->nested.nested_vmx_procbased_ctls_low = + msrs->procbased_ctls_low, + msrs->procbased_ctls_high); + msrs->procbased_ctls_low = CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR; - vmx->nested.nested_vmx_procbased_ctls_high &= + msrs->procbased_ctls_high &= CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_VIRTUAL_NMI_PENDING | CPU_BASED_USE_TSC_OFFSETING | CPU_BASED_HLT_EXITING | CPU_BASED_INVLPG_EXITING | @@ -2791,12 +3045,12 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) * can use it to avoid exits to L1 - even when L0 runs L2 * without MSR bitmaps. */ - vmx->nested.nested_vmx_procbased_ctls_high |= + msrs->procbased_ctls_high |= CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR | CPU_BASED_USE_MSR_BITMAPS; /* We support free control of CR3 access interception. */ - vmx->nested.nested_vmx_procbased_ctls_low &= + msrs->procbased_ctls_low &= ~(CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING); /* @@ -2804,10 +3058,10 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) * depend on CPUID bits, they are added later by vmx_cpuid_update. */ rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2, - vmx->nested.nested_vmx_secondary_ctls_low, - vmx->nested.nested_vmx_secondary_ctls_high); - vmx->nested.nested_vmx_secondary_ctls_low = 0; - vmx->nested.nested_vmx_secondary_ctls_high &= + msrs->secondary_ctls_low, + msrs->secondary_ctls_high); + msrs->secondary_ctls_low = 0; + msrs->secondary_ctls_high &= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | SECONDARY_EXEC_DESC | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | @@ -2817,33 +3071,33 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) if (enable_ept) { /* nested EPT: emulate EPT also to L1 */ - vmx->nested.nested_vmx_secondary_ctls_high |= + msrs->secondary_ctls_high |= SECONDARY_EXEC_ENABLE_EPT; - vmx->nested.nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT | + msrs->ept_caps = VMX_EPT_PAGE_WALK_4_BIT | VMX_EPTP_WB_BIT | VMX_EPT_INVEPT_BIT; if (cpu_has_vmx_ept_execute_only()) - vmx->nested.nested_vmx_ept_caps |= + msrs->ept_caps |= VMX_EPT_EXECUTE_ONLY_BIT; - vmx->nested.nested_vmx_ept_caps &= vmx_capability.ept; - vmx->nested.nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT | + msrs->ept_caps &= vmx_capability.ept; + msrs->ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT | VMX_EPT_EXTENT_CONTEXT_BIT | VMX_EPT_2MB_PAGE_BIT | VMX_EPT_1GB_PAGE_BIT; if (enable_ept_ad_bits) { - vmx->nested.nested_vmx_secondary_ctls_high |= + msrs->secondary_ctls_high |= SECONDARY_EXEC_ENABLE_PML; - vmx->nested.nested_vmx_ept_caps |= VMX_EPT_AD_BIT; + msrs->ept_caps |= VMX_EPT_AD_BIT; } } if (cpu_has_vmx_vmfunc()) { - vmx->nested.nested_vmx_secondary_ctls_high |= + msrs->secondary_ctls_high |= SECONDARY_EXEC_ENABLE_VMFUNC; /* * Advertise EPTP switching unconditionally * since we emulate it */ if (enable_ept) - vmx->nested.nested_vmx_vmfunc_controls = + msrs->vmfunc_controls = VMX_VMFUNC_EPTP_SWITCHING; } @@ -2854,25 +3108,25 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) * not failing the single-context invvpid, and it is worse. */ if (enable_vpid) { - vmx->nested.nested_vmx_secondary_ctls_high |= + msrs->secondary_ctls_high |= SECONDARY_EXEC_ENABLE_VPID; - vmx->nested.nested_vmx_vpid_caps = VMX_VPID_INVVPID_BIT | + msrs->vpid_caps = VMX_VPID_INVVPID_BIT | VMX_VPID_EXTENT_SUPPORTED_MASK; } if (enable_unrestricted_guest) - vmx->nested.nested_vmx_secondary_ctls_high |= + msrs->secondary_ctls_high |= SECONDARY_EXEC_UNRESTRICTED_GUEST; /* miscellaneous data */ rdmsr(MSR_IA32_VMX_MISC, - vmx->nested.nested_vmx_misc_low, - vmx->nested.nested_vmx_misc_high); - vmx->nested.nested_vmx_misc_low &= VMX_MISC_SAVE_EFER_LMA; - vmx->nested.nested_vmx_misc_low |= + msrs->misc_low, + msrs->misc_high); + msrs->misc_low &= VMX_MISC_SAVE_EFER_LMA; + msrs->misc_low |= VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE | VMX_MISC_ACTIVITY_HLT; - vmx->nested.nested_vmx_misc_high = 0; + msrs->misc_high = 0; /* * This MSR reports some information about VMX support. We @@ -2880,14 +3134,14 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) * guest, and the VMCS structure we give it - not about the * VMX support of the underlying hardware. */ - vmx->nested.nested_vmx_basic = + msrs->basic = VMCS12_REVISION | VMX_BASIC_TRUE_CTLS | ((u64)VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT) | (VMX_BASIC_MEM_TYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT); if (cpu_has_vmx_basic_inout()) - vmx->nested.nested_vmx_basic |= VMX_BASIC_INOUT; + msrs->basic |= VMX_BASIC_INOUT; /* * These MSRs specify bits which the guest must keep fixed on @@ -2896,15 +3150,15 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) */ #define VMXON_CR0_ALWAYSON (X86_CR0_PE | X86_CR0_PG | X86_CR0_NE) #define VMXON_CR4_ALWAYSON X86_CR4_VMXE - vmx->nested.nested_vmx_cr0_fixed0 = VMXON_CR0_ALWAYSON; - vmx->nested.nested_vmx_cr4_fixed0 = VMXON_CR4_ALWAYSON; + msrs->cr0_fixed0 = VMXON_CR0_ALWAYSON; + msrs->cr4_fixed0 = VMXON_CR4_ALWAYSON; /* These MSRs specify bits which the guest must keep fixed off. */ - rdmsrl(MSR_IA32_VMX_CR0_FIXED1, vmx->nested.nested_vmx_cr0_fixed1); - rdmsrl(MSR_IA32_VMX_CR4_FIXED1, vmx->nested.nested_vmx_cr4_fixed1); + rdmsrl(MSR_IA32_VMX_CR0_FIXED1, msrs->cr0_fixed1); + rdmsrl(MSR_IA32_VMX_CR4_FIXED1, msrs->cr4_fixed1); /* highest index: VMX_PREEMPTION_TIMER_VALUE */ - vmx->nested.nested_vmx_vmcs_enum = VMCS12_MAX_FIELD_INDEX << 1; + msrs->vmcs_enum = VMCS12_MAX_FIELD_INDEX << 1; } /* @@ -2941,7 +3195,7 @@ static int vmx_restore_vmx_basic(struct vcpu_vmx *vmx, u64 data) BIT_ULL(49) | BIT_ULL(54) | BIT_ULL(55) | /* reserved */ BIT_ULL(31) | GENMASK_ULL(47, 45) | GENMASK_ULL(63, 56); - u64 vmx_basic = vmx->nested.nested_vmx_basic; + u64 vmx_basic = vmx->nested.msrs.basic; if (!is_bitwise_subset(vmx_basic, data, feature_and_reserved)) return -EINVAL; @@ -2960,7 +3214,7 @@ static int vmx_restore_vmx_basic(struct vcpu_vmx *vmx, u64 data) if (vmx_basic_vmcs_size(vmx_basic) > vmx_basic_vmcs_size(data)) return -EINVAL; - vmx->nested.nested_vmx_basic = data; + vmx->nested.msrs.basic = data; return 0; } @@ -2972,24 +3226,24 @@ vmx_restore_control_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data) switch (msr_index) { case MSR_IA32_VMX_TRUE_PINBASED_CTLS: - lowp = &vmx->nested.nested_vmx_pinbased_ctls_low; - highp = &vmx->nested.nested_vmx_pinbased_ctls_high; + lowp = &vmx->nested.msrs.pinbased_ctls_low; + highp = &vmx->nested.msrs.pinbased_ctls_high; break; case MSR_IA32_VMX_TRUE_PROCBASED_CTLS: - lowp = &vmx->nested.nested_vmx_procbased_ctls_low; - highp = &vmx->nested.nested_vmx_procbased_ctls_high; + lowp = &vmx->nested.msrs.procbased_ctls_low; + highp = &vmx->nested.msrs.procbased_ctls_high; break; case MSR_IA32_VMX_TRUE_EXIT_CTLS: - lowp = &vmx->nested.nested_vmx_exit_ctls_low; - highp = &vmx->nested.nested_vmx_exit_ctls_high; + lowp = &vmx->nested.msrs.exit_ctls_low; + highp = &vmx->nested.msrs.exit_ctls_high; break; case MSR_IA32_VMX_TRUE_ENTRY_CTLS: - lowp = &vmx->nested.nested_vmx_entry_ctls_low; - highp = &vmx->nested.nested_vmx_entry_ctls_high; + lowp = &vmx->nested.msrs.entry_ctls_low; + highp = &vmx->nested.msrs.entry_ctls_high; break; case MSR_IA32_VMX_PROCBASED_CTLS2: - lowp = &vmx->nested.nested_vmx_secondary_ctls_low; - highp = &vmx->nested.nested_vmx_secondary_ctls_high; + lowp = &vmx->nested.msrs.secondary_ctls_low; + highp = &vmx->nested.msrs.secondary_ctls_high; break; default: BUG(); @@ -3020,13 +3274,13 @@ static int vmx_restore_vmx_misc(struct vcpu_vmx *vmx, u64 data) GENMASK_ULL(13, 9) | BIT_ULL(31); u64 vmx_misc; - vmx_misc = vmx_control_msr(vmx->nested.nested_vmx_misc_low, - vmx->nested.nested_vmx_misc_high); + vmx_misc = vmx_control_msr(vmx->nested.msrs.misc_low, + vmx->nested.msrs.misc_high); if (!is_bitwise_subset(vmx_misc, data, feature_and_reserved_bits)) return -EINVAL; - if ((vmx->nested.nested_vmx_pinbased_ctls_high & + if ((vmx->nested.msrs.pinbased_ctls_high & PIN_BASED_VMX_PREEMPTION_TIMER) && vmx_misc_preemption_timer_rate(data) != vmx_misc_preemption_timer_rate(vmx_misc)) @@ -3041,8 +3295,8 @@ static int vmx_restore_vmx_misc(struct vcpu_vmx *vmx, u64 data) if (vmx_misc_mseg_revid(data) != vmx_misc_mseg_revid(vmx_misc)) return -EINVAL; - vmx->nested.nested_vmx_misc_low = data; - vmx->nested.nested_vmx_misc_high = data >> 32; + vmx->nested.msrs.misc_low = data; + vmx->nested.msrs.misc_high = data >> 32; return 0; } @@ -3050,15 +3304,15 @@ static int vmx_restore_vmx_ept_vpid_cap(struct vcpu_vmx *vmx, u64 data) { u64 vmx_ept_vpid_cap; - vmx_ept_vpid_cap = vmx_control_msr(vmx->nested.nested_vmx_ept_caps, - vmx->nested.nested_vmx_vpid_caps); + vmx_ept_vpid_cap = vmx_control_msr(vmx->nested.msrs.ept_caps, + vmx->nested.msrs.vpid_caps); /* Every bit is either reserved or a feature bit. */ if (!is_bitwise_subset(vmx_ept_vpid_cap, data, -1ULL)) return -EINVAL; - vmx->nested.nested_vmx_ept_caps = data; - vmx->nested.nested_vmx_vpid_caps = data >> 32; + vmx->nested.msrs.ept_caps = data; + vmx->nested.msrs.vpid_caps = data >> 32; return 0; } @@ -3068,10 +3322,10 @@ static int vmx_restore_fixed0_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data) switch (msr_index) { case MSR_IA32_VMX_CR0_FIXED0: - msr = &vmx->nested.nested_vmx_cr0_fixed0; + msr = &vmx->nested.msrs.cr0_fixed0; break; case MSR_IA32_VMX_CR4_FIXED0: - msr = &vmx->nested.nested_vmx_cr4_fixed0; + msr = &vmx->nested.msrs.cr4_fixed0; break; default: BUG(); @@ -3135,7 +3389,7 @@ static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) case MSR_IA32_VMX_EPT_VPID_CAP: return vmx_restore_vmx_ept_vpid_cap(vmx, data); case MSR_IA32_VMX_VMCS_ENUM: - vmx->nested.nested_vmx_vmcs_enum = data; + vmx->nested.msrs.vmcs_enum = data; return 0; default: /* @@ -3146,77 +3400,75 @@ static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) } /* Returns 0 on success, non-0 otherwise. */ -static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) +static int vmx_get_vmx_msr(struct nested_vmx_msrs *msrs, u32 msr_index, u64 *pdata) { - struct vcpu_vmx *vmx = to_vmx(vcpu); - switch (msr_index) { case MSR_IA32_VMX_BASIC: - *pdata = vmx->nested.nested_vmx_basic; + *pdata = msrs->basic; break; case MSR_IA32_VMX_TRUE_PINBASED_CTLS: case MSR_IA32_VMX_PINBASED_CTLS: *pdata = vmx_control_msr( - vmx->nested.nested_vmx_pinbased_ctls_low, - vmx->nested.nested_vmx_pinbased_ctls_high); + msrs->pinbased_ctls_low, + msrs->pinbased_ctls_high); if (msr_index == MSR_IA32_VMX_PINBASED_CTLS) *pdata |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR; break; case MSR_IA32_VMX_TRUE_PROCBASED_CTLS: case MSR_IA32_VMX_PROCBASED_CTLS: *pdata = vmx_control_msr( - vmx->nested.nested_vmx_procbased_ctls_low, - vmx->nested.nested_vmx_procbased_ctls_high); + msrs->procbased_ctls_low, + msrs->procbased_ctls_high); if (msr_index == MSR_IA32_VMX_PROCBASED_CTLS) *pdata |= CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR; break; case MSR_IA32_VMX_TRUE_EXIT_CTLS: case MSR_IA32_VMX_EXIT_CTLS: *pdata = vmx_control_msr( - vmx->nested.nested_vmx_exit_ctls_low, - vmx->nested.nested_vmx_exit_ctls_high); + msrs->exit_ctls_low, + msrs->exit_ctls_high); if (msr_index == MSR_IA32_VMX_EXIT_CTLS) *pdata |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR; break; case MSR_IA32_VMX_TRUE_ENTRY_CTLS: case MSR_IA32_VMX_ENTRY_CTLS: *pdata = vmx_control_msr( - vmx->nested.nested_vmx_entry_ctls_low, - vmx->nested.nested_vmx_entry_ctls_high); + msrs->entry_ctls_low, + msrs->entry_ctls_high); if (msr_index == MSR_IA32_VMX_ENTRY_CTLS) *pdata |= VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; break; case MSR_IA32_VMX_MISC: *pdata = vmx_control_msr( - vmx->nested.nested_vmx_misc_low, - vmx->nested.nested_vmx_misc_high); + msrs->misc_low, + msrs->misc_high); break; case MSR_IA32_VMX_CR0_FIXED0: - *pdata = vmx->nested.nested_vmx_cr0_fixed0; + *pdata = msrs->cr0_fixed0; break; case MSR_IA32_VMX_CR0_FIXED1: - *pdata = vmx->nested.nested_vmx_cr0_fixed1; + *pdata = msrs->cr0_fixed1; break; case MSR_IA32_VMX_CR4_FIXED0: - *pdata = vmx->nested.nested_vmx_cr4_fixed0; + *pdata = msrs->cr4_fixed0; break; case MSR_IA32_VMX_CR4_FIXED1: - *pdata = vmx->nested.nested_vmx_cr4_fixed1; + *pdata = msrs->cr4_fixed1; break; case MSR_IA32_VMX_VMCS_ENUM: - *pdata = vmx->nested.nested_vmx_vmcs_enum; + *pdata = msrs->vmcs_enum; break; case MSR_IA32_VMX_PROCBASED_CTLS2: *pdata = vmx_control_msr( - vmx->nested.nested_vmx_secondary_ctls_low, - vmx->nested.nested_vmx_secondary_ctls_high); + msrs->secondary_ctls_low, + msrs->secondary_ctls_high); break; case MSR_IA32_VMX_EPT_VPID_CAP: - *pdata = vmx->nested.nested_vmx_ept_caps | - ((u64)vmx->nested.nested_vmx_vpid_caps << 32); + *pdata = msrs->ept_caps | + ((u64)msrs->vpid_caps << 32); break; case MSR_IA32_VMX_VMFUNC: - *pdata = vmx->nested.nested_vmx_vmfunc_controls; + *pdata = msrs->vmfunc_controls; break; default: return 1; @@ -3235,7 +3487,16 @@ static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu, static int vmx_get_msr_feature(struct kvm_msr_entry *msr) { - return 1; + switch (msr->index) { + case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: + if (!nested) + return 1; + return vmx_get_vmx_msr(&vmcs_config.nested, msr->index, &msr->data); + default: + return 1; + } + + return 0; } /* @@ -3309,7 +3570,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: if (!nested_vmx_allowed(vcpu)) return 1; - return vmx_get_vmx_msr(vcpu, msr_info->index, &msr_info->data); + return vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index, + &msr_info->data); case MSR_IA32_XSS: if (!vmx_xsaves_supported()) return 1; @@ -3602,6 +3864,14 @@ static int hardware_enable(void) if (cr4_read_shadow() & X86_CR4_VMXE) return -EBUSY; + /* + * This can happen if we hot-added a CPU but failed to allocate + * VP assist page for it. + */ + if (static_branch_unlikely(&enable_evmcs) && + !hv_get_vp_assist_page(cpu)) + return -EFAULT; + INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu)); INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu)); spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); @@ -3700,6 +3970,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) u32 _vmexit_control = 0; u32 _vmentry_control = 0; + memset(vmcs_conf, 0, sizeof(*vmcs_conf)); min = CPU_BASED_HLT_EXITING | #ifdef CONFIG_X86_64 CPU_BASED_CR8_LOAD_EXITING | @@ -3710,13 +3981,11 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) CPU_BASED_UNCOND_IO_EXITING | CPU_BASED_MOV_DR_EXITING | CPU_BASED_USE_TSC_OFFSETING | + CPU_BASED_MWAIT_EXITING | + CPU_BASED_MONITOR_EXITING | CPU_BASED_INVLPG_EXITING | CPU_BASED_RDPMC_EXITING; - if (!kvm_mwait_in_guest()) - min |= CPU_BASED_MWAIT_EXITING | - CPU_BASED_MONITOR_EXITING; - opt = CPU_BASED_TPR_SHADOW | CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; @@ -3835,7 +4104,12 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) vmcs_conf->size = vmx_msr_high & 0x1fff; vmcs_conf->order = get_order(vmcs_conf->size); vmcs_conf->basic_cap = vmx_msr_high & ~0x1fff; - vmcs_conf->revision_id = vmx_msr_low; + + /* KVM supports Enlightened VMCS v1 only */ + if (static_branch_unlikely(&enable_evmcs)) + vmcs_conf->revision_id = KVM_EVMCS_VERSION; + else + vmcs_conf->revision_id = vmx_msr_low; vmcs_conf->pin_based_exec_ctrl = _pin_based_exec_control; vmcs_conf->cpu_based_exec_ctrl = _cpu_based_exec_control; @@ -3843,6 +4117,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) vmcs_conf->vmexit_ctrl = _vmexit_control; vmcs_conf->vmentry_ctrl = _vmentry_control; + if (static_branch_unlikely(&enable_evmcs)) + evmcs_sanitize_exec_ctrls(vmcs_conf); + cpu_has_load_ia32_efer = allow_1_setting(MSR_IA32_VMX_ENTRY_CTLS, VM_ENTRY_LOAD_IA32_EFER) @@ -4162,6 +4439,7 @@ static void enter_rmode(struct kvm_vcpu *vcpu) { unsigned long flags; struct vcpu_vmx *vmx = to_vmx(vcpu); + struct kvm_vmx *kvm_vmx = to_kvm_vmx(vcpu->kvm); vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR); vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES); @@ -4177,13 +4455,13 @@ static void enter_rmode(struct kvm_vcpu *vcpu) * Very old userspace does not call KVM_SET_TSS_ADDR before entering * vcpu. Warn the user that an update is overdue. */ - if (!vcpu->kvm->arch.tss_addr) + if (!kvm_vmx->tss_addr) printk_once(KERN_WARNING "kvm: KVM_SET_TSS_ADDR need to be " "called before entering vcpu\n"); vmx_segment_cache_clear(vmx); - vmcs_writel(GUEST_TR_BASE, vcpu->kvm->arch.tss_addr); + vmcs_writel(GUEST_TR_BASE, kvm_vmx->tss_addr); vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1); vmcs_write32(GUEST_TR_AR_BYTES, 0x008b); @@ -4291,7 +4569,7 @@ static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) static void vmx_decache_cr3(struct kvm_vcpu *vcpu) { - if (enable_ept && is_paging(vcpu)) + if (enable_unrestricted_guest || (enable_ept && is_paging(vcpu))) vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); } @@ -4339,11 +4617,11 @@ static void ept_save_pdptrs(struct kvm_vcpu *vcpu) static bool nested_guest_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val) { - u64 fixed0 = to_vmx(vcpu)->nested.nested_vmx_cr0_fixed0; - u64 fixed1 = to_vmx(vcpu)->nested.nested_vmx_cr0_fixed1; + u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr0_fixed0; + u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr0_fixed1; struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - if (to_vmx(vcpu)->nested.nested_vmx_secondary_ctls_high & + if (to_vmx(vcpu)->nested.msrs.secondary_ctls_high & SECONDARY_EXEC_UNRESTRICTED_GUEST && nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST)) fixed0 &= ~(X86_CR0_PE | X86_CR0_PG); @@ -4353,16 +4631,16 @@ static bool nested_guest_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val) static bool nested_host_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val) { - u64 fixed0 = to_vmx(vcpu)->nested.nested_vmx_cr0_fixed0; - u64 fixed1 = to_vmx(vcpu)->nested.nested_vmx_cr0_fixed1; + u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr0_fixed0; + u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr0_fixed1; return fixed_bits_valid(val, fixed0, fixed1); } static bool nested_cr4_valid(struct kvm_vcpu *vcpu, unsigned long val) { - u64 fixed0 = to_vmx(vcpu)->nested.nested_vmx_cr4_fixed0; - u64 fixed1 = to_vmx(vcpu)->nested.nested_vmx_cr4_fixed1; + u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr4_fixed0; + u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr4_fixed1; return fixed_bits_valid(val, fixed0, fixed1); } @@ -4428,7 +4706,7 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) } #endif - if (enable_ept) + if (enable_ept && !enable_unrestricted_guest) ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu); vmcs_writel(CR0_READ_SHADOW, cr0); @@ -4469,10 +4747,11 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) if (enable_ept) { eptp = construct_eptp(vcpu, cr3); vmcs_write64(EPT_POINTER, eptp); - if (is_paging(vcpu) || is_guest_mode(vcpu)) + if (enable_unrestricted_guest || is_paging(vcpu) || + is_guest_mode(vcpu)) guest_cr3 = kvm_read_cr3(vcpu); else - guest_cr3 = vcpu->kvm->arch.ept_identity_map_addr; + guest_cr3 = to_kvm_vmx(vcpu->kvm)->ept_identity_map_addr; ept_load_pdptrs(vcpu); } @@ -4487,11 +4766,15 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) * is in force while we are in guest mode. Do not let guests control * this bit, even if host CR4.MCE == 0. */ - unsigned long hw_cr4 = - (cr4_read_shadow() & X86_CR4_MCE) | - (cr4 & ~X86_CR4_MCE) | - (to_vmx(vcpu)->rmode.vm86_active ? - KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON); + unsigned long hw_cr4; + + hw_cr4 = (cr4_read_shadow() & X86_CR4_MCE) | (cr4 & ~X86_CR4_MCE); + if (enable_unrestricted_guest) + hw_cr4 |= KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST; + else if (to_vmx(vcpu)->rmode.vm86_active) + hw_cr4 |= KVM_RMODE_VM_CR4_ALWAYS_ON; + else + hw_cr4 |= KVM_PMODE_VM_CR4_ALWAYS_ON; if ((cr4 & X86_CR4_UMIP) && !boot_cpu_has(X86_FEATURE_UMIP)) { vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL, @@ -4517,16 +4800,17 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) return 1; vcpu->arch.cr4 = cr4; - if (enable_ept) { - if (!is_paging(vcpu)) { - hw_cr4 &= ~X86_CR4_PAE; - hw_cr4 |= X86_CR4_PSE; - } else if (!(cr4 & X86_CR4_PAE)) { - hw_cr4 &= ~X86_CR4_PAE; + + if (!enable_unrestricted_guest) { + if (enable_ept) { + if (!is_paging(vcpu)) { + hw_cr4 &= ~X86_CR4_PAE; + hw_cr4 |= X86_CR4_PSE; + } else if (!(cr4 & X86_CR4_PAE)) { + hw_cr4 &= ~X86_CR4_PAE; + } } - } - if (!enable_unrestricted_guest && !is_paging(vcpu)) /* * SMEP/SMAP/PKU is disabled if CPU is in non-paging mode in * hardware. To emulate this behavior, SMEP/SMAP/PKU needs @@ -4538,7 +4822,9 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) * If enable_unrestricted_guest, the CPU automatically * disables SMEP/SMAP/PKU when the guest sets CR0.PG=0. */ - hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE); + if (!is_paging(vcpu)) + hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE); + } vmcs_writel(CR4_READ_SHADOW, cr4); vmcs_writel(GUEST_CR4, hw_cr4); @@ -4906,7 +5192,7 @@ static int init_rmode_tss(struct kvm *kvm) int idx, r; idx = srcu_read_lock(&kvm->srcu); - fn = kvm->arch.tss_addr >> PAGE_SHIFT; + fn = to_kvm_vmx(kvm)->tss_addr >> PAGE_SHIFT; r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE); if (r < 0) goto out; @@ -4932,22 +5218,23 @@ out: static int init_rmode_identity_map(struct kvm *kvm) { + struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm); int i, idx, r = 0; kvm_pfn_t identity_map_pfn; u32 tmp; - /* Protect kvm->arch.ept_identity_pagetable_done. */ + /* Protect kvm_vmx->ept_identity_pagetable_done. */ mutex_lock(&kvm->slots_lock); - if (likely(kvm->arch.ept_identity_pagetable_done)) + if (likely(kvm_vmx->ept_identity_pagetable_done)) goto out2; - if (!kvm->arch.ept_identity_map_addr) - kvm->arch.ept_identity_map_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR; - identity_map_pfn = kvm->arch.ept_identity_map_addr >> PAGE_SHIFT; + if (!kvm_vmx->ept_identity_map_addr) + kvm_vmx->ept_identity_map_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR; + identity_map_pfn = kvm_vmx->ept_identity_map_addr >> PAGE_SHIFT; r = __x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT, - kvm->arch.ept_identity_map_addr, PAGE_SIZE); + kvm_vmx->ept_identity_map_addr, PAGE_SIZE); if (r < 0) goto out2; @@ -4964,7 +5251,7 @@ static int init_rmode_identity_map(struct kvm *kvm) if (r < 0) goto out; } - kvm->arch.ept_identity_pagetable_done = true; + kvm_vmx->ept_identity_pagetable_done = true; out: srcu_read_unlock(&kvm->srcu, idx); @@ -5500,6 +5787,11 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx) exec_control |= CPU_BASED_CR3_STORE_EXITING | CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_INVLPG_EXITING; + if (kvm_mwait_in_guest(vmx->vcpu.kvm)) + exec_control &= ~(CPU_BASED_MWAIT_EXITING | + CPU_BASED_MONITOR_EXITING); + if (kvm_hlt_in_guest(vmx->vcpu.kvm)) + exec_control &= ~CPU_BASED_HLT_EXITING; return exec_control; } @@ -5533,7 +5825,7 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx) } if (!enable_unrestricted_guest) exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; - if (!ple_gap) + if (kvm_pause_in_guest(vmx->vcpu.kvm)) exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; if (!kvm_vcpu_apicv_active(vcpu)) exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT | @@ -5565,10 +5857,10 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx) if (nested) { if (xsaves_enabled) - vmx->nested.nested_vmx_secondary_ctls_high |= + vmx->nested.msrs.secondary_ctls_high |= SECONDARY_EXEC_XSAVES; else - vmx->nested.nested_vmx_secondary_ctls_high &= + vmx->nested.msrs.secondary_ctls_high &= ~SECONDARY_EXEC_XSAVES; } } @@ -5580,10 +5872,10 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx) if (nested) { if (rdtscp_enabled) - vmx->nested.nested_vmx_secondary_ctls_high |= + vmx->nested.msrs.secondary_ctls_high |= SECONDARY_EXEC_RDTSCP; else - vmx->nested.nested_vmx_secondary_ctls_high &= + vmx->nested.msrs.secondary_ctls_high &= ~SECONDARY_EXEC_RDTSCP; } } @@ -5601,10 +5893,10 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx) if (nested) { if (invpcid_enabled) - vmx->nested.nested_vmx_secondary_ctls_high |= + vmx->nested.msrs.secondary_ctls_high |= SECONDARY_EXEC_ENABLE_INVPCID; else - vmx->nested.nested_vmx_secondary_ctls_high &= + vmx->nested.msrs.secondary_ctls_high &= ~SECONDARY_EXEC_ENABLE_INVPCID; } } @@ -5616,10 +5908,10 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx) if (nested) { if (rdrand_enabled) - vmx->nested.nested_vmx_secondary_ctls_high |= + vmx->nested.msrs.secondary_ctls_high |= SECONDARY_EXEC_RDRAND_EXITING; else - vmx->nested.nested_vmx_secondary_ctls_high &= + vmx->nested.msrs.secondary_ctls_high &= ~SECONDARY_EXEC_RDRAND_EXITING; } } @@ -5631,10 +5923,10 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx) if (nested) { if (rdseed_enabled) - vmx->nested.nested_vmx_secondary_ctls_high |= + vmx->nested.msrs.secondary_ctls_high |= SECONDARY_EXEC_RDSEED_EXITING; else - vmx->nested.nested_vmx_secondary_ctls_high &= + vmx->nested.msrs.secondary_ctls_high &= ~SECONDARY_EXEC_RDSEED_EXITING; } } @@ -5696,7 +5988,7 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc))); } - if (ple_gap) { + if (!kvm_pause_in_guest(vmx->vcpu.kvm)) { vmcs_write32(PLE_GAP, ple_gap); vmx->ple_window = ple_window; vmx->ple_window_dirty = true; @@ -5861,6 +6153,8 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) update_exception_bitmap(vcpu); vpid_sync_context(vmx->vpid); + if (init_event) + vmx_clear_hlt(vcpu); } /* @@ -5885,8 +6179,7 @@ static bool nested_exit_intr_ack_set(struct kvm_vcpu *vcpu) static bool nested_exit_on_nmi(struct kvm_vcpu *vcpu) { - return get_vmcs12(vcpu)->pin_based_vm_exec_control & - PIN_BASED_NMI_EXITING; + return nested_cpu_has_nmi_exiting(get_vmcs12(vcpu)); } static void enable_irq_window(struct kvm_vcpu *vcpu) @@ -5932,6 +6225,8 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu) } else intr |= INTR_TYPE_EXT_INTR; vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr); + + vmx_clear_hlt(vcpu); } static void vmx_inject_nmi(struct kvm_vcpu *vcpu) @@ -5962,6 +6257,8 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu) vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); + + vmx_clear_hlt(vcpu); } static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) @@ -6024,14 +6321,23 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) { int ret; + if (enable_unrestricted_guest) + return 0; + ret = x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, addr, PAGE_SIZE * 3); if (ret) return ret; - kvm->arch.tss_addr = addr; + to_kvm_vmx(kvm)->tss_addr = addr; return init_rmode_tss(kvm); } +static int vmx_set_identity_map_addr(struct kvm *kvm, u64 ident_addr) +{ + to_kvm_vmx(kvm)->ept_identity_map_addr = ident_addr; + return 0; +} + static bool rmode_exception(struct kvm_vcpu *vcpu, int vec) { switch (vec) { @@ -6134,19 +6440,24 @@ static int handle_exception(struct kvm_vcpu *vcpu) if (is_nmi(intr_info)) return 1; /* already handled by vmx_vcpu_run() */ - if (is_invalid_opcode(intr_info)) { - er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD); - if (er == EMULATE_USER_EXIT) - return 0; - if (er != EMULATE_DONE) - kvm_queue_exception(vcpu, UD_VECTOR); - return 1; - } + if (is_invalid_opcode(intr_info)) + return handle_ud(vcpu); error_code = 0; if (intr_info & INTR_INFO_DELIVER_CODE_MASK) error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); + if (!vmx->rmode.vm86_active && is_gp_fault(intr_info)) { + WARN_ON_ONCE(!enable_vmware_backdoor); + er = emulate_instruction(vcpu, + EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL); + if (er == EMULATE_USER_EXIT) + return 0; + else if (er != EMULATE_DONE) + kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); + return 1; + } + /* * The #PF with PFEC.RSVD = 1 indicates the guest is accessing * MMIO, it is better to report an internal error. @@ -6232,28 +6543,22 @@ static int handle_triple_fault(struct kvm_vcpu *vcpu) static int handle_io(struct kvm_vcpu *vcpu) { unsigned long exit_qualification; - int size, in, string, ret; + int size, in, string; unsigned port; exit_qualification = vmcs_readl(EXIT_QUALIFICATION); string = (exit_qualification & 16) != 0; - in = (exit_qualification & 8) != 0; ++vcpu->stat.io_exits; - if (string || in) + if (string) return emulate_instruction(vcpu, 0) == EMULATE_DONE; port = exit_qualification >> 16; size = (exit_qualification & 7) + 1; + in = (exit_qualification & 8) != 0; - ret = kvm_skip_emulated_instruction(vcpu); - - /* - * TODO: we might be squashing a KVM_GUESTDBG_SINGLESTEP-triggered - * KVM_EXIT_DEBUG here. - */ - return kvm_fast_pio_out(vcpu, size, port) && ret; + return kvm_fast_pio(vcpu, size, port, in); } static void @@ -6344,6 +6649,7 @@ static int handle_cr(struct kvm_vcpu *vcpu) err = handle_set_cr0(vcpu, val); return kvm_complete_insn_gp(vcpu, err); case 3: + WARN_ON_ONCE(enable_unrestricted_guest); err = kvm_set_cr3(vcpu, val); return kvm_complete_insn_gp(vcpu, err); case 4: @@ -6376,6 +6682,7 @@ static int handle_cr(struct kvm_vcpu *vcpu) case 1: /*mov from cr*/ switch (cr) { case 3: + WARN_ON_ONCE(enable_unrestricted_guest); val = kvm_read_cr3(vcpu); kvm_register_write(vcpu, reg, val); trace_kvm_cr_read(cr, val); @@ -6769,7 +7076,6 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) static int handle_ept_misconfig(struct kvm_vcpu *vcpu) { - int ret; gpa_t gpa; /* @@ -6797,17 +7103,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) NULL, 0) == EMULATE_DONE; } - ret = kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0); - if (ret >= 0) - return ret; - - /* It is the real ept misconfig */ - WARN_ON(1); - - vcpu->run->exit_reason = KVM_EXIT_UNKNOWN; - vcpu->run->hw.hardware_exit_reason = EXIT_REASON_EPT_MISCONFIG; - - return 0; + return kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0); } static int handle_nmi_window(struct kvm_vcpu *vcpu) @@ -6830,6 +7126,13 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) bool intr_window_requested; unsigned count = 130; + /* + * We should never reach the point where we are emulating L2 + * due to invalid guest state as that means we incorrectly + * allowed a nested VMEntry with an invalid vmcs12. + */ + WARN_ON_ONCE(vmx->emulation_required && vmx->nested.nested_run_pending); + cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); intr_window_requested = cpu_exec_ctrl & CPU_BASED_VIRTUAL_INTR_PENDING; @@ -6848,12 +7151,12 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) goto out; } - if (err != EMULATE_DONE) { - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; - vcpu->run->internal.ndata = 0; - return 0; - } + if (err != EMULATE_DONE) + goto emulation_error; + + if (vmx->emulation_required && !vmx->rmode.vm86_active && + vcpu->arch.exception.pending) + goto emulation_error; if (vcpu->arch.halt_request) { vcpu->arch.halt_request = 0; @@ -6869,34 +7172,12 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) out: return ret; -} - -static int __grow_ple_window(int val) -{ - if (ple_window_grow < 1) - return ple_window; - - val = min(val, ple_window_actual_max); - - if (ple_window_grow < ple_window) - val *= ple_window_grow; - else - val += ple_window_grow; - - return val; -} -static int __shrink_ple_window(int val, int modifier, int minimum) -{ - if (modifier < 1) - return ple_window; - - if (modifier < ple_window) - val /= modifier; - else - val -= modifier; - - return max(val, minimum); +emulation_error: + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; + vcpu->run->internal.ndata = 0; + return 0; } static void grow_ple_window(struct kvm_vcpu *vcpu) @@ -6904,7 +7185,9 @@ static void grow_ple_window(struct kvm_vcpu *vcpu) struct vcpu_vmx *vmx = to_vmx(vcpu); int old = vmx->ple_window; - vmx->ple_window = __grow_ple_window(old); + vmx->ple_window = __grow_ple_window(old, ple_window, + ple_window_grow, + ple_window_max); if (vmx->ple_window != old) vmx->ple_window_dirty = true; @@ -6917,8 +7200,9 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu) struct vcpu_vmx *vmx = to_vmx(vcpu); int old = vmx->ple_window; - vmx->ple_window = __shrink_ple_window(old, - ple_window_shrink, ple_window); + vmx->ple_window = __shrink_ple_window(old, ple_window, + ple_window_shrink, + ple_window); if (vmx->ple_window != old) vmx->ple_window_dirty = true; @@ -6927,21 +7211,6 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu) } /* - * ple_window_actual_max is computed to be one grow_ple_window() below - * ple_window_max. (See __grow_ple_window for the reason.) - * This prevents overflows, because ple_window_max is int. - * ple_window_max effectively rounded down to a multiple of ple_window_grow in - * this process. - * ple_window_max is also prevented from setting vmx->ple_window < ple_window. - */ -static void update_ple_window_actual_max(void) -{ - ple_window_actual_max = - __shrink_ple_window(max(ple_window_max, ple_window), - ple_window_grow, INT_MIN); -} - -/* * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR. */ static void wakeup_handler(void) @@ -6960,7 +7229,7 @@ static void wakeup_handler(void) spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); } -void vmx_enable_tdp(void) +static void vmx_enable_tdp(void) { kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK, enable_ept_ad_bits ? VMX_EPT_ACCESS_BIT : 0ull, @@ -7061,8 +7330,6 @@ static __init int hardware_setup(void) else kvm_disable_tdp(); - update_ple_window_actual_max(); - /* * Only enable PML when hardware supports PML feature, and both EPT * and EPT A/D bit features are enabled -- PML depends on them to work. @@ -7094,6 +7361,7 @@ static __init int hardware_setup(void) init_vmcs_shadow_fields(); kvm_set_posted_intr_wakeup_handler(wakeup_handler); + nested_vmx_setup_ctls_msrs(&vmcs_config.nested, enable_apicv); kvm_mce_cap_supported |= MCG_LMCE_P; @@ -7122,7 +7390,7 @@ static __exit void hardware_unsetup(void) */ static int handle_pause(struct kvm_vcpu *vcpu) { - if (ple_gap) + if (!kvm_pause_in_guest(vcpu->kvm)) grow_ple_window(vcpu); /* @@ -7954,9 +8222,9 @@ static int handle_invept(struct kvm_vcpu *vcpu) u64 eptp, gpa; } operand; - if (!(vmx->nested.nested_vmx_secondary_ctls_high & + if (!(vmx->nested.msrs.secondary_ctls_high & SECONDARY_EXEC_ENABLE_EPT) || - !(vmx->nested.nested_vmx_ept_caps & VMX_EPT_INVEPT_BIT)) { + !(vmx->nested.msrs.ept_caps & VMX_EPT_INVEPT_BIT)) { kvm_queue_exception(vcpu, UD_VECTOR); return 1; } @@ -7967,7 +8235,7 @@ static int handle_invept(struct kvm_vcpu *vcpu) vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf); - types = (vmx->nested.nested_vmx_ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6; + types = (vmx->nested.msrs.ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6; if (type >= 32 || !(types & (1 << type))) { nested_vmx_failValid(vcpu, @@ -8018,9 +8286,9 @@ static int handle_invvpid(struct kvm_vcpu *vcpu) u64 gla; } operand; - if (!(vmx->nested.nested_vmx_secondary_ctls_high & + if (!(vmx->nested.msrs.secondary_ctls_high & SECONDARY_EXEC_ENABLE_VPID) || - !(vmx->nested.nested_vmx_vpid_caps & VMX_VPID_INVVPID_BIT)) { + !(vmx->nested.msrs.vpid_caps & VMX_VPID_INVVPID_BIT)) { kvm_queue_exception(vcpu, UD_VECTOR); return 1; } @@ -8031,7 +8299,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu) vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf); - types = (vmx->nested.nested_vmx_vpid_caps & + types = (vmx->nested.msrs.vpid_caps & VMX_VPID_EXTENT_SUPPORTED_MASK) >> 8; if (type >= 32 || !(types & (1 << type))) { @@ -8125,11 +8393,11 @@ static bool valid_ept_address(struct kvm_vcpu *vcpu, u64 address) /* Check for memory type validity */ switch (address & VMX_EPTP_MT_MASK) { case VMX_EPTP_MT_UC: - if (!(vmx->nested.nested_vmx_ept_caps & VMX_EPTP_UC_BIT)) + if (!(vmx->nested.msrs.ept_caps & VMX_EPTP_UC_BIT)) return false; break; case VMX_EPTP_MT_WB: - if (!(vmx->nested.nested_vmx_ept_caps & VMX_EPTP_WB_BIT)) + if (!(vmx->nested.msrs.ept_caps & VMX_EPTP_WB_BIT)) return false; break; default: @@ -8146,7 +8414,7 @@ static bool valid_ept_address(struct kvm_vcpu *vcpu, u64 address) /* AD, if set, should be supported */ if (address & VMX_EPTP_AD_ENABLE_BIT) { - if (!(vmx->nested.nested_vmx_ept_caps & VMX_EPT_AD_BIT)) + if (!(vmx->nested.msrs.ept_caps & VMX_EPT_AD_BIT)) return false; } @@ -8790,7 +9058,8 @@ static void dump_vmcs(void) pr_err("DebugCtl = 0x%016llx DebugExceptions = 0x%016lx\n", vmcs_read64(GUEST_IA32_DEBUGCTL), vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS)); - if (vmentry_ctl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) + if (cpu_has_load_perf_global_ctrl && + vmentry_ctl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) pr_err("PerfGlobCtl = 0x%016llx\n", vmcs_read64(GUEST_IA32_PERF_GLOBAL_CTRL)); if (vmentry_ctl & VM_ENTRY_LOAD_BNDCFGS) @@ -8826,7 +9095,8 @@ static void dump_vmcs(void) pr_err("EFER = 0x%016llx PAT = 0x%016llx\n", vmcs_read64(HOST_IA32_EFER), vmcs_read64(HOST_IA32_PAT)); - if (vmexit_ctl & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) + if (cpu_has_load_perf_global_ctrl && + vmexit_ctl & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) pr_err("PerfGlobCtl = 0x%016llx\n", vmcs_read64(HOST_IA32_PERF_GLOBAL_CTRL)); @@ -9178,9 +9448,9 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) /* We need to handle NMIs before interrupts are enabled */ if (is_nmi(exit_intr_info)) { - kvm_before_handle_nmi(&vmx->vcpu); + kvm_before_interrupt(&vmx->vcpu); asm("int $2"); - kvm_after_handle_nmi(&vmx->vcpu); + kvm_after_interrupt(&vmx->vcpu); } } @@ -9403,7 +9673,7 @@ static void vmx_arm_hv_timer(struct kvm_vcpu *vcpu) static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); - unsigned long cr3, cr4; + unsigned long cr3, cr4, evmcs_rsp; /* Record the guest's net vcpu time for enforced NMI injections. */ if (unlikely(!enable_vnmi && @@ -9469,6 +9739,10 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) native_wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); vmx->__launched = vmx->loaded_vmcs->launched; + + evmcs_rsp = static_branch_unlikely(&enable_evmcs) ? + (unsigned long)¤t_evmcs->host_rsp : 0; + asm( /* Store host registers */ "push %%" _ASM_DX "; push %%" _ASM_BP ";" @@ -9477,15 +9751,21 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) "cmp %%" _ASM_SP ", %c[host_rsp](%0) \n\t" "je 1f \n\t" "mov %%" _ASM_SP ", %c[host_rsp](%0) \n\t" + /* Avoid VMWRITE when Enlightened VMCS is in use */ + "test %%" _ASM_SI ", %%" _ASM_SI " \n\t" + "jz 2f \n\t" + "mov %%" _ASM_SP ", (%%" _ASM_SI ") \n\t" + "jmp 1f \n\t" + "2: \n\t" __ex(ASM_VMX_VMWRITE_RSP_RDX) "\n\t" "1: \n\t" /* Reload cr2 if changed */ "mov %c[cr2](%0), %%" _ASM_AX " \n\t" "mov %%cr2, %%" _ASM_DX " \n\t" "cmp %%" _ASM_AX ", %%" _ASM_DX " \n\t" - "je 2f \n\t" + "je 3f \n\t" "mov %%" _ASM_AX", %%cr2 \n\t" - "2: \n\t" + "3: \n\t" /* Check if vmlaunch of vmresume is needed */ "cmpl $0, %c[launched](%0) \n\t" /* Load guest registers. Don't clobber flags. */ @@ -9554,7 +9834,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) ".global vmx_return \n\t" "vmx_return: " _ASM_PTR " 2b \n\t" ".popsection" - : : "c"(vmx), "d"((unsigned long)HOST_RSP), + : : "c"(vmx), "d"((unsigned long)HOST_RSP), "S"(evmcs_rsp), [launched]"i"(offsetof(struct vcpu_vmx, __launched)), [fail]"i"(offsetof(struct vcpu_vmx, fail)), [host_rsp]"i"(offsetof(struct vcpu_vmx, host_rsp)), @@ -9579,10 +9859,10 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) [wordsize]"i"(sizeof(ulong)) : "cc", "memory" #ifdef CONFIG_X86_64 - , "rax", "rbx", "rdi", "rsi" + , "rax", "rbx", "rdi" , "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" #else - , "eax", "ebx", "edi", "esi" + , "eax", "ebx", "edi" #endif ); @@ -9610,6 +9890,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) /* Eliminate branch target predictions from guest mode */ vmexit_fill_RSB(); + /* All fields are clean at this point */ + if (static_branch_unlikely(&enable_evmcs)) + current_evmcs->hv_clean_fields |= + HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; + /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */ if (vmx->host_debugctlmsr) update_debugctlmsr(vmx->host_debugctlmsr); @@ -9646,14 +9931,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) __write_pkru(vmx->host_pkru); } - /* - * the KVM_REQ_EVENT optimization bit is only on for one entry, and if - * we did not inject a still-pending event to L1 now because of - * nested_run_pending, we need to re-enable this bit. - */ - if (vmx->nested.nested_run_pending) - kvm_make_request(KVM_REQ_EVENT, vcpu); - vmx->nested.nested_run_pending = 0; vmx->idt_vectoring_info = 0; @@ -9670,6 +9947,17 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) } STACK_FRAME_NON_STANDARD(vmx_vcpu_run); +static struct kvm *vmx_vm_alloc(void) +{ + struct kvm_vmx *kvm_vmx = kzalloc(sizeof(struct kvm_vmx), GFP_KERNEL); + return &kvm_vmx->kvm; +} + +static void vmx_vm_free(struct kvm *kvm) +{ + kfree(to_kvm_vmx(kvm)); +} + static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -9777,14 +10065,15 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) goto free_vmcs; } - if (enable_ept) { + if (enable_ept && !enable_unrestricted_guest) { err = init_rmode_identity_map(kvm); if (err) goto free_vmcs; } if (nested) { - nested_vmx_setup_ctls_msrs(vmx); + nested_vmx_setup_ctls_msrs(&vmx->nested.msrs, + kvm_vcpu_apicv_active(&vmx->vcpu)); vmx->nested.vpid02 = allocate_vpid(); } @@ -9817,6 +10106,13 @@ free_vcpu: return ERR_PTR(err); } +static int vmx_vm_init(struct kvm *kvm) +{ + if (!ple_gap) + kvm->arch.pause_in_guest = true; + return 0; +} + static void __init vmx_check_processor_compat(void *rtn) { struct vmcs_config vmcs_conf; @@ -9824,6 +10120,7 @@ static void __init vmx_check_processor_compat(void *rtn) *(int *)rtn = 0; if (setup_vmcs_config(&vmcs_conf) < 0) *(int *)rtn = -EIO; + nested_vmx_setup_ctls_msrs(&vmcs_conf.nested, enable_apicv); if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config)) != 0) { printk(KERN_ERR "kvm: CPU %d feature inconsistency!\n", smp_processor_id()); @@ -9911,12 +10208,12 @@ static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu) struct vcpu_vmx *vmx = to_vmx(vcpu); struct kvm_cpuid_entry2 *entry; - vmx->nested.nested_vmx_cr0_fixed1 = 0xffffffff; - vmx->nested.nested_vmx_cr4_fixed1 = X86_CR4_PCE; + vmx->nested.msrs.cr0_fixed1 = 0xffffffff; + vmx->nested.msrs.cr4_fixed1 = X86_CR4_PCE; #define cr4_fixed1_update(_cr4_mask, _reg, _cpuid_mask) do { \ if (entry && (entry->_reg & (_cpuid_mask))) \ - vmx->nested.nested_vmx_cr4_fixed1 |= (_cr4_mask); \ + vmx->nested.msrs.cr4_fixed1 |= (_cr4_mask); \ } while (0) entry = kvm_find_cpuid_entry(vcpu, 0x1, 0); @@ -10013,7 +10310,7 @@ static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu) kvm_mmu_unload(vcpu); kvm_init_shadow_ept_mmu(vcpu, - to_vmx(vcpu)->nested.nested_vmx_ept_caps & + to_vmx(vcpu)->nested.msrs.ept_caps & VMX_EPT_EXECUTE_ONLY_BIT, nested_ept_ad_enabled(vcpu)); vcpu->arch.mmu.set_cr3 = vmx_set_cr3; @@ -10952,6 +11249,16 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, /* Note: modifies VM_ENTRY/EXIT_CONTROLS and GUEST/HOST_IA32_EFER */ vmx_set_efer(vcpu, vcpu->arch.efer); + /* + * Guest state is invalid and unrestricted guest is disabled, + * which means L1 attempted VMEntry to L2 with invalid state. + * Fail the VMEntry. + */ + if (vmx->emulation_required) { + *entry_failure_code = ENTRY_FAIL_DEFAULT; + return 1; + } + /* Shadow page tables on either EPT or shadow page tables. */ if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_cpu_has_ept(vmcs12), entry_failure_code)) @@ -10965,6 +11272,19 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, return 0; } +static int nested_vmx_check_nmi_controls(struct vmcs12 *vmcs12) +{ + if (!nested_cpu_has_nmi_exiting(vmcs12) && + nested_cpu_has_virtual_nmis(vmcs12)) + return -EINVAL; + + if (!nested_cpu_has_virtual_nmis(vmcs12) && + nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_NMI_PENDING)) + return -EINVAL; + + return 0; +} + static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -10992,26 +11312,29 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) return VMXERR_ENTRY_INVALID_CONTROL_FIELD; if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control, - vmx->nested.nested_vmx_procbased_ctls_low, - vmx->nested.nested_vmx_procbased_ctls_high) || + vmx->nested.msrs.procbased_ctls_low, + vmx->nested.msrs.procbased_ctls_high) || (nested_cpu_has(vmcs12, CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) && !vmx_control_verify(vmcs12->secondary_vm_exec_control, - vmx->nested.nested_vmx_secondary_ctls_low, - vmx->nested.nested_vmx_secondary_ctls_high)) || + vmx->nested.msrs.secondary_ctls_low, + vmx->nested.msrs.secondary_ctls_high)) || !vmx_control_verify(vmcs12->pin_based_vm_exec_control, - vmx->nested.nested_vmx_pinbased_ctls_low, - vmx->nested.nested_vmx_pinbased_ctls_high) || + vmx->nested.msrs.pinbased_ctls_low, + vmx->nested.msrs.pinbased_ctls_high) || !vmx_control_verify(vmcs12->vm_exit_controls, - vmx->nested.nested_vmx_exit_ctls_low, - vmx->nested.nested_vmx_exit_ctls_high) || + vmx->nested.msrs.exit_ctls_low, + vmx->nested.msrs.exit_ctls_high) || !vmx_control_verify(vmcs12->vm_entry_controls, - vmx->nested.nested_vmx_entry_ctls_low, - vmx->nested.nested_vmx_entry_ctls_high)) + vmx->nested.msrs.entry_ctls_low, + vmx->nested.msrs.entry_ctls_high)) + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; + + if (nested_vmx_check_nmi_controls(vmcs12)) return VMXERR_ENTRY_INVALID_CONTROL_FIELD; if (nested_cpu_has_vmfunc(vmcs12)) { if (vmcs12->vm_function_control & - ~vmx->nested.nested_vmx_vmfunc_controls) + ~vmx->nested.msrs.vmfunc_controls) return VMXERR_ENTRY_INVALID_CONTROL_FIELD; if (nested_cpu_has_eptp_switching(vmcs12)) { @@ -11293,7 +11616,7 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu, } else if (vcpu->arch.nmi_injected) { vmcs12->idt_vectoring_info_field = INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR; - } else if (vcpu->arch.interrupt.pending) { + } else if (vcpu->arch.interrupt.injected) { nr = vcpu->arch.interrupt.nr; idt_vectoring = nr | VECTORING_INFO_VALID_MASK; @@ -11941,7 +12264,7 @@ static void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu) static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu) { - if (ple_gap) + if (!kvm_pause_in_guest(vcpu->kvm)) shrink_ple_window(vcpu); } @@ -12259,6 +12582,7 @@ static int vmx_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate) vmx->nested.smm.vmxon = vmx->nested.vmxon; vmx->nested.vmxon = false; + vmx_clear_hlt(vcpu); return 0; } @@ -12300,6 +12624,10 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .cpu_has_accelerated_tpr = report_flexpriority, .cpu_has_high_real_mode_segbase = vmx_has_high_real_mode_segbase, + .vm_init = vmx_vm_init, + .vm_alloc = vmx_vm_alloc, + .vm_free = vmx_vm_free, + .vcpu_create = vmx_create_vcpu, .vcpu_free = vmx_free_vcpu, .vcpu_reset = vmx_vcpu_reset, @@ -12367,6 +12695,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .deliver_posted_interrupt = vmx_deliver_posted_interrupt, .set_tss_addr = vmx_set_tss_addr, + .set_identity_map_addr = vmx_set_identity_map_addr, .get_tdp_level = get_ept_level, .get_mt_mask = vmx_get_mt_mask, @@ -12425,7 +12754,38 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { static int __init vmx_init(void) { - int r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), + int r; + +#if IS_ENABLED(CONFIG_HYPERV) + /* + * Enlightened VMCS usage should be recommended and the host needs + * to support eVMCS v1 or above. We can also disable eVMCS support + * with module parameter. + */ + if (enlightened_vmcs && + ms_hyperv.hints & HV_X64_ENLIGHTENED_VMCS_RECOMMENDED && + (ms_hyperv.nested_features & HV_X64_ENLIGHTENED_VMCS_VERSION) >= + KVM_EVMCS_VERSION) { + int cpu; + + /* Check that we have assist pages on all online CPUs */ + for_each_online_cpu(cpu) { + if (!hv_get_vp_assist_page(cpu)) { + enlightened_vmcs = false; + break; + } + } + + if (enlightened_vmcs) { + pr_info("KVM: vmx: using Hyper-V Enlightened VMCS\n"); + static_branch_enable(&enable_evmcs); + } + } else { + enlightened_vmcs = false; + } +#endif + + r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), __alignof__(struct vcpu_vmx), THIS_MODULE); if (r) return r; @@ -12446,6 +12806,29 @@ static void __exit vmx_exit(void) #endif kvm_exit(); + +#if IS_ENABLED(CONFIG_HYPERV) + if (static_branch_unlikely(&enable_evmcs)) { + int cpu; + struct hv_vp_assist_page *vp_ap; + /* + * Reset everything to support using non-enlightened VMCS + * access later (e.g. when we reload the module with + * enlightened_vmcs=0) + */ + for_each_online_cpu(cpu) { + vp_ap = hv_get_vp_assist_page(cpu); + + if (!vp_ap) + continue; + + vp_ap->current_nested_vmcs = 0; + vp_ap->enlighten_vmentry = 0; + } + + static_branch_disable(&enable_evmcs); + } +#endif } module_init(vmx_init) diff --git a/arch/x86/kvm/vmx_evmcs.h b/arch/x86/kvm/vmx_evmcs.h new file mode 100644 index 000000000000..210a884090ad --- /dev/null +++ b/arch/x86/kvm/vmx_evmcs.h @@ -0,0 +1,324 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __KVM_X86_VMX_EVMCS_H +#define __KVM_X86_VMX_EVMCS_H + +#include <asm/hyperv-tlfs.h> + +#define ROL16(val, n) ((u16)(((u16)(val) << (n)) | ((u16)(val) >> (16 - (n))))) +#define EVMCS1_OFFSET(x) offsetof(struct hv_enlightened_vmcs, x) +#define EVMCS1_FIELD(number, name, clean_field)[ROL16(number, 6)] = \ + {EVMCS1_OFFSET(name), clean_field} + +struct evmcs_field { + u16 offset; + u16 clean_field; +}; + +static const struct evmcs_field vmcs_field_to_evmcs_1[] = { + /* 64 bit rw */ + EVMCS1_FIELD(GUEST_RIP, guest_rip, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE), + EVMCS1_FIELD(GUEST_RSP, guest_rsp, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC), + EVMCS1_FIELD(GUEST_RFLAGS, guest_rflags, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC), + EVMCS1_FIELD(HOST_IA32_PAT, host_ia32_pat, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1), + EVMCS1_FIELD(HOST_IA32_EFER, host_ia32_efer, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1), + EVMCS1_FIELD(HOST_CR0, host_cr0, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1), + EVMCS1_FIELD(HOST_CR3, host_cr3, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1), + EVMCS1_FIELD(HOST_CR4, host_cr4, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1), + EVMCS1_FIELD(HOST_IA32_SYSENTER_ESP, host_ia32_sysenter_esp, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1), + EVMCS1_FIELD(HOST_IA32_SYSENTER_EIP, host_ia32_sysenter_eip, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1), + EVMCS1_FIELD(HOST_RIP, host_rip, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1), + EVMCS1_FIELD(IO_BITMAP_A, io_bitmap_a, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP), + EVMCS1_FIELD(IO_BITMAP_B, io_bitmap_b, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP), + EVMCS1_FIELD(MSR_BITMAP, msr_bitmap, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP), + EVMCS1_FIELD(GUEST_ES_BASE, guest_es_base, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_CS_BASE, guest_cs_base, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_SS_BASE, guest_ss_base, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_DS_BASE, guest_ds_base, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_FS_BASE, guest_fs_base, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_GS_BASE, guest_gs_base, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_LDTR_BASE, guest_ldtr_base, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_TR_BASE, guest_tr_base, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_GDTR_BASE, guest_gdtr_base, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_IDTR_BASE, guest_idtr_base, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(TSC_OFFSET, tsc_offset, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2), + EVMCS1_FIELD(VIRTUAL_APIC_PAGE_ADDR, virtual_apic_page_addr, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2), + EVMCS1_FIELD(VMCS_LINK_POINTER, vmcs_link_pointer, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1), + EVMCS1_FIELD(GUEST_IA32_DEBUGCTL, guest_ia32_debugctl, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1), + EVMCS1_FIELD(GUEST_IA32_PAT, guest_ia32_pat, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1), + EVMCS1_FIELD(GUEST_IA32_EFER, guest_ia32_efer, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1), + EVMCS1_FIELD(GUEST_PDPTR0, guest_pdptr0, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1), + EVMCS1_FIELD(GUEST_PDPTR1, guest_pdptr1, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1), + EVMCS1_FIELD(GUEST_PDPTR2, guest_pdptr2, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1), + EVMCS1_FIELD(GUEST_PDPTR3, guest_pdptr3, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1), + EVMCS1_FIELD(GUEST_PENDING_DBG_EXCEPTIONS, guest_pending_dbg_exceptions, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1), + EVMCS1_FIELD(GUEST_SYSENTER_ESP, guest_sysenter_esp, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1), + EVMCS1_FIELD(GUEST_SYSENTER_EIP, guest_sysenter_eip, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1), + EVMCS1_FIELD(CR0_GUEST_HOST_MASK, cr0_guest_host_mask, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR), + EVMCS1_FIELD(CR4_GUEST_HOST_MASK, cr4_guest_host_mask, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR), + EVMCS1_FIELD(CR0_READ_SHADOW, cr0_read_shadow, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR), + EVMCS1_FIELD(CR4_READ_SHADOW, cr4_read_shadow, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR), + EVMCS1_FIELD(GUEST_CR0, guest_cr0, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR), + EVMCS1_FIELD(GUEST_CR3, guest_cr3, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR), + EVMCS1_FIELD(GUEST_CR4, guest_cr4, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR), + EVMCS1_FIELD(GUEST_DR7, guest_dr7, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR), + EVMCS1_FIELD(HOST_FS_BASE, host_fs_base, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER), + EVMCS1_FIELD(HOST_GS_BASE, host_gs_base, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER), + EVMCS1_FIELD(HOST_TR_BASE, host_tr_base, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER), + EVMCS1_FIELD(HOST_GDTR_BASE, host_gdtr_base, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER), + EVMCS1_FIELD(HOST_IDTR_BASE, host_idtr_base, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER), + EVMCS1_FIELD(HOST_RSP, host_rsp, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER), + EVMCS1_FIELD(EPT_POINTER, ept_pointer, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT), + EVMCS1_FIELD(GUEST_BNDCFGS, guest_bndcfgs, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1), + EVMCS1_FIELD(XSS_EXIT_BITMAP, xss_exit_bitmap, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2), + + /* 64 bit read only */ + EVMCS1_FIELD(GUEST_PHYSICAL_ADDRESS, guest_physical_address, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE), + EVMCS1_FIELD(EXIT_QUALIFICATION, exit_qualification, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE), + /* + * Not defined in KVM: + * + * EVMCS1_FIELD(0x00006402, exit_io_instruction_ecx, + * HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE); + * EVMCS1_FIELD(0x00006404, exit_io_instruction_esi, + * HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE); + * EVMCS1_FIELD(0x00006406, exit_io_instruction_esi, + * HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE); + * EVMCS1_FIELD(0x00006408, exit_io_instruction_eip, + * HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE); + */ + EVMCS1_FIELD(GUEST_LINEAR_ADDRESS, guest_linear_address, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE), + + /* + * No mask defined in the spec as Hyper-V doesn't currently support + * these. Future proof by resetting the whole clean field mask on + * access. + */ + EVMCS1_FIELD(VM_EXIT_MSR_STORE_ADDR, vm_exit_msr_store_addr, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL), + EVMCS1_FIELD(VM_EXIT_MSR_LOAD_ADDR, vm_exit_msr_load_addr, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL), + EVMCS1_FIELD(VM_ENTRY_MSR_LOAD_ADDR, vm_entry_msr_load_addr, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL), + EVMCS1_FIELD(CR3_TARGET_VALUE0, cr3_target_value0, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL), + EVMCS1_FIELD(CR3_TARGET_VALUE1, cr3_target_value1, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL), + EVMCS1_FIELD(CR3_TARGET_VALUE2, cr3_target_value2, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL), + EVMCS1_FIELD(CR3_TARGET_VALUE3, cr3_target_value3, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL), + + /* 32 bit rw */ + EVMCS1_FIELD(TPR_THRESHOLD, tpr_threshold, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE), + EVMCS1_FIELD(GUEST_INTERRUPTIBILITY_INFO, guest_interruptibility_info, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC), + EVMCS1_FIELD(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC), + EVMCS1_FIELD(EXCEPTION_BITMAP, exception_bitmap, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN), + EVMCS1_FIELD(VM_ENTRY_CONTROLS, vm_entry_controls, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY), + EVMCS1_FIELD(VM_ENTRY_INTR_INFO_FIELD, vm_entry_intr_info_field, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT), + EVMCS1_FIELD(VM_ENTRY_EXCEPTION_ERROR_CODE, + vm_entry_exception_error_code, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT), + EVMCS1_FIELD(VM_ENTRY_INSTRUCTION_LEN, vm_entry_instruction_len, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT), + EVMCS1_FIELD(HOST_IA32_SYSENTER_CS, host_ia32_sysenter_cs, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1), + EVMCS1_FIELD(PIN_BASED_VM_EXEC_CONTROL, pin_based_vm_exec_control, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1), + EVMCS1_FIELD(VM_EXIT_CONTROLS, vm_exit_controls, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1), + EVMCS1_FIELD(SECONDARY_VM_EXEC_CONTROL, secondary_vm_exec_control, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1), + EVMCS1_FIELD(GUEST_ES_LIMIT, guest_es_limit, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_CS_LIMIT, guest_cs_limit, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_SS_LIMIT, guest_ss_limit, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_DS_LIMIT, guest_ds_limit, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_FS_LIMIT, guest_fs_limit, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_GS_LIMIT, guest_gs_limit, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_LDTR_LIMIT, guest_ldtr_limit, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_TR_LIMIT, guest_tr_limit, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_GDTR_LIMIT, guest_gdtr_limit, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_IDTR_LIMIT, guest_idtr_limit, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_ES_AR_BYTES, guest_es_ar_bytes, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_CS_AR_BYTES, guest_cs_ar_bytes, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_SS_AR_BYTES, guest_ss_ar_bytes, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_DS_AR_BYTES, guest_ds_ar_bytes, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_FS_AR_BYTES, guest_fs_ar_bytes, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_GS_AR_BYTES, guest_gs_ar_bytes, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_LDTR_AR_BYTES, guest_ldtr_ar_bytes, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_TR_AR_BYTES, guest_tr_ar_bytes, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_ACTIVITY_STATE, guest_activity_state, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1), + EVMCS1_FIELD(GUEST_SYSENTER_CS, guest_sysenter_cs, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1), + + /* 32 bit read only */ + EVMCS1_FIELD(VM_INSTRUCTION_ERROR, vm_instruction_error, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE), + EVMCS1_FIELD(VM_EXIT_REASON, vm_exit_reason, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE), + EVMCS1_FIELD(VM_EXIT_INTR_INFO, vm_exit_intr_info, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE), + EVMCS1_FIELD(VM_EXIT_INTR_ERROR_CODE, vm_exit_intr_error_code, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE), + EVMCS1_FIELD(IDT_VECTORING_INFO_FIELD, idt_vectoring_info_field, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE), + EVMCS1_FIELD(IDT_VECTORING_ERROR_CODE, idt_vectoring_error_code, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE), + EVMCS1_FIELD(VM_EXIT_INSTRUCTION_LEN, vm_exit_instruction_len, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE), + EVMCS1_FIELD(VMX_INSTRUCTION_INFO, vmx_instruction_info, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE), + + /* No mask defined in the spec (not used) */ + EVMCS1_FIELD(PAGE_FAULT_ERROR_CODE_MASK, page_fault_error_code_mask, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL), + EVMCS1_FIELD(PAGE_FAULT_ERROR_CODE_MATCH, page_fault_error_code_match, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL), + EVMCS1_FIELD(CR3_TARGET_COUNT, cr3_target_count, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL), + EVMCS1_FIELD(VM_EXIT_MSR_STORE_COUNT, vm_exit_msr_store_count, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL), + EVMCS1_FIELD(VM_EXIT_MSR_LOAD_COUNT, vm_exit_msr_load_count, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL), + EVMCS1_FIELD(VM_ENTRY_MSR_LOAD_COUNT, vm_entry_msr_load_count, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL), + + /* 16 bit rw */ + EVMCS1_FIELD(HOST_ES_SELECTOR, host_es_selector, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1), + EVMCS1_FIELD(HOST_CS_SELECTOR, host_cs_selector, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1), + EVMCS1_FIELD(HOST_SS_SELECTOR, host_ss_selector, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1), + EVMCS1_FIELD(HOST_DS_SELECTOR, host_ds_selector, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1), + EVMCS1_FIELD(HOST_FS_SELECTOR, host_fs_selector, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1), + EVMCS1_FIELD(HOST_GS_SELECTOR, host_gs_selector, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1), + EVMCS1_FIELD(HOST_TR_SELECTOR, host_tr_selector, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1), + EVMCS1_FIELD(GUEST_ES_SELECTOR, guest_es_selector, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_CS_SELECTOR, guest_cs_selector, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_SS_SELECTOR, guest_ss_selector, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_DS_SELECTOR, guest_ds_selector, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_FS_SELECTOR, guest_fs_selector, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_GS_SELECTOR, guest_gs_selector, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_LDTR_SELECTOR, guest_ldtr_selector, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_TR_SELECTOR, guest_tr_selector, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT), +}; + +static __always_inline int get_evmcs_offset(unsigned long field, + u16 *clean_field) +{ + unsigned int index = ROL16(field, 6); + const struct evmcs_field *evmcs_field; + + if (unlikely(index >= ARRAY_SIZE(vmcs_field_to_evmcs_1))) { + WARN_ONCE(1, "KVM: accessing unsupported EVMCS field %lx\n", + field); + return -ENOENT; + } + + evmcs_field = &vmcs_field_to_evmcs_1[index]; + + if (clean_field) + *clean_field = evmcs_field->clean_field; + + return evmcs_field->offset; +} + +#undef ROL16 + +#endif /* __KVM_X86_VMX_EVMCS_H */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 18b5ca7a3197..b2ff74b12ec4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -102,6 +102,8 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu); static void process_nmi(struct kvm_vcpu *vcpu); static void enter_smm(struct kvm_vcpu *vcpu); static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); +static void store_regs(struct kvm_vcpu *vcpu); +static int sync_regs(struct kvm_vcpu *vcpu); struct kvm_x86_ops *kvm_x86_ops __read_mostly; EXPORT_SYMBOL_GPL(kvm_x86_ops); @@ -140,6 +142,13 @@ module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR); static bool __read_mostly vector_hashing = true; module_param(vector_hashing, bool, S_IRUGO); +bool __read_mostly enable_vmware_backdoor = false; +module_param(enable_vmware_backdoor, bool, S_IRUGO); +EXPORT_SYMBOL_GPL(enable_vmware_backdoor); + +static bool __read_mostly force_emulation_prefix = false; +module_param(force_emulation_prefix, bool, S_IRUGO); + #define KVM_NR_SHARED_MSRS 16 struct kvm_shared_msrs_global { @@ -1032,7 +1041,11 @@ static u32 emulated_msrs[] = { HV_X64_MSR_VP_RUNTIME, HV_X64_MSR_SCONTROL, HV_X64_MSR_STIMER0_CONFIG, - HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, + HV_X64_MSR_VP_ASSIST_PAGE, + HV_X64_MSR_REENLIGHTENMENT_CONTROL, HV_X64_MSR_TSC_EMULATION_CONTROL, + HV_X64_MSR_TSC_EMULATION_STATUS, + + MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, MSR_KVM_PV_EOI_EN, MSR_IA32_TSC_ADJUST, @@ -1054,6 +1067,25 @@ static unsigned num_emulated_msrs; * can be used by a hypervisor to validate requested CPU features. */ static u32 msr_based_features[] = { + MSR_IA32_VMX_BASIC, + MSR_IA32_VMX_TRUE_PINBASED_CTLS, + MSR_IA32_VMX_PINBASED_CTLS, + MSR_IA32_VMX_TRUE_PROCBASED_CTLS, + MSR_IA32_VMX_PROCBASED_CTLS, + MSR_IA32_VMX_TRUE_EXIT_CTLS, + MSR_IA32_VMX_EXIT_CTLS, + MSR_IA32_VMX_TRUE_ENTRY_CTLS, + MSR_IA32_VMX_ENTRY_CTLS, + MSR_IA32_VMX_MISC, + MSR_IA32_VMX_CR0_FIXED0, + MSR_IA32_VMX_CR0_FIXED1, + MSR_IA32_VMX_CR4_FIXED0, + MSR_IA32_VMX_CR4_FIXED1, + MSR_IA32_VMX_VMCS_ENUM, + MSR_IA32_VMX_PROCBASED_CTLS2, + MSR_IA32_VMX_EPT_VPID_CAP, + MSR_IA32_VMX_VMFUNC, + MSR_F10H_DECFG, MSR_IA32_UCODE_REV, }; @@ -2432,6 +2464,9 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: case HV_X64_MSR_CRASH_CTL: case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT: + case HV_X64_MSR_REENLIGHTENMENT_CONTROL: + case HV_X64_MSR_TSC_EMULATION_CONTROL: + case HV_X64_MSR_TSC_EMULATION_STATUS: return kvm_hv_set_msr_common(vcpu, msr, data, msr_info->host_initiated); case MSR_IA32_BBL_CR_CTL3: @@ -2558,6 +2593,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_AMD64_DC_CFG: msr_info->data = 0; break; + case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_CTR5: case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3: case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3: case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1: @@ -2661,6 +2697,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: case HV_X64_MSR_CRASH_CTL: case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT: + case HV_X64_MSR_REENLIGHTENMENT_CONTROL: + case HV_X64_MSR_TSC_EMULATION_CONTROL: + case HV_X64_MSR_TSC_EMULATION_STATUS: return kvm_hv_get_msr_common(vcpu, msr_info->index, &msr_info->data); break; @@ -2777,9 +2816,15 @@ out: return r; } +static inline bool kvm_can_mwait_in_guest(void) +{ + return boot_cpu_has(X86_FEATURE_MWAIT) && + !boot_cpu_has_bug(X86_BUG_MONITOR); +} + int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { - int r; + int r = 0; switch (ext) { case KVM_CAP_IRQCHIP: @@ -2809,6 +2854,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_HYPERV_SYNIC: case KVM_CAP_HYPERV_SYNIC2: case KVM_CAP_HYPERV_VP_INDEX: + case KVM_CAP_HYPERV_EVENTFD: case KVM_CAP_PCI_SEGMENT: case KVM_CAP_DEBUGREGS: case KVM_CAP_X86_ROBUST_SINGLESTEP: @@ -2828,11 +2874,16 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_GET_MSR_FEATURES: r = 1; break; + case KVM_CAP_SYNC_REGS: + r = KVM_SYNC_X86_VALID_FIELDS; + break; case KVM_CAP_ADJUST_CLOCK: r = KVM_CLOCK_TSC_STABLE; break; - case KVM_CAP_X86_GUEST_MWAIT: - r = kvm_mwait_in_guest(); + case KVM_CAP_X86_DISABLE_EXITS: + r |= KVM_X86_DISABLE_EXITS_HTL | KVM_X86_DISABLE_EXITS_PAUSE; + if(kvm_can_mwait_in_guest()) + r |= KVM_X86_DISABLE_EXITS_MWAIT; break; case KVM_CAP_X86_SMM: /* SMBASE is usually relocated above 1M on modern chipsets, @@ -2873,7 +2924,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = KVM_X2APIC_API_VALID_FLAGS; break; default: - r = 0; break; } return r; @@ -3265,7 +3315,7 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, events->exception.error_code = vcpu->arch.exception.error_code; events->interrupt.injected = - vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft; + vcpu->arch.interrupt.injected && !vcpu->arch.interrupt.soft; events->interrupt.nr = vcpu->arch.interrupt.nr; events->interrupt.soft = 0; events->interrupt.shadow = kvm_x86_ops->get_interrupt_shadow(vcpu); @@ -3318,7 +3368,7 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, vcpu->arch.exception.has_error_code = events->exception.has_error_code; vcpu->arch.exception.error_code = events->exception.error_code; - vcpu->arch.interrupt.pending = events->interrupt.injected; + vcpu->arch.interrupt.injected = events->interrupt.injected; vcpu->arch.interrupt.nr = events->interrupt.nr; vcpu->arch.interrupt.soft = events->interrupt.soft; if (events->flags & KVM_VCPUEVENT_VALID_SHADOW) @@ -3917,8 +3967,7 @@ static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr) static int kvm_vm_ioctl_set_identity_map_addr(struct kvm *kvm, u64 ident_addr) { - kvm->arch.ept_identity_map_addr = ident_addr; - return 0; + return kvm_x86_ops->set_identity_map_addr(kvm, ident_addr); } static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm, @@ -4178,6 +4227,20 @@ split_irqchip_unlock: r = 0; break; + case KVM_CAP_X86_DISABLE_EXITS: + r = -EINVAL; + if (cap->args[0] & ~KVM_X86_DISABLE_VALID_EXITS) + break; + + if ((cap->args[0] & KVM_X86_DISABLE_EXITS_MWAIT) && + kvm_can_mwait_in_guest()) + kvm->arch.mwait_in_guest = true; + if (cap->args[0] & KVM_X86_DISABLE_EXITS_HTL) + kvm->arch.hlt_in_guest = true; + if (cap->args[0] & KVM_X86_DISABLE_EXITS_PAUSE) + kvm->arch.pause_in_guest = true; + r = 0; + break; default: r = -EINVAL; break; @@ -4482,6 +4545,15 @@ set_identity_unlock: r = kvm_x86_ops->mem_enc_unreg_region(kvm, ®ion); break; } + case KVM_HYPERV_EVENTFD: { + struct kvm_hyperv_eventfd hvevfd; + + r = -EFAULT; + if (copy_from_user(&hvevfd, argp, sizeof(hvevfd))) + goto out; + r = kvm_vm_ioctl_hv_eventfd(kvm, &hvevfd); + break; + } default: r = -ENOTTY; } @@ -4771,6 +4843,30 @@ out: } EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system); +int handle_ud(struct kvm_vcpu *vcpu) +{ + int emul_type = EMULTYPE_TRAP_UD; + enum emulation_result er; + char sig[5]; /* ud2; .ascii "kvm" */ + struct x86_exception e; + + if (force_emulation_prefix && + kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, + kvm_get_linear_rip(vcpu), sig, sizeof(sig), &e) == 0 && + memcmp(sig, "\xf\xbkvm", sizeof(sig)) == 0) { + kvm_rip_write(vcpu, kvm_rip_read(vcpu) + sizeof(sig)); + emul_type = 0; + } + + er = emulate_instruction(vcpu, emul_type); + if (er == EMULATE_USER_EXIT) + return 0; + if (er != EMULATE_DONE) + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; +} +EXPORT_SYMBOL_GPL(handle_ud); + static int vcpu_is_mmio_gpa(struct kvm_vcpu *vcpu, unsigned long gva, gpa_t gpa, bool write) { @@ -5612,27 +5708,27 @@ int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip) kvm_rip_write(vcpu, ctxt->eip); kvm_set_rflags(vcpu, ctxt->eflags); - if (irq == NMI_VECTOR) - vcpu->arch.nmi_pending = 0; - else - vcpu->arch.interrupt.pending = false; - return EMULATE_DONE; } EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt); -static int handle_emulation_failure(struct kvm_vcpu *vcpu) +static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type) { int r = EMULATE_DONE; ++vcpu->stat.insn_emulation_fail; trace_kvm_emulate_insn_failed(vcpu); + + if (emulation_type & EMULTYPE_NO_UD_ON_FAIL) + return EMULATE_FAIL; + if (!is_guest_mode(vcpu) && kvm_x86_ops->get_cpl(vcpu) == 0) { vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; vcpu->run->internal.ndata = 0; r = EMULATE_USER_EXIT; } + kvm_queue_exception(vcpu, UD_VECTOR); return r; @@ -5876,6 +5972,37 @@ static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r) return false; } +static bool is_vmware_backdoor_opcode(struct x86_emulate_ctxt *ctxt) +{ + switch (ctxt->opcode_len) { + case 1: + switch (ctxt->b) { + case 0xe4: /* IN */ + case 0xe5: + case 0xec: + case 0xed: + case 0xe6: /* OUT */ + case 0xe7: + case 0xee: + case 0xef: + case 0x6c: /* INS */ + case 0x6d: + case 0x6e: /* OUTS */ + case 0x6f: + return true; + } + break; + case 2: + switch (ctxt->b) { + case 0x33: /* RDPMC */ + return true; + } + break; + } + + return false; +} + int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, int emulation_type, @@ -5928,10 +6055,14 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, return EMULATE_DONE; if (emulation_type & EMULTYPE_SKIP) return EMULATE_FAIL; - return handle_emulation_failure(vcpu); + return handle_emulation_failure(vcpu, emulation_type); } } + if ((emulation_type & EMULTYPE_VMWARE) && + !is_vmware_backdoor_opcode(ctxt)) + return EMULATE_FAIL; + if (emulation_type & EMULTYPE_SKIP) { kvm_rip_write(vcpu, ctxt->_eip); if (ctxt->eflags & X86_EFLAGS_RF) @@ -5963,7 +6094,7 @@ restart: emulation_type)) return EMULATE_DONE; - return handle_emulation_failure(vcpu); + return handle_emulation_failure(vcpu, emulation_type); } if (ctxt->have_exception) { @@ -6016,7 +6147,8 @@ restart: } EXPORT_SYMBOL_GPL(x86_emulate_instruction); -int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port) +static int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, + unsigned short port) { unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX); int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt, @@ -6025,7 +6157,6 @@ int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port) vcpu->arch.pio.count = 0; return ret; } -EXPORT_SYMBOL_GPL(kvm_fast_pio_out); static int complete_fast_pio_in(struct kvm_vcpu *vcpu) { @@ -6049,7 +6180,8 @@ static int complete_fast_pio_in(struct kvm_vcpu *vcpu) return 1; } -int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size, unsigned short port) +static int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size, + unsigned short port) { unsigned long val; int ret; @@ -6068,7 +6200,21 @@ int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size, unsigned short port) return 0; } -EXPORT_SYMBOL_GPL(kvm_fast_pio_in); + +int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in) +{ + int ret = kvm_skip_emulated_instruction(vcpu); + + /* + * TODO: we might be squashing a KVM_GUESTDBG_SINGLESTEP-triggered + * KVM_EXIT_DEBUG here. + */ + if (in) + return kvm_fast_pio_in(vcpu, size, port) && ret; + else + return kvm_fast_pio_out(vcpu, size, port) && ret; +} +EXPORT_SYMBOL_GPL(kvm_fast_pio); static int kvmclock_cpu_down_prep(unsigned int cpu) { @@ -6246,7 +6392,8 @@ static void kvm_timer_init(void) kvmclock_cpu_online, kvmclock_cpu_down_prep); } -static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu); +DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu); +EXPORT_PER_CPU_SYMBOL_GPL(current_vcpu); int kvm_is_in_guest(void) { @@ -6279,18 +6426,6 @@ static struct perf_guest_info_callbacks kvm_guest_cbs = { .get_guest_ip = kvm_get_guest_ip, }; -void kvm_before_handle_nmi(struct kvm_vcpu *vcpu) -{ - __this_cpu_write(current_vcpu, vcpu); -} -EXPORT_SYMBOL_GPL(kvm_before_handle_nmi); - -void kvm_after_handle_nmi(struct kvm_vcpu *vcpu) -{ - __this_cpu_write(current_vcpu, NULL); -} -EXPORT_SYMBOL_GPL(kvm_after_handle_nmi); - static void kvm_set_mmio_spte_mask(void) { u64 mask; @@ -6644,27 +6779,36 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win) int r; /* try to reinject previous events if any */ - if (vcpu->arch.exception.injected) { - kvm_x86_ops->queue_exception(vcpu); - return 0; - } + if (vcpu->arch.exception.injected) + kvm_x86_ops->queue_exception(vcpu); /* - * Exceptions must be injected immediately, or the exception - * frame will have the address of the NMI or interrupt handler. + * Do not inject an NMI or interrupt if there is a pending + * exception. Exceptions and interrupts are recognized at + * instruction boundaries, i.e. the start of an instruction. + * Trap-like exceptions, e.g. #DB, have higher priority than + * NMIs and interrupts, i.e. traps are recognized before an + * NMI/interrupt that's pending on the same instruction. + * Fault-like exceptions, e.g. #GP and #PF, are the lowest + * priority, but are only generated (pended) during instruction + * execution, i.e. a pending fault-like exception means the + * fault occurred on the *previous* instruction and must be + * serviced prior to recognizing any new events in order to + * fully complete the previous instruction. */ - if (!vcpu->arch.exception.pending) { - if (vcpu->arch.nmi_injected) { + else if (!vcpu->arch.exception.pending) { + if (vcpu->arch.nmi_injected) kvm_x86_ops->set_nmi(vcpu); - return 0; - } - - if (vcpu->arch.interrupt.pending) { + else if (vcpu->arch.interrupt.injected) kvm_x86_ops->set_irq(vcpu); - return 0; - } } + /* + * Call check_nested_events() even if we reinjected a previous event + * in order for caller to determine if it should require immediate-exit + * from L2 to L1 due to pending L1 events which require exit + * from L2 to L1. + */ if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) { r = kvm_x86_ops->check_nested_events(vcpu, req_int_win); if (r != 0) @@ -6677,6 +6821,7 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win) vcpu->arch.exception.has_error_code, vcpu->arch.exception.error_code); + WARN_ON_ONCE(vcpu->arch.exception.injected); vcpu->arch.exception.pending = false; vcpu->arch.exception.injected = true; @@ -6691,7 +6836,14 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win) } kvm_x86_ops->queue_exception(vcpu); - } else if (vcpu->arch.smi_pending && !is_smm(vcpu) && kvm_x86_ops->smi_allowed(vcpu)) { + } + + /* Don't consider new event if we re-injected an event */ + if (kvm_event_needs_reinjection(vcpu)) + return 0; + + if (vcpu->arch.smi_pending && !is_smm(vcpu) && + kvm_x86_ops->smi_allowed(vcpu)) { vcpu->arch.smi_pending = false; ++vcpu->arch.smi_count; enter_smm(vcpu); @@ -6985,8 +7137,6 @@ void kvm_make_scan_ioapic_request(struct kvm *kvm) static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) { - u64 eoi_exit_bitmap[4]; - if (!kvm_apic_hw_enabled(vcpu->arch.apic)) return; @@ -6999,6 +7149,20 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) kvm_x86_ops->sync_pir_to_irr(vcpu); kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors); } + + if (is_guest_mode(vcpu)) + vcpu->arch.load_eoi_exitmap_pending = true; + else + kvm_make_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu); +} + +static void vcpu_load_eoi_exitmap(struct kvm_vcpu *vcpu) +{ + u64 eoi_exit_bitmap[4]; + + if (!kvm_apic_hw_enabled(vcpu->arch.apic)) + return; + bitmap_or((ulong *)eoi_exit_bitmap, vcpu->arch.ioapic_handled_vectors, vcpu_to_synic(vcpu)->vec_bitmap, 256); kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap); @@ -7113,6 +7277,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) } if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu)) vcpu_scan_ioapic(vcpu); + if (kvm_check_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu)) + vcpu_load_eoi_exitmap(vcpu); if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu)) kvm_vcpu_reload_apic_access_page(vcpu); if (kvm_check_request(KVM_REQ_HV_CRASH, vcpu)) { @@ -7291,7 +7457,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) kvm_put_guest_xcr0(vcpu); + kvm_before_interrupt(vcpu); kvm_x86_ops->handle_external_intr(vcpu); + kvm_after_interrupt(vcpu); ++vcpu->stat.exits; @@ -7500,7 +7668,6 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu) return 0; } - int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { int r; @@ -7526,6 +7693,17 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) goto out; } + if (vcpu->run->kvm_valid_regs & ~KVM_SYNC_X86_VALID_FIELDS) { + r = -EINVAL; + goto out; + } + + if (vcpu->run->kvm_dirty_regs) { + r = sync_regs(vcpu); + if (r != 0) + goto out; + } + /* re-sync apic's tpr */ if (!lapic_in_kernel(vcpu)) { if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) { @@ -7550,6 +7728,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) out: kvm_put_guest_fpu(vcpu); + if (vcpu->run->kvm_valid_regs) + store_regs(vcpu); post_kvm_run_save(vcpu); kvm_sigset_deactivate(vcpu); @@ -7557,10 +7737,8 @@ out: return r; } -int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +static void __get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { - vcpu_load(vcpu); - if (vcpu->arch.emulate_regs_need_sync_to_vcpu) { /* * We are here if userspace calls get_regs() in the middle of @@ -7593,15 +7771,18 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) regs->rip = kvm_rip_read(vcpu); regs->rflags = kvm_get_rflags(vcpu); +} +int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + vcpu_load(vcpu); + __get_regs(vcpu, regs); vcpu_put(vcpu); return 0; } -int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +static void __set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { - vcpu_load(vcpu); - vcpu->arch.emulate_regs_need_sync_from_vcpu = true; vcpu->arch.emulate_regs_need_sync_to_vcpu = false; @@ -7630,7 +7811,12 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) vcpu->arch.exception.pending = false; kvm_make_request(KVM_REQ_EVENT, vcpu); +} +int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + vcpu_load(vcpu); + __set_regs(vcpu, regs); vcpu_put(vcpu); return 0; } @@ -7645,13 +7831,10 @@ void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) } EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits); -int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, - struct kvm_sregs *sregs) +static void __get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { struct desc_ptr dt; - vcpu_load(vcpu); - kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS); kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS); kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES); @@ -7679,10 +7862,16 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap); - if (vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft) + if (vcpu->arch.interrupt.injected && !vcpu->arch.interrupt.soft) set_bit(vcpu->arch.interrupt.nr, (unsigned long *)sregs->interrupt_bitmap); +} +int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + vcpu_load(vcpu); + __get_sregs(vcpu, sregs); vcpu_put(vcpu); return 0; } @@ -7754,7 +7943,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, } EXPORT_SYMBOL_GPL(kvm_task_switch); -int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +static int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG)) { /* @@ -7777,8 +7966,7 @@ int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) return 0; } -int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, - struct kvm_sregs *sregs) +static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { struct msr_data apic_base_msr; int mmu_reset_needed = 0; @@ -7786,8 +7974,6 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, struct desc_ptr dt; int ret = -EINVAL; - vcpu_load(vcpu); - if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && (sregs->cr4 & X86_CR4_OSXSAVE)) goto out; @@ -7866,6 +8052,16 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, ret = 0; out: + return ret; +} + +int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + int ret; + + vcpu_load(vcpu); + ret = __set_sregs(vcpu, sregs); vcpu_put(vcpu); return ret; } @@ -7992,6 +8188,45 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) return 0; } +static void store_regs(struct kvm_vcpu *vcpu) +{ + BUILD_BUG_ON(sizeof(struct kvm_sync_regs) > SYNC_REGS_SIZE_BYTES); + + if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_REGS) + __get_regs(vcpu, &vcpu->run->s.regs.regs); + + if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_SREGS) + __get_sregs(vcpu, &vcpu->run->s.regs.sregs); + + if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_EVENTS) + kvm_vcpu_ioctl_x86_get_vcpu_events( + vcpu, &vcpu->run->s.regs.events); +} + +static int sync_regs(struct kvm_vcpu *vcpu) +{ + if (vcpu->run->kvm_dirty_regs & ~KVM_SYNC_X86_VALID_FIELDS) + return -EINVAL; + + if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_REGS) { + __set_regs(vcpu, &vcpu->run->s.regs.regs); + vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_REGS; + } + if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_SREGS) { + if (__set_sregs(vcpu, &vcpu->run->s.regs.sregs)) + return -EINVAL; + vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_SREGS; + } + if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_EVENTS) { + if (kvm_vcpu_ioctl_x86_set_vcpu_events( + vcpu, &vcpu->run->s.regs.events)) + return -EINVAL; + vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_EVENTS; + } + + return 0; +} + static void fx_init(struct kvm_vcpu *vcpu) { fpstate_init(&vcpu->arch.guest_fpu.state); @@ -8447,7 +8682,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) raw_spin_lock_init(&kvm->arch.tsc_write_lock); mutex_init(&kvm->arch.apic_map_lock); - mutex_init(&kvm->arch.hyperv.hv_lock); spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock); kvm->arch.kvmclock_offset = -ktime_get_boot_ns(); @@ -8456,6 +8690,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) INIT_DELAYED_WORK(&kvm->arch.kvmclock_update_work, kvmclock_update_fn); INIT_DELAYED_WORK(&kvm->arch.kvmclock_sync_work, kvmclock_sync_fn); + kvm_hv_init_vm(kvm); kvm_page_track_init(kvm); kvm_mmu_init_vm(kvm); @@ -8586,6 +8821,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvfree(rcu_dereference_check(kvm->arch.apic_map, 1)); kvm_mmu_uninit_vm(kvm); kvm_page_track_cleanup(kvm); + kvm_hv_destroy_vm(kvm); } void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index b91215d1fd80..7d35ce672989 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -2,12 +2,48 @@ #ifndef ARCH_X86_KVM_X86_H #define ARCH_X86_KVM_X86_H -#include <asm/processor.h> -#include <asm/mwait.h> #include <linux/kvm_host.h> #include <asm/pvclock.h> #include "kvm_cache_regs.h" +#define KVM_DEFAULT_PLE_GAP 128 +#define KVM_VMX_DEFAULT_PLE_WINDOW 4096 +#define KVM_DEFAULT_PLE_WINDOW_GROW 2 +#define KVM_DEFAULT_PLE_WINDOW_SHRINK 0 +#define KVM_VMX_DEFAULT_PLE_WINDOW_MAX UINT_MAX +#define KVM_SVM_DEFAULT_PLE_WINDOW_MAX USHRT_MAX +#define KVM_SVM_DEFAULT_PLE_WINDOW 3000 + +static inline unsigned int __grow_ple_window(unsigned int val, + unsigned int base, unsigned int modifier, unsigned int max) +{ + u64 ret = val; + + if (modifier < 1) + return base; + + if (modifier < base) + ret *= modifier; + else + ret += modifier; + + return min(ret, (u64)max); +} + +static inline unsigned int __shrink_ple_window(unsigned int val, + unsigned int base, unsigned int modifier, unsigned int min) +{ + if (modifier < 1) + return base; + + if (modifier < base) + val /= modifier; + else + val -= modifier; + + return max(val, min); +} + #define MSR_IA32_CR_PAT_DEFAULT 0x0007040600070406ULL static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu) @@ -19,19 +55,19 @@ static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu) static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector, bool soft) { - vcpu->arch.interrupt.pending = true; + vcpu->arch.interrupt.injected = true; vcpu->arch.interrupt.soft = soft; vcpu->arch.interrupt.nr = vector; } static inline void kvm_clear_interrupt_queue(struct kvm_vcpu *vcpu) { - vcpu->arch.interrupt.pending = false; + vcpu->arch.interrupt.injected = false; } static inline bool kvm_event_needs_reinjection(struct kvm_vcpu *vcpu) { - return vcpu->arch.exception.injected || vcpu->arch.interrupt.pending || + return vcpu->arch.exception.injected || vcpu->arch.interrupt.injected || vcpu->arch.nmi_injected; } @@ -205,8 +241,6 @@ static inline bool kvm_check_has_quirk(struct kvm *kvm, u64 quirk) return !(kvm->arch.disabled_quirks & quirk); } -void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); -void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); void kvm_set_pending_timer(struct kvm_vcpu *vcpu); int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip); @@ -221,6 +255,8 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt, gva_t addr, void *val, unsigned int bytes, struct x86_exception *exception); +int handle_ud(struct kvm_vcpu *vcpu); + void kvm_vcpu_mtrr_init(struct kvm_vcpu *vcpu); u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn); bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data); @@ -242,6 +278,8 @@ extern unsigned int min_timer_period_us; extern unsigned int lapic_timer_advance_ns; +extern bool enable_vmware_backdoor; + extern struct static_key kvm_no_apic_vcpu; static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec) @@ -264,10 +302,38 @@ static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec) __rem; \ }) -static inline bool kvm_mwait_in_guest(void) +#define KVM_X86_DISABLE_EXITS_MWAIT (1 << 0) +#define KVM_X86_DISABLE_EXITS_HTL (1 << 1) +#define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2) +#define KVM_X86_DISABLE_VALID_EXITS (KVM_X86_DISABLE_EXITS_MWAIT | \ + KVM_X86_DISABLE_EXITS_HTL | \ + KVM_X86_DISABLE_EXITS_PAUSE) + +static inline bool kvm_mwait_in_guest(struct kvm *kvm) +{ + return kvm->arch.mwait_in_guest; +} + +static inline bool kvm_hlt_in_guest(struct kvm *kvm) +{ + return kvm->arch.hlt_in_guest; +} + +static inline bool kvm_pause_in_guest(struct kvm *kvm) +{ + return kvm->arch.pause_in_guest; +} + +DECLARE_PER_CPU(struct kvm_vcpu *, current_vcpu); + +static inline void kvm_before_interrupt(struct kvm_vcpu *vcpu) +{ + __this_cpu_write(current_vcpu, vcpu); +} + +static inline void kvm_after_interrupt(struct kvm_vcpu *vcpu) { - return boot_cpu_has(X86_FEATURE_MWAIT) && - !boot_cpu_has_bug(X86_BUG_MONITOR); + __this_cpu_write(current_vcpu, NULL); } #endif diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 396e1f0151ac..8008db2bddb3 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -778,6 +778,7 @@ void __init mem_init(void) free_all_bootmem(); after_bootmem = 1; + x86_init.hyper.init_after_bootmem(); mem_init_print_info(NULL); printk(KERN_INFO "virtual kernel memory layout:\n" diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 45241de66785..66de40e45f58 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1185,6 +1185,7 @@ void __init mem_init(void) /* this will put all memory onto the freelists */ free_all_bootmem(); after_bootmem = 1; + x86_init.hyper.init_after_bootmem(); /* * Must be done after boot memory is put on freelist, because here we @@ -1328,14 +1329,39 @@ int kern_addr_valid(unsigned long addr) return pfn_valid(pte_pfn(*pte)); } +/* + * Block size is the minimum amount of memory which can be hotplugged or + * hotremoved. It must be power of two and must be equal or larger than + * MIN_MEMORY_BLOCK_SIZE. + */ +#define MAX_BLOCK_SIZE (2UL << 30) + +/* Amount of ram needed to start using large blocks */ +#define MEM_SIZE_FOR_LARGE_BLOCK (64UL << 30) + static unsigned long probe_memory_block_size(void) { - unsigned long bz = MIN_MEMORY_BLOCK_SIZE; + unsigned long boot_mem_end = max_pfn << PAGE_SHIFT; + unsigned long bz; - /* if system is UV or has 64GB of RAM or more, use large blocks */ - if (is_uv_system() || ((max_pfn << PAGE_SHIFT) >= (64UL << 30))) - bz = 2UL << 30; /* 2GB */ + /* If this is UV system, always set 2G block size */ + if (is_uv_system()) { + bz = MAX_BLOCK_SIZE; + goto done; + } + /* Use regular block if RAM is smaller than MEM_SIZE_FOR_LARGE_BLOCK */ + if (boot_mem_end < MEM_SIZE_FOR_LARGE_BLOCK) { + bz = MIN_MEMORY_BLOCK_SIZE; + goto done; + } + + /* Find the largest allowed block size that aligns to memory end */ + for (bz = MAX_BLOCK_SIZE; bz > MIN_MEMORY_BLOCK_SIZE; bz >>= 1) { + if (IS_ALIGNED(boot_mem_end, bz)) + break; + } +done: pr_info("x86/mm: Memory block size: %ldMB\n", bz >> 20); return bz; diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index 155ecbac9e28..48c591251600 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -90,9 +90,10 @@ unsigned long arch_mmap_rnd(void) return arch_rnd(mmap_is_ia32() ? mmap32_rnd_bits : mmap64_rnd_bits); } -static unsigned long mmap_base(unsigned long rnd, unsigned long task_size) +static unsigned long mmap_base(unsigned long rnd, unsigned long task_size, + struct rlimit *rlim_stack) { - unsigned long gap = rlimit(RLIMIT_STACK); + unsigned long gap = rlim_stack->rlim_cur; unsigned long pad = stack_maxrandom_size(task_size) + stack_guard_gap; unsigned long gap_min, gap_max; @@ -126,16 +127,17 @@ static unsigned long mmap_legacy_base(unsigned long rnd, * process VM image, sets up which VM layout function to use: */ static void arch_pick_mmap_base(unsigned long *base, unsigned long *legacy_base, - unsigned long random_factor, unsigned long task_size) + unsigned long random_factor, unsigned long task_size, + struct rlimit *rlim_stack) { *legacy_base = mmap_legacy_base(random_factor, task_size); if (mmap_is_legacy()) *base = *legacy_base; else - *base = mmap_base(random_factor, task_size); + *base = mmap_base(random_factor, task_size, rlim_stack); } -void arch_pick_mmap_layout(struct mm_struct *mm) +void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack) { if (mmap_is_legacy()) mm->get_unmapped_area = arch_get_unmapped_area; @@ -143,7 +145,8 @@ void arch_pick_mmap_layout(struct mm_struct *mm) mm->get_unmapped_area = arch_get_unmapped_area_topdown; arch_pick_mmap_base(&mm->mmap_base, &mm->mmap_legacy_base, - arch_rnd(mmap64_rnd_bits), task_size_64bit(0)); + arch_rnd(mmap64_rnd_bits), task_size_64bit(0), + rlim_stack); #ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES /* @@ -153,7 +156,8 @@ void arch_pick_mmap_layout(struct mm_struct *mm) * mmap_base, the compat syscall uses mmap_compat_base. */ arch_pick_mmap_base(&mm->mmap_compat_base, &mm->mmap_compat_legacy_base, - arch_rnd(mmap32_rnd_bits), task_size_32bit()); + arch_rnd(mmap32_rnd_bits), task_size_32bit(), + rlim_stack); #endif } diff --git a/arch/x86/um/stub_segv.c b/arch/x86/um/stub_segv.c index 1518d2805ae8..27361cbb7ca9 100644 --- a/arch/x86/um/stub_segv.c +++ b/arch/x86/um/stub_segv.c @@ -6,11 +6,12 @@ #include <sysdep/stub.h> #include <sysdep/faultinfo.h> #include <sysdep/mcontext.h> +#include <sys/ucontext.h> void __attribute__ ((__section__ (".__syscall_stub"))) stub_segv_handler(int sig, siginfo_t *info, void *p) { - struct ucontext *uc = p; + ucontext_t *uc = p; GET_FAULTINFO_FROM_MC(*((struct faultinfo *) STUB_DATA), &uc->uc_mcontext); diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index d20763472920..486c0a34d00b 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -116,6 +116,8 @@ DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */ static phys_addr_t xen_pt_base, xen_pt_size __initdata; +static DEFINE_STATIC_KEY_FALSE(xen_struct_pages_ready); + /* * Just beyond the highest usermode address. STACK_TOP_MAX has a * redzone above it, so round it up to a PGD boundary. @@ -155,11 +157,18 @@ void make_lowmem_page_readwrite(void *vaddr) } +/* + * During early boot all page table pages are pinned, but we do not have struct + * pages, so return true until struct pages are ready. + */ static bool xen_page_pinned(void *ptr) { - struct page *page = virt_to_page(ptr); + if (static_branch_likely(&xen_struct_pages_ready)) { + struct page *page = virt_to_page(ptr); - return PagePinned(page); + return PagePinned(page); + } + return true; } static void xen_extend_mmu_update(const struct mmu_update *update) @@ -836,11 +845,6 @@ void xen_mm_pin_all(void) spin_unlock(&pgd_lock); } -/* - * The init_mm pagetable is really pinned as soon as its created, but - * that's before we have page structures to store the bits. So do all - * the book-keeping now. - */ static int __init xen_mark_pinned(struct mm_struct *mm, struct page *page, enum pt_level level) { @@ -848,8 +852,18 @@ static int __init xen_mark_pinned(struct mm_struct *mm, struct page *page, return 0; } -static void __init xen_mark_init_mm_pinned(void) +/* + * The init_mm pagetable is really pinned as soon as its created, but + * that's before we have page structures to store the bits. So do all + * the book-keeping now once struct pages for allocated pages are + * initialized. This happens only after free_all_bootmem() is called. + */ +static void __init xen_after_bootmem(void) { + static_branch_enable(&xen_struct_pages_ready); +#ifdef CONFIG_X86_64 + SetPagePinned(virt_to_page(level3_user_vsyscall)); +#endif xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP); } @@ -1623,14 +1637,15 @@ static inline void __set_pfn_prot(unsigned long pfn, pgprot_t prot) static inline void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned level) { - bool pinned = PagePinned(virt_to_page(mm->pgd)); + bool pinned = xen_page_pinned(mm->pgd); trace_xen_mmu_alloc_ptpage(mm, pfn, level, pinned); if (pinned) { struct page *page = pfn_to_page(pfn); - SetPagePinned(page); + if (static_branch_likely(&xen_struct_pages_ready)) + SetPagePinned(page); if (!PageHighMem(page)) { xen_mc_batch(); @@ -2364,9 +2379,7 @@ static void __init xen_post_allocator_init(void) #ifdef CONFIG_X86_64 pv_mmu_ops.write_cr3 = &xen_write_cr3; - SetPagePinned(virt_to_page(level3_user_vsyscall)); #endif - xen_mark_init_mm_pinned(); } static void xen_leave_lazy_mmu(void) @@ -2450,6 +2463,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = { void __init xen_init_mmu_ops(void) { x86_init.paging.pagetable_init = xen_pagetable_init; + x86_init.hyper.init_after_bootmem = xen_after_bootmem; pv_mmu_ops = xen_mmu_ops; diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index c0c756c76afe..2e20ae2fa2d6 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -425,6 +425,7 @@ static void xen_pv_play_dead(void) /* used only with HOTPLUG_CPU */ * data back is to call: */ tick_nohz_idle_enter(); + tick_nohz_idle_stop_tick_protected(); cpuhp_online_idle(CPUHP_AP_ONLINE_IDLE); } |