diff options
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 16 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll | 2138 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll | 1210 |
3 files changed, 132 insertions, 3232 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index a55770ae2f1..9d9ee212bbf 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -23602,6 +23602,22 @@ static SDValue LowerBITCAST(SDValue Op, const X86Subtarget &Subtarget, MVT SrcVT = Op.getOperand(0).getSimpleValueType(); MVT DstVT = Op.getSimpleValueType(); + // Legalize (v64i1 (bitcast i64 (X))) by splitting the i64, bitcasting each + // half to v32i1 and concatenating the result. + if (SrcVT == MVT::i64 && DstVT == MVT::v64i1) { + assert(!Subtarget.is64Bit() && "Expected 32-bit mode"); + assert(Subtarget.hasBWI() && "Expected BWI target"); + SDValue Op0 = Op->getOperand(0); + SDLoc dl(Op); + SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op0, + DAG.getIntPtrConstant(0, dl)); + Lo = DAG.getBitcast(MVT::v32i1, Lo); + SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op0, + DAG.getIntPtrConstant(1, dl)); + Hi = DAG.getBitcast(MVT::v32i1, Hi); + return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, Lo, Hi); + } + if (SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 || SrcVT == MVT::v8i8 || SrcVT == MVT::i64) { assert(Subtarget.hasSSE2() && "Requires at least SSE2!"); diff --git a/llvm/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll index 1315fb474bb..904e21d10f3 100644 --- a/llvm/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll @@ -107,535 +107,11 @@ entry: define <8 x i64> @test_mm512_mask_set1_epi8(<8 x i64> %__O, i64 %__M, i8 signext %__A) { ; X32-LABEL: test_mm512_mask_set1_epi8: ; X32: # %bb.0: # %entry -; X32-NEXT: pushl %ebx -; X32-NEXT: .cfi_def_cfa_offset 8 -; X32-NEXT: pushl %esi -; X32-NEXT: .cfi_def_cfa_offset 12 -; X32-NEXT: .cfi_offset %esi, -12 -; X32-NEXT: .cfi_offset %ebx, -8 -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: kmovd %eax, %k0 -; X32-NEXT: kshiftrq $1, %k0, %k1 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: andb $2, %cl -; X32-NEXT: shrb %cl -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $62, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $2, %k0, %k1 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: andb $15, %cl -; X32-NEXT: movl %ecx, %edx -; X32-NEXT: shrb $2, %dl -; X32-NEXT: kmovd %edx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $61, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $3, %k0, %k1 -; X32-NEXT: shrb $3, %cl -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $60, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $4, %k0, %k1 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrb $4, %cl -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $59, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $5, %k0, %k1 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrb $5, %cl -; X32-NEXT: andb $1, %cl -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $58, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $6, %k0, %k1 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrb $6, %cl -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $57, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $7, %k0, %k1 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrb $7, %cl -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $56, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $8, %k0, %k1 -; X32-NEXT: movb %ah, %cl -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $55, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $9, %k0, %k1 -; X32-NEXT: andb $2, %cl -; X32-NEXT: shrb %cl -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $54, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $10, %k0, %k1 -; X32-NEXT: movb %ah, %cl -; X32-NEXT: movl %eax, %esi -; X32-NEXT: andb $15, %cl -; X32-NEXT: movl %ecx, %edx -; X32-NEXT: shrb $2, %dl -; X32-NEXT: kmovd %edx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: shrb $3, %cl -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: movzwl %si, %edx -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: shrl $12, %ecx -; X32-NEXT: kmovd %ecx, %k3 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrl $13, %ecx -; X32-NEXT: andb $1, %cl -; X32-NEXT: kmovd %ecx, %k4 -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: shrl $14, %ecx -; X32-NEXT: kmovd %ecx, %k5 -; X32-NEXT: shrl $16, %eax -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: andb $2, %bl -; X32-NEXT: shrb %bl -; X32-NEXT: kmovd %ebx, %k6 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: andb $15, %cl -; X32-NEXT: movl %ecx, %ebx -; X32-NEXT: shrb $2, %bl -; X32-NEXT: kmovd %ebx, %k7 -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $53, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $11, %k0, %k1 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $52, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $12, %k0, %k1 -; X32-NEXT: kxorq %k3, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $51, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $13, %k0, %k1 -; X32-NEXT: kxorq %k4, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $50, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $14, %k0, %k1 -; X32-NEXT: kxorq %k5, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $49, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $15, %k0, %k1 -; X32-NEXT: shrl $15, %edx -; X32-NEXT: kmovd %edx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $48, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $16, %k0, %k1 -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $47, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $17, %k0, %k1 -; X32-NEXT: kxorq %k6, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $46, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $18, %k0, %k1 -; X32-NEXT: kxorq %k7, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $45, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $19, %k0, %k1 -; X32-NEXT: shrb $3, %cl -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $44, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $20, %k0, %k1 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrb $4, %cl -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $43, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $21, %k0, %k1 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrb $5, %cl -; X32-NEXT: andb $1, %cl -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $42, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $22, %k0, %k1 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrb $6, %cl -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $41, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $23, %k0, %k1 -; X32-NEXT: shrb $7, %al -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $40, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $24, %k0, %k1 -; X32-NEXT: movl %esi, %edx -; X32-NEXT: shrl $24, %edx -; X32-NEXT: kmovd %edx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $39, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $25, %k0, %k1 -; X32-NEXT: movl %edx, %eax -; X32-NEXT: andb $2, %al -; X32-NEXT: shrb %al -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $38, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $26, %k0, %k1 -; X32-NEXT: andb $15, %dl -; X32-NEXT: movl %edx, %eax -; X32-NEXT: shrb $2, %al -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $37, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $27, %k0, %k1 -; X32-NEXT: shrb $3, %dl -; X32-NEXT: kmovd %edx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $36, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $28, %k0, %k1 -; X32-NEXT: movl %esi, %ecx -; X32-NEXT: movl %esi, %eax -; X32-NEXT: shrl $28, %eax -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $35, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $29, %k0, %k1 -; X32-NEXT: movl %esi, %eax -; X32-NEXT: shrl $29, %eax -; X32-NEXT: andb $1, %al -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $34, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $30, %k0, %k1 -; X32-NEXT: movl %esi, %eax -; X32-NEXT: shrl $30, %eax -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $33, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $31, %k0, %k1 -; X32-NEXT: shrl $31, %ecx -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $32, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $32, %k0, %k1 -; X32-NEXT: kmovd %ebx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $31, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $33, %k0, %k1 -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: andb $2, %al -; X32-NEXT: shrb %al -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $30, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $34, %k0, %k1 -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: andb $15, %al -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrb $2, %cl -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $29, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $35, %k0, %k1 -; X32-NEXT: shrb $3, %al -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $28, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $36, %k0, %k1 -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: shrb $4, %al -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $27, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $37, %k0, %k1 -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: shrb $5, %al -; X32-NEXT: andb $1, %al -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $26, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $38, %k0, %k1 -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: shrb $6, %al -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $25, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $39, %k0, %k1 -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: shrb $7, %al -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $24, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $40, %k0, %k1 -; X32-NEXT: movb %bh, %al -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: andb $2, %al -; X32-NEXT: shrb %al -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: movb %bh, %al -; X32-NEXT: andb $15, %al -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrb $2, %cl -; X32-NEXT: kmovd %ecx, %k3 -; X32-NEXT: shrb $3, %al -; X32-NEXT: kmovd %eax, %k4 -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: shrl $13, %eax -; X32-NEXT: andb $1, %al -; X32-NEXT: kmovd %eax, %k5 -; X32-NEXT: movl %ebx, %ecx -; X32-NEXT: shrl $16, %ecx -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: andb $2, %al -; X32-NEXT: shrb %al -; X32-NEXT: kmovd %eax, %k6 -; X32-NEXT: movl %ecx, %edx -; X32-NEXT: andb $15, %dl -; X32-NEXT: movl %edx, %eax -; X32-NEXT: shrb $2, %al -; X32-NEXT: kmovd %eax, %k7 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $23, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $41, %k0, %k1 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $22, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $42, %k0, %k1 -; X32-NEXT: kxorq %k3, %k1, %k1 -; X32-NEXT: movzwl %bx, %eax -; X32-NEXT: movl %eax, %esi -; X32-NEXT: shrl $12, %esi -; X32-NEXT: kmovd %esi, %k2 -; X32-NEXT: movl %eax, %esi -; X32-NEXT: shrl $14, %esi -; X32-NEXT: kmovd %esi, %k3 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $21, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $43, %k0, %k1 -; X32-NEXT: kxorq %k4, %k1, %k1 -; X32-NEXT: shrl $15, %eax -; X32-NEXT: kmovd %eax, %k4 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $20, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $44, %k0, %k1 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $19, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $45, %k0, %k1 -; X32-NEXT: kxorq %k5, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $18, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $46, %k0, %k1 -; X32-NEXT: kxorq %k3, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $17, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $47, %k0, %k1 -; X32-NEXT: kxorq %k4, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $16, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $48, %k0, %k1 -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $15, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $49, %k0, %k1 -; X32-NEXT: kxorq %k6, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $14, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $50, %k0, %k1 -; X32-NEXT: kxorq %k7, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $13, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $51, %k0, %k1 -; X32-NEXT: shrb $3, %dl -; X32-NEXT: kmovd %edx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $12, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k4 -; X32-NEXT: kshiftrq $52, %k4, %k0 -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: shrb $4, %al -; X32-NEXT: kmovd %eax, %k1 -; X32-NEXT: kxorq %k1, %k0, %k5 -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: shrb $5, %al -; X32-NEXT: andb $1, %al -; X32-NEXT: kmovd %eax, %k6 -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: shrb $6, %al -; X32-NEXT: kmovd %eax, %k7 -; X32-NEXT: shrb $7, %cl -; X32-NEXT: kmovd %ecx, %k0 -; X32-NEXT: movl %ebx, %ecx -; X32-NEXT: shrl $24, %ecx -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: andb $2, %al -; X32-NEXT: shrb %al -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: andb $15, %cl -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: shrb $2, %al -; X32-NEXT: kmovd %eax, %k3 -; X32-NEXT: kshiftlq $63, %k5, %k5 -; X32-NEXT: kshiftrq $11, %k5, %k5 -; X32-NEXT: kxorq %k5, %k4, %k4 -; X32-NEXT: kshiftrq $53, %k4, %k5 -; X32-NEXT: kxorq %k6, %k5, %k5 -; X32-NEXT: kshiftlq $63, %k5, %k5 -; X32-NEXT: kshiftrq $10, %k5, %k5 -; X32-NEXT: kxorq %k5, %k4, %k5 -; X32-NEXT: kshiftrq $54, %k5, %k4 -; X32-NEXT: kxorq %k7, %k4, %k6 -; X32-NEXT: shrb $3, %cl -; X32-NEXT: kmovd %ecx, %k4 -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: shrl $29, %eax -; X32-NEXT: andb $1, %al -; X32-NEXT: kmovd %eax, %k7 -; X32-NEXT: kshiftlq $63, %k6, %k6 -; X32-NEXT: kshiftrq $9, %k6, %k6 -; X32-NEXT: kxorq %k6, %k5, %k5 -; X32-NEXT: kshiftrq $55, %k5, %k6 -; X32-NEXT: kxorq %k0, %k6, %k0 -; X32-NEXT: kshiftlq $63, %k0, %k0 -; X32-NEXT: kshiftrq $8, %k0, %k0 -; X32-NEXT: kxorq %k0, %k5, %k0 -; X32-NEXT: kshiftrq $56, %k0, %k5 -; X32-NEXT: kxorq %k1, %k5, %k1 -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: shrl $28, %eax -; X32-NEXT: kmovd %eax, %k5 -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: shrl $30, %eax -; X32-NEXT: kmovd %eax, %k6 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $7, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $57, %k0, %k1 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $6, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $58, %k0, %k1 -; X32-NEXT: kxorq %k3, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $5, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $59, %k0, %k1 -; X32-NEXT: kxorq %k4, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $4, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $60, %k0, %k1 -; X32-NEXT: kxorq %k5, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $3, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $61, %k0, %k1 -; X32-NEXT: kxorq %k7, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $2, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $62, %k0, %k1 -; X32-NEXT: kxorq %k6, %k1, %k1 -; X32-NEXT: shrl $31, %ebx -; X32-NEXT: kmovd %ebx, %k2 +; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 +; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $1, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftlq $1, %k0, %k0 -; X32-NEXT: kshiftrq $1, %k0, %k0 -; X32-NEXT: kshiftlq $63, %k2, %k1 -; X32-NEXT: korq %k1, %k0, %k1 +; X32-NEXT: kunpckdq %k1, %k0, %k1 ; X32-NEXT: vpbroadcastb %eax, %zmm0 {%k1} -; X32-NEXT: popl %esi -; X32-NEXT: popl %ebx ; X32-NEXT: retl ; ; X64-LABEL: test_mm512_mask_set1_epi8: @@ -656,535 +132,11 @@ define <8 x i64> @test_mm512_mask_set1_epi8(<8 x i64> %__O, i64 %__M, i8 signext define <8 x i64> @test_mm512_maskz_set1_epi8(i64 %__M, i8 signext %__A) { ; X32-LABEL: test_mm512_maskz_set1_epi8: ; X32: # %bb.0: # %entry -; X32-NEXT: pushl %ebx -; X32-NEXT: .cfi_def_cfa_offset 8 -; X32-NEXT: pushl %esi -; X32-NEXT: .cfi_def_cfa_offset 12 -; X32-NEXT: .cfi_offset %esi, -12 -; X32-NEXT: .cfi_offset %ebx, -8 -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: kmovd %eax, %k0 -; X32-NEXT: kshiftrq $1, %k0, %k1 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: andb $2, %cl -; X32-NEXT: shrb %cl -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $62, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $2, %k0, %k1 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: andb $15, %cl -; X32-NEXT: movl %ecx, %edx -; X32-NEXT: shrb $2, %dl -; X32-NEXT: kmovd %edx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $61, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $3, %k0, %k1 -; X32-NEXT: shrb $3, %cl -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $60, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $4, %k0, %k1 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrb $4, %cl -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $59, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $5, %k0, %k1 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrb $5, %cl -; X32-NEXT: andb $1, %cl -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $58, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $6, %k0, %k1 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrb $6, %cl -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $57, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $7, %k0, %k1 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrb $7, %cl -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $56, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $8, %k0, %k1 -; X32-NEXT: movb %ah, %cl -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $55, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $9, %k0, %k1 -; X32-NEXT: andb $2, %cl -; X32-NEXT: shrb %cl -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $54, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $10, %k0, %k1 -; X32-NEXT: movb %ah, %cl -; X32-NEXT: movl %eax, %esi -; X32-NEXT: andb $15, %cl -; X32-NEXT: movl %ecx, %edx -; X32-NEXT: shrb $2, %dl -; X32-NEXT: kmovd %edx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: shrb $3, %cl -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: movzwl %si, %edx -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: shrl $12, %ecx -; X32-NEXT: kmovd %ecx, %k3 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrl $13, %ecx -; X32-NEXT: andb $1, %cl -; X32-NEXT: kmovd %ecx, %k4 -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: shrl $14, %ecx -; X32-NEXT: kmovd %ecx, %k5 -; X32-NEXT: shrl $16, %eax -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: andb $2, %bl -; X32-NEXT: shrb %bl -; X32-NEXT: kmovd %ebx, %k6 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: andb $15, %cl -; X32-NEXT: movl %ecx, %ebx -; X32-NEXT: shrb $2, %bl -; X32-NEXT: kmovd %ebx, %k7 -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $53, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $11, %k0, %k1 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $52, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $12, %k0, %k1 -; X32-NEXT: kxorq %k3, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $51, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $13, %k0, %k1 -; X32-NEXT: kxorq %k4, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $50, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $14, %k0, %k1 -; X32-NEXT: kxorq %k5, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $49, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $15, %k0, %k1 -; X32-NEXT: shrl $15, %edx -; X32-NEXT: kmovd %edx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $48, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $16, %k0, %k1 -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $47, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $17, %k0, %k1 -; X32-NEXT: kxorq %k6, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $46, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $18, %k0, %k1 -; X32-NEXT: kxorq %k7, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $45, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $19, %k0, %k1 -; X32-NEXT: shrb $3, %cl -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $44, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $20, %k0, %k1 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrb $4, %cl -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $43, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $21, %k0, %k1 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrb $5, %cl -; X32-NEXT: andb $1, %cl -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $42, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $22, %k0, %k1 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrb $6, %cl -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $41, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $23, %k0, %k1 -; X32-NEXT: shrb $7, %al -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $40, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $24, %k0, %k1 -; X32-NEXT: movl %esi, %edx -; X32-NEXT: shrl $24, %edx -; X32-NEXT: kmovd %edx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $39, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $25, %k0, %k1 -; X32-NEXT: movl %edx, %eax -; X32-NEXT: andb $2, %al -; X32-NEXT: shrb %al -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $38, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $26, %k0, %k1 -; X32-NEXT: andb $15, %dl -; X32-NEXT: movl %edx, %eax -; X32-NEXT: shrb $2, %al -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $37, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $27, %k0, %k1 -; X32-NEXT: shrb $3, %dl -; X32-NEXT: kmovd %edx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $36, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $28, %k0, %k1 -; X32-NEXT: movl %esi, %ecx -; X32-NEXT: movl %esi, %eax -; X32-NEXT: shrl $28, %eax -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $35, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $29, %k0, %k1 -; X32-NEXT: movl %esi, %eax -; X32-NEXT: shrl $29, %eax -; X32-NEXT: andb $1, %al -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $34, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $30, %k0, %k1 -; X32-NEXT: movl %esi, %eax -; X32-NEXT: shrl $30, %eax -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $33, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $31, %k0, %k1 -; X32-NEXT: shrl $31, %ecx -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $32, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $32, %k0, %k1 -; X32-NEXT: kmovd %ebx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $31, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $33, %k0, %k1 -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: andb $2, %al -; X32-NEXT: shrb %al -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $30, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $34, %k0, %k1 -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: andb $15, %al -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrb $2, %cl -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $29, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $35, %k0, %k1 -; X32-NEXT: shrb $3, %al -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $28, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $36, %k0, %k1 -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: shrb $4, %al -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $27, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $37, %k0, %k1 -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: shrb $5, %al -; X32-NEXT: andb $1, %al -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $26, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $38, %k0, %k1 -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: shrb $6, %al -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $25, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $39, %k0, %k1 -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: shrb $7, %al -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $24, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $40, %k0, %k1 -; X32-NEXT: movb %bh, %al -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: andb $2, %al -; X32-NEXT: shrb %al -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: movb %bh, %al -; X32-NEXT: andb $15, %al -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrb $2, %cl -; X32-NEXT: kmovd %ecx, %k3 -; X32-NEXT: shrb $3, %al -; X32-NEXT: kmovd %eax, %k4 -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: shrl $13, %eax -; X32-NEXT: andb $1, %al -; X32-NEXT: kmovd %eax, %k5 -; X32-NEXT: movl %ebx, %ecx -; X32-NEXT: shrl $16, %ecx -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: andb $2, %al -; X32-NEXT: shrb %al -; X32-NEXT: kmovd %eax, %k6 -; X32-NEXT: movl %ecx, %edx -; X32-NEXT: andb $15, %dl -; X32-NEXT: movl %edx, %eax -; X32-NEXT: shrb $2, %al -; X32-NEXT: kmovd %eax, %k7 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $23, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $41, %k0, %k1 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $22, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $42, %k0, %k1 -; X32-NEXT: kxorq %k3, %k1, %k1 -; X32-NEXT: movzwl %bx, %eax -; X32-NEXT: movl %eax, %esi -; X32-NEXT: shrl $12, %esi -; X32-NEXT: kmovd %esi, %k2 -; X32-NEXT: movl %eax, %esi -; X32-NEXT: shrl $14, %esi -; X32-NEXT: kmovd %esi, %k3 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $21, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $43, %k0, %k1 -; X32-NEXT: kxorq %k4, %k1, %k1 -; X32-NEXT: shrl $15, %eax -; X32-NEXT: kmovd %eax, %k4 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $20, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $44, %k0, %k1 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $19, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $45, %k0, %k1 -; X32-NEXT: kxorq %k5, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $18, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $46, %k0, %k1 -; X32-NEXT: kxorq %k3, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $17, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $47, %k0, %k1 -; X32-NEXT: kxorq %k4, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $16, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $48, %k0, %k1 -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $15, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $49, %k0, %k1 -; X32-NEXT: kxorq %k6, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $14, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $50, %k0, %k1 -; X32-NEXT: kxorq %k7, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $13, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $51, %k0, %k1 -; X32-NEXT: shrb $3, %dl -; X32-NEXT: kmovd %edx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $12, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k4 -; X32-NEXT: kshiftrq $52, %k4, %k0 -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: shrb $4, %al -; X32-NEXT: kmovd %eax, %k1 -; X32-NEXT: kxorq %k1, %k0, %k5 -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: shrb $5, %al -; X32-NEXT: andb $1, %al -; X32-NEXT: kmovd %eax, %k6 -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: shrb $6, %al -; X32-NEXT: kmovd %eax, %k7 -; X32-NEXT: shrb $7, %cl -; X32-NEXT: kmovd %ecx, %k0 -; X32-NEXT: movl %ebx, %ecx -; X32-NEXT: shrl $24, %ecx -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: andb $2, %al -; X32-NEXT: shrb %al -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: andb $15, %cl -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: shrb $2, %al -; X32-NEXT: kmovd %eax, %k3 -; X32-NEXT: kshiftlq $63, %k5, %k5 -; X32-NEXT: kshiftrq $11, %k5, %k5 -; X32-NEXT: kxorq %k5, %k4, %k4 -; X32-NEXT: kshiftrq $53, %k4, %k5 -; X32-NEXT: kxorq %k6, %k5, %k5 -; X32-NEXT: kshiftlq $63, %k5, %k5 -; X32-NEXT: kshiftrq $10, %k5, %k5 -; X32-NEXT: kxorq %k5, %k4, %k5 -; X32-NEXT: kshiftrq $54, %k5, %k4 -; X32-NEXT: kxorq %k7, %k4, %k6 -; X32-NEXT: shrb $3, %cl -; X32-NEXT: kmovd %ecx, %k4 -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: shrl $29, %eax -; X32-NEXT: andb $1, %al -; X32-NEXT: kmovd %eax, %k7 -; X32-NEXT: kshiftlq $63, %k6, %k6 -; X32-NEXT: kshiftrq $9, %k6, %k6 -; X32-NEXT: kxorq %k6, %k5, %k5 -; X32-NEXT: kshiftrq $55, %k5, %k6 -; X32-NEXT: kxorq %k0, %k6, %k0 -; X32-NEXT: kshiftlq $63, %k0, %k0 -; X32-NEXT: kshiftrq $8, %k0, %k0 -; X32-NEXT: kxorq %k0, %k5, %k0 -; X32-NEXT: kshiftrq $56, %k0, %k5 -; X32-NEXT: kxorq %k1, %k5, %k1 -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: shrl $28, %eax -; X32-NEXT: kmovd %eax, %k5 -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: shrl $30, %eax -; X32-NEXT: kmovd %eax, %k6 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $7, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $57, %k0, %k1 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $6, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $58, %k0, %k1 -; X32-NEXT: kxorq %k3, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $5, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $59, %k0, %k1 -; X32-NEXT: kxorq %k4, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $4, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $60, %k0, %k1 -; X32-NEXT: kxorq %k5, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $3, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $61, %k0, %k1 -; X32-NEXT: kxorq %k7, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $2, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $62, %k0, %k1 -; X32-NEXT: kxorq %k6, %k1, %k1 -; X32-NEXT: shrl $31, %ebx -; X32-NEXT: kmovd %ebx, %k2 +; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 +; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $1, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftlq $1, %k0, %k0 -; X32-NEXT: kshiftrq $1, %k0, %k0 -; X32-NEXT: kshiftlq $63, %k2, %k1 -; X32-NEXT: korq %k1, %k0, %k1 +; X32-NEXT: kunpckdq %k1, %k0, %k1 ; X32-NEXT: vpbroadcastb %eax, %zmm0 {%k1} {z} -; X32-NEXT: popl %esi -; X32-NEXT: popl %ebx ; X32-NEXT: retl ; ; X64-LABEL: test_mm512_maskz_set1_epi8: @@ -1672,537 +624,14 @@ entry: define i64 @test_mm512_mask_test_epi8_mask(i64 %__U, <8 x i64> %__A, <8 x i64> %__B) { ; X32-LABEL: test_mm512_mask_test_epi8_mask: ; X32: # %bb.0: # %entry -; X32-NEXT: pushl %ebx -; X32-NEXT: .cfi_def_cfa_offset 8 -; X32-NEXT: pushl %esi -; X32-NEXT: .cfi_def_cfa_offset 12 -; X32-NEXT: .cfi_offset %esi, -12 -; X32-NEXT: .cfi_offset %ebx, -8 -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: kmovd %eax, %k0 -; X32-NEXT: kshiftrq $1, %k0, %k1 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: andb $2, %cl -; X32-NEXT: shrb %cl -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $62, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $2, %k0, %k1 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: andb $15, %cl -; X32-NEXT: movl %ecx, %edx -; X32-NEXT: shrb $2, %dl -; X32-NEXT: kmovd %edx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $61, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $3, %k0, %k1 -; X32-NEXT: shrb $3, %cl -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $60, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $4, %k0, %k1 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrb $4, %cl -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $59, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $5, %k0, %k1 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrb $5, %cl -; X32-NEXT: andb $1, %cl -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $58, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $6, %k0, %k1 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrb $6, %cl -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $57, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $7, %k0, %k1 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrb $7, %cl -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $56, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $8, %k0, %k1 -; X32-NEXT: movb %ah, %cl -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $55, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $9, %k0, %k1 -; X32-NEXT: andb $2, %cl -; X32-NEXT: shrb %cl -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $54, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $10, %k0, %k1 -; X32-NEXT: movb %ah, %cl -; X32-NEXT: movl %eax, %esi -; X32-NEXT: andb $15, %cl -; X32-NEXT: movl %ecx, %edx -; X32-NEXT: shrb $2, %dl -; X32-NEXT: kmovd %edx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: shrb $3, %cl -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: movzwl %si, %edx -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: shrl $12, %ecx -; X32-NEXT: kmovd %ecx, %k3 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrl $13, %ecx -; X32-NEXT: andb $1, %cl -; X32-NEXT: kmovd %ecx, %k4 -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: shrl $14, %ecx -; X32-NEXT: kmovd %ecx, %k5 -; X32-NEXT: shrl $16, %eax -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: andb $2, %bl -; X32-NEXT: shrb %bl -; X32-NEXT: kmovd %ebx, %k6 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: andb $15, %cl -; X32-NEXT: movl %ecx, %ebx -; X32-NEXT: shrb $2, %bl -; X32-NEXT: kmovd %ebx, %k7 -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $53, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $11, %k0, %k1 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $52, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $12, %k0, %k1 -; X32-NEXT: kxorq %k3, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $51, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $13, %k0, %k1 -; X32-NEXT: kxorq %k4, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $50, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $14, %k0, %k1 -; X32-NEXT: kxorq %k5, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $49, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $15, %k0, %k1 -; X32-NEXT: shrl $15, %edx -; X32-NEXT: kmovd %edx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $48, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $16, %k0, %k1 -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $47, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $17, %k0, %k1 -; X32-NEXT: kxorq %k6, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $46, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $18, %k0, %k1 -; X32-NEXT: kxorq %k7, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $45, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $19, %k0, %k1 -; X32-NEXT: shrb $3, %cl -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $44, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $20, %k0, %k1 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrb $4, %cl -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $43, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $21, %k0, %k1 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrb $5, %cl -; X32-NEXT: andb $1, %cl -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $42, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $22, %k0, %k1 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrb $6, %cl -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $41, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $23, %k0, %k1 -; X32-NEXT: shrb $7, %al -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $40, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $24, %k0, %k1 -; X32-NEXT: movl %esi, %edx -; X32-NEXT: shrl $24, %edx -; X32-NEXT: kmovd %edx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $39, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $25, %k0, %k1 -; X32-NEXT: movl %edx, %eax -; X32-NEXT: andb $2, %al -; X32-NEXT: shrb %al -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $38, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $26, %k0, %k1 -; X32-NEXT: andb $15, %dl -; X32-NEXT: movl %edx, %eax -; X32-NEXT: shrb $2, %al -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $37, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $27, %k0, %k1 -; X32-NEXT: shrb $3, %dl -; X32-NEXT: kmovd %edx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $36, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $28, %k0, %k1 -; X32-NEXT: movl %esi, %ecx -; X32-NEXT: movl %esi, %eax -; X32-NEXT: shrl $28, %eax -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $35, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $29, %k0, %k1 -; X32-NEXT: movl %esi, %eax -; X32-NEXT: shrl $29, %eax -; X32-NEXT: andb $1, %al -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $34, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $30, %k0, %k1 -; X32-NEXT: movl %esi, %eax -; X32-NEXT: shrl $30, %eax -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $33, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $31, %k0, %k1 -; X32-NEXT: shrl $31, %ecx -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $32, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $32, %k0, %k1 -; X32-NEXT: kmovd %ebx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $31, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $33, %k0, %k1 -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: andb $2, %al -; X32-NEXT: shrb %al -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $30, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $34, %k0, %k1 -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: andb $15, %al -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrb $2, %cl -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $29, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $35, %k0, %k1 -; X32-NEXT: shrb $3, %al -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $28, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $36, %k0, %k1 -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: shrb $4, %al -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $27, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $37, %k0, %k1 -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: shrb $5, %al -; X32-NEXT: andb $1, %al -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $26, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $38, %k0, %k1 -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: shrb $6, %al -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $25, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $39, %k0, %k1 -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: shrb $7, %al -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $24, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $40, %k0, %k1 -; X32-NEXT: movb %bh, %al -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: andb $2, %al -; X32-NEXT: shrb %al -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: movb %bh, %al -; X32-NEXT: andb $15, %al -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrb $2, %cl -; X32-NEXT: kmovd %ecx, %k3 -; X32-NEXT: shrb $3, %al -; X32-NEXT: kmovd %eax, %k4 -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: shrl $13, %eax -; X32-NEXT: andb $1, %al -; X32-NEXT: kmovd %eax, %k5 -; X32-NEXT: movl %ebx, %ecx -; X32-NEXT: shrl $16, %ecx -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: andb $2, %al -; X32-NEXT: shrb %al -; X32-NEXT: kmovd %eax, %k6 -; X32-NEXT: movl %ecx, %edx -; X32-NEXT: andb $15, %dl -; X32-NEXT: movl %edx, %eax -; X32-NEXT: shrb $2, %al -; X32-NEXT: kmovd %eax, %k7 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $23, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $41, %k0, %k1 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $22, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $42, %k0, %k1 -; X32-NEXT: kxorq %k3, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $21, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $43, %k0, %k1 -; X32-NEXT: kxorq %k4, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $20, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $44, %k0, %k1 -; X32-NEXT: movzwl %bx, %eax -; X32-NEXT: movl %eax, %esi -; X32-NEXT: shrl $12, %esi -; X32-NEXT: kmovd %esi, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $19, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $45, %k0, %k1 -; X32-NEXT: kxorq %k5, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $18, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $46, %k0, %k1 -; X32-NEXT: movl %eax, %esi -; X32-NEXT: shrl $14, %esi -; X32-NEXT: kmovd %esi, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $17, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $47, %k0, %k1 -; X32-NEXT: shrl $15, %eax -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $16, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $48, %k0, %k1 -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $15, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $49, %k0, %k1 -; X32-NEXT: kxorq %k6, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $14, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $50, %k0, %k1 -; X32-NEXT: kxorq %k7, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $13, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $51, %k0, %k1 -; X32-NEXT: shrb $3, %dl -; X32-NEXT: kmovd %edx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $12, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $52, %k0, %k1 -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: shrb $4, %al -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: shrb $5, %al -; X32-NEXT: andb $1, %al -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: shrb $6, %al -; X32-NEXT: kmovd %eax, %k3 -; X32-NEXT: shrb $7, %cl -; X32-NEXT: kmovd %ecx, %k4 -; X32-NEXT: movl %ebx, %ecx -; X32-NEXT: shrl $24, %ecx -; X32-NEXT: kmovd %ecx, %k5 -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: andb $2, %al -; X32-NEXT: shrb %al -; X32-NEXT: kmovd %eax, %k6 -; X32-NEXT: andb $15, %cl -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: shrb $2, %al -; X32-NEXT: kmovd %eax, %k7 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $11, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $53, %k0, %k1 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $10, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $54, %k0, %k1 -; X32-NEXT: kxorq %k3, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $9, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $55, %k0, %k1 -; X32-NEXT: kxorq %k4, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $8, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $56, %k0, %k1 -; X32-NEXT: kxorq %k5, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $7, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $57, %k0, %k1 -; X32-NEXT: kxorq %k6, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $6, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $58, %k0, %k1 -; X32-NEXT: kxorq %k7, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $5, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $59, %k0, %k1 -; X32-NEXT: shrb $3, %cl -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $4, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $60, %k0, %k1 -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: shrl $28, %eax -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $3, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $61, %k0, %k1 -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: shrl $29, %eax -; X32-NEXT: andb $1, %al -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $2, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $62, %k0, %k1 -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: shrl $30, %eax -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $1, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftlq $1, %k0, %k0 -; X32-NEXT: kshiftrq $1, %k0, %k0 -; X32-NEXT: shrl $31, %ebx -; X32-NEXT: kmovd %ebx, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: korq %k1, %k0, %k1 -; X32-NEXT: vptestmb %zmm0, %zmm1, %k0 {%k1} -; X32-NEXT: kshiftrq $32, %k0, %k1 -; X32-NEXT: kmovd %k0, %eax -; X32-NEXT: kmovd %k1, %edx -; X32-NEXT: popl %esi -; X32-NEXT: popl %ebx +; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 +; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; X32-NEXT: vptestmb %zmm0, %zmm1, %k2 +; X32-NEXT: kandd %k1, %k2, %k1 +; X32-NEXT: kmovd %k1, %eax +; X32-NEXT: kshiftrq $32, %k2, %k1 +; X32-NEXT: kandd %k0, %k1, %k0 +; X32-NEXT: kmovd %k0, %edx ; X32-NEXT: vzeroupper ; X32-NEXT: retl ; @@ -2298,537 +727,14 @@ entry: define i64 @test_mm512_mask_testn_epi8_mask(i64 %__U, <8 x i64> %__A, <8 x i64> %__B) { ; X32-LABEL: test_mm512_mask_testn_epi8_mask: ; X32: # %bb.0: # %entry -; X32-NEXT: pushl %ebx -; X32-NEXT: .cfi_def_cfa_offset 8 -; X32-NEXT: pushl %esi -; X32-NEXT: .cfi_def_cfa_offset 12 -; X32-NEXT: .cfi_offset %esi, -12 -; X32-NEXT: .cfi_offset %ebx, -8 -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: kmovd %eax, %k0 -; X32-NEXT: kshiftrq $1, %k0, %k1 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: andb $2, %cl -; X32-NEXT: shrb %cl -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $62, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $2, %k0, %k1 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: andb $15, %cl -; X32-NEXT: movl %ecx, %edx -; X32-NEXT: shrb $2, %dl -; X32-NEXT: kmovd %edx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $61, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $3, %k0, %k1 -; X32-NEXT: shrb $3, %cl -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $60, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $4, %k0, %k1 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrb $4, %cl -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $59, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $5, %k0, %k1 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrb $5, %cl -; X32-NEXT: andb $1, %cl -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $58, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $6, %k0, %k1 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrb $6, %cl -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $57, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $7, %k0, %k1 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrb $7, %cl -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $56, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $8, %k0, %k1 -; X32-NEXT: movb %ah, %cl -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $55, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $9, %k0, %k1 -; X32-NEXT: andb $2, %cl -; X32-NEXT: shrb %cl -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $54, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $10, %k0, %k1 -; X32-NEXT: movb %ah, %cl -; X32-NEXT: movl %eax, %esi -; X32-NEXT: andb $15, %cl -; X32-NEXT: movl %ecx, %edx -; X32-NEXT: shrb $2, %dl -; X32-NEXT: kmovd %edx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: shrb $3, %cl -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: movzwl %si, %edx -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: shrl $12, %ecx -; X32-NEXT: kmovd %ecx, %k3 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrl $13, %ecx -; X32-NEXT: andb $1, %cl -; X32-NEXT: kmovd %ecx, %k4 -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: shrl $14, %ecx -; X32-NEXT: kmovd %ecx, %k5 -; X32-NEXT: shrl $16, %eax -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: andb $2, %bl -; X32-NEXT: shrb %bl -; X32-NEXT: kmovd %ebx, %k6 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: andb $15, %cl -; X32-NEXT: movl %ecx, %ebx -; X32-NEXT: shrb $2, %bl -; X32-NEXT: kmovd %ebx, %k7 -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $53, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $11, %k0, %k1 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $52, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $12, %k0, %k1 -; X32-NEXT: kxorq %k3, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $51, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $13, %k0, %k1 -; X32-NEXT: kxorq %k4, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $50, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $14, %k0, %k1 -; X32-NEXT: kxorq %k5, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $49, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $15, %k0, %k1 -; X32-NEXT: shrl $15, %edx -; X32-NEXT: kmovd %edx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $48, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $16, %k0, %k1 -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $47, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $17, %k0, %k1 -; X32-NEXT: kxorq %k6, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $46, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $18, %k0, %k1 -; X32-NEXT: kxorq %k7, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $45, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $19, %k0, %k1 -; X32-NEXT: shrb $3, %cl -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $44, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $20, %k0, %k1 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrb $4, %cl -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $43, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $21, %k0, %k1 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrb $5, %cl -; X32-NEXT: andb $1, %cl -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $42, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $22, %k0, %k1 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrb $6, %cl -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $41, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $23, %k0, %k1 -; X32-NEXT: shrb $7, %al -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $40, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $24, %k0, %k1 -; X32-NEXT: movl %esi, %edx -; X32-NEXT: shrl $24, %edx -; X32-NEXT: kmovd %edx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $39, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $25, %k0, %k1 -; X32-NEXT: movl %edx, %eax -; X32-NEXT: andb $2, %al -; X32-NEXT: shrb %al -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $38, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $26, %k0, %k1 -; X32-NEXT: andb $15, %dl -; X32-NEXT: movl %edx, %eax -; X32-NEXT: shrb $2, %al -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $37, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $27, %k0, %k1 -; X32-NEXT: shrb $3, %dl -; X32-NEXT: kmovd %edx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $36, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $28, %k0, %k1 -; X32-NEXT: movl %esi, %ecx -; X32-NEXT: movl %esi, %eax -; X32-NEXT: shrl $28, %eax -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $35, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $29, %k0, %k1 -; X32-NEXT: movl %esi, %eax -; X32-NEXT: shrl $29, %eax -; X32-NEXT: andb $1, %al -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $34, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $30, %k0, %k1 -; X32-NEXT: movl %esi, %eax -; X32-NEXT: shrl $30, %eax -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $33, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $31, %k0, %k1 -; X32-NEXT: shrl $31, %ecx -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $32, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $32, %k0, %k1 -; X32-NEXT: kmovd %ebx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $31, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $33, %k0, %k1 -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: andb $2, %al -; X32-NEXT: shrb %al -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $30, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $34, %k0, %k1 -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: andb $15, %al -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrb $2, %cl -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $29, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $35, %k0, %k1 -; X32-NEXT: shrb $3, %al -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $28, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $36, %k0, %k1 -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: shrb $4, %al -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $27, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $37, %k0, %k1 -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: shrb $5, %al -; X32-NEXT: andb $1, %al -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $26, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $38, %k0, %k1 -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: shrb $6, %al -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $25, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $39, %k0, %k1 -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: shrb $7, %al -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $24, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $40, %k0, %k1 -; X32-NEXT: movb %bh, %al -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: andb $2, %al -; X32-NEXT: shrb %al -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: movb %bh, %al -; X32-NEXT: andb $15, %al -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrb $2, %cl -; X32-NEXT: kmovd %ecx, %k3 -; X32-NEXT: shrb $3, %al -; X32-NEXT: kmovd %eax, %k4 -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: shrl $13, %eax -; X32-NEXT: andb $1, %al -; X32-NEXT: kmovd %eax, %k5 -; X32-NEXT: movl %ebx, %ecx -; X32-NEXT: shrl $16, %ecx -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: andb $2, %al -; X32-NEXT: shrb %al -; X32-NEXT: kmovd %eax, %k6 -; X32-NEXT: movl %ecx, %edx -; X32-NEXT: andb $15, %dl -; X32-NEXT: movl %edx, %eax -; X32-NEXT: shrb $2, %al -; X32-NEXT: kmovd %eax, %k7 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $23, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $41, %k0, %k1 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $22, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $42, %k0, %k1 -; X32-NEXT: kxorq %k3, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $21, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $43, %k0, %k1 -; X32-NEXT: kxorq %k4, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $20, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $44, %k0, %k1 -; X32-NEXT: movzwl %bx, %eax -; X32-NEXT: movl %eax, %esi -; X32-NEXT: shrl $12, %esi -; X32-NEXT: kmovd %esi, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $19, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $45, %k0, %k1 -; X32-NEXT: kxorq %k5, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $18, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $46, %k0, %k1 -; X32-NEXT: movl %eax, %esi -; X32-NEXT: shrl $14, %esi -; X32-NEXT: kmovd %esi, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $17, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $47, %k0, %k1 -; X32-NEXT: shrl $15, %eax -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $16, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $48, %k0, %k1 -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $15, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $49, %k0, %k1 -; X32-NEXT: kxorq %k6, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $14, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $50, %k0, %k1 -; X32-NEXT: kxorq %k7, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $13, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $51, %k0, %k1 -; X32-NEXT: shrb $3, %dl -; X32-NEXT: kmovd %edx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $12, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $52, %k0, %k1 -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: shrb $4, %al -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: shrb $5, %al -; X32-NEXT: andb $1, %al -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: shrb $6, %al -; X32-NEXT: kmovd %eax, %k3 -; X32-NEXT: shrb $7, %cl -; X32-NEXT: kmovd %ecx, %k4 -; X32-NEXT: movl %ebx, %ecx -; X32-NEXT: shrl $24, %ecx -; X32-NEXT: kmovd %ecx, %k5 -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: andb $2, %al -; X32-NEXT: shrb %al -; X32-NEXT: kmovd %eax, %k6 -; X32-NEXT: andb $15, %cl -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: shrb $2, %al -; X32-NEXT: kmovd %eax, %k7 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $11, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $53, %k0, %k1 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $10, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $54, %k0, %k1 -; X32-NEXT: kxorq %k3, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $9, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $55, %k0, %k1 -; X32-NEXT: kxorq %k4, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $8, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $56, %k0, %k1 -; X32-NEXT: kxorq %k5, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $7, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $57, %k0, %k1 -; X32-NEXT: kxorq %k6, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $6, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $58, %k0, %k1 -; X32-NEXT: kxorq %k7, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $5, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $59, %k0, %k1 -; X32-NEXT: shrb $3, %cl -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $4, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $60, %k0, %k1 -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: shrl $28, %eax -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $3, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $61, %k0, %k1 -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: shrl $29, %eax -; X32-NEXT: andb $1, %al -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $2, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftrq $62, %k0, %k1 -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: shrl $30, %eax -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $1, %k1, %k1 -; X32-NEXT: kxorq %k1, %k0, %k0 -; X32-NEXT: kshiftlq $1, %k0, %k0 -; X32-NEXT: kshiftrq $1, %k0, %k0 -; X32-NEXT: shrl $31, %ebx -; X32-NEXT: kmovd %ebx, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: korq %k1, %k0, %k1 -; X32-NEXT: vptestnmb %zmm0, %zmm1, %k0 {%k1} -; X32-NEXT: kshiftrq $32, %k0, %k1 -; X32-NEXT: kmovd %k0, %eax -; X32-NEXT: kmovd %k1, %edx -; X32-NEXT: popl %esi -; X32-NEXT: popl %ebx +; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 +; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; X32-NEXT: vptestnmb %zmm0, %zmm1, %k2 +; X32-NEXT: kandd %k1, %k2, %k1 +; X32-NEXT: kmovd %k1, %eax +; X32-NEXT: kshiftrq $32, %k2, %k1 +; X32-NEXT: kandd %k0, %k1, %k0 +; X32-NEXT: kmovd %k0, %edx ; X32-NEXT: vzeroupper ; X32-NEXT: retl ; diff --git a/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll index 8fad90b214b..d22918b459d 100644 --- a/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll @@ -1805,569 +1805,58 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) { ; AVX512F-32-NEXT: .cfi_offset %edi, -16 ; AVX512F-32-NEXT: .cfi_offset %ebx, -12 ; AVX512F-32-NEXT: .cfi_offset %ebp, -8 +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %esi ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: movl %eax, %ecx -; AVX512F-32-NEXT: shrl $16, %ecx -; AVX512F-32-NEXT: movl %ecx, %esi -; AVX512F-32-NEXT: movl %eax, %ecx -; AVX512F-32-NEXT: andb $15, %cl -; AVX512F-32-NEXT: movl %eax, %edx -; AVX512F-32-NEXT: andb $2, %dl -; AVX512F-32-NEXT: shrb %dl -; AVX512F-32-NEXT: kmovd %edx, %k1 -; AVX512F-32-NEXT: movl %ecx, %ebx -; AVX512F-32-NEXT: shrb $2, %cl -; AVX512F-32-NEXT: kmovd %ecx, %k2 -; AVX512F-32-NEXT: movb %ah, %dl -; AVX512F-32-NEXT: andb $15, %dl -; AVX512F-32-NEXT: shrb $3, %bl -; AVX512F-32-NEXT: kmovd %ebx, %k0 -; AVX512F-32-NEXT: movl %eax, %ecx -; AVX512F-32-NEXT: shrb $4, %cl -; AVX512F-32-NEXT: kmovd %ecx, %k3 -; AVX512F-32-NEXT: movl %eax, %ecx -; AVX512F-32-NEXT: shrb $5, %cl -; AVX512F-32-NEXT: andb $1, %cl -; AVX512F-32-NEXT: kmovd %ecx, %k4 -; AVX512F-32-NEXT: movl %eax, %ecx -; AVX512F-32-NEXT: shrb $6, %cl -; AVX512F-32-NEXT: kmovd %ecx, %k6 -; AVX512F-32-NEXT: movl %eax, %ecx -; AVX512F-32-NEXT: shrb $7, %cl -; AVX512F-32-NEXT: kmovd %eax, %k5 -; AVX512F-32-NEXT: kshiftrq $1, %k5, %k7 -; AVX512F-32-NEXT: kxorq %k1, %k7, %k1 -; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1 -; AVX512F-32-NEXT: kshiftrq $62, %k1, %k1 -; AVX512F-32-NEXT: kxorq %k1, %k5, %k7 -; AVX512F-32-NEXT: kshiftrq $2, %k7, %k1 -; AVX512F-32-NEXT: kxorq %k2, %k1, %k2 -; AVX512F-32-NEXT: kmovd %ecx, %k5 -; AVX512F-32-NEXT: movb %ah, %cl -; AVX512F-32-NEXT: kmovd %ecx, %k1 -; AVX512F-32-NEXT: movl %eax, %ebp -; AVX512F-32-NEXT: andb $2, %cl -; AVX512F-32-NEXT: shrb %cl -; AVX512F-32-NEXT: kshiftlq $63, %k2, %k2 -; AVX512F-32-NEXT: kshiftrq $61, %k2, %k2 -; AVX512F-32-NEXT: kxorq %k2, %k7, %k7 -; AVX512F-32-NEXT: kshiftrq $3, %k7, %k2 -; AVX512F-32-NEXT: kxorq %k0, %k2, %k0 -; AVX512F-32-NEXT: kmovd %ecx, %k2 -; AVX512F-32-NEXT: movl %edx, %ecx -; AVX512F-32-NEXT: shrb $2, %dl -; AVX512F-32-NEXT: kshiftlq $63, %k0, %k0 -; AVX512F-32-NEXT: kshiftrq $60, %k0, %k0 -; AVX512F-32-NEXT: kxorq %k0, %k7, %k0 -; AVX512F-32-NEXT: kshiftrq $4, %k0, %k7 -; AVX512F-32-NEXT: kxorq %k3, %k7, %k7 -; AVX512F-32-NEXT: kmovd %edx, %k3 -; AVX512F-32-NEXT: movl %esi, %eax -; AVX512F-32-NEXT: movl %eax, %edx -; AVX512F-32-NEXT: andb $15, %dl -; AVX512F-32-NEXT: shrb $3, %cl -; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 -; AVX512F-32-NEXT: kshiftrq $59, %k7, %k7 -; AVX512F-32-NEXT: kxorq %k7, %k0, %k7 -; AVX512F-32-NEXT: kshiftrq $5, %k7, %k0 -; AVX512F-32-NEXT: kxorq %k4, %k0, %k4 -; AVX512F-32-NEXT: kmovd %ecx, %k0 -; AVX512F-32-NEXT: movl %ebp, %ecx -; AVX512F-32-NEXT: shrl $13, %ecx -; AVX512F-32-NEXT: andb $1, %cl -; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4 -; AVX512F-32-NEXT: kshiftrq $58, %k4, %k4 -; AVX512F-32-NEXT: kxorq %k4, %k7, %k7 -; AVX512F-32-NEXT: kshiftrq $6, %k7, %k4 -; AVX512F-32-NEXT: kxorq %k6, %k4, %k6 -; AVX512F-32-NEXT: kmovd %ecx, %k4 -; AVX512F-32-NEXT: movl %eax, %ebx -; AVX512F-32-NEXT: andb $2, %bl -; AVX512F-32-NEXT: shrb %bl -; AVX512F-32-NEXT: kshiftlq $63, %k6, %k6 -; AVX512F-32-NEXT: kshiftrq $57, %k6, %k6 -; AVX512F-32-NEXT: kxorq %k6, %k7, %k6 -; AVX512F-32-NEXT: kshiftrq $7, %k6, %k7 -; AVX512F-32-NEXT: kxorq %k5, %k7, %k7 -; AVX512F-32-NEXT: kmovd %ebx, %k5 -; AVX512F-32-NEXT: movl %edx, %ecx -; AVX512F-32-NEXT: shrb $2, %dl -; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 -; AVX512F-32-NEXT: kshiftrq $56, %k7, %k7 -; AVX512F-32-NEXT: kxorq %k7, %k6, %k7 -; AVX512F-32-NEXT: kshiftrq $8, %k7, %k6 -; AVX512F-32-NEXT: kxorq %k1, %k6, %k1 -; AVX512F-32-NEXT: kmovd %edx, %k6 -; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ebx -; AVX512F-32-NEXT: shrb $3, %cl -; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1 -; AVX512F-32-NEXT: kshiftrq $55, %k1, %k1 -; AVX512F-32-NEXT: kxorq %k1, %k7, %k7 -; AVX512F-32-NEXT: kshiftrq $9, %k7, %k1 -; AVX512F-32-NEXT: kxorq %k2, %k1, %k2 -; AVX512F-32-NEXT: kmovd %ecx, %k1 -; AVX512F-32-NEXT: movl %eax, %ecx -; AVX512F-32-NEXT: shrb $4, %cl -; AVX512F-32-NEXT: kshiftlq $63, %k2, %k2 -; AVX512F-32-NEXT: kshiftrq $54, %k2, %k2 -; AVX512F-32-NEXT: kxorq %k2, %k7, %k7 -; AVX512F-32-NEXT: kshiftrq $10, %k7, %k2 -; AVX512F-32-NEXT: kxorq %k3, %k2, %k3 -; AVX512F-32-NEXT: kmovd %ecx, %k2 -; AVX512F-32-NEXT: movl %eax, %ecx -; AVX512F-32-NEXT: shrb $5, %cl -; AVX512F-32-NEXT: andb $1, %cl -; AVX512F-32-NEXT: movzwl %bp, %edx -; AVX512F-32-NEXT: movl %edx, %esi -; AVX512F-32-NEXT: movl %edx, %edi -; AVX512F-32-NEXT: shrl $12, %edx -; AVX512F-32-NEXT: kshiftlq $63, %k3, %k3 -; AVX512F-32-NEXT: kshiftrq $53, %k3, %k3 -; AVX512F-32-NEXT: kxorq %k3, %k7, %k3 -; AVX512F-32-NEXT: kshiftrq $11, %k3, %k7 -; AVX512F-32-NEXT: kxorq %k0, %k7, %k0 -; AVX512F-32-NEXT: kshiftlq $63, %k0, %k0 -; AVX512F-32-NEXT: kshiftrq $52, %k0, %k0 -; AVX512F-32-NEXT: kxorq %k0, %k3, %k3 -; AVX512F-32-NEXT: kshiftrq $12, %k3, %k0 -; AVX512F-32-NEXT: kmovd %edx, %k7 -; AVX512F-32-NEXT: kxorq %k7, %k0, %k7 -; AVX512F-32-NEXT: kmovd %ecx, %k0 -; AVX512F-32-NEXT: movl %eax, %ecx -; AVX512F-32-NEXT: shrb $6, %cl -; AVX512F-32-NEXT: shrl $15, %esi -; AVX512F-32-NEXT: shrl $14, %edi -; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 -; AVX512F-32-NEXT: kshiftrq $51, %k7, %k7 -; AVX512F-32-NEXT: kxorq %k7, %k3, %k3 -; AVX512F-32-NEXT: kshiftrq $13, %k3, %k7 -; AVX512F-32-NEXT: kxorq %k4, %k7, %k4 -; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4 -; AVX512F-32-NEXT: kshiftrq $50, %k4, %k4 -; AVX512F-32-NEXT: kxorq %k4, %k3, %k3 -; AVX512F-32-NEXT: kshiftrq $14, %k3, %k4 -; AVX512F-32-NEXT: kmovd %edi, %k7 -; AVX512F-32-NEXT: kxorq %k7, %k4, %k4 -; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4 -; AVX512F-32-NEXT: kshiftrq $49, %k4, %k4 -; AVX512F-32-NEXT: kxorq %k4, %k3, %k3 -; AVX512F-32-NEXT: kshiftrq $15, %k3, %k4 -; AVX512F-32-NEXT: kmovd %esi, %k7 -; AVX512F-32-NEXT: kxorq %k7, %k4, %k4 -; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4 -; AVX512F-32-NEXT: kshiftrq $48, %k4, %k4 -; AVX512F-32-NEXT: kxorq %k4, %k3, %k3 -; AVX512F-32-NEXT: kshiftrq $16, %k3, %k4 -; AVX512F-32-NEXT: kmovd %eax, %k7 -; AVX512F-32-NEXT: kxorq %k7, %k4, %k4 -; AVX512F-32-NEXT: kmovd %ecx, %k7 -; AVX512F-32-NEXT: movl %ebp, %ecx -; AVX512F-32-NEXT: shrl $24, %ecx -; AVX512F-32-NEXT: # kill: def $al killed $al killed $eax def $eax -; AVX512F-32-NEXT: shrb $7, %al -; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4 -; AVX512F-32-NEXT: kshiftrq $47, %k4, %k4 -; AVX512F-32-NEXT: kxorq %k4, %k3, %k3 -; AVX512F-32-NEXT: kshiftrq $17, %k3, %k4 -; AVX512F-32-NEXT: kxorq %k5, %k4, %k4 -; AVX512F-32-NEXT: kmovd %eax, %k5 -; AVX512F-32-NEXT: movl %ecx, %eax -; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4 -; AVX512F-32-NEXT: kshiftrq $46, %k4, %k4 -; AVX512F-32-NEXT: kxorq %k4, %k3, %k4 -; AVX512F-32-NEXT: kshiftrq $18, %k4, %k3 -; AVX512F-32-NEXT: kxorq %k6, %k3, %k6 -; AVX512F-32-NEXT: kmovd %ecx, %k3 -; AVX512F-32-NEXT: # kill: def $cl killed $cl killed $ecx def $ecx -; AVX512F-32-NEXT: andb $15, %cl -; AVX512F-32-NEXT: andb $2, %al -; AVX512F-32-NEXT: shrb %al -; AVX512F-32-NEXT: kshiftlq $63, %k6, %k6 -; AVX512F-32-NEXT: kshiftrq $45, %k6, %k6 -; AVX512F-32-NEXT: kxorq %k6, %k4, %k6 -; AVX512F-32-NEXT: kshiftrq $19, %k6, %k4 -; AVX512F-32-NEXT: kxorq %k1, %k4, %k1 -; AVX512F-32-NEXT: kmovd %eax, %k4 -; AVX512F-32-NEXT: movl %ecx, %edx -; AVX512F-32-NEXT: shrb $2, %cl -; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1 -; AVX512F-32-NEXT: kshiftrq $44, %k1, %k1 -; AVX512F-32-NEXT: kxorq %k1, %k6, %k1 -; AVX512F-32-NEXT: kshiftrq $20, %k1, %k6 -; AVX512F-32-NEXT: kxorq %k2, %k6, %k6 -; AVX512F-32-NEXT: kmovd %ecx, %k2 -; AVX512F-32-NEXT: movl %ebx, %eax -; AVX512F-32-NEXT: andb $15, %al -; AVX512F-32-NEXT: shrb $3, %dl -; AVX512F-32-NEXT: kshiftlq $63, %k6, %k6 -; AVX512F-32-NEXT: kshiftrq $43, %k6, %k6 -; AVX512F-32-NEXT: kxorq %k6, %k1, %k6 -; AVX512F-32-NEXT: kshiftrq $21, %k6, %k1 -; AVX512F-32-NEXT: kxorq %k0, %k1, %k0 -; AVX512F-32-NEXT: kmovd %edx, %k1 -; AVX512F-32-NEXT: movl %ebp, %ecx -; AVX512F-32-NEXT: shrl $29, %ecx -; AVX512F-32-NEXT: andb $1, %cl -; AVX512F-32-NEXT: kshiftlq $63, %k0, %k0 -; AVX512F-32-NEXT: kshiftrq $42, %k0, %k0 -; AVX512F-32-NEXT: kxorq %k0, %k6, %k6 -; AVX512F-32-NEXT: kshiftrq $22, %k6, %k0 -; AVX512F-32-NEXT: kxorq %k7, %k0, %k7 -; AVX512F-32-NEXT: kmovd %ecx, %k0 -; AVX512F-32-NEXT: movl %ebx, %ecx -; AVX512F-32-NEXT: andb $2, %cl -; AVX512F-32-NEXT: shrb %cl -; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 -; AVX512F-32-NEXT: kshiftrq $41, %k7, %k7 -; AVX512F-32-NEXT: kxorq %k7, %k6, %k6 -; AVX512F-32-NEXT: kshiftrq $23, %k6, %k7 -; AVX512F-32-NEXT: kxorq %k5, %k7, %k7 -; AVX512F-32-NEXT: kmovd %ecx, %k5 -; AVX512F-32-NEXT: movl %eax, %ecx -; AVX512F-32-NEXT: shrb $2, %al -; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 -; AVX512F-32-NEXT: kshiftrq $40, %k7, %k7 -; AVX512F-32-NEXT: kxorq %k7, %k6, %k7 -; AVX512F-32-NEXT: kshiftrq $24, %k7, %k6 -; AVX512F-32-NEXT: kxorq %k3, %k6, %k3 -; AVX512F-32-NEXT: kmovd %eax, %k6 -; AVX512F-32-NEXT: movb %bh, %al -; AVX512F-32-NEXT: andb $15, %al -; AVX512F-32-NEXT: shrb $3, %cl -; AVX512F-32-NEXT: kshiftlq $63, %k3, %k3 -; AVX512F-32-NEXT: kshiftrq $39, %k3, %k3 -; AVX512F-32-NEXT: kxorq %k3, %k7, %k7 -; AVX512F-32-NEXT: kshiftrq $25, %k7, %k3 -; AVX512F-32-NEXT: kxorq %k4, %k3, %k4 -; AVX512F-32-NEXT: kmovd %ecx, %k3 -; AVX512F-32-NEXT: movl %ebx, %ecx -; AVX512F-32-NEXT: shrb $4, %cl -; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4 -; AVX512F-32-NEXT: kshiftrq $38, %k4, %k4 -; AVX512F-32-NEXT: kxorq %k4, %k7, %k4 -; AVX512F-32-NEXT: kshiftrq $26, %k4, %k7 -; AVX512F-32-NEXT: kxorq %k2, %k7, %k7 -; AVX512F-32-NEXT: kmovd %ecx, %k2 -; AVX512F-32-NEXT: movl %ebx, %ecx -; AVX512F-32-NEXT: shrb $5, %cl -; AVX512F-32-NEXT: andb $1, %cl -; AVX512F-32-NEXT: movl %ebp, %edx -; AVX512F-32-NEXT: shrl $28, %edx -; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 -; AVX512F-32-NEXT: kshiftrq $37, %k7, %k7 -; AVX512F-32-NEXT: kxorq %k7, %k4, %k4 -; AVX512F-32-NEXT: kshiftrq $27, %k4, %k7 -; AVX512F-32-NEXT: kxorq %k1, %k7, %k1 -; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1 -; AVX512F-32-NEXT: kshiftrq $36, %k1, %k1 -; AVX512F-32-NEXT: kxorq %k1, %k4, %k1 -; AVX512F-32-NEXT: kshiftrq $28, %k1, %k4 -; AVX512F-32-NEXT: kmovd %edx, %k7 -; AVX512F-32-NEXT: kxorq %k7, %k4, %k7 -; AVX512F-32-NEXT: kmovd %ecx, %k4 -; AVX512F-32-NEXT: movl %ebx, %ecx -; AVX512F-32-NEXT: shrb $6, %cl -; AVX512F-32-NEXT: movl %ebp, %edx -; AVX512F-32-NEXT: shrl $31, %edx -; AVX512F-32-NEXT: movl %ebp, %esi -; AVX512F-32-NEXT: shrl $30, %esi -; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 -; AVX512F-32-NEXT: kshiftrq $35, %k7, %k7 -; AVX512F-32-NEXT: kxorq %k7, %k1, %k1 -; AVX512F-32-NEXT: kshiftrq $29, %k1, %k7 -; AVX512F-32-NEXT: kxorq %k0, %k7, %k0 -; AVX512F-32-NEXT: kshiftlq $63, %k0, %k0 -; AVX512F-32-NEXT: kshiftrq $34, %k0, %k0 -; AVX512F-32-NEXT: kxorq %k0, %k1, %k0 -; AVX512F-32-NEXT: kshiftrq $30, %k0, %k1 -; AVX512F-32-NEXT: kmovd %esi, %k7 -; AVX512F-32-NEXT: kxorq %k7, %k1, %k1 -; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1 -; AVX512F-32-NEXT: kshiftrq $33, %k1, %k1 -; AVX512F-32-NEXT: kxorq %k1, %k0, %k0 -; AVX512F-32-NEXT: kshiftrq $31, %k0, %k1 -; AVX512F-32-NEXT: kmovd %edx, %k7 -; AVX512F-32-NEXT: kxorq %k7, %k1, %k1 -; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1 -; AVX512F-32-NEXT: kshiftrq $32, %k1, %k1 -; AVX512F-32-NEXT: kxorq %k1, %k0, %k0 -; AVX512F-32-NEXT: kshiftrq $32, %k0, %k1 -; AVX512F-32-NEXT: kmovd %ebx, %k7 -; AVX512F-32-NEXT: kxorq %k7, %k1, %k1 -; AVX512F-32-NEXT: kmovd %ecx, %k7 -; AVX512F-32-NEXT: movl %ebx, %ecx -; AVX512F-32-NEXT: shrb $7, %cl -; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1 -; AVX512F-32-NEXT: kshiftrq $31, %k1, %k1 -; AVX512F-32-NEXT: kxorq %k1, %k0, %k0 -; AVX512F-32-NEXT: kshiftrq $33, %k0, %k1 -; AVX512F-32-NEXT: kxorq %k5, %k1, %k1 -; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1 -; AVX512F-32-NEXT: kshiftrq $30, %k1, %k1 -; AVX512F-32-NEXT: kxorq %k1, %k0, %k0 -; AVX512F-32-NEXT: kshiftrq $34, %k0, %k1 -; AVX512F-32-NEXT: kxorq %k6, %k1, %k5 -; AVX512F-32-NEXT: kmovd %ecx, %k6 -; AVX512F-32-NEXT: movb %bh, %cl -; AVX512F-32-NEXT: kmovd %ecx, %k1 -; AVX512F-32-NEXT: andb $2, %cl -; AVX512F-32-NEXT: shrb %cl -; AVX512F-32-NEXT: kshiftlq $63, %k5, %k5 -; AVX512F-32-NEXT: kshiftrq $29, %k5, %k5 -; AVX512F-32-NEXT: kxorq %k5, %k0, %k5 -; AVX512F-32-NEXT: kshiftrq $35, %k5, %k0 -; AVX512F-32-NEXT: kxorq %k3, %k0, %k3 -; AVX512F-32-NEXT: kmovd %ecx, %k0 -; AVX512F-32-NEXT: movl %eax, %ecx -; AVX512F-32-NEXT: shrb $2, %al -; AVX512F-32-NEXT: kshiftlq $63, %k3, %k3 -; AVX512F-32-NEXT: kshiftrq $28, %k3, %k3 -; AVX512F-32-NEXT: kxorq %k3, %k5, %k5 -; AVX512F-32-NEXT: kshiftrq $36, %k5, %k3 -; AVX512F-32-NEXT: kxorq %k2, %k3, %k2 -; AVX512F-32-NEXT: kmovd %eax, %k3 -; AVX512F-32-NEXT: movl %ebx, %eax -; AVX512F-32-NEXT: shrl $16, %eax -; AVX512F-32-NEXT: shrb $3, %cl -; AVX512F-32-NEXT: kshiftlq $63, %k2, %k2 -; AVX512F-32-NEXT: kshiftrq $27, %k2, %k2 -; AVX512F-32-NEXT: kxorq %k2, %k5, %k2 -; AVX512F-32-NEXT: kshiftrq $37, %k2, %k5 -; AVX512F-32-NEXT: kxorq %k4, %k5, %k4 -; AVX512F-32-NEXT: kmovd %ecx, %k5 -; AVX512F-32-NEXT: movl %ebx, %ecx -; AVX512F-32-NEXT: shrl $13, %ecx -; AVX512F-32-NEXT: andb $1, %cl -; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4 -; AVX512F-32-NEXT: kshiftrq $26, %k4, %k4 -; AVX512F-32-NEXT: kxorq %k4, %k2, %k2 -; AVX512F-32-NEXT: kshiftrq $38, %k2, %k4 -; AVX512F-32-NEXT: kxorq %k7, %k4, %k7 -; AVX512F-32-NEXT: kmovd %ecx, %k4 -; AVX512F-32-NEXT: movl %eax, %ecx -; AVX512F-32-NEXT: andb $2, %cl -; AVX512F-32-NEXT: shrb %cl -; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 -; AVX512F-32-NEXT: kshiftrq $25, %k7, %k7 -; AVX512F-32-NEXT: kxorq %k7, %k2, %k7 -; AVX512F-32-NEXT: kshiftrq $39, %k7, %k2 -; AVX512F-32-NEXT: kxorq %k6, %k2, %k6 -; AVX512F-32-NEXT: kmovd %ecx, %k2 -; AVX512F-32-NEXT: movl %eax, %edx -; AVX512F-32-NEXT: andb $15, %dl -; AVX512F-32-NEXT: movl %edx, %ecx -; AVX512F-32-NEXT: shrb $2, %dl -; AVX512F-32-NEXT: kshiftlq $63, %k6, %k6 -; AVX512F-32-NEXT: kshiftrq $24, %k6, %k6 -; AVX512F-32-NEXT: kxorq %k6, %k7, %k6 -; AVX512F-32-NEXT: kshiftrq $40, %k6, %k7 -; AVX512F-32-NEXT: kxorq %k1, %k7, %k7 -; AVX512F-32-NEXT: kmovd %edx, %k1 -; AVX512F-32-NEXT: movzwl %bx, %esi -; AVX512F-32-NEXT: movl %esi, %edx -; AVX512F-32-NEXT: movl %esi, %edi -; AVX512F-32-NEXT: shrl $12, %esi -; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 -; AVX512F-32-NEXT: kshiftrq $23, %k7, %k7 -; AVX512F-32-NEXT: kxorq %k7, %k6, %k6 -; AVX512F-32-NEXT: kshiftrq $41, %k6, %k7 -; AVX512F-32-NEXT: kxorq %k0, %k7, %k0 -; AVX512F-32-NEXT: kmovd %esi, %k7 -; AVX512F-32-NEXT: shrl $14, %edi -; AVX512F-32-NEXT: kshiftlq $63, %k0, %k0 -; AVX512F-32-NEXT: kshiftrq $22, %k0, %k0 -; AVX512F-32-NEXT: kxorq %k0, %k6, %k0 -; AVX512F-32-NEXT: kshiftrq $42, %k0, %k6 -; AVX512F-32-NEXT: kxorq %k3, %k6, %k3 -; AVX512F-32-NEXT: kmovd %edi, %k6 -; AVX512F-32-NEXT: shrb $3, %cl -; AVX512F-32-NEXT: kshiftlq $63, %k3, %k3 -; AVX512F-32-NEXT: kshiftrq $21, %k3, %k3 -; AVX512F-32-NEXT: kxorq %k3, %k0, %k3 -; AVX512F-32-NEXT: kshiftrq $43, %k3, %k0 -; AVX512F-32-NEXT: kxorq %k5, %k0, %k5 -; AVX512F-32-NEXT: kmovd %ecx, %k0 -; AVX512F-32-NEXT: movl %eax, %ecx -; AVX512F-32-NEXT: shrb $4, %cl -; AVX512F-32-NEXT: kshiftlq $63, %k5, %k5 -; AVX512F-32-NEXT: kshiftrq $20, %k5, %k5 -; AVX512F-32-NEXT: kxorq %k5, %k3, %k5 -; AVX512F-32-NEXT: kshiftrq $44, %k5, %k3 -; AVX512F-32-NEXT: kxorq %k7, %k3, %k7 -; AVX512F-32-NEXT: kmovd %ecx, %k3 -; AVX512F-32-NEXT: movl %eax, %ecx -; AVX512F-32-NEXT: shrb $5, %cl -; AVX512F-32-NEXT: andb $1, %cl -; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 -; AVX512F-32-NEXT: kshiftrq $19, %k7, %k7 -; AVX512F-32-NEXT: kxorq %k7, %k5, %k5 -; AVX512F-32-NEXT: kshiftrq $45, %k5, %k7 -; AVX512F-32-NEXT: kxorq %k4, %k7, %k7 -; AVX512F-32-NEXT: kmovd %ecx, %k4 -; AVX512F-32-NEXT: movl %eax, %ecx -; AVX512F-32-NEXT: shrb $6, %cl -; AVX512F-32-NEXT: shrl $15, %edx -; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 -; AVX512F-32-NEXT: kshiftrq $18, %k7, %k7 -; AVX512F-32-NEXT: kxorq %k7, %k5, %k5 -; AVX512F-32-NEXT: kshiftrq $46, %k5, %k7 -; AVX512F-32-NEXT: kxorq %k6, %k7, %k6 -; AVX512F-32-NEXT: kshiftlq $63, %k6, %k6 -; AVX512F-32-NEXT: kshiftrq $17, %k6, %k6 -; AVX512F-32-NEXT: kxorq %k6, %k5, %k5 -; AVX512F-32-NEXT: kshiftrq $47, %k5, %k6 -; AVX512F-32-NEXT: kmovd %edx, %k7 -; AVX512F-32-NEXT: kxorq %k7, %k6, %k6 -; AVX512F-32-NEXT: kshiftlq $63, %k6, %k6 -; AVX512F-32-NEXT: kshiftrq $16, %k6, %k6 -; AVX512F-32-NEXT: kxorq %k6, %k5, %k6 -; AVX512F-32-NEXT: kshiftrq $48, %k6, %k5 -; AVX512F-32-NEXT: kmovd %eax, %k7 -; AVX512F-32-NEXT: kxorq %k7, %k5, %k7 -; AVX512F-32-NEXT: kmovd %ecx, %k5 -; AVX512F-32-NEXT: movl %ebx, %ecx -; AVX512F-32-NEXT: shrl $24, %ecx -; AVX512F-32-NEXT: # kill: def $al killed $al killed $eax def $eax -; AVX512F-32-NEXT: shrb $7, %al -; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 -; AVX512F-32-NEXT: kshiftrq $15, %k7, %k7 -; AVX512F-32-NEXT: kxorq %k7, %k6, %k6 -; AVX512F-32-NEXT: kshiftrq $49, %k6, %k7 -; AVX512F-32-NEXT: kxorq %k2, %k7, %k7 -; AVX512F-32-NEXT: kmovd %eax, %k2 -; AVX512F-32-NEXT: movl %ecx, %eax -; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 -; AVX512F-32-NEXT: kshiftrq $14, %k7, %k7 -; AVX512F-32-NEXT: kxorq %k7, %k6, %k6 -; AVX512F-32-NEXT: kshiftrq $50, %k6, %k7 -; AVX512F-32-NEXT: kxorq %k1, %k7, %k7 -; AVX512F-32-NEXT: kmovd %ecx, %k1 -; AVX512F-32-NEXT: # kill: def $cl killed $cl killed $ecx def $ecx -; AVX512F-32-NEXT: andb $15, %cl -; AVX512F-32-NEXT: andb $2, %al -; AVX512F-32-NEXT: shrb %al -; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 -; AVX512F-32-NEXT: kshiftrq $13, %k7, %k7 -; AVX512F-32-NEXT: kxorq %k7, %k6, %k6 -; AVX512F-32-NEXT: kshiftrq $51, %k6, %k7 -; AVX512F-32-NEXT: kxorq %k0, %k7, %k7 -; AVX512F-32-NEXT: kmovd %eax, %k0 -; AVX512F-32-NEXT: movl %ecx, %eax -; AVX512F-32-NEXT: shrb $2, %cl -; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 -; AVX512F-32-NEXT: kshiftrq $12, %k7, %k7 -; AVX512F-32-NEXT: kxorq %k7, %k6, %k6 -; AVX512F-32-NEXT: kshiftrq $52, %k6, %k7 -; AVX512F-32-NEXT: kxorq %k3, %k7, %k7 -; AVX512F-32-NEXT: kmovd %ecx, %k3 -; AVX512F-32-NEXT: shrb $3, %al -; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 -; AVX512F-32-NEXT: kshiftrq $11, %k7, %k7 -; AVX512F-32-NEXT: kxorq %k7, %k6, %k6 -; AVX512F-32-NEXT: kshiftrq $53, %k6, %k7 -; AVX512F-32-NEXT: kxorq %k4, %k7, %k7 -; AVX512F-32-NEXT: kmovd %eax, %k4 -; AVX512F-32-NEXT: movl %ebx, %eax -; AVX512F-32-NEXT: shrl $29, %eax -; AVX512F-32-NEXT: andb $1, %al -; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 -; AVX512F-32-NEXT: kshiftrq $10, %k7, %k7 -; AVX512F-32-NEXT: kxorq %k7, %k6, %k6 -; AVX512F-32-NEXT: kshiftrq $54, %k6, %k7 -; AVX512F-32-NEXT: kxorq %k5, %k7, %k7 -; AVX512F-32-NEXT: kmovd %eax, %k5 -; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 -; AVX512F-32-NEXT: kshiftrq $9, %k7, %k7 -; AVX512F-32-NEXT: kxorq %k7, %k6, %k6 -; AVX512F-32-NEXT: kshiftrq $55, %k6, %k7 -; AVX512F-32-NEXT: kxorq %k2, %k7, %k2 -; AVX512F-32-NEXT: kshiftlq $63, %k2, %k2 -; AVX512F-32-NEXT: kshiftrq $8, %k2, %k2 -; AVX512F-32-NEXT: kxorq %k2, %k6, %k2 -; AVX512F-32-NEXT: kshiftrq $56, %k2, %k6 -; AVX512F-32-NEXT: kxorq %k1, %k6, %k1 -; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1 -; AVX512F-32-NEXT: kshiftrq $7, %k1, %k1 -; AVX512F-32-NEXT: kxorq %k1, %k2, %k1 -; AVX512F-32-NEXT: kshiftrq $57, %k1, %k2 -; AVX512F-32-NEXT: kxorq %k0, %k2, %k0 -; AVX512F-32-NEXT: kshiftlq $63, %k0, %k0 -; AVX512F-32-NEXT: kshiftrq $6, %k0, %k0 -; AVX512F-32-NEXT: kxorq %k0, %k1, %k0 -; AVX512F-32-NEXT: kshiftrq $58, %k0, %k1 -; AVX512F-32-NEXT: kxorq %k3, %k1, %k1 -; AVX512F-32-NEXT: movl %ebx, %eax -; AVX512F-32-NEXT: shrl $28, %eax -; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1 -; AVX512F-32-NEXT: kshiftrq $5, %k1, %k1 -; AVX512F-32-NEXT: kxorq %k1, %k0, %k0 -; AVX512F-32-NEXT: kshiftrq $59, %k0, %k1 -; AVX512F-32-NEXT: kxorq %k4, %k1, %k1 -; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1 -; AVX512F-32-NEXT: kshiftrq $4, %k1, %k1 -; AVX512F-32-NEXT: kxorq %k1, %k0, %k0 -; AVX512F-32-NEXT: kshiftrq $60, %k0, %k1 -; AVX512F-32-NEXT: kmovd %eax, %k2 -; AVX512F-32-NEXT: kxorq %k2, %k1, %k1 -; AVX512F-32-NEXT: movl %ebx, %eax -; AVX512F-32-NEXT: shrl $31, %eax -; AVX512F-32-NEXT: movl %ebx, %ecx -; AVX512F-32-NEXT: shrl $30, %ecx -; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1 -; AVX512F-32-NEXT: kshiftrq $3, %k1, %k1 -; AVX512F-32-NEXT: kxorq %k1, %k0, %k0 -; AVX512F-32-NEXT: kshiftrq $61, %k0, %k1 -; AVX512F-32-NEXT: kxorq %k5, %k1, %k1 -; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1 -; AVX512F-32-NEXT: kshiftrq $2, %k1, %k1 -; AVX512F-32-NEXT: kxorq %k1, %k0, %k0 -; AVX512F-32-NEXT: kshiftrq $62, %k0, %k1 -; AVX512F-32-NEXT: kmovd %ecx, %k2 -; AVX512F-32-NEXT: kxorq %k2, %k1, %k1 -; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1 -; AVX512F-32-NEXT: kshiftrq $1, %k1, %k1 -; AVX512F-32-NEXT: kxorq %k1, %k0, %k0 -; AVX512F-32-NEXT: kshiftlq $1, %k0, %k0 -; AVX512F-32-NEXT: kshiftrq $1, %k0, %k0 +; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k2 +; AVX512F-32-NEXT: kmovd %esi, %k0 ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1 -; AVX512F-32-NEXT: korq %k1, %k0, %k1 -; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} -; AVX512F-32-NEXT: kshiftrq $32, %k0, %k2 -; AVX512F-32-NEXT: kmovd %k2, %eax -; AVX512F-32-NEXT: kmovd %k0, %ecx -; AVX512F-32-NEXT: vpcmpgtb %zmm0, %zmm1, %k0 {%k1} -; AVX512F-32-NEXT: kshiftrq $32, %k0, %k2 -; AVX512F-32-NEXT: kmovd %k0, %edx -; AVX512F-32-NEXT: addl %ecx, %edx -; AVX512F-32-NEXT: kmovd %k2, %ecx -; AVX512F-32-NEXT: adcl %eax, %ecx -; AVX512F-32-NEXT: vpcmpleb %zmm1, %zmm0, %k0 {%k1} -; AVX512F-32-NEXT: kshiftrq $32, %k0, %k2 -; AVX512F-32-NEXT: kmovd %k0, %eax -; AVX512F-32-NEXT: addl %edx, %eax +; AVX512F-32-NEXT: kshiftrq $32, %k2, %k3 +; AVX512F-32-NEXT: kandd %k1, %k3, %k3 +; AVX512F-32-NEXT: kmovd %k3, %eax +; AVX512F-32-NEXT: kandd %k0, %k2, %k2 ; AVX512F-32-NEXT: kmovd %k2, %edx -; AVX512F-32-NEXT: adcl %ecx, %edx -; AVX512F-32-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1} -; AVX512F-32-NEXT: kshiftrq $32, %k0, %k2 -; AVX512F-32-NEXT: kmovd %k0, %ecx -; AVX512F-32-NEXT: addl %eax, %ecx -; AVX512F-32-NEXT: kmovd %k2, %eax -; AVX512F-32-NEXT: adcl %edx, %eax -; AVX512F-32-NEXT: vpcmpleb %zmm0, %zmm1, %k0 {%k1} -; AVX512F-32-NEXT: kshiftrq $32, %k0, %k2 -; AVX512F-32-NEXT: kmovd %k0, %edx -; AVX512F-32-NEXT: addl %ecx, %edx +; AVX512F-32-NEXT: vpcmpgtb %zmm0, %zmm1, %k2 +; AVX512F-32-NEXT: kshiftrq $32, %k2, %k3 +; AVX512F-32-NEXT: kandd %k1, %k3, %k3 +; AVX512F-32-NEXT: kmovd %k3, %edi +; AVX512F-32-NEXT: kandd %k0, %k2, %k2 +; AVX512F-32-NEXT: kmovd %k2, %ebx +; AVX512F-32-NEXT: addl %edx, %ebx +; AVX512F-32-NEXT: adcl %eax, %edi +; AVX512F-32-NEXT: vpcmpleb %zmm1, %zmm0, %k2 +; AVX512F-32-NEXT: kshiftrq $32, %k2, %k3 +; AVX512F-32-NEXT: kandd %k1, %k3, %k3 +; AVX512F-32-NEXT: kmovd %k3, %eax +; AVX512F-32-NEXT: kandd %k0, %k2, %k2 +; AVX512F-32-NEXT: kmovd %k2, %edx +; AVX512F-32-NEXT: addl %ebx, %edx +; AVX512F-32-NEXT: adcl %edi, %eax +; AVX512F-32-NEXT: vpcmpneqb %zmm1, %zmm0, %k2 +; AVX512F-32-NEXT: kshiftrq $32, %k2, %k3 +; AVX512F-32-NEXT: kandd %k1, %k3, %k3 +; AVX512F-32-NEXT: kmovd %k3, %edi +; AVX512F-32-NEXT: kandd %k0, %k2, %k2 +; AVX512F-32-NEXT: kmovd %k2, %ebx +; AVX512F-32-NEXT: addl %edx, %ebx +; AVX512F-32-NEXT: adcl %eax, %edi +; AVX512F-32-NEXT: vpcmpleb %zmm0, %zmm1, %k2 +; AVX512F-32-NEXT: kshiftrq $32, %k2, %k3 +; AVX512F-32-NEXT: kandd %k1, %k3, %k3 +; AVX512F-32-NEXT: kmovd %k3, %ebp +; AVX512F-32-NEXT: kandd %k0, %k2, %k2 ; AVX512F-32-NEXT: kmovd %k2, %ecx -; AVX512F-32-NEXT: adcl %eax, %ecx -; AVX512F-32-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 {%k1} -; AVX512F-32-NEXT: kshiftrq $32, %k0, %k1 -; AVX512F-32-NEXT: kmovd %k0, %eax -; AVX512F-32-NEXT: addl %edx, %eax +; AVX512F-32-NEXT: addl %ebx, %ecx +; AVX512F-32-NEXT: adcl %edi, %ebp +; AVX512F-32-NEXT: vpcmpgtb %zmm1, %zmm0, %k2 +; AVX512F-32-NEXT: kshiftrq $32, %k2, %k3 +; AVX512F-32-NEXT: kandd %k1, %k3, %k1 ; AVX512F-32-NEXT: kmovd %k1, %edx -; AVX512F-32-NEXT: adcl %ecx, %edx -; AVX512F-32-NEXT: addl %ebp, %eax -; AVX512F-32-NEXT: adcl %ebx, %edx +; AVX512F-32-NEXT: kandd %k0, %k2, %k0 +; AVX512F-32-NEXT: kmovd %k0, %eax +; AVX512F-32-NEXT: addl %ecx, %eax +; AVX512F-32-NEXT: adcl %ebp, %edx +; AVX512F-32-NEXT: addl %esi, %eax +; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: popl %esi ; AVX512F-32-NEXT: popl %edi ; AVX512F-32-NEXT: popl %ebx @@ -2527,569 +2016,58 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m ; AVX512F-32-NEXT: .cfi_offset %edi, -16 ; AVX512F-32-NEXT: .cfi_offset %ebx, -12 ; AVX512F-32-NEXT: .cfi_offset %ebp, -8 +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %esi ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: movl %eax, %ecx -; AVX512F-32-NEXT: shrl $16, %ecx -; AVX512F-32-NEXT: movl %ecx, %esi -; AVX512F-32-NEXT: movl %eax, %ecx -; AVX512F-32-NEXT: andb $15, %cl -; AVX512F-32-NEXT: movl %eax, %edx -; AVX512F-32-NEXT: andb $2, %dl -; AVX512F-32-NEXT: shrb %dl -; AVX512F-32-NEXT: kmovd %edx, %k1 -; AVX512F-32-NEXT: movl %ecx, %ebx -; AVX512F-32-NEXT: shrb $2, %cl -; AVX512F-32-NEXT: kmovd %ecx, %k2 -; AVX512F-32-NEXT: movb %ah, %dl -; AVX512F-32-NEXT: andb $15, %dl -; AVX512F-32-NEXT: shrb $3, %bl -; AVX512F-32-NEXT: kmovd %ebx, %k0 -; AVX512F-32-NEXT: movl %eax, %ecx -; AVX512F-32-NEXT: shrb $4, %cl -; AVX512F-32-NEXT: kmovd %ecx, %k3 -; AVX512F-32-NEXT: movl %eax, %ecx -; AVX512F-32-NEXT: shrb $5, %cl -; AVX512F-32-NEXT: andb $1, %cl -; AVX512F-32-NEXT: kmovd %ecx, %k4 -; AVX512F-32-NEXT: movl %eax, %ecx -; AVX512F-32-NEXT: shrb $6, %cl -; AVX512F-32-NEXT: kmovd %ecx, %k6 -; AVX512F-32-NEXT: movl %eax, %ecx -; AVX512F-32-NEXT: shrb $7, %cl -; AVX512F-32-NEXT: kmovd %eax, %k5 -; AVX512F-32-NEXT: kshiftrq $1, %k5, %k7 -; AVX512F-32-NEXT: kxorq %k1, %k7, %k1 -; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1 -; AVX512F-32-NEXT: kshiftrq $62, %k1, %k1 -; AVX512F-32-NEXT: kxorq %k1, %k5, %k7 -; AVX512F-32-NEXT: kshiftrq $2, %k7, %k1 -; AVX512F-32-NEXT: kxorq %k2, %k1, %k2 -; AVX512F-32-NEXT: kmovd %ecx, %k5 -; AVX512F-32-NEXT: movb %ah, %cl -; AVX512F-32-NEXT: kmovd %ecx, %k1 -; AVX512F-32-NEXT: movl %eax, %ebp -; AVX512F-32-NEXT: andb $2, %cl -; AVX512F-32-NEXT: shrb %cl -; AVX512F-32-NEXT: kshiftlq $63, %k2, %k2 -; AVX512F-32-NEXT: kshiftrq $61, %k2, %k2 -; AVX512F-32-NEXT: kxorq %k2, %k7, %k7 -; AVX512F-32-NEXT: kshiftrq $3, %k7, %k2 -; AVX512F-32-NEXT: kxorq %k0, %k2, %k0 -; AVX512F-32-NEXT: kmovd %ecx, %k2 -; AVX512F-32-NEXT: movl %edx, %ecx -; AVX512F-32-NEXT: shrb $2, %dl -; AVX512F-32-NEXT: kshiftlq $63, %k0, %k0 -; AVX512F-32-NEXT: kshiftrq $60, %k0, %k0 -; AVX512F-32-NEXT: kxorq %k0, %k7, %k0 -; AVX512F-32-NEXT: kshiftrq $4, %k0, %k7 -; AVX512F-32-NEXT: kxorq %k3, %k7, %k7 -; AVX512F-32-NEXT: kmovd %edx, %k3 -; AVX512F-32-NEXT: movl %esi, %eax -; AVX512F-32-NEXT: movl %eax, %edx -; AVX512F-32-NEXT: andb $15, %dl -; AVX512F-32-NEXT: shrb $3, %cl -; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 -; AVX512F-32-NEXT: kshiftrq $59, %k7, %k7 -; AVX512F-32-NEXT: kxorq %k7, %k0, %k7 -; AVX512F-32-NEXT: kshiftrq $5, %k7, %k0 -; AVX512F-32-NEXT: kxorq %k4, %k0, %k4 -; AVX512F-32-NEXT: kmovd %ecx, %k0 -; AVX512F-32-NEXT: movl %ebp, %ecx -; AVX512F-32-NEXT: shrl $13, %ecx -; AVX512F-32-NEXT: andb $1, %cl -; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4 -; AVX512F-32-NEXT: kshiftrq $58, %k4, %k4 -; AVX512F-32-NEXT: kxorq %k4, %k7, %k7 -; AVX512F-32-NEXT: kshiftrq $6, %k7, %k4 -; AVX512F-32-NEXT: kxorq %k6, %k4, %k6 -; AVX512F-32-NEXT: kmovd %ecx, %k4 -; AVX512F-32-NEXT: movl %eax, %ebx -; AVX512F-32-NEXT: andb $2, %bl -; AVX512F-32-NEXT: shrb %bl -; AVX512F-32-NEXT: kshiftlq $63, %k6, %k6 -; AVX512F-32-NEXT: kshiftrq $57, %k6, %k6 -; AVX512F-32-NEXT: kxorq %k6, %k7, %k6 -; AVX512F-32-NEXT: kshiftrq $7, %k6, %k7 -; AVX512F-32-NEXT: kxorq %k5, %k7, %k7 -; AVX512F-32-NEXT: kmovd %ebx, %k5 -; AVX512F-32-NEXT: movl %edx, %ecx -; AVX512F-32-NEXT: shrb $2, %dl -; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 -; AVX512F-32-NEXT: kshiftrq $56, %k7, %k7 -; AVX512F-32-NEXT: kxorq %k7, %k6, %k7 -; AVX512F-32-NEXT: kshiftrq $8, %k7, %k6 -; AVX512F-32-NEXT: kxorq %k1, %k6, %k1 -; AVX512F-32-NEXT: kmovd %edx, %k6 -; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ebx -; AVX512F-32-NEXT: shrb $3, %cl -; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1 -; AVX512F-32-NEXT: kshiftrq $55, %k1, %k1 -; AVX512F-32-NEXT: kxorq %k1, %k7, %k7 -; AVX512F-32-NEXT: kshiftrq $9, %k7, %k1 -; AVX512F-32-NEXT: kxorq %k2, %k1, %k2 -; AVX512F-32-NEXT: kmovd %ecx, %k1 -; AVX512F-32-NEXT: movl %eax, %ecx -; AVX512F-32-NEXT: shrb $4, %cl -; AVX512F-32-NEXT: kshiftlq $63, %k2, %k2 -; AVX512F-32-NEXT: kshiftrq $54, %k2, %k2 -; AVX512F-32-NEXT: kxorq %k2, %k7, %k7 -; AVX512F-32-NEXT: kshiftrq $10, %k7, %k2 -; AVX512F-32-NEXT: kxorq %k3, %k2, %k3 -; AVX512F-32-NEXT: kmovd %ecx, %k2 -; AVX512F-32-NEXT: movl %eax, %ecx -; AVX512F-32-NEXT: shrb $5, %cl -; AVX512F-32-NEXT: andb $1, %cl -; AVX512F-32-NEXT: movzwl %bp, %edx -; AVX512F-32-NEXT: movl %edx, %esi -; AVX512F-32-NEXT: movl %edx, %edi -; AVX512F-32-NEXT: shrl $12, %edx -; AVX512F-32-NEXT: kshiftlq $63, %k3, %k3 -; AVX512F-32-NEXT: kshiftrq $53, %k3, %k3 -; AVX512F-32-NEXT: kxorq %k3, %k7, %k3 -; AVX512F-32-NEXT: kshiftrq $11, %k3, %k7 -; AVX512F-32-NEXT: kxorq %k0, %k7, %k0 -; AVX512F-32-NEXT: kshiftlq $63, %k0, %k0 -; AVX512F-32-NEXT: kshiftrq $52, %k0, %k0 -; AVX512F-32-NEXT: kxorq %k0, %k3, %k3 -; AVX512F-32-NEXT: kshiftrq $12, %k3, %k0 -; AVX512F-32-NEXT: kmovd %edx, %k7 -; AVX512F-32-NEXT: kxorq %k7, %k0, %k7 -; AVX512F-32-NEXT: kmovd %ecx, %k0 -; AVX512F-32-NEXT: movl %eax, %ecx -; AVX512F-32-NEXT: shrb $6, %cl -; AVX512F-32-NEXT: shrl $15, %esi -; AVX512F-32-NEXT: shrl $14, %edi -; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 -; AVX512F-32-NEXT: kshiftrq $51, %k7, %k7 -; AVX512F-32-NEXT: kxorq %k7, %k3, %k3 -; AVX512F-32-NEXT: kshiftrq $13, %k3, %k7 -; AVX512F-32-NEXT: kxorq %k4, %k7, %k4 -; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4 -; AVX512F-32-NEXT: kshiftrq $50, %k4, %k4 -; AVX512F-32-NEXT: kxorq %k4, %k3, %k3 -; AVX512F-32-NEXT: kshiftrq $14, %k3, %k4 -; AVX512F-32-NEXT: kmovd %edi, %k7 -; AVX512F-32-NEXT: kxorq %k7, %k4, %k4 -; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4 -; AVX512F-32-NEXT: kshiftrq $49, %k4, %k4 -; AVX512F-32-NEXT: kxorq %k4, %k3, %k3 -; AVX512F-32-NEXT: kshiftrq $15, %k3, %k4 -; AVX512F-32-NEXT: kmovd %esi, %k7 -; AVX512F-32-NEXT: kxorq %k7, %k4, %k4 -; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4 -; AVX512F-32-NEXT: kshiftrq $48, %k4, %k4 -; AVX512F-32-NEXT: kxorq %k4, %k3, %k3 -; AVX512F-32-NEXT: kshiftrq $16, %k3, %k4 -; AVX512F-32-NEXT: kmovd %eax, %k7 -; AVX512F-32-NEXT: kxorq %k7, %k4, %k4 -; AVX512F-32-NEXT: kmovd %ecx, %k7 -; AVX512F-32-NEXT: movl %ebp, %ecx -; AVX512F-32-NEXT: shrl $24, %ecx -; AVX512F-32-NEXT: # kill: def $al killed $al killed $eax def $eax -; AVX512F-32-NEXT: shrb $7, %al -; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4 -; AVX512F-32-NEXT: kshiftrq $47, %k4, %k4 -; AVX512F-32-NEXT: kxorq %k4, %k3, %k3 -; AVX512F-32-NEXT: kshiftrq $17, %k3, %k4 -; AVX512F-32-NEXT: kxorq %k5, %k4, %k4 -; AVX512F-32-NEXT: kmovd %eax, %k5 -; AVX512F-32-NEXT: movl %ecx, %eax -; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4 -; AVX512F-32-NEXT: kshiftrq $46, %k4, %k4 -; AVX512F-32-NEXT: kxorq %k4, %k3, %k4 -; AVX512F-32-NEXT: kshiftrq $18, %k4, %k3 -; AVX512F-32-NEXT: kxorq %k6, %k3, %k6 -; AVX512F-32-NEXT: kmovd %ecx, %k3 -; AVX512F-32-NEXT: # kill: def $cl killed $cl killed $ecx def $ecx -; AVX512F-32-NEXT: andb $15, %cl -; AVX512F-32-NEXT: andb $2, %al -; AVX512F-32-NEXT: shrb %al -; AVX512F-32-NEXT: kshiftlq $63, %k6, %k6 -; AVX512F-32-NEXT: kshiftrq $45, %k6, %k6 -; AVX512F-32-NEXT: kxorq %k6, %k4, %k6 -; AVX512F-32-NEXT: kshiftrq $19, %k6, %k4 -; AVX512F-32-NEXT: kxorq %k1, %k4, %k1 -; AVX512F-32-NEXT: kmovd %eax, %k4 -; AVX512F-32-NEXT: movl %ecx, %edx -; AVX512F-32-NEXT: shrb $2, %cl -; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1 -; AVX512F-32-NEXT: kshiftrq $44, %k1, %k1 -; AVX512F-32-NEXT: kxorq %k1, %k6, %k1 -; AVX512F-32-NEXT: kshiftrq $20, %k1, %k6 -; AVX512F-32-NEXT: kxorq %k2, %k6, %k6 -; AVX512F-32-NEXT: kmovd %ecx, %k2 -; AVX512F-32-NEXT: movl %ebx, %eax -; AVX512F-32-NEXT: andb $15, %al -; AVX512F-32-NEXT: shrb $3, %dl -; AVX512F-32-NEXT: kshiftlq $63, %k6, %k6 -; AVX512F-32-NEXT: kshiftrq $43, %k6, %k6 -; AVX512F-32-NEXT: kxorq %k6, %k1, %k6 -; AVX512F-32-NEXT: kshiftrq $21, %k6, %k1 -; AVX512F-32-NEXT: kxorq %k0, %k1, %k0 -; AVX512F-32-NEXT: kmovd %edx, %k1 -; AVX512F-32-NEXT: movl %ebp, %ecx -; AVX512F-32-NEXT: shrl $29, %ecx -; AVX512F-32-NEXT: andb $1, %cl -; AVX512F-32-NEXT: kshiftlq $63, %k0, %k0 -; AVX512F-32-NEXT: kshiftrq $42, %k0, %k0 -; AVX512F-32-NEXT: kxorq %k0, %k6, %k6 -; AVX512F-32-NEXT: kshiftrq $22, %k6, %k0 -; AVX512F-32-NEXT: kxorq %k7, %k0, %k7 -; AVX512F-32-NEXT: kmovd %ecx, %k0 -; AVX512F-32-NEXT: movl %ebx, %ecx -; AVX512F-32-NEXT: andb $2, %cl -; AVX512F-32-NEXT: shrb %cl -; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 -; AVX512F-32-NEXT: kshiftrq $41, %k7, %k7 -; AVX512F-32-NEXT: kxorq %k7, %k6, %k6 -; AVX512F-32-NEXT: kshiftrq $23, %k6, %k7 -; AVX512F-32-NEXT: kxorq %k5, %k7, %k7 -; AVX512F-32-NEXT: kmovd %ecx, %k5 -; AVX512F-32-NEXT: movl %eax, %ecx -; AVX512F-32-NEXT: shrb $2, %al -; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 -; AVX512F-32-NEXT: kshiftrq $40, %k7, %k7 -; AVX512F-32-NEXT: kxorq %k7, %k6, %k7 -; AVX512F-32-NEXT: kshiftrq $24, %k7, %k6 -; AVX512F-32-NEXT: kxorq %k3, %k6, %k3 -; AVX512F-32-NEXT: kmovd %eax, %k6 -; AVX512F-32-NEXT: movb %bh, %al -; AVX512F-32-NEXT: andb $15, %al -; AVX512F-32-NEXT: shrb $3, %cl -; AVX512F-32-NEXT: kshiftlq $63, %k3, %k3 -; AVX512F-32-NEXT: kshiftrq $39, %k3, %k3 -; AVX512F-32-NEXT: kxorq %k3, %k7, %k7 -; AVX512F-32-NEXT: kshiftrq $25, %k7, %k3 -; AVX512F-32-NEXT: kxorq %k4, %k3, %k4 -; AVX512F-32-NEXT: kmovd %ecx, %k3 -; AVX512F-32-NEXT: movl %ebx, %ecx -; AVX512F-32-NEXT: shrb $4, %cl -; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4 -; AVX512F-32-NEXT: kshiftrq $38, %k4, %k4 -; AVX512F-32-NEXT: kxorq %k4, %k7, %k4 -; AVX512F-32-NEXT: kshiftrq $26, %k4, %k7 -; AVX512F-32-NEXT: kxorq %k2, %k7, %k7 -; AVX512F-32-NEXT: kmovd %ecx, %k2 -; AVX512F-32-NEXT: movl %ebx, %ecx -; AVX512F-32-NEXT: shrb $5, %cl -; AVX512F-32-NEXT: andb $1, %cl -; AVX512F-32-NEXT: movl %ebp, %edx -; AVX512F-32-NEXT: shrl $28, %edx -; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 -; AVX512F-32-NEXT: kshiftrq $37, %k7, %k7 -; AVX512F-32-NEXT: kxorq %k7, %k4, %k4 -; AVX512F-32-NEXT: kshiftrq $27, %k4, %k7 -; AVX512F-32-NEXT: kxorq %k1, %k7, %k1 -; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1 -; AVX512F-32-NEXT: kshiftrq $36, %k1, %k1 -; AVX512F-32-NEXT: kxorq %k1, %k4, %k1 -; AVX512F-32-NEXT: kshiftrq $28, %k1, %k4 -; AVX512F-32-NEXT: kmovd %edx, %k7 -; AVX512F-32-NEXT: kxorq %k7, %k4, %k7 -; AVX512F-32-NEXT: kmovd %ecx, %k4 -; AVX512F-32-NEXT: movl %ebx, %ecx -; AVX512F-32-NEXT: shrb $6, %cl -; AVX512F-32-NEXT: movl %ebp, %edx -; AVX512F-32-NEXT: shrl $31, %edx -; AVX512F-32-NEXT: movl %ebp, %esi -; AVX512F-32-NEXT: shrl $30, %esi -; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 -; AVX512F-32-NEXT: kshiftrq $35, %k7, %k7 -; AVX512F-32-NEXT: kxorq %k7, %k1, %k1 -; AVX512F-32-NEXT: kshiftrq $29, %k1, %k7 -; AVX512F-32-NEXT: kxorq %k0, %k7, %k0 -; AVX512F-32-NEXT: kshiftlq $63, %k0, %k0 -; AVX512F-32-NEXT: kshiftrq $34, %k0, %k0 -; AVX512F-32-NEXT: kxorq %k0, %k1, %k0 -; AVX512F-32-NEXT: kshiftrq $30, %k0, %k1 -; AVX512F-32-NEXT: kmovd %esi, %k7 -; AVX512F-32-NEXT: kxorq %k7, %k1, %k1 -; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1 -; AVX512F-32-NEXT: kshiftrq $33, %k1, %k1 -; AVX512F-32-NEXT: kxorq %k1, %k0, %k0 -; AVX512F-32-NEXT: kshiftrq $31, %k0, %k1 -; AVX512F-32-NEXT: kmovd %edx, %k7 -; AVX512F-32-NEXT: kxorq %k7, %k1, %k1 -; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1 -; AVX512F-32-NEXT: kshiftrq $32, %k1, %k1 -; AVX512F-32-NEXT: kxorq %k1, %k0, %k0 -; AVX512F-32-NEXT: kshiftrq $32, %k0, %k1 -; AVX512F-32-NEXT: kmovd %ebx, %k7 -; AVX512F-32-NEXT: kxorq %k7, %k1, %k1 -; AVX512F-32-NEXT: kmovd %ecx, %k7 -; AVX512F-32-NEXT: movl %ebx, %ecx -; AVX512F-32-NEXT: shrb $7, %cl -; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1 -; AVX512F-32-NEXT: kshiftrq $31, %k1, %k1 -; AVX512F-32-NEXT: kxorq %k1, %k0, %k0 -; AVX512F-32-NEXT: kshiftrq $33, %k0, %k1 -; AVX512F-32-NEXT: kxorq %k5, %k1, %k1 -; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1 -; AVX512F-32-NEXT: kshiftrq $30, %k1, %k1 -; AVX512F-32-NEXT: kxorq %k1, %k0, %k0 -; AVX512F-32-NEXT: kshiftrq $34, %k0, %k1 -; AVX512F-32-NEXT: kxorq %k6, %k1, %k5 -; AVX512F-32-NEXT: kmovd %ecx, %k6 -; AVX512F-32-NEXT: movb %bh, %cl -; AVX512F-32-NEXT: kmovd %ecx, %k1 -; AVX512F-32-NEXT: andb $2, %cl -; AVX512F-32-NEXT: shrb %cl -; AVX512F-32-NEXT: kshiftlq $63, %k5, %k5 -; AVX512F-32-NEXT: kshiftrq $29, %k5, %k5 -; AVX512F-32-NEXT: kxorq %k5, %k0, %k5 -; AVX512F-32-NEXT: kshiftrq $35, %k5, %k0 -; AVX512F-32-NEXT: kxorq %k3, %k0, %k3 -; AVX512F-32-NEXT: kmovd %ecx, %k0 -; AVX512F-32-NEXT: movl %eax, %ecx -; AVX512F-32-NEXT: shrb $2, %al -; AVX512F-32-NEXT: kshiftlq $63, %k3, %k3 -; AVX512F-32-NEXT: kshiftrq $28, %k3, %k3 -; AVX512F-32-NEXT: kxorq %k3, %k5, %k5 -; AVX512F-32-NEXT: kshiftrq $36, %k5, %k3 -; AVX512F-32-NEXT: kxorq %k2, %k3, %k2 -; AVX512F-32-NEXT: kmovd %eax, %k3 -; AVX512F-32-NEXT: movl %ebx, %eax -; AVX512F-32-NEXT: shrl $16, %eax -; AVX512F-32-NEXT: shrb $3, %cl -; AVX512F-32-NEXT: kshiftlq $63, %k2, %k2 -; AVX512F-32-NEXT: kshiftrq $27, %k2, %k2 -; AVX512F-32-NEXT: kxorq %k2, %k5, %k2 -; AVX512F-32-NEXT: kshiftrq $37, %k2, %k5 -; AVX512F-32-NEXT: kxorq %k4, %k5, %k4 -; AVX512F-32-NEXT: kmovd %ecx, %k5 -; AVX512F-32-NEXT: movl %ebx, %ecx -; AVX512F-32-NEXT: shrl $13, %ecx -; AVX512F-32-NEXT: andb $1, %cl -; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4 -; AVX512F-32-NEXT: kshiftrq $26, %k4, %k4 -; AVX512F-32-NEXT: kxorq %k4, %k2, %k2 -; AVX512F-32-NEXT: kshiftrq $38, %k2, %k4 -; AVX512F-32-NEXT: kxorq %k7, %k4, %k7 -; AVX512F-32-NEXT: kmovd %ecx, %k4 -; AVX512F-32-NEXT: movl %eax, %ecx -; AVX512F-32-NEXT: andb $2, %cl -; AVX512F-32-NEXT: shrb %cl -; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 -; AVX512F-32-NEXT: kshiftrq $25, %k7, %k7 -; AVX512F-32-NEXT: kxorq %k7, %k2, %k7 -; AVX512F-32-NEXT: kshiftrq $39, %k7, %k2 -; AVX512F-32-NEXT: kxorq %k6, %k2, %k6 -; AVX512F-32-NEXT: kmovd %ecx, %k2 -; AVX512F-32-NEXT: movl %eax, %edx -; AVX512F-32-NEXT: andb $15, %dl -; AVX512F-32-NEXT: movl %edx, %ecx -; AVX512F-32-NEXT: shrb $2, %dl -; AVX512F-32-NEXT: kshiftlq $63, %k6, %k6 -; AVX512F-32-NEXT: kshiftrq $24, %k6, %k6 -; AVX512F-32-NEXT: kxorq %k6, %k7, %k6 -; AVX512F-32-NEXT: kshiftrq $40, %k6, %k7 -; AVX512F-32-NEXT: kxorq %k1, %k7, %k7 -; AVX512F-32-NEXT: kmovd %edx, %k1 -; AVX512F-32-NEXT: movzwl %bx, %esi -; AVX512F-32-NEXT: movl %esi, %edx -; AVX512F-32-NEXT: movl %esi, %edi -; AVX512F-32-NEXT: shrl $12, %esi -; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 -; AVX512F-32-NEXT: kshiftrq $23, %k7, %k7 -; AVX512F-32-NEXT: kxorq %k7, %k6, %k6 -; AVX512F-32-NEXT: kshiftrq $41, %k6, %k7 -; AVX512F-32-NEXT: kxorq %k0, %k7, %k0 -; AVX512F-32-NEXT: kmovd %esi, %k7 -; AVX512F-32-NEXT: shrl $14, %edi -; AVX512F-32-NEXT: kshiftlq $63, %k0, %k0 -; AVX512F-32-NEXT: kshiftrq $22, %k0, %k0 -; AVX512F-32-NEXT: kxorq %k0, %k6, %k0 -; AVX512F-32-NEXT: kshiftrq $42, %k0, %k6 -; AVX512F-32-NEXT: kxorq %k3, %k6, %k3 -; AVX512F-32-NEXT: kmovd %edi, %k6 -; AVX512F-32-NEXT: shrb $3, %cl -; AVX512F-32-NEXT: kshiftlq $63, %k3, %k3 -; AVX512F-32-NEXT: kshiftrq $21, %k3, %k3 -; AVX512F-32-NEXT: kxorq %k3, %k0, %k3 -; AVX512F-32-NEXT: kshiftrq $43, %k3, %k0 -; AVX512F-32-NEXT: kxorq %k5, %k0, %k5 -; AVX512F-32-NEXT: kmovd %ecx, %k0 -; AVX512F-32-NEXT: movl %eax, %ecx -; AVX512F-32-NEXT: shrb $4, %cl -; AVX512F-32-NEXT: kshiftlq $63, %k5, %k5 -; AVX512F-32-NEXT: kshiftrq $20, %k5, %k5 -; AVX512F-32-NEXT: kxorq %k5, %k3, %k5 -; AVX512F-32-NEXT: kshiftrq $44, %k5, %k3 -; AVX512F-32-NEXT: kxorq %k7, %k3, %k7 -; AVX512F-32-NEXT: kmovd %ecx, %k3 -; AVX512F-32-NEXT: movl %eax, %ecx -; AVX512F-32-NEXT: shrb $5, %cl -; AVX512F-32-NEXT: andb $1, %cl -; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 -; AVX512F-32-NEXT: kshiftrq $19, %k7, %k7 -; AVX512F-32-NEXT: kxorq %k7, %k5, %k5 -; AVX512F-32-NEXT: kshiftrq $45, %k5, %k7 -; AVX512F-32-NEXT: kxorq %k4, %k7, %k7 -; AVX512F-32-NEXT: kmovd %ecx, %k4 -; AVX512F-32-NEXT: movl %eax, %ecx -; AVX512F-32-NEXT: shrb $6, %cl -; AVX512F-32-NEXT: shrl $15, %edx -; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 -; AVX512F-32-NEXT: kshiftrq $18, %k7, %k7 -; AVX512F-32-NEXT: kxorq %k7, %k5, %k5 -; AVX512F-32-NEXT: kshiftrq $46, %k5, %k7 -; AVX512F-32-NEXT: kxorq %k6, %k7, %k6 -; AVX512F-32-NEXT: kshiftlq $63, %k6, %k6 -; AVX512F-32-NEXT: kshiftrq $17, %k6, %k6 -; AVX512F-32-NEXT: kxorq %k6, %k5, %k5 -; AVX512F-32-NEXT: kshiftrq $47, %k5, %k6 -; AVX512F-32-NEXT: kmovd %edx, %k7 -; AVX512F-32-NEXT: kxorq %k7, %k6, %k6 -; AVX512F-32-NEXT: kshiftlq $63, %k6, %k6 -; AVX512F-32-NEXT: kshiftrq $16, %k6, %k6 -; AVX512F-32-NEXT: kxorq %k6, %k5, %k6 -; AVX512F-32-NEXT: kshiftrq $48, %k6, %k5 -; AVX512F-32-NEXT: kmovd %eax, %k7 -; AVX512F-32-NEXT: kxorq %k7, %k5, %k7 -; AVX512F-32-NEXT: kmovd %ecx, %k5 -; AVX512F-32-NEXT: movl %ebx, %ecx -; AVX512F-32-NEXT: shrl $24, %ecx -; AVX512F-32-NEXT: # kill: def $al killed $al killed $eax def $eax -; AVX512F-32-NEXT: shrb $7, %al -; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 -; AVX512F-32-NEXT: kshiftrq $15, %k7, %k7 -; AVX512F-32-NEXT: kxorq %k7, %k6, %k6 -; AVX512F-32-NEXT: kshiftrq $49, %k6, %k7 -; AVX512F-32-NEXT: kxorq %k2, %k7, %k7 -; AVX512F-32-NEXT: kmovd %eax, %k2 -; AVX512F-32-NEXT: movl %ecx, %eax -; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 -; AVX512F-32-NEXT: kshiftrq $14, %k7, %k7 -; AVX512F-32-NEXT: kxorq %k7, %k6, %k6 -; AVX512F-32-NEXT: kshiftrq $50, %k6, %k7 -; AVX512F-32-NEXT: kxorq %k1, %k7, %k7 -; AVX512F-32-NEXT: kmovd %ecx, %k1 -; AVX512F-32-NEXT: # kill: def $cl killed $cl killed $ecx def $ecx -; AVX512F-32-NEXT: andb $15, %cl -; AVX512F-32-NEXT: andb $2, %al -; AVX512F-32-NEXT: shrb %al -; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 -; AVX512F-32-NEXT: kshiftrq $13, %k7, %k7 -; AVX512F-32-NEXT: kxorq %k7, %k6, %k6 -; AVX512F-32-NEXT: kshiftrq $51, %k6, %k7 -; AVX512F-32-NEXT: kxorq %k0, %k7, %k7 -; AVX512F-32-NEXT: kmovd %eax, %k0 -; AVX512F-32-NEXT: movl %ecx, %eax -; AVX512F-32-NEXT: shrb $2, %cl -; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 -; AVX512F-32-NEXT: kshiftrq $12, %k7, %k7 -; AVX512F-32-NEXT: kxorq %k7, %k6, %k6 -; AVX512F-32-NEXT: kshiftrq $52, %k6, %k7 -; AVX512F-32-NEXT: kxorq %k3, %k7, %k7 -; AVX512F-32-NEXT: kmovd %ecx, %k3 -; AVX512F-32-NEXT: shrb $3, %al -; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 -; AVX512F-32-NEXT: kshiftrq $11, %k7, %k7 -; AVX512F-32-NEXT: kxorq %k7, %k6, %k6 -; AVX512F-32-NEXT: kshiftrq $53, %k6, %k7 -; AVX512F-32-NEXT: kxorq %k4, %k7, %k7 -; AVX512F-32-NEXT: kmovd %eax, %k4 -; AVX512F-32-NEXT: movl %ebx, %eax -; AVX512F-32-NEXT: shrl $29, %eax -; AVX512F-32-NEXT: andb $1, %al -; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 -; AVX512F-32-NEXT: kshiftrq $10, %k7, %k7 -; AVX512F-32-NEXT: kxorq %k7, %k6, %k6 -; AVX512F-32-NEXT: kshiftrq $54, %k6, %k7 -; AVX512F-32-NEXT: kxorq %k5, %k7, %k7 -; AVX512F-32-NEXT: kmovd %eax, %k5 -; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 -; AVX512F-32-NEXT: kshiftrq $9, %k7, %k7 -; AVX512F-32-NEXT: kxorq %k7, %k6, %k6 -; AVX512F-32-NEXT: kshiftrq $55, %k6, %k7 -; AVX512F-32-NEXT: kxorq %k2, %k7, %k2 -; AVX512F-32-NEXT: kshiftlq $63, %k2, %k2 -; AVX512F-32-NEXT: kshiftrq $8, %k2, %k2 -; AVX512F-32-NEXT: kxorq %k2, %k6, %k2 -; AVX512F-32-NEXT: kshiftrq $56, %k2, %k6 -; AVX512F-32-NEXT: kxorq %k1, %k6, %k1 -; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1 -; AVX512F-32-NEXT: kshiftrq $7, %k1, %k1 -; AVX512F-32-NEXT: kxorq %k1, %k2, %k1 -; AVX512F-32-NEXT: kshiftrq $57, %k1, %k2 -; AVX512F-32-NEXT: kxorq %k0, %k2, %k0 -; AVX512F-32-NEXT: kshiftlq $63, %k0, %k0 -; AVX512F-32-NEXT: kshiftrq $6, %k0, %k0 -; AVX512F-32-NEXT: kxorq %k0, %k1, %k0 -; AVX512F-32-NEXT: kshiftrq $58, %k0, %k1 -; AVX512F-32-NEXT: kxorq %k3, %k1, %k1 -; AVX512F-32-NEXT: movl %ebx, %eax -; AVX512F-32-NEXT: shrl $28, %eax -; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1 -; AVX512F-32-NEXT: kshiftrq $5, %k1, %k1 -; AVX512F-32-NEXT: kxorq %k1, %k0, %k0 -; AVX512F-32-NEXT: kshiftrq $59, %k0, %k1 -; AVX512F-32-NEXT: kxorq %k4, %k1, %k1 -; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1 -; AVX512F-32-NEXT: kshiftrq $4, %k1, %k1 -; AVX512F-32-NEXT: kxorq %k1, %k0, %k0 -; AVX512F-32-NEXT: kshiftrq $60, %k0, %k1 -; AVX512F-32-NEXT: kmovd %eax, %k2 -; AVX512F-32-NEXT: kxorq %k2, %k1, %k1 -; AVX512F-32-NEXT: movl %ebx, %eax -; AVX512F-32-NEXT: shrl $31, %eax -; AVX512F-32-NEXT: movl %ebx, %ecx -; AVX512F-32-NEXT: shrl $30, %ecx -; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1 -; AVX512F-32-NEXT: kshiftrq $3, %k1, %k1 -; AVX512F-32-NEXT: kxorq %k1, %k0, %k0 -; AVX512F-32-NEXT: kshiftrq $61, %k0, %k1 -; AVX512F-32-NEXT: kxorq %k5, %k1, %k1 -; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1 -; AVX512F-32-NEXT: kshiftrq $2, %k1, %k1 -; AVX512F-32-NEXT: kxorq %k1, %k0, %k0 -; AVX512F-32-NEXT: kshiftrq $62, %k0, %k1 -; AVX512F-32-NEXT: kmovd %ecx, %k2 -; AVX512F-32-NEXT: kxorq %k2, %k1, %k1 -; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1 -; AVX512F-32-NEXT: kshiftrq $1, %k1, %k1 -; AVX512F-32-NEXT: kxorq %k1, %k0, %k0 -; AVX512F-32-NEXT: kshiftlq $1, %k0, %k0 -; AVX512F-32-NEXT: kshiftrq $1, %k0, %k0 +; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k2 +; AVX512F-32-NEXT: kmovd %esi, %k0 ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1 -; AVX512F-32-NEXT: korq %k1, %k0, %k1 -; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} -; AVX512F-32-NEXT: kshiftrq $32, %k0, %k2 -; AVX512F-32-NEXT: kmovd %k2, %eax -; AVX512F-32-NEXT: kmovd %k0, %ecx -; AVX512F-32-NEXT: vpcmpltub %zmm1, %zmm0, %k0 {%k1} -; AVX512F-32-NEXT: kshiftrq $32, %k0, %k2 -; AVX512F-32-NEXT: kmovd %k0, %edx -; AVX512F-32-NEXT: addl %ecx, %edx -; AVX512F-32-NEXT: kmovd %k2, %ecx -; AVX512F-32-NEXT: adcl %eax, %ecx -; AVX512F-32-NEXT: vpcmpleub %zmm1, %zmm0, %k0 {%k1} -; AVX512F-32-NEXT: kshiftrq $32, %k0, %k2 -; AVX512F-32-NEXT: kmovd %k0, %eax -; AVX512F-32-NEXT: addl %edx, %eax +; AVX512F-32-NEXT: kshiftrq $32, %k2, %k3 +; AVX512F-32-NEXT: kandd %k1, %k3, %k3 +; AVX512F-32-NEXT: kmovd %k3, %eax +; AVX512F-32-NEXT: kandd %k0, %k2, %k2 ; AVX512F-32-NEXT: kmovd %k2, %edx -; AVX512F-32-NEXT: adcl %ecx, %edx -; AVX512F-32-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1} -; AVX512F-32-NEXT: kshiftrq $32, %k0, %k2 -; AVX512F-32-NEXT: kmovd %k0, %ecx -; AVX512F-32-NEXT: addl %eax, %ecx -; AVX512F-32-NEXT: kmovd %k2, %eax -; AVX512F-32-NEXT: adcl %edx, %eax -; AVX512F-32-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 {%k1} -; AVX512F-32-NEXT: kshiftrq $32, %k0, %k2 -; AVX512F-32-NEXT: kmovd %k0, %edx -; AVX512F-32-NEXT: addl %ecx, %edx +; AVX512F-32-NEXT: vpcmpltub %zmm1, %zmm0, %k2 +; AVX512F-32-NEXT: kshiftrq $32, %k2, %k3 +; AVX512F-32-NEXT: kandd %k1, %k3, %k3 +; AVX512F-32-NEXT: kmovd %k3, %edi +; AVX512F-32-NEXT: kandd %k0, %k2, %k2 +; AVX512F-32-NEXT: kmovd %k2, %ebx +; AVX512F-32-NEXT: addl %edx, %ebx +; AVX512F-32-NEXT: adcl %eax, %edi +; AVX512F-32-NEXT: vpcmpleub %zmm1, %zmm0, %k2 +; AVX512F-32-NEXT: kshiftrq $32, %k2, %k3 +; AVX512F-32-NEXT: kandd %k1, %k3, %k3 +; AVX512F-32-NEXT: kmovd %k3, %eax +; AVX512F-32-NEXT: kandd %k0, %k2, %k2 +; AVX512F-32-NEXT: kmovd %k2, %edx +; AVX512F-32-NEXT: addl %ebx, %edx +; AVX512F-32-NEXT: adcl %edi, %eax +; AVX512F-32-NEXT: vpcmpneqb %zmm1, %zmm0, %k2 +; AVX512F-32-NEXT: kshiftrq $32, %k2, %k3 +; AVX512F-32-NEXT: kandd %k1, %k3, %k3 +; AVX512F-32-NEXT: kmovd %k3, %edi +; AVX512F-32-NEXT: kandd %k0, %k2, %k2 +; AVX512F-32-NEXT: kmovd %k2, %ebx +; AVX512F-32-NEXT: addl %edx, %ebx +; AVX512F-32-NEXT: adcl %eax, %edi +; AVX512F-32-NEXT: vpcmpnltub %zmm1, %zmm0, %k2 +; AVX512F-32-NEXT: kshiftrq $32, %k2, %k3 +; AVX512F-32-NEXT: kandd %k1, %k3, %k3 +; AVX512F-32-NEXT: kmovd %k3, %ebp +; AVX512F-32-NEXT: kandd %k0, %k2, %k2 ; AVX512F-32-NEXT: kmovd %k2, %ecx -; AVX512F-32-NEXT: adcl %eax, %ecx -; AVX512F-32-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 {%k1} -; AVX512F-32-NEXT: kshiftrq $32, %k0, %k1 -; AVX512F-32-NEXT: kmovd %k0, %eax -; AVX512F-32-NEXT: addl %edx, %eax +; AVX512F-32-NEXT: addl %ebx, %ecx +; AVX512F-32-NEXT: adcl %edi, %ebp +; AVX512F-32-NEXT: vpcmpnleub %zmm1, %zmm0, %k2 +; AVX512F-32-NEXT: kshiftrq $32, %k2, %k3 +; AVX512F-32-NEXT: kandd %k1, %k3, %k1 ; AVX512F-32-NEXT: kmovd %k1, %edx -; AVX512F-32-NEXT: adcl %ecx, %edx -; AVX512F-32-NEXT: addl %ebp, %eax -; AVX512F-32-NEXT: adcl %ebx, %edx +; AVX512F-32-NEXT: kandd %k0, %k2, %k0 +; AVX512F-32-NEXT: kmovd %k0, %eax +; AVX512F-32-NEXT: addl %ecx, %eax +; AVX512F-32-NEXT: adcl %ebp, %edx +; AVX512F-32-NEXT: addl %esi, %eax +; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: popl %esi ; AVX512F-32-NEXT: popl %edi ; AVX512F-32-NEXT: popl %ebx |