diff options
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 13 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-ext.ll | 291 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-pmovxrm.ll | 6 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-zext.ll | 14 | 
5 files changed, 25 insertions, 301 deletions
| diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index d7b4b4291a1..5a82ff1e3ab 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -4084,6 +4084,19 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,        if (VT.getSimpleVT() == N1.getSimpleValueType())          return N1; +      // EXTRACT_SUBVECTOR of an UNDEF is an UNDEF. +      if (N1.isUndef()) +        return getUNDEF(VT); + +      // EXTRACT_SUBVECTOR of CONCAT_VECTOR can be simplified if the pieces of +      // the concat have the same type as the extract. +      if (N2C && N1.getOpcode() == ISD::CONCAT_VECTORS && +          N1.getNumOperands() > 0 && +          VT == N1.getOperand(0).getValueType()) { +        unsigned Factor = VT.getVectorNumElements(); +        return N1.getOperand(N2C->getZExtValue() / Factor); +      } +        // EXTRACT_SUBVECTOR of INSERT_SUBVECTOR is often created        // during shuffle legalization.        if (N1.getOpcode() == ISD::INSERT_SUBVECTOR && N2 == N1.getOperand(2) && diff --git a/llvm/test/CodeGen/X86/avx512-ext.ll b/llvm/test/CodeGen/X86/avx512-ext.ll index 03d6127ae5d..f1f98411060 100644 --- a/llvm/test/CodeGen/X86/avx512-ext.ll +++ b/llvm/test/CodeGen/X86/avx512-ext.ll @@ -1502,301 +1502,22 @@ define void @extload_v8i64(<8 x i8>* %a, <8 x i64>* %res) {  define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {  ; KNL-LABEL: test21:  ; KNL:       ## BB#0: -; KNL-NEXT:    pushq %rbp -; KNL-NEXT:    pushq %r15 -; KNL-NEXT:    pushq %r14 -; KNL-NEXT:    pushq %r13 -; KNL-NEXT:    pushq %r12 -; KNL-NEXT:    pushq %rbx -; KNL-NEXT:    vpmovsxbd %xmm7, %zmm7 -; KNL-NEXT:    vpslld $31, %zmm7, %zmm7 -; KNL-NEXT:    vpmovsxbd %xmm6, %zmm6 -; KNL-NEXT:    vpslld $31, %zmm6, %zmm6 -; KNL-NEXT:    vpmovsxbd %xmm5, %zmm5 -; KNL-NEXT:    vpslld $31, %zmm5, %zmm5 -; KNL-NEXT:    vpmovsxbd %xmm4, %zmm4 -; KNL-NEXT:    vpslld $31, %zmm4, %zmm4 -; KNL-NEXT:    vptestmd %zmm4, %zmm4, %k0 -; KNL-NEXT:    kshiftlw $14, %k0, %k1 -; KNL-NEXT:    kshiftrw $15, %k1, %k1 -; KNL-NEXT:    kmovw %k1, %eax -; KNL-NEXT:    kshiftlw $15, %k0, %k1 -; KNL-NEXT:    kshiftrw $15, %k1, %k1 -; KNL-NEXT:    kmovw %k1, %r13d -; KNL-NEXT:    kshiftlw $13, %k0, %k1 -; KNL-NEXT:    kshiftrw $15, %k1, %k1 -; KNL-NEXT:    kmovw %k1, %ecx -; KNL-NEXT:    kshiftlw $12, %k0, %k1 -; KNL-NEXT:    kshiftrw $15, %k1, %k1 -; KNL-NEXT:    kmovw %k1, %r11d -; KNL-NEXT:    kshiftlw $11, %k0, %k1 -; KNL-NEXT:    kshiftrw $15, %k1, %k1 -; KNL-NEXT:    kmovw %k1, %r8d -; KNL-NEXT:    kshiftlw $10, %k0, %k1 -; KNL-NEXT:    kshiftrw $15, %k1, %k1 -; KNL-NEXT:    kmovw %k1, %edi -; KNL-NEXT:    kshiftlw $9, %k0, %k1 -; KNL-NEXT:    kshiftrw $15, %k1, %k1 -; KNL-NEXT:    kmovw %k1, %edx -; KNL-NEXT:    kshiftlw $8, %k0, %k1 -; KNL-NEXT:    kshiftrw $15, %k1, %k1 -; KNL-NEXT:    kmovw %k1, %esi -; KNL-NEXT:    kshiftlw $7, %k0, %k1 -; KNL-NEXT:    kshiftrw $15, %k1, %k1 -; KNL-NEXT:    kmovw %k1, %ebx -; KNL-NEXT:    kshiftlw $6, %k0, %k1 -; KNL-NEXT:    kshiftrw $15, %k1, %k1 -; KNL-NEXT:    kmovw %k1, %ebp -; KNL-NEXT:    kshiftlw $5, %k0, %k1 -; KNL-NEXT:    kshiftrw $15, %k1, %k1 -; KNL-NEXT:    kmovw %k1, %r14d -; KNL-NEXT:    kshiftlw $4, %k0, %k1 -; KNL-NEXT:    kshiftrw $15, %k1, %k1 -; KNL-NEXT:    kmovw %k1, %r15d -; KNL-NEXT:    kshiftlw $3, %k0, %k1 -; KNL-NEXT:    kshiftrw $15, %k1, %k1 -; KNL-NEXT:    kmovw %k1, %r12d -; KNL-NEXT:    kshiftlw $2, %k0, %k1 -; KNL-NEXT:    kshiftrw $15, %k1, %k1 -; KNL-NEXT:    kmovw %k1, %r10d -; KNL-NEXT:    kshiftlw $1, %k0, %k1 -; KNL-NEXT:    kshiftrw $15, %k1, %k1 -; KNL-NEXT:    kmovw %k1, %r9d -; KNL-NEXT:    vptestmd %zmm5, %zmm5, %k1 -; KNL-NEXT:    kshiftrw $15, %k0, %k0 -; KNL-NEXT:    vmovd %r13d, %xmm4 -; KNL-NEXT:    kmovw %k0, %r13d -; KNL-NEXT:    kshiftlw $14, %k1, %k0 -; KNL-NEXT:    kshiftrw $15, %k0, %k0 -; KNL-NEXT:    vpinsrb $1, %eax, %xmm4, %xmm4 -; KNL-NEXT:    kmovw %k0, %eax -; KNL-NEXT:    kshiftlw $15, %k1, %k0 -; KNL-NEXT:    kshiftrw $15, %k0, %k0 -; KNL-NEXT:    vpinsrb $2, %ecx, %xmm4, %xmm4 -; KNL-NEXT:    kmovw %k0, %ecx -; KNL-NEXT:    kshiftlw $13, %k1, %k0 -; KNL-NEXT:    kshiftrw $15, %k0, %k0 -; KNL-NEXT:    vpinsrb $3, %r11d, %xmm4, %xmm4 -; KNL-NEXT:    kmovw %k0, %r11d -; KNL-NEXT:    kshiftlw $12, %k1, %k0 -; KNL-NEXT:    kshiftrw $15, %k0, %k0 -; KNL-NEXT:    vpinsrb $4, %r8d, %xmm4, %xmm4 -; KNL-NEXT:    kmovw %k0, %r8d -; KNL-NEXT:    kshiftlw $11, %k1, %k0 -; KNL-NEXT:    kshiftrw $15, %k0, %k0 -; KNL-NEXT:    vpinsrb $5, %edi, %xmm4, %xmm4 -; KNL-NEXT:    kmovw %k0, %edi -; KNL-NEXT:    movl %edi, -{{[0-9]+}}(%rsp) ## 4-byte Spill -; KNL-NEXT:    kshiftlw $10, %k1, %k0 -; KNL-NEXT:    kshiftrw $15, %k0, %k0 -; KNL-NEXT:    vpinsrb $6, %edx, %xmm4, %xmm4 -; KNL-NEXT:    kmovw %k0, %edx -; KNL-NEXT:    kshiftlw $9, %k1, %k0 -; KNL-NEXT:    kshiftrw $15, %k0, %k0 -; KNL-NEXT:    vpinsrb $7, %esi, %xmm4, %xmm4 -; KNL-NEXT:    kmovw %k0, %esi -; KNL-NEXT:    kshiftlw $8, %k1, %k0 -; KNL-NEXT:    kshiftrw $15, %k0, %k0 -; KNL-NEXT:    vpinsrb $8, %ebx, %xmm4, %xmm4 -; KNL-NEXT:    kmovw %k0, %ebx -; KNL-NEXT:    kshiftlw $7, %k1, %k0 -; KNL-NEXT:    kshiftrw $15, %k0, %k0 -; KNL-NEXT:    vpinsrb $9, %ebp, %xmm4, %xmm4 -; KNL-NEXT:    kmovw %k0, %ebp -; KNL-NEXT:    kshiftlw $6, %k1, %k0 -; KNL-NEXT:    kshiftrw $15, %k0, %k0 -; KNL-NEXT:    vpinsrb $10, %r14d, %xmm4, %xmm4 -; KNL-NEXT:    kmovw %k0, %r14d -; KNL-NEXT:    kshiftlw $5, %k1, %k0 -; KNL-NEXT:    kshiftrw $15, %k0, %k0 -; KNL-NEXT:    vpinsrb $11, %r15d, %xmm4, %xmm4 -; KNL-NEXT:    kmovw %k0, %r15d -; KNL-NEXT:    kshiftlw $4, %k1, %k0 -; KNL-NEXT:    kshiftrw $15, %k0, %k0 -; KNL-NEXT:    vpinsrb $12, %r12d, %xmm4, %xmm4 -; KNL-NEXT:    kmovw %k0, %edi -; KNL-NEXT:    kshiftlw $3, %k1, %k0 -; KNL-NEXT:    kshiftrw $15, %k0, %k0 -; KNL-NEXT:    vpinsrb $13, %r10d, %xmm4, %xmm4 -; KNL-NEXT:    kmovw %k0, %r10d -; KNL-NEXT:    kshiftlw $2, %k1, %k0 -; KNL-NEXT:    kshiftrw $15, %k0, %k0 -; KNL-NEXT:    vpinsrb $14, %r9d, %xmm4, %xmm4 -; KNL-NEXT:    kmovw %k0, %r9d -; KNL-NEXT:    kshiftlw $1, %k1, %k0 -; KNL-NEXT:    kshiftrw $15, %k0, %k0 -; KNL-NEXT:    vpinsrb $15, %r13d, %xmm4, %xmm4 -; KNL-NEXT:    kmovw %k0, %r12d -; KNL-NEXT:    vptestmd %zmm6, %zmm6, %k0 -; KNL-NEXT:    kshiftrw $15, %k1, %k1 -; KNL-NEXT:    vmovd %ecx, %xmm5 -; KNL-NEXT:    kmovw %k1, %r13d -; KNL-NEXT:    kshiftlw $14, %k0, %k1 -; KNL-NEXT:    kshiftrw $15, %k1, %k1 -; KNL-NEXT:    vpinsrb $1, %eax, %xmm5, %xmm5 -; KNL-NEXT:    kmovw %k1, %eax -; KNL-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill -; KNL-NEXT:    kshiftlw $15, %k0, %k1 -; KNL-NEXT:    kshiftrw $15, %k1, %k1 -; KNL-NEXT:    vpinsrb $2, %r11d, %xmm5, %xmm5 -; KNL-NEXT:    kmovw %k1, %eax -; KNL-NEXT:    kshiftlw $13, %k0, %k1 -; KNL-NEXT:    kshiftrw $15, %k1, %k1 -; KNL-NEXT:    vpinsrb $3, %r8d, %xmm5, %xmm5 -; KNL-NEXT:    kmovw %k1, %ecx -; KNL-NEXT:    movl %ecx, -{{[0-9]+}}(%rsp) ## 4-byte Spill -; KNL-NEXT:    kshiftlw $12, %k0, %k1 -; KNL-NEXT:    kshiftrw $15, %k1, %k1 -; KNL-NEXT:    vpinsrb $4, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload -; KNL-NEXT:    kmovw %k1, %ecx -; KNL-NEXT:    kshiftlw $11, %k0, %k1 -; KNL-NEXT:    kshiftrw $15, %k1, %k1 -; KNL-NEXT:    vpinsrb $5, %edx, %xmm5, %xmm5 -; KNL-NEXT:    kmovw %k1, %r8d -; KNL-NEXT:    kshiftlw $10, %k0, %k1 -; KNL-NEXT:    kshiftrw $15, %k1, %k1 -; KNL-NEXT:    vpinsrb $6, %esi, %xmm5, %xmm5 -; KNL-NEXT:    kmovw %k1, %edx -; KNL-NEXT:    kshiftlw $9, %k0, %k1 -; KNL-NEXT:    kshiftrw $15, %k1, %k1 -; KNL-NEXT:    vpinsrb $7, %ebx, %xmm5, %xmm5 -; KNL-NEXT:    kmovw %k1, %esi -; KNL-NEXT:    kshiftlw $8, %k0, %k1 -; KNL-NEXT:    kshiftrw $15, %k1, %k1 -; KNL-NEXT:    vpinsrb $8, %ebp, %xmm5, %xmm5 -; KNL-NEXT:    kmovw %k1, %ebp -; KNL-NEXT:    kshiftlw $7, %k0, %k1 -; KNL-NEXT:    kshiftrw $15, %k1, %k1 -; KNL-NEXT:    vpinsrb $9, %r14d, %xmm5, %xmm5 -; KNL-NEXT:    kmovw %k1, %ebx -; KNL-NEXT:    kshiftlw $6, %k0, %k1 -; KNL-NEXT:    kshiftrw $15, %k1, %k1 -; KNL-NEXT:    vpinsrb $10, %r15d, %xmm5, %xmm5 -; KNL-NEXT:    kmovw %k1, %r11d -; KNL-NEXT:    kshiftlw $5, %k0, %k1 -; KNL-NEXT:    kshiftrw $15, %k1, %k1 -; KNL-NEXT:    vpinsrb $11, %edi, %xmm5, %xmm5 -; KNL-NEXT:    kmovw %k1, %edi -; KNL-NEXT:    kshiftlw $4, %k0, %k1 -; KNL-NEXT:    kshiftrw $15, %k1, %k1 -; KNL-NEXT:    vpinsrb $12, %r10d, %xmm5, %xmm5 -; KNL-NEXT:    kmovw %k1, %r10d -; KNL-NEXT:    kshiftlw $3, %k0, %k1 -; KNL-NEXT:    kshiftrw $15, %k1, %k1 -; KNL-NEXT:    vpinsrb $13, %r9d, %xmm5, %xmm5 -; KNL-NEXT:    kmovw %k1, %r9d -; KNL-NEXT:    kshiftlw $2, %k0, %k1 -; KNL-NEXT:    kshiftrw $15, %k1, %k1 -; KNL-NEXT:    vpinsrb $14, %r12d, %xmm5, %xmm5 -; KNL-NEXT:    kmovw %k1, %r14d -; KNL-NEXT:    kshiftlw $1, %k0, %k1 -; KNL-NEXT:    kshiftrw $15, %k1, %k1 -; KNL-NEXT:    vpinsrb $15, %r13d, %xmm5, %xmm5 -; KNL-NEXT:    kmovw %k1, %r15d -; KNL-NEXT:    vptestmd %zmm7, %zmm7, %k1 -; KNL-NEXT:    kshiftrw $15, %k0, %k0 -; KNL-NEXT:    vmovd %eax, %xmm6 -; KNL-NEXT:    kmovw %k0, %r12d -; KNL-NEXT:    kshiftlw $14, %k1, %k0 -; KNL-NEXT:    kshiftrw $15, %k0, %k0 -; KNL-NEXT:    vpinsrb $1, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload -; KNL-NEXT:    kmovw %k0, %r13d -; KNL-NEXT:    kshiftlw $15, %k1, %k0 -; KNL-NEXT:    kshiftrw $15, %k0, %k0 -; KNL-NEXT:    vpinsrb $2, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload -; KNL-NEXT:    kmovw %k0, %eax -; KNL-NEXT:    kshiftlw $13, %k1, %k0 -; KNL-NEXT:    kshiftrw $15, %k0, %k0 -; KNL-NEXT:    vpinsrb $3, %ecx, %xmm6, %xmm6 -; KNL-NEXT:    kmovw %k0, %ecx -; KNL-NEXT:    kshiftlw $12, %k1, %k0 -; KNL-NEXT:    kshiftrw $15, %k0, %k0 -; KNL-NEXT:    vpinsrb $4, %r8d, %xmm6, %xmm6 -; KNL-NEXT:    kmovw %k0, %r8d -; KNL-NEXT:    kshiftlw $11, %k1, %k0 -; KNL-NEXT:    kshiftrw $15, %k0, %k0 -; KNL-NEXT:    vpinsrb $5, %edx, %xmm6, %xmm6 -; KNL-NEXT:    kmovw %k0, %edx -; KNL-NEXT:    kshiftlw $10, %k1, %k0 -; KNL-NEXT:    kshiftrw $15, %k0, %k0 -; KNL-NEXT:    vpinsrb $6, %esi, %xmm6, %xmm6 -; KNL-NEXT:    kmovw %k0, %esi -; KNL-NEXT:    kshiftlw $9, %k1, %k0 -; KNL-NEXT:    kshiftrw $15, %k0, %k0 -; KNL-NEXT:    vpinsrb $7, %ebp, %xmm6, %xmm6 -; KNL-NEXT:    kmovw %k0, %ebp -; KNL-NEXT:    kshiftlw $8, %k1, %k0 -; KNL-NEXT:    kshiftrw $15, %k0, %k0 -; KNL-NEXT:    vpinsrb $8, %ebx, %xmm6, %xmm6 -; KNL-NEXT:    kmovw %k0, %ebx -; KNL-NEXT:    kshiftlw $7, %k1, %k0 -; KNL-NEXT:    kshiftrw $15, %k0, %k0 -; KNL-NEXT:    vpinsrb $9, %r11d, %xmm6, %xmm6 -; KNL-NEXT:    kmovw %k0, %r11d -; KNL-NEXT:    kshiftlw $6, %k1, %k0 -; KNL-NEXT:    kshiftrw $15, %k0, %k0 -; KNL-NEXT:    vpinsrb $10, %edi, %xmm6, %xmm6 -; KNL-NEXT:    kmovw %k0, %edi -; KNL-NEXT:    kshiftlw $5, %k1, %k0 -; KNL-NEXT:    kshiftrw $15, %k0, %k0 -; KNL-NEXT:    vpinsrb $11, %r10d, %xmm6, %xmm6 -; KNL-NEXT:    kmovw %k0, %r10d -; KNL-NEXT:    kshiftlw $4, %k1, %k0 -; KNL-NEXT:    kshiftrw $15, %k0, %k0 -; KNL-NEXT:    vpinsrb $12, %r9d, %xmm6, %xmm6 -; KNL-NEXT:    kmovw %k0, %r9d -; KNL-NEXT:    kshiftlw $3, %k1, %k0 -; KNL-NEXT:    kshiftrw $15, %k0, %k0 -; KNL-NEXT:    vpinsrb $13, %r14d, %xmm6, %xmm6 -; KNL-NEXT:    kmovw %k0, %r14d -; KNL-NEXT:    kshiftlw $2, %k1, %k0 -; KNL-NEXT:    kshiftrw $15, %k0, %k0 -; KNL-NEXT:    vpinsrb $14, %r15d, %xmm6, %xmm6 -; KNL-NEXT:    kmovw %k0, %r15d -; KNL-NEXT:    kshiftlw $1, %k1, %k0 -; KNL-NEXT:    kshiftrw $15, %k0, %k0 -; KNL-NEXT:    vpinsrb $15, %r12d, %xmm6, %xmm6 -; KNL-NEXT:    kmovw %k0, %r12d -; KNL-NEXT:    kshiftrw $15, %k1, %k0 -; KNL-NEXT:    vmovd %eax, %xmm7 -; KNL-NEXT:    kmovw %k0, %eax -; KNL-NEXT:    vpinsrb $1, %r13d, %xmm7, %xmm7 -; KNL-NEXT:    vpinsrb $2, %ecx, %xmm7, %xmm7 -; KNL-NEXT:    vpinsrb $3, %r8d, %xmm7, %xmm7 -; KNL-NEXT:    vpinsrb $4, %edx, %xmm7, %xmm7 -; KNL-NEXT:    vpinsrb $5, %esi, %xmm7, %xmm7 -; KNL-NEXT:    vpinsrb $6, %ebp, %xmm7, %xmm7 -; KNL-NEXT:    vpinsrb $7, %ebx, %xmm7, %xmm7 -; KNL-NEXT:    vpinsrb $8, %r11d, %xmm7, %xmm7 -; KNL-NEXT:    vpinsrb $9, %edi, %xmm7, %xmm7 -; KNL-NEXT:    vpinsrb $10, %r10d, %xmm7, %xmm7 -; KNL-NEXT:    vpinsrb $11, %r9d, %xmm7, %xmm7 -; KNL-NEXT:    vpinsrb $12, %r14d, %xmm7, %xmm7 -; KNL-NEXT:    vpinsrb $13, %r15d, %xmm7, %xmm7 +; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm7 = xmm7[0],zero,xmm7[1],zero,xmm7[2],zero,xmm7[3],zero,xmm7[4],zero,xmm7[5],zero,xmm7[6],zero,xmm7[7],zero,xmm7[8],zero,xmm7[9],zero,xmm7[10],zero,xmm7[11],zero,xmm7[12],zero,xmm7[13],zero,xmm7[14],zero,xmm7[15],zero +; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm6 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero,xmm6[4],zero,xmm6[5],zero,xmm6[6],zero,xmm6[7],zero,xmm6[8],zero,xmm6[9],zero,xmm6[10],zero,xmm6[11],zero,xmm6[12],zero,xmm6[13],zero,xmm6[14],zero,xmm6[15],zero +; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm5 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero,xmm5[8],zero,xmm5[9],zero,xmm5[10],zero,xmm5[11],zero,xmm5[12],zero,xmm5[13],zero,xmm5[14],zero,xmm5[15],zero  ; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero,xmm4[8],zero,xmm4[9],zero,xmm4[10],zero,xmm4[11],zero,xmm4[12],zero,xmm4[13],zero,xmm4[14],zero,xmm4[15],zero  ; KNL-NEXT:    vpsllw $15, %ymm4, %ymm4  ; KNL-NEXT:    vpsraw $15, %ymm4, %ymm4  ; KNL-NEXT:    vpand %ymm0, %ymm4, %ymm0 -; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm4 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero,xmm5[8],zero,xmm5[9],zero,xmm5[10],zero,xmm5[11],zero,xmm5[12],zero,xmm5[13],zero,xmm5[14],zero,xmm5[15],zero -; KNL-NEXT:    vpsllw $15, %ymm4, %ymm4 +; KNL-NEXT:    vpsllw $15, %ymm5, %ymm4  ; KNL-NEXT:    vpsraw $15, %ymm4, %ymm4  ; KNL-NEXT:    vpand %ymm1, %ymm4, %ymm1 -; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm4 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero,xmm6[4],zero,xmm6[5],zero,xmm6[6],zero,xmm6[7],zero,xmm6[8],zero,xmm6[9],zero,xmm6[10],zero,xmm6[11],zero,xmm6[12],zero,xmm6[13],zero,xmm6[14],zero,xmm6[15],zero -; KNL-NEXT:    vpsllw $15, %ymm4, %ymm4 +; KNL-NEXT:    vpsllw $15, %ymm6, %ymm4  ; KNL-NEXT:    vpsraw $15, %ymm4, %ymm4  ; KNL-NEXT:    vpand %ymm2, %ymm4, %ymm2 -; KNL-NEXT:    vpinsrb $14, %r12d, %xmm7, %xmm4 -; KNL-NEXT:    vpinsrb $15, %eax, %xmm4, %xmm4 -; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero,xmm4[8],zero,xmm4[9],zero,xmm4[10],zero,xmm4[11],zero,xmm4[12],zero,xmm4[13],zero,xmm4[14],zero,xmm4[15],zero -; KNL-NEXT:    vpsllw $15, %ymm4, %ymm4 +; KNL-NEXT:    vpsllw $15, %ymm7, %ymm4  ; KNL-NEXT:    vpsraw $15, %ymm4, %ymm4  ; KNL-NEXT:    vpand %ymm3, %ymm4, %ymm3 -; KNL-NEXT:    popq %rbx -; KNL-NEXT:    popq %r12 -; KNL-NEXT:    popq %r13 -; KNL-NEXT:    popq %r14 -; KNL-NEXT:    popq %r15 -; KNL-NEXT:    popq %rbp  ; KNL-NEXT:    retq  ;  ; SKX-LABEL: test21: diff --git a/llvm/test/CodeGen/X86/avx512-pmovxrm.ll b/llvm/test/CodeGen/X86/avx512-pmovxrm.ll index 7c3965e0863..ab3f32091fc 100644 --- a/llvm/test/CodeGen/X86/avx512-pmovxrm.ll +++ b/llvm/test/CodeGen/X86/avx512-pmovxrm.ll @@ -135,14 +135,12 @@ define <8 x i64> @test_llvm_x86_avx512_pmovzxbq(<16 x i8>* %a) {  ; X32-LABEL: test_llvm_x86_avx512_pmovzxbq:  ; X32:       ## BB#0:  ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax -; X32-NEXT:    vmovdqu (%eax), %xmm0 -; X32-NEXT:    vpmovzxbq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero +; X32-NEXT:    vpmovzxbq {{.*#+}} zmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero  ; X32-NEXT:    retl  ;  ; X64-LABEL: test_llvm_x86_avx512_pmovzxbq:  ; X64:       ## BB#0: -; X64-NEXT:    vmovdqu (%rdi), %xmm0 -; X64-NEXT:    vpmovzxbq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero +; X64-NEXT:    vpmovzxbq {{.*#+}} zmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero  ; X64-NEXT:    retq    %1 = load <16 x i8>, <16 x i8>* %a, align 1    %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> diff --git a/llvm/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll index f4cf22c5ed3..f25acb60375 100644 --- a/llvm/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll @@ -78,7 +78,6 @@ declare <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double>, <2 x do  define <8 x double>@test_int_x86_avx512_mask_insertf64x2_512(<8 x double> %x0, <2 x double> %x1,<8 x double> %x3, i8 %x4) {  ; CHECK-LABEL: test_int_x86_avx512_mask_insertf64x2_512:  ; CHECK:       ## BB#0: -; CHECK-NEXT:    ## kill: %XMM1<def> %XMM1<kill> %ZMM1<def>  ; CHECK-NEXT:    vinsertf64x2 $1, %xmm1, %zmm0, %zmm3  ; CHECK-NEXT:    kmovb %edi, %k1  ; CHECK-NEXT:    vinsertf64x2 $1, %xmm1, %zmm0, %zmm2 {%k1} @@ -119,7 +118,6 @@ declare <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64>, <2 x i64>, i3  define <8 x i64>@test_int_x86_avx512_mask_inserti64x2_512(<8 x i64> %x0, <2 x i64> %x1, <8 x i64> %x3, i8 %x4) {  ; CHECK-LABEL: test_int_x86_avx512_mask_inserti64x2_512:  ; CHECK:       ## BB#0: -; CHECK-NEXT:    ## kill: %XMM1<def> %XMM1<kill> %ZMM1<def>  ; CHECK-NEXT:    vinserti64x2 $1, %xmm1, %zmm0, %zmm3  ; CHECK-NEXT:    kmovb %edi, %k1  ; CHECK-NEXT:    vinserti64x2 $1, %xmm1, %zmm0, %zmm2 {%k1} diff --git a/llvm/test/CodeGen/X86/vector-zext.ll b/llvm/test/CodeGen/X86/vector-zext.ll index 1febf559bde..e1c3d8f7d08 100644 --- a/llvm/test/CodeGen/X86/vector-zext.ll +++ b/llvm/test/CodeGen/X86/vector-zext.ll @@ -458,16 +458,10 @@ define <8 x i64> @zext_16i8_to_8i64(<16 x i8> %A) nounwind uwtable readnone ssp  ; AVX2-NEXT:    vmovdqa %ymm2, %ymm0  ; AVX2-NEXT:    retq  ; -; AVX512F-LABEL: zext_16i8_to_8i64: -; AVX512F:       # BB#0: # %entry -; AVX512F-NEXT:    vpmovzxbq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero -; AVX512F-NEXT:    retq -; -; AVX512BW-LABEL: zext_16i8_to_8i64: -; AVX512BW:       # BB#0: # %entry -; AVX512BW-NEXT:    # kill: %XMM0<def> %XMM0<kill> %ZMM0<def> -; AVX512BW-NEXT:    vpmovzxbq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero -; AVX512BW-NEXT:    retq +; AVX512-LABEL: zext_16i8_to_8i64: +; AVX512:       # BB#0: # %entry +; AVX512-NEXT:    vpmovzxbq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero +; AVX512-NEXT:    retq  entry:    %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>    %C = zext <8 x i8> %B to <8 x i64> | 

