summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
authorElena Demikhovsky <elena.demikhovsky@intel.com>2016-07-06 09:11:49 +0000
committerElena Demikhovsky <elena.demikhovsky@intel.com>2016-07-06 09:11:49 +0000
commit02ced295aa8213155517fa1e1c2d33f61d90bf6e (patch)
tree6d8e53f8fa8e670155f3023491c3eb9a30e6f391 /llvm/test
parent932ec01328798f575e044e20394eccf562261caa (diff)
downloadbcm5719-llvm-02ced295aa8213155517fa1e1c2d33f61d90bf6e.tar.gz
bcm5719-llvm-02ced295aa8213155517fa1e1c2d33f61d90bf6e.zip
Reverted 274613 due to compilation failue.
llvm-svn: 274615
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/X86/avx512-cmp.ll2
-rw-r--r--llvm/test/CodeGen/X86/avx512-ext.ll325
-rw-r--r--llvm/test/CodeGen/X86/avx512-insert-extract.ll1
-rw-r--r--llvm/test/CodeGen/X86/avx512-intrinsics.ll12
-rw-r--r--llvm/test/CodeGen/X86/avx512-mask-op.ll60
-rw-r--r--llvm/test/CodeGen/X86/avx512dq-intrinsics.ll4
-rw-r--r--llvm/test/CodeGen/X86/masked_gather_scatter.ll102
-rw-r--r--llvm/test/CodeGen/X86/pr27591.ll49
-rw-r--r--llvm/test/CodeGen/X86/pr28173.ll20
-rw-r--r--llvm/test/CodeGen/X86/xaluo.ll2
10 files changed, 287 insertions, 290 deletions
diff --git a/llvm/test/CodeGen/X86/avx512-cmp.ll b/llvm/test/CodeGen/X86/avx512-cmp.ll
index 52caa0ed5d6..2c0c0a5b8c7 100644
--- a/llvm/test/CodeGen/X86/avx512-cmp.ll
+++ b/llvm/test/CodeGen/X86/avx512-cmp.ll
@@ -163,10 +163,12 @@ define i32 @test10(i64 %b, i64 %c, i1 %d) {
; ALL-NEXT: kmovw %edx, %k0
; ALL-NEXT: cmpq %rsi, %rdi
; ALL-NEXT: sete %al
+; ALL-NEXT: andl $1, %eax
; ALL-NEXT: kmovw %eax, %k1
; ALL-NEXT: korw %k1, %k0, %k1
; ALL-NEXT: kxorw %k1, %k0, %k0
; ALL-NEXT: kmovw %k0, %eax
+; ALL-NEXT: andl $1, %eax
; ALL-NEXT: testb %al, %al
; ALL-NEXT: je LBB8_1
; ALL-NEXT: ## BB#2: ## %if.end.i
diff --git a/llvm/test/CodeGen/X86/avx512-ext.ll b/llvm/test/CodeGen/X86/avx512-ext.ll
index cb74c598a1a..f5631af34d2 100644
--- a/llvm/test/CodeGen/X86/avx512-ext.ll
+++ b/llvm/test/CodeGen/X86/avx512-ext.ll
@@ -1513,264 +1513,265 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
; KNL-NEXT: vptestmd %zmm4, %zmm4, %k0
; KNL-NEXT: kshiftlw $14, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ecx
+; KNL-NEXT: kmovw %k1, %edx
; KNL-NEXT: kshiftlw $15, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %r15d
+; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $13, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %r12d
+; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: kshiftlw $12, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %edx
+; KNL-NEXT: kmovw %k1, %edi
; KNL-NEXT: kshiftlw $11, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %r13d
+; KNL-NEXT: kmovw %k1, %esi
; KNL-NEXT: kshiftlw $10, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
+; KNL-NEXT: kmovw %k1, %r13d
; KNL-NEXT: kshiftlw $9, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %esi
+; KNL-NEXT: kmovw %k1, %r8d
; KNL-NEXT: kshiftlw $8, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %edi
+; KNL-NEXT: kmovw %k1, %r10d
; KNL-NEXT: kshiftlw $7, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %r8d
+; KNL-NEXT: kmovw %k1, %r11d
; KNL-NEXT: kshiftlw $6, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %r9d
+; KNL-NEXT: kmovw %k1, %ebx
; KNL-NEXT: kshiftlw $5, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %r10d
+; KNL-NEXT: kmovw %k1, %ebp
; KNL-NEXT: kshiftlw $4, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %r11d
+; KNL-NEXT: kmovw %k1, %r14d
; KNL-NEXT: kshiftlw $3, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ebx
+; KNL-NEXT: kmovw %k1, %r15d
; KNL-NEXT: kshiftlw $2, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ebp
+; KNL-NEXT: kmovw %k1, %r9d
; KNL-NEXT: kshiftlw $1, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %r14d
-; KNL-NEXT: vptestmd %zmm5, %zmm5, %k2
+; KNL-NEXT: kmovw %k1, %r12d
+; KNL-NEXT: vptestmd %zmm5, %zmm5, %k1
; KNL-NEXT: kshiftlw $0, %k0, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vmovd %r15d, %xmm4
-; KNL-NEXT: kmovw %k0, %r15d
-; KNL-NEXT: kshiftlw $14, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $1, %ecx, %xmm4, %xmm4
-; KNL-NEXT: kmovw %k0, %ecx
-; KNL-NEXT: kshiftlw $15, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $2, %r12d, %xmm4, %xmm4
+; KNL-NEXT: vmovd %eax, %xmm4
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: kshiftlw $13, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $3, %edx, %xmm4, %xmm4
-; KNL-NEXT: kmovw %k0, %r12d
-; KNL-NEXT: kshiftlw $12, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $4, %r13d, %xmm4, %xmm4
-; KNL-NEXT: kmovw %k0, %edx
-; KNL-NEXT: kshiftlw $11, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $5, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload
-; KNL-NEXT: kmovw %k0, %r13d
-; KNL-NEXT: kshiftlw $10, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $6, %esi, %xmm4, %xmm4
-; KNL-NEXT: kmovw %k0, %esi
-; KNL-NEXT: movl %esi, -{{[0-9]+}}(%rsp) ## 4-byte Spill
-; KNL-NEXT: kshiftlw $9, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $7, %edi, %xmm4, %xmm4
-; KNL-NEXT: kmovw %k0, %esi
-; KNL-NEXT: kshiftlw $8, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $8, %r8d, %xmm4, %xmm4
-; KNL-NEXT: kmovw %k0, %edi
-; KNL-NEXT: kshiftlw $7, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $9, %r9d, %xmm4, %xmm4
-; KNL-NEXT: kmovw %k0, %r8d
-; KNL-NEXT: kshiftlw $6, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $10, %r10d, %xmm4, %xmm4
-; KNL-NEXT: kmovw %k0, %r9d
-; KNL-NEXT: kshiftlw $5, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $11, %r11d, %xmm4, %xmm4
-; KNL-NEXT: kmovw %k0, %r10d
-; KNL-NEXT: kshiftlw $4, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $12, %ebx, %xmm4, %xmm4
-; KNL-NEXT: kmovw %k0, %ebx
-; KNL-NEXT: kshiftlw $3, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $13, %ebp, %xmm4, %xmm4
-; KNL-NEXT: kmovw %k0, %ebp
-; KNL-NEXT: kshiftlw $2, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $14, %r14d, %xmm4, %xmm4
-; KNL-NEXT: kmovw %k0, %r11d
-; KNL-NEXT: kshiftlw $1, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $15, %r15d, %xmm4, %xmm4
-; KNL-NEXT: kmovw %k0, %r14d
-; KNL-NEXT: vptestmd %zmm6, %zmm6, %k1
-; KNL-NEXT: kshiftlw $0, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vmovd %eax, %xmm5
-; KNL-NEXT: kmovw %k0, %r15d
; KNL-NEXT: kshiftlw $14, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $1, %ecx, %xmm5, %xmm5
-; KNL-NEXT: kmovw %k0, %ecx
+; KNL-NEXT: vpinsrb $1, %edx, %xmm4, %xmm4
+; KNL-NEXT: kmovw %k0, %edx
+; KNL-NEXT: movl %edx, -{{[0-9]+}}(%rsp) ## 4-byte Spill
; KNL-NEXT: kshiftlw $15, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $2, %r12d, %xmm5, %xmm5
-; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: vpinsrb $2, %ecx, %xmm4, %xmm4
+; KNL-NEXT: kmovw %k0, %ecx
; KNL-NEXT: kshiftlw $13, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $3, %edx, %xmm5, %xmm5
-; KNL-NEXT: kmovw %k0, %r12d
+; KNL-NEXT: vpinsrb $3, %edi, %xmm4, %xmm4
+; KNL-NEXT: kmovw %k0, %edi
; KNL-NEXT: kshiftlw $12, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $4, %r13d, %xmm5, %xmm5
-; KNL-NEXT: kmovw %k0, %edx
+; KNL-NEXT: vpinsrb $4, %esi, %xmm4, %xmm4
+; KNL-NEXT: kmovw %k0, %esi
; KNL-NEXT: kshiftlw $11, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $5, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
+; KNL-NEXT: vpinsrb $5, %r13d, %xmm4, %xmm4
; KNL-NEXT: kmovw %k0, %r13d
; KNL-NEXT: kshiftlw $10, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $6, %esi, %xmm5, %xmm5
-; KNL-NEXT: kmovw %k0, %esi
-; KNL-NEXT: movl %esi, -{{[0-9]+}}(%rsp) ## 4-byte Spill
+; KNL-NEXT: vpinsrb $6, %r8d, %xmm4, %xmm4
+; KNL-NEXT: kmovw %k0, %r8d
; KNL-NEXT: kshiftlw $9, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $7, %edi, %xmm5, %xmm5
-; KNL-NEXT: kmovw %k0, %esi
+; KNL-NEXT: vpinsrb $7, %r10d, %xmm4, %xmm4
+; KNL-NEXT: kmovw %k0, %r10d
; KNL-NEXT: kshiftlw $8, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $8, %r8d, %xmm5, %xmm5
-; KNL-NEXT: kmovw %k0, %edi
+; KNL-NEXT: vpinsrb $8, %r11d, %xmm4, %xmm4
+; KNL-NEXT: kmovw %k0, %r11d
; KNL-NEXT: kshiftlw $7, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $9, %r9d, %xmm5, %xmm5
-; KNL-NEXT: kmovw %k0, %r8d
+; KNL-NEXT: vpinsrb $9, %ebx, %xmm4, %xmm4
+; KNL-NEXT: kmovw %k0, %ebx
; KNL-NEXT: kshiftlw $6, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $10, %r10d, %xmm5, %xmm5
-; KNL-NEXT: kmovw %k0, %r9d
+; KNL-NEXT: vpinsrb $10, %ebp, %xmm4, %xmm4
+; KNL-NEXT: kmovw %k0, %ebp
; KNL-NEXT: kshiftlw $5, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $11, %ebx, %xmm5, %xmm5
-; KNL-NEXT: kmovw %k0, %ebx
+; KNL-NEXT: vpinsrb $11, %r14d, %xmm4, %xmm4
+; KNL-NEXT: kmovw %k0, %r14d
; KNL-NEXT: kshiftlw $4, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $12, %ebp, %xmm5, %xmm5
-; KNL-NEXT: kmovw %k0, %ebp
+; KNL-NEXT: vpinsrb $12, %r15d, %xmm4, %xmm4
+; KNL-NEXT: kmovw %k0, %r15d
; KNL-NEXT: kshiftlw $3, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $13, %r11d, %xmm5, %xmm5
-; KNL-NEXT: kmovw %k0, %r10d
+; KNL-NEXT: vpinsrb $13, %r9d, %xmm4, %xmm4
+; KNL-NEXT: kmovw %k0, %edx
+; KNL-NEXT: movl %edx, -{{[0-9]+}}(%rsp) ## 4-byte Spill
; KNL-NEXT: kshiftlw $2, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $14, %r14d, %xmm5, %xmm5
-; KNL-NEXT: kmovw %k0, %r11d
+; KNL-NEXT: vpinsrb $14, %r12d, %xmm4, %xmm4
+; KNL-NEXT: kmovw %k0, %r12d
; KNL-NEXT: kshiftlw $1, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $15, %r15d, %xmm5, %xmm5
-; KNL-NEXT: kmovw %k0, %r14d
-; KNL-NEXT: vptestmd %zmm7, %zmm7, %k0
+; KNL-NEXT: vpinsrb $15, %eax, %xmm4, %xmm4
+; KNL-NEXT: kmovw %k0, %r9d
+; KNL-NEXT: vptestmd %zmm6, %zmm6, %k0
; KNL-NEXT: kshiftlw $0, %k1, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: vmovd %eax, %xmm6
-; KNL-NEXT: kmovw %k1, %r15d
+; KNL-NEXT: vmovd %ecx, %xmm5
+; KNL-NEXT: kmovw %k1, %edx
; KNL-NEXT: kshiftlw $14, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: vpinsrb $1, %ecx, %xmm6, %xmm6
-; KNL-NEXT: kmovw %k1, %ecx
+; KNL-NEXT: vpinsrb $1, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
+; KNL-NEXT: kmovw %k1, %eax
+; KNL-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
; KNL-NEXT: kshiftlw $15, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: vpinsrb $2, %r12d, %xmm6, %xmm6
-; KNL-NEXT: kmovw %k1, %r12d
+; KNL-NEXT: vpinsrb $2, %edi, %xmm5, %xmm5
+; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $13, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: vpinsrb $3, %edx, %xmm6, %xmm6
-; KNL-NEXT: kmovw %k1, %edx
+; KNL-NEXT: vpinsrb $3, %esi, %xmm5, %xmm5
+; KNL-NEXT: kmovw %k1, %edi
; KNL-NEXT: kshiftlw $12, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: vpinsrb $4, %r13d, %xmm6, %xmm6
-; KNL-NEXT: kmovw %k1, %r13d
+; KNL-NEXT: vpinsrb $4, %r13d, %xmm5, %xmm5
+; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: kshiftlw $11, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: vpinsrb $5, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload
-; KNL-NEXT: kmovw %k1, %eax
+; KNL-NEXT: vpinsrb $5, %r8d, %xmm5, %xmm5
+; KNL-NEXT: kmovw %k1, %r8d
; KNL-NEXT: kshiftlw $10, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: vpinsrb $6, %esi, %xmm6, %xmm6
-; KNL-NEXT: kmovw %k1, %esi
+; KNL-NEXT: vpinsrb $6, %r10d, %xmm5, %xmm5
+; KNL-NEXT: kmovw %k1, %r13d
; KNL-NEXT: kshiftlw $9, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: vpinsrb $7, %edi, %xmm6, %xmm6
-; KNL-NEXT: kmovw %k1, %edi
+; KNL-NEXT: vpinsrb $7, %r11d, %xmm5, %xmm5
+; KNL-NEXT: kmovw %k1, %esi
+; KNL-NEXT: movl %esi, -{{[0-9]+}}(%rsp) ## 4-byte Spill
; KNL-NEXT: kshiftlw $8, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: vpinsrb $8, %r8d, %xmm6, %xmm6
-; KNL-NEXT: kmovw %k1, %r8d
+; KNL-NEXT: vpinsrb $8, %ebx, %xmm5, %xmm5
+; KNL-NEXT: kmovw %k1, %ebx
; KNL-NEXT: kshiftlw $7, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: vpinsrb $9, %r9d, %xmm6, %xmm6
-; KNL-NEXT: kmovw %k1, %r9d
+; KNL-NEXT: vpinsrb $9, %ebp, %xmm5, %xmm5
+; KNL-NEXT: kmovw %k1, %ebp
; KNL-NEXT: kshiftlw $6, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: vpinsrb $10, %ebx, %xmm6, %xmm6
-; KNL-NEXT: kmovw %k1, %ebx
+; KNL-NEXT: vpinsrb $10, %r14d, %xmm5, %xmm5
+; KNL-NEXT: kmovw %k1, %r10d
; KNL-NEXT: kshiftlw $5, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: vpinsrb $11, %ebp, %xmm6, %xmm6
-; KNL-NEXT: kmovw %k1, %ebp
+; KNL-NEXT: vpinsrb $11, %r15d, %xmm5, %xmm5
+; KNL-NEXT: kmovw %k1, %r11d
; KNL-NEXT: kshiftlw $4, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: vpinsrb $12, %r10d, %xmm6, %xmm6
-; KNL-NEXT: kmovw %k1, %r10d
+; KNL-NEXT: vpinsrb $12, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
+; KNL-NEXT: kmovw %k1, %esi
; KNL-NEXT: kshiftlw $3, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: vpinsrb $13, %r11d, %xmm6, %xmm6
-; KNL-NEXT: kmovw %k1, %r11d
+; KNL-NEXT: vpinsrb $13, %r12d, %xmm5, %xmm5
+; KNL-NEXT: kmovw %k1, %r14d
; KNL-NEXT: kshiftlw $2, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: vpinsrb $14, %r14d, %xmm6, %xmm6
-; KNL-NEXT: kmovw %k1, %r14d
+; KNL-NEXT: vpinsrb $14, %r9d, %xmm5, %xmm5
+; KNL-NEXT: kmovw %k1, %r9d
; KNL-NEXT: kshiftlw $1, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: vpinsrb $15, %r15d, %xmm6, %xmm6
+; KNL-NEXT: vpinsrb $15, %edx, %xmm5, %xmm5
; KNL-NEXT: kmovw %k1, %r15d
+; KNL-NEXT: vptestmd %zmm7, %zmm7, %k1
; KNL-NEXT: kshiftlw $0, %k0, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vmovd %r12d, %xmm7
+; KNL-NEXT: vmovd %eax, %xmm6
+; KNL-NEXT: kmovw %k0, %r12d
+; KNL-NEXT: kshiftlw $14, %k1, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: vpinsrb $1, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload
+; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: kshiftlw $15, %k1, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: vpinsrb $2, %edi, %xmm6, %xmm6
+; KNL-NEXT: kmovw %k0, %edx
+; KNL-NEXT: kshiftlw $13, %k1, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: vpinsrb $3, %ecx, %xmm6, %xmm6
+; KNL-NEXT: kmovw %k0, %ecx
+; KNL-NEXT: kshiftlw $12, %k1, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: vpinsrb $4, %r8d, %xmm6, %xmm6
+; KNL-NEXT: kmovw %k0, %r8d
+; KNL-NEXT: kshiftlw $11, %k1, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: vpinsrb $5, %r13d, %xmm6, %xmm6
+; KNL-NEXT: kmovw %k0, %r13d
+; KNL-NEXT: kshiftlw $10, %k1, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: vpinsrb $6, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload
+; KNL-NEXT: kmovw %k0, %edi
+; KNL-NEXT: kshiftlw $9, %k1, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: vpinsrb $7, %ebx, %xmm6, %xmm6
+; KNL-NEXT: kmovw %k0, %ebx
+; KNL-NEXT: kshiftlw $8, %k1, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: vpinsrb $8, %ebp, %xmm6, %xmm6
+; KNL-NEXT: kmovw %k0, %ebp
+; KNL-NEXT: kshiftlw $7, %k1, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: vpinsrb $9, %r10d, %xmm6, %xmm6
+; KNL-NEXT: kmovw %k0, %r10d
+; KNL-NEXT: kshiftlw $6, %k1, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: vpinsrb $10, %r11d, %xmm6, %xmm6
+; KNL-NEXT: kmovw %k0, %r11d
+; KNL-NEXT: kshiftlw $5, %k1, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: vpinsrb $11, %esi, %xmm6, %xmm6
+; KNL-NEXT: kmovw %k0, %esi
+; KNL-NEXT: kshiftlw $4, %k1, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: vpinsrb $12, %r14d, %xmm6, %xmm6
+; KNL-NEXT: kmovw %k0, %r14d
+; KNL-NEXT: kshiftlw $3, %k1, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: vpinsrb $13, %r9d, %xmm6, %xmm6
+; KNL-NEXT: kmovw %k0, %r9d
+; KNL-NEXT: kshiftlw $2, %k1, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: vpinsrb $14, %r15d, %xmm6, %xmm6
+; KNL-NEXT: kmovw %k0, %r15d
+; KNL-NEXT: kshiftlw $1, %k1, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: vpinsrb $15, %r12d, %xmm6, %xmm6
; KNL-NEXT: kmovw %k0, %r12d
-; KNL-NEXT: vpinsrb $1, %ecx, %xmm7, %xmm7
-; KNL-NEXT: vpinsrb $2, %edx, %xmm7, %xmm7
-; KNL-NEXT: vpinsrb $3, %r13d, %xmm7, %xmm7
-; KNL-NEXT: vpinsrb $4, %eax, %xmm7, %xmm7
-; KNL-NEXT: vpinsrb $5, %esi, %xmm7, %xmm7
-; KNL-NEXT: vpinsrb $6, %edi, %xmm7, %xmm7
-; KNL-NEXT: vpinsrb $7, %r8d, %xmm7, %xmm7
-; KNL-NEXT: vpinsrb $8, %r9d, %xmm7, %xmm7
-; KNL-NEXT: vpinsrb $9, %ebx, %xmm7, %xmm7
-; KNL-NEXT: vpinsrb $10, %ebp, %xmm7, %xmm7
-; KNL-NEXT: vpinsrb $11, %r10d, %xmm7, %xmm7
-; KNL-NEXT: vpinsrb $12, %r11d, %xmm7, %xmm7
-; KNL-NEXT: vpinsrb $13, %r14d, %xmm7, %xmm7
+; KNL-NEXT: kshiftlw $0, %k1, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: vmovd %edx, %xmm7
+; KNL-NEXT: kmovw %k0, %edx
+; KNL-NEXT: vpinsrb $1, %eax, %xmm7, %xmm7
+; KNL-NEXT: vpinsrb $2, %ecx, %xmm7, %xmm7
+; KNL-NEXT: vpinsrb $3, %r8d, %xmm7, %xmm7
+; KNL-NEXT: vpinsrb $4, %r13d, %xmm7, %xmm7
+; KNL-NEXT: vpinsrb $5, %edi, %xmm7, %xmm7
+; KNL-NEXT: vpinsrb $6, %ebx, %xmm7, %xmm7
+; KNL-NEXT: vpinsrb $7, %ebp, %xmm7, %xmm7
+; KNL-NEXT: vpinsrb $8, %r10d, %xmm7, %xmm7
+; KNL-NEXT: vpinsrb $9, %r11d, %xmm7, %xmm7
+; KNL-NEXT: vpinsrb $10, %esi, %xmm7, %xmm7
+; KNL-NEXT: vpinsrb $11, %r14d, %xmm7, %xmm7
+; KNL-NEXT: vpinsrb $12, %r9d, %xmm7, %xmm7
+; KNL-NEXT: vpinsrb $13, %r15d, %xmm7, %xmm7
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero,xmm4[8],zero,xmm4[9],zero,xmm4[10],zero,xmm4[11],zero,xmm4[12],zero,xmm4[13],zero,xmm4[14],zero,xmm4[15],zero
; KNL-NEXT: vpsllw $15, %ymm4, %ymm4
; KNL-NEXT: vpsraw $15, %ymm4, %ymm4
@@ -1783,8 +1784,8 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
; KNL-NEXT: vpsllw $15, %ymm4, %ymm4
; KNL-NEXT: vpsraw $15, %ymm4, %ymm4
; KNL-NEXT: vpand %ymm2, %ymm4, %ymm2
-; KNL-NEXT: vpinsrb $14, %r15d, %xmm7, %xmm4
-; KNL-NEXT: vpinsrb $15, %r12d, %xmm4, %xmm4
+; KNL-NEXT: vpinsrb $14, %r12d, %xmm7, %xmm4
+; KNL-NEXT: vpinsrb $15, %edx, %xmm4, %xmm4
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero,xmm4[8],zero,xmm4[9],zero,xmm4[10],zero,xmm4[11],zero,xmm4[12],zero,xmm4[13],zero,xmm4[14],zero,xmm4[15],zero
; KNL-NEXT: vpsllw $15, %ymm4, %ymm4
; KNL-NEXT: vpsraw $15, %ymm4, %ymm4
diff --git a/llvm/test/CodeGen/X86/avx512-insert-extract.ll b/llvm/test/CodeGen/X86/avx512-insert-extract.ll
index 44ecad01407..308673bc395 100644
--- a/llvm/test/CodeGen/X86/avx512-insert-extract.ll
+++ b/llvm/test/CodeGen/X86/avx512-insert-extract.ll
@@ -159,6 +159,7 @@ define i64 @test12(<16 x i64>%a, <16 x i64>%b, i64 %a1, i64 %b1) {
;CHECK-LABEL: test13
;CHECK: cmpl %esi, %edi
;CHECK: setb %al
+;CHECK: andl $1, %eax
;CHECK: kmovw %eax, %k0
;CHECK: movw $-4
;CHECK: korw
diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-intrinsics.ll
index d1961fc96e6..cc8be256732 100644
--- a/llvm/test/CodeGen/X86/avx512-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512-intrinsics.ll
@@ -9,7 +9,9 @@ define i32 @test_kortestz(i16 %a0, i16 %a1) {
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: kortestw %k0, %k1
; CHECK-NEXT: sete %al
-; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: kmovw %eax, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: andl $1, %eax
; CHECK-NEXT: retq
%res = call i32 @llvm.x86.avx512.kortestz.w(i16 %a0, i16 %a1)
ret i32 %res
@@ -5089,6 +5091,7 @@ define i8@test_int_x86_avx512_mask_cmp_sd(<2 x double> %x0, <2 x double> %x1, i8
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vcmpnltsd {sae}, %xmm1, %xmm0, %k0 {%k1}
; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: andl $1, %eax
; CHECK-NEXT: retq
%res4 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 5, i8 %x3, i32 8)
@@ -5109,6 +5112,7 @@ define i8@test_int_x86_avx512_mask_cmp_sd_all(<2 x double> %x0, <2 x double> %x1
; CHECK-NEXT: kandw %k2, %k1, %k1
; CHECK-NEXT: korw %k1, %k0, %k0
; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: andl $1, %eax
; CHECK-NEXT: retq
%res1 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 2, i8 -1, i32 4)
@@ -5131,6 +5135,7 @@ define i8@test_int_x86_avx512_mask_cmp_ss(<4 x float> %x0, <4 x float> %x1, i8 %
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vcmpunordss %xmm1, %xmm0, %k0 {%k1}
; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: andl $1, %eax
; CHECK-NEXT: retq
%res2 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 3, i8 %x3, i32 4)
@@ -5148,8 +5153,9 @@ define i8@test_int_x86_avx512_mask_cmp_ss_all(<4 x float> %x0, <4 x float> %x1,
; CHECK-NEXT: vcmpneqss %xmm1, %xmm0, %k2 {%k1}
; CHECK-NEXT: kmovw %k2, %ecx
; CHECK-NEXT: vcmpnltss {sae}, %xmm1, %xmm0, %k1 {%k1}
-; CHECK-NEXT: kmovw %k1, %eax
-; CHECK-NEXT: kmovw %k0, %edx
+; CHECK-NEXT: kmovw %k1, %edx
+; CHECK-NEXT: andl $1, %edx
+; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: andb %cl, %al
; CHECK-NEXT: andb %dl, %al
; CHECK-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll
index 939c338ac9a..f935270d767 100644
--- a/llvm/test/CodeGen/X86/avx512-mask-op.ll
+++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll
@@ -173,35 +173,18 @@ define i32 @zext_test1(<16 x i32> %a, <16 x i32> %b) {
; CHECK-NEXT: kshiftlw $10, %k0, %k0
; CHECK-NEXT: kshiftrw $15, %k0, %k0
; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: andl $1, %eax
; CHECK-NEXT: retq
%cmp_res = icmp ugt <16 x i32> %a, %b
%cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
%res = zext i1 %cmp_res.i1 to i32
ret i32 %res
-}
-
-define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) {
-; CHECK-LABEL: zext_test2:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
-; CHECK-NEXT: kshiftlw $10, %k0, %k0
-; CHECK-NEXT: kshiftrw $15, %k0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+}define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) {
%cmp_res = icmp ugt <16 x i32> %a, %b
%cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
%res = zext i1 %cmp_res.i1 to i16
ret i16 %res
-}
-
-define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) {
-; CHECK-LABEL: zext_test3:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
-; CHECK-NEXT: kshiftlw $10, %k0, %k0
-; CHECK-NEXT: kshiftrw $15, %k0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+}define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) {
%cmp_res = icmp ugt <16 x i32> %a, %b
%cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
%res = zext i1 %cmp_res.i1 to i8
@@ -596,6 +579,7 @@ define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) {
; SKX-NEXT: kmovq %rdi, %k0
; SKX-NEXT: cmpl %edx, %esi
; SKX-NEXT: setg %al
+; SKX-NEXT: andl $1, %eax
; SKX-NEXT: kmovw %eax, %k1
; SKX-NEXT: kshiftlq $5, %k1, %k1
; SKX-NEXT: korq %k1, %k0, %k0
@@ -1639,10 +1623,10 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
; KNL-NEXT: kmovw %k1, %r8d
; KNL-NEXT: kshiftlw $15, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %r9d
+; KNL-NEXT: kmovw %k1, %r10d
; KNL-NEXT: kshiftlw $13, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %r10d
+; KNL-NEXT: kmovw %k1, %r9d
; KNL-NEXT: kshiftlw $12, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %r11d
@@ -1669,22 +1653,22 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $4, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ecx
+; KNL-NEXT: kmovw %k1, %edx
; KNL-NEXT: kshiftlw $3, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %edx
+; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: kshiftlw $2, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %esi
; KNL-NEXT: kshiftlw $1, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: vmovd %r9d, %xmm3
-; KNL-NEXT: kmovw %k1, %r9d
+; KNL-NEXT: vmovd %r10d, %xmm3
+; KNL-NEXT: kmovw %k1, %r10d
; KNL-NEXT: vptestmd %zmm2, %zmm2, %k2
; KNL-NEXT: kshiftlw $0, %k0, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: vpinsrb $1, %r8d, %xmm3, %xmm2
-; KNL-NEXT: vpinsrb $2, %r10d, %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $2, %r9d, %xmm2, %xmm2
; KNL-NEXT: vpinsrb $3, %r11d, %xmm2, %xmm2
; KNL-NEXT: vpinsrb $4, %r14d, %xmm2, %xmm2
; KNL-NEXT: vpinsrb $5, %r15d, %xmm2, %xmm2
@@ -1693,10 +1677,10 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
; KNL-NEXT: vpinsrb $8, %ebx, %xmm2, %xmm2
; KNL-NEXT: vpinsrb $9, %ebp, %xmm2, %xmm2
; KNL-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
-; KNL-NEXT: vpinsrb $11, %ecx, %xmm2, %xmm2
-; KNL-NEXT: vpinsrb $12, %edx, %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $11, %edx, %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $12, %ecx, %xmm2, %xmm2
; KNL-NEXT: vpinsrb $13, %esi, %xmm2, %xmm2
-; KNL-NEXT: vpinsrb $14, %r9d, %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $14, %r10d, %xmm2, %xmm2
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
; KNL-NEXT: vpmovsxbd %xmm2, %zmm2
@@ -1729,7 +1713,7 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
; KNL-NEXT: kmovw %k0, %r13d
; KNL-NEXT: kshiftlw $7, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %edx
+; KNL-NEXT: kmovw %k0, %ecx
; KNL-NEXT: kshiftlw $6, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %esi
@@ -1744,7 +1728,7 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: kshiftlw $2, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %ecx
+; KNL-NEXT: kmovw %k0, %edx
; KNL-NEXT: kshiftlw $1, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: vmovd %r10d, %xmm2
@@ -1759,12 +1743,12 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
; KNL-NEXT: vpinsrb $5, %r15d, %xmm1, %xmm1
; KNL-NEXT: vpinsrb $6, %r12d, %xmm1, %xmm1
; KNL-NEXT: vpinsrb $7, %r13d, %xmm1, %xmm1
-; KNL-NEXT: vpinsrb $8, %edx, %xmm1, %xmm1
+; KNL-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; KNL-NEXT: vpinsrb $9, %esi, %xmm1, %xmm1
; KNL-NEXT: vpinsrb $10, %ebp, %xmm1, %xmm1
; KNL-NEXT: vpinsrb $11, %ebx, %xmm1, %xmm1
; KNL-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; KNL-NEXT: vpinsrb $13, %ecx, %xmm1, %xmm1
+; KNL-NEXT: vpinsrb $13, %edx, %xmm1, %xmm1
; KNL-NEXT: vpinsrb $14, %r10d, %xmm1, %xmm1
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
@@ -1798,7 +1782,7 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
; KNL-NEXT: kmovw %k0, %r13d
; KNL-NEXT: kshiftlw $7, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %edx
+; KNL-NEXT: kmovw %k0, %ecx
; KNL-NEXT: kshiftlw $6, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %esi
@@ -1813,7 +1797,7 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: kshiftlw $2, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %ecx
+; KNL-NEXT: kmovw %k0, %edx
; KNL-NEXT: kshiftlw $1, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: vmovd %r10d, %xmm1
@@ -1828,12 +1812,12 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
; KNL-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $8, %edx, %xmm0, %xmm0
+; KNL-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $9, %esi, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $10, %ebp, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $11, %ebx, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
+; KNL-NEXT: vpinsrb $13, %edx, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $14, %r10d, %xmm0, %xmm0
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
diff --git a/llvm/test/CodeGen/X86/avx512dq-intrinsics.ll b/llvm/test/CodeGen/X86/avx512dq-intrinsics.ll
index af14b6b0d93..434495b9f72 100644
--- a/llvm/test/CodeGen/X86/avx512dq-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512dq-intrinsics.ll
@@ -490,6 +490,7 @@ define i8 @test_int_x86_avx512_mask_fpclass_sd(<2 x double> %x0, i8 %x1) {
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vfpclasssd $2, %xmm0, %k0 {%k1}
; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: andl $1, %eax
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: je LBB28_2
; CHECK-NEXT: ## BB#1:
@@ -497,6 +498,7 @@ define i8 @test_int_x86_avx512_mask_fpclass_sd(<2 x double> %x0, i8 %x1) {
; CHECK-NEXT: LBB28_2:
; CHECK-NEXT: vfpclasssd $4, %xmm0, %k0
; CHECK-NEXT: kmovw %k0, %ecx
+; CHECK-NEXT: andl $1, %ecx
; CHECK-NEXT: testb %cl, %cl
; CHECK-NEXT: je LBB28_4
; CHECK-NEXT: ## BB#3:
@@ -519,6 +521,7 @@ define i8 @test_int_x86_avx512_mask_fpclass_ss(<4 x float> %x0, i8 %x1) {
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vfpclassss $4, %xmm0, %k0 {%k1}
; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: andl $1, %eax
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: je LBB29_2
; CHECK-NEXT: ## BB#1:
@@ -526,6 +529,7 @@ define i8 @test_int_x86_avx512_mask_fpclass_ss(<4 x float> %x0, i8 %x1) {
; CHECK-NEXT: LBB29_2:
; CHECK-NEXT: vfpclassss $4, %xmm0, %k0
; CHECK-NEXT: kmovw %k0, %ecx
+; CHECK-NEXT: andl $1, %ecx
; CHECK-NEXT: testb %cl, %cl
; CHECK-NEXT: je LBB29_4
; CHECK-NEXT: ## BB#3:
diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter.ll b/llvm/test/CodeGen/X86/masked_gather_scatter.ll
index a8cf982323f..416ccdc68c7 100644
--- a/llvm/test/CodeGen/X86/masked_gather_scatter.ll
+++ b/llvm/test/CodeGen/X86/masked_gather_scatter.ll
@@ -1367,9 +1367,12 @@ define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x
; KNL_64-LABEL: test30:
; KNL_64: # BB#0:
; KNL_64-NEXT: andl $1, %edx
+; KNL_64-NEXT: kmovw %edx, %k1
; KNL_64-NEXT: andl $1, %esi
+; KNL_64-NEXT: kmovw %esi, %k2
; KNL_64-NEXT: movl %edi, %eax
; KNL_64-NEXT: andl $1, %eax
+; KNL_64-NEXT: kmovw %eax, %k0
; KNL_64-NEXT: vpmovsxdq %xmm1, %ymm1
; KNL_64-NEXT: vpsllq $2, %ymm1, %ymm1
; KNL_64-NEXT: vpaddq %ymm1, %ymm0, %ymm1
@@ -1377,76 +1380,81 @@ define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x
; KNL_64-NEXT: testb $1, %dil
; KNL_64-NEXT: je .LBB29_2
; KNL_64-NEXT: # BB#1: # %cond.load
-; KNL_64-NEXT: vmovq %xmm1, %rcx
-; KNL_64-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; KNL_64-NEXT: vmovq %xmm1, %rax
+; KNL_64-NEXT: vmovd (%rax), %xmm0
; KNL_64-NEXT: .LBB29_2: # %else
-; KNL_64-NEXT: testb %sil, %sil
+; KNL_64-NEXT: kmovw %k2, %eax
+; KNL_64-NEXT: movl %eax, %ecx
+; KNL_64-NEXT: andl $1, %ecx
+; KNL_64-NEXT: testb %cl, %cl
; KNL_64-NEXT: je .LBB29_4
; KNL_64-NEXT: # BB#3: # %cond.load1
; KNL_64-NEXT: vpextrq $1, %xmm1, %rcx
; KNL_64-NEXT: vpinsrd $1, (%rcx), %xmm0, %xmm0
; KNL_64-NEXT: .LBB29_4: # %else2
+; KNL_64-NEXT: kmovw %k1, %ecx
+; KNL_64-NEXT: movl %ecx, %edx
+; KNL_64-NEXT: andl $1, %edx
; KNL_64-NEXT: testb %dl, %dl
; KNL_64-NEXT: je .LBB29_6
; KNL_64-NEXT: # BB#5: # %cond.load4
; KNL_64-NEXT: vextracti128 $1, %ymm1, %xmm1
-; KNL_64-NEXT: vmovq %xmm1, %rcx
-; KNL_64-NEXT: vpinsrd $2, (%rcx), %xmm0, %xmm0
+; KNL_64-NEXT: vmovq %xmm1, %rdx
+; KNL_64-NEXT: vpinsrd $2, (%rdx), %xmm0, %xmm0
; KNL_64-NEXT: .LBB29_6: # %else5
-; KNL_64-NEXT: vmovd %eax, %xmm1
-; KNL_64-NEXT: vpinsrd $1, %esi, %xmm1, %xmm1
-; KNL_64-NEXT: vpinsrd $2, %edx, %xmm1, %xmm1
+; KNL_64-NEXT: kmovw %k0, %edx
+; KNL_64-NEXT: vmovd %edx, %xmm1
+; KNL_64-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
+; KNL_64-NEXT: vpinsrd $2, %ecx, %xmm1, %xmm1
; KNL_64-NEXT: vpslld $31, %xmm1, %xmm1
; KNL_64-NEXT: vblendvps %xmm1, %xmm0, %xmm2, %xmm0
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test30:
; KNL_32: # BB#0:
-; KNL_32-NEXT: pushl %ebx
-; KNL_32-NEXT: .Ltmp0:
-; KNL_32-NEXT: .cfi_def_cfa_offset 8
-; KNL_32-NEXT: pushl %esi
-; KNL_32-NEXT: .Ltmp1:
-; KNL_32-NEXT: .cfi_def_cfa_offset 12
-; KNL_32-NEXT: .Ltmp2:
-; KNL_32-NEXT: .cfi_offset %esi, -12
-; KNL_32-NEXT: .Ltmp3:
-; KNL_32-NEXT: .cfi_offset %ebx, -8
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
; KNL_32-NEXT: andl $1, %eax
-; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; KNL_32-NEXT: kmovw %eax, %k1
+; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT: andl $1, %eax
+; KNL_32-NEXT: kmovw %eax, %k2
+; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT: movl %eax, %ecx
; KNL_32-NEXT: andl $1, %ecx
-; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; KNL_32-NEXT: movl %ebx, %edx
-; KNL_32-NEXT: andl $1, %edx
+; KNL_32-NEXT: kmovw %ecx, %k0
; KNL_32-NEXT: vpslld $2, %xmm1, %xmm1
; KNL_32-NEXT: vpaddd %xmm1, %xmm0, %xmm1
; KNL_32-NEXT: # implicit-def: %XMM0
-; KNL_32-NEXT: testb $1, %bl
+; KNL_32-NEXT: testb $1, %al
; KNL_32-NEXT: je .LBB29_2
; KNL_32-NEXT: # BB#1: # %cond.load
-; KNL_32-NEXT: vmovd %xmm1, %esi
-; KNL_32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; KNL_32-NEXT: vmovd %xmm1, %eax
+; KNL_32-NEXT: vmovd (%eax), %xmm0
; KNL_32-NEXT: .LBB29_2: # %else
+; KNL_32-NEXT: kmovw %k2, %eax
+; KNL_32-NEXT: movl %eax, %ecx
+; KNL_32-NEXT: andl $1, %ecx
; KNL_32-NEXT: testb %cl, %cl
; KNL_32-NEXT: je .LBB29_4
; KNL_32-NEXT: # BB#3: # %cond.load1
-; KNL_32-NEXT: vpextrd $1, %xmm1, %esi
-; KNL_32-NEXT: vpinsrd $1, (%esi), %xmm0, %xmm0
+; KNL_32-NEXT: vpextrd $1, %xmm1, %ecx
+; KNL_32-NEXT: vpinsrd $1, (%ecx), %xmm0, %xmm0
; KNL_32-NEXT: .LBB29_4: # %else2
-; KNL_32-NEXT: testb %al, %al
+; KNL_32-NEXT: kmovw %k1, %ecx
+; KNL_32-NEXT: movl %ecx, %edx
+; KNL_32-NEXT: andl $1, %edx
+; KNL_32-NEXT: testb %dl, %dl
; KNL_32-NEXT: je .LBB29_6
; KNL_32-NEXT: # BB#5: # %cond.load4
-; KNL_32-NEXT: vpextrd $2, %xmm1, %esi
-; KNL_32-NEXT: vpinsrd $2, (%esi), %xmm0, %xmm0
+; KNL_32-NEXT: vpextrd $2, %xmm1, %edx
+; KNL_32-NEXT: vpinsrd $2, (%edx), %xmm0, %xmm0
; KNL_32-NEXT: .LBB29_6: # %else5
+; KNL_32-NEXT: kmovw %k0, %edx
; KNL_32-NEXT: vmovd %edx, %xmm1
-; KNL_32-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1
-; KNL_32-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1
+; KNL_32-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
+; KNL_32-NEXT: vpinsrd $2, %ecx, %xmm1, %xmm1
; KNL_32-NEXT: vpslld $31, %xmm1, %xmm1
; KNL_32-NEXT: vblendvps %xmm1, %xmm0, %xmm2, %xmm0
-; KNL_32-NEXT: popl %esi
-; KNL_32-NEXT: popl %ebx
; KNL_32-NEXT: retl
;
; SKX-LABEL: test30:
@@ -1463,7 +1471,7 @@ define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x
; SKX-NEXT: je .LBB29_2
; SKX-NEXT: # BB#1: # %cond.load
; SKX-NEXT: vmovq %xmm1, %rax
-; SKX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SKX-NEXT: vmovd (%rax), %xmm0
; SKX-NEXT: .LBB29_2: # %else
; SKX-NEXT: kmovb %k1, -{{[0-9]+}}(%rsp)
; SKX-NEXT: movb -{{[0-9]+}}(%rsp), %al
@@ -1637,12 +1645,12 @@ define <16 x i64> @test_gather_16i64(<16 x i64*> %ptrs, <16 x i1> %mask, <16 x i
; KNL_32-LABEL: test_gather_16i64:
; KNL_32: # BB#0:
; KNL_32-NEXT: pushl %ebp
-; KNL_32-NEXT: .Ltmp4:
+; KNL_32-NEXT: .Ltmp0:
; KNL_32-NEXT: .cfi_def_cfa_offset 8
-; KNL_32-NEXT: .Ltmp5:
+; KNL_32-NEXT: .Ltmp1:
; KNL_32-NEXT: .cfi_offset %ebp, -8
; KNL_32-NEXT: movl %esp, %ebp
-; KNL_32-NEXT: .Ltmp6:
+; KNL_32-NEXT: .Ltmp2:
; KNL_32-NEXT: .cfi_def_cfa_register %ebp
; KNL_32-NEXT: andl $-64, %esp
; KNL_32-NEXT: subl $64, %esp
@@ -1760,12 +1768,12 @@ define <16 x double> @test_gather_16f64(<16 x double*> %ptrs, <16 x i1> %mask, <
; KNL_32-LABEL: test_gather_16f64:
; KNL_32: # BB#0:
; KNL_32-NEXT: pushl %ebp
-; KNL_32-NEXT: .Ltmp7:
+; KNL_32-NEXT: .Ltmp3:
; KNL_32-NEXT: .cfi_def_cfa_offset 8
-; KNL_32-NEXT: .Ltmp8:
+; KNL_32-NEXT: .Ltmp4:
; KNL_32-NEXT: .cfi_offset %ebp, -8
; KNL_32-NEXT: movl %esp, %ebp
-; KNL_32-NEXT: .Ltmp9:
+; KNL_32-NEXT: .Ltmp5:
; KNL_32-NEXT: .cfi_def_cfa_register %ebp
; KNL_32-NEXT: andl $-64, %esp
; KNL_32-NEXT: subl $64, %esp
@@ -1877,12 +1885,12 @@ define void @test_scatter_16i64(<16 x i64*> %ptrs, <16 x i1> %mask, <16 x i64> %
; KNL_32-LABEL: test_scatter_16i64:
; KNL_32: # BB#0:
; KNL_32-NEXT: pushl %ebp
-; KNL_32-NEXT: .Ltmp10:
+; KNL_32-NEXT: .Ltmp6:
; KNL_32-NEXT: .cfi_def_cfa_offset 8
-; KNL_32-NEXT: .Ltmp11:
+; KNL_32-NEXT: .Ltmp7:
; KNL_32-NEXT: .cfi_offset %ebp, -8
; KNL_32-NEXT: movl %esp, %ebp
-; KNL_32-NEXT: .Ltmp12:
+; KNL_32-NEXT: .Ltmp8:
; KNL_32-NEXT: .cfi_def_cfa_register %ebp
; KNL_32-NEXT: andl $-64, %esp
; KNL_32-NEXT: subl $64, %esp
@@ -1991,12 +1999,12 @@ define void @test_scatter_16f64(<16 x double*> %ptrs, <16 x i1> %mask, <16 x dou
; KNL_32-LABEL: test_scatter_16f64:
; KNL_32: # BB#0:
; KNL_32-NEXT: pushl %ebp
-; KNL_32-NEXT: .Ltmp13:
+; KNL_32-NEXT: .Ltmp9:
; KNL_32-NEXT: .cfi_def_cfa_offset 8
-; KNL_32-NEXT: .Ltmp14:
+; KNL_32-NEXT: .Ltmp10:
; KNL_32-NEXT: .cfi_offset %ebp, -8
; KNL_32-NEXT: movl %esp, %ebp
-; KNL_32-NEXT: .Ltmp15:
+; KNL_32-NEXT: .Ltmp11:
; KNL_32-NEXT: .cfi_def_cfa_register %ebp
; KNL_32-NEXT: andl $-64, %esp
; KNL_32-NEXT: subl $64, %esp
diff --git a/llvm/test/CodeGen/X86/pr27591.ll b/llvm/test/CodeGen/X86/pr27591.ll
index 11f5de4956a..bbafe5960d9 100644
--- a/llvm/test/CodeGen/X86/pr27591.ll
+++ b/llvm/test/CodeGen/X86/pr27591.ll
@@ -3,48 +3,39 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define void @test1(i32 %x) #0 {
-; CHECK-LABEL: test1:
-; CHECK: # BB#0: # %entry
-; CHECK-NEXT: pushq %rax
-; CHECK-NEXT: testl %edi, %edi
-; CHECK-NEXT: setne %al
-; CHECK-NEXT: movb %al, %cl
-; CHECK-NEXT: kmovw %ecx, %k0
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: andb $1, %al
-; CHECK-NEXT: movzbl %al, %edi
-; CHECK-NEXT: callq callee1
-; CHECK-NEXT: popq %rax
-; CHECK-NEXT: retq
entry:
%tobool = icmp ne i32 %x, 0
call void @callee1(i1 zeroext %tobool)
ret void
}
+; CHECK-LABEL: test1:
+; CHECK: cmpl $0, %edi
+; CHECK-NEXT: setne %al
+; CHECK-NEXT: andb $1, %al
+; CHECK-NEXT: movzbl %al, %edi
+; CHECK-NEXT: callq callee1
+
define void @test2(i32 %x) #0 {
-; CHECK-LABEL: test2:
-; CHECK: # BB#0: # %entry
-; CHECK-NEXT: pushq %rax
-; CHECK-NEXT: testl %edi, %edi
-; CHECK-NEXT: setne %al
-; CHECK-NEXT: movb %al, %cl
-; CHECK-NEXT: kmovw %ecx, %k0
-; CHECK-NEXT: kmovw %k0, %ecx
-; CHECK-NEXT: movb %cl, %al
-; CHECK-NEXT: xorl %edi, %edi
-; CHECK-NEXT: testb %al, %al
-; CHECK-NEXT: movl $-1, %edx
-; CHECK-NEXT: cmovnel %edx, %edi
-; CHECK-NEXT: callq callee2
-; CHECK-NEXT: popq %rax
-; CHECK-NEXT: retq
entry:
%tobool = icmp ne i32 %x, 0
call void @callee2(i1 signext %tobool)
ret void
}
+; CHECK-LABEL: test2:
+; CHECK: cmpl $0, %edi
+; CHECK-NEXT: setne %al
+; CHECK-NEXT: kmovb %eax, %k0
+; CHECK-NEXT: kmovw %k0, %edi
+; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: movb %dil, %al
+; CHECK-NEXT: xorl %edi, %edi
+; CHECK-NEXT: testb %al, %al
+; CHECK-NEXT: movl $-1, %ecx
+; CHECK-NEXT: cmovnel %ecx, %edi
+; CHECK-NEXT: callq callee2
+
declare void @callee1(i1 zeroext)
declare void @callee2(i1 signext)
diff --git a/llvm/test/CodeGen/X86/pr28173.ll b/llvm/test/CodeGen/X86/pr28173.ll
index 7c20d0857d9..81c10bb3757 100644
--- a/llvm/test/CodeGen/X86/pr28173.ll
+++ b/llvm/test/CodeGen/X86/pr28173.ll
@@ -5,12 +5,12 @@ target triple = "x86_64-unknown-linux-gnu"
; Note that the kmovs should really *not* appear in the output, this is an
; artifact of the current poor lowering. This is tracked by PR28175.
+; CHECK-LABEL: @foo64
+; CHECK: kmov
+; CHECK: kmov
+; CHECK: orq $-2, %rax
+; CHECK: ret
define i64 @foo64(i1 zeroext %i, i32 %j) #0 {
-; CHECK-LABEL: foo64:
-; CHECK: # BB#0:
-; CHECK-NEXT: orq $-2, %rdi
-; CHECK-NEXT: movq %rdi, %rax
-; CHECK-NEXT: retq
br label %bb
bb:
@@ -22,12 +22,12 @@ end:
ret i64 %v
}
+; CHECK-LABEL: @foo16
+; CHECK: kmov
+; CHECK: kmov
+; CHECK: orl $65534, %eax
+; CHECK: retq
define i16 @foo16(i1 zeroext %i, i32 %j) #0 {
-; CHECK-LABEL: foo16:
-; CHECK: # BB#0:
-; CHECK-NEXT: orl $65534, %edi # imm = 0xFFFE
-; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: retq
br label %bb
bb:
diff --git a/llvm/test/CodeGen/X86/xaluo.ll b/llvm/test/CodeGen/X86/xaluo.ll
index 31e18989144..76e00a0993d 100644
--- a/llvm/test/CodeGen/X86/xaluo.ll
+++ b/llvm/test/CodeGen/X86/xaluo.ll
@@ -738,10 +738,10 @@ define i1 @bug27873(i64 %c1, i1 %c2) {
; KNL-LABEL: bug27873:
; KNL: ## BB#0:
; KNL-NEXT: andl $1, %esi
-; KNL-NEXT: kmovw %esi, %k0
; KNL-NEXT: movl $160, %ecx
; KNL-NEXT: movq %rdi, %rax
; KNL-NEXT: mulq %rcx
+; KNL-NEXT: kmovw %esi, %k0
; KNL-NEXT: seto %al
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: korw %k1, %k0, %k0
OpenPOWER on IntegriCloud