diff options
| author | Benjamin Kramer <benny.kra@googlemail.com> | 2019-05-28 12:52:57 +0000 |
|---|---|---|
| committer | Benjamin Kramer <benny.kra@googlemail.com> | 2019-05-28 12:52:57 +0000 |
| commit | 57e267a2e92a7744df043c740cb946952c05ede8 (patch) | |
| tree | 1edd5fee3b9af9256b739fada22571be37c3560c /llvm/test/CodeGen | |
| parent | 19e91253c0a5e021697f9271c299d6816cbab642 (diff) | |
| download | bcm5719-llvm-57e267a2e92a7744df043c740cb946952c05ede8.tar.gz bcm5719-llvm-57e267a2e92a7744df043c740cb946952c05ede8.zip | |
[X86] Custom lower CONCAT_VECTORS of v2i1
The generic legalizer cannot handle this. Add an assert instead of
silently miscompiling vectors with elements smaller than 8 bits.
llvm-svn: 361814
Diffstat (limited to 'llvm/test/CodeGen')
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-insert-extract.ll | 104 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vec_saddo.ll | 8 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vec_smulo.ll | 46 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vec_ssubo.ll | 8 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vec_uaddo.ll | 8 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vec_umulo.ll | 8 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vec_usubo.ll | 8 |
7 files changed, 151 insertions, 39 deletions
diff --git a/llvm/test/CodeGen/X86/avx512-insert-extract.ll b/llvm/test/CodeGen/X86/avx512-insert-extract.ll index b81c829052c..8bdd7dc2c1d 100644 --- a/llvm/test/CodeGen/X86/avx512-insert-extract.ll +++ b/llvm/test/CodeGen/X86/avx512-insert-extract.ll @@ -2252,3 +2252,107 @@ define i128 @test_insertelement_variable_v128i1(<128 x i8> %a, i8 %b, i32 %index %t4 = bitcast <128 x i1> %t3 to i128 ret i128 %t4 } + +define void @test_concat_v2i1(<2 x half>* %arg, <2 x half>* %arg1, <2 x half>* %arg2) { +; KNL-LABEL: test_concat_v2i1: +; KNL: ## %bb.0: +; KNL-NEXT: movswl (%rdi), %eax +; KNL-NEXT: vmovd %eax, %xmm0 +; KNL-NEXT: vcvtph2ps %xmm0, %xmm0 +; KNL-NEXT: movswl 2(%rdi), %eax +; KNL-NEXT: vmovd %eax, %xmm1 +; KNL-NEXT: vcvtph2ps %xmm1, %xmm1 +; KNL-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; KNL-NEXT: vucomiss %xmm2, %xmm1 +; KNL-NEXT: setb %al +; KNL-NEXT: kmovw %eax, %k0 +; KNL-NEXT: kshiftlw $1, %k0, %k0 +; KNL-NEXT: vucomiss %xmm2, %xmm0 +; KNL-NEXT: setb %al +; KNL-NEXT: andl $1, %eax +; KNL-NEXT: kmovw %eax, %k1 +; KNL-NEXT: korw %k0, %k1, %k0 +; KNL-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; KNL-NEXT: vucomiss %xmm2, %xmm1 +; KNL-NEXT: seta %al +; KNL-NEXT: kmovw %eax, %k1 +; KNL-NEXT: kshiftlw $1, %k1, %k1 +; KNL-NEXT: vucomiss %xmm2, %xmm0 +; KNL-NEXT: seta %al +; KNL-NEXT: andl $1, %eax +; KNL-NEXT: kmovw %eax, %k2 +; KNL-NEXT: korw %k1, %k2, %k1 +; KNL-NEXT: kandw %k1, %k0, %k1 +; KNL-NEXT: kshiftrw $1, %k1, %k2 +; KNL-NEXT: movswl (%rsi), %eax +; KNL-NEXT: vmovd %eax, %xmm0 +; KNL-NEXT: vcvtph2ps %xmm0, %xmm0 +; KNL-NEXT: movswl 2(%rsi), %eax +; KNL-NEXT: vmovd %eax, %xmm1 +; KNL-NEXT: vcvtph2ps %xmm1, %xmm1 +; KNL-NEXT: vmovss %xmm1, %xmm0, %xmm1 {%k2} {z} +; KNL-NEXT: vmovss %xmm0, %xmm0, %xmm0 {%k1} {z} +; KNL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; KNL-NEXT: vmovd %xmm0, %eax +; KNL-NEXT: movw %ax, (%rdx) +; KNL-NEXT: vcvtps2ph $4, %xmm1, %xmm0 +; KNL-NEXT: vmovd %xmm0, %eax +; KNL-NEXT: movw %ax, 2(%rdx) +; KNL-NEXT: retq +; +; SKX-LABEL: test_concat_v2i1: +; SKX: ## %bb.0: +; SKX-NEXT: movswl (%rdi), %eax +; SKX-NEXT: vmovd %eax, %xmm0 +; SKX-NEXT: vcvtph2ps %xmm0, %xmm0 +; SKX-NEXT: movswl 2(%rdi), %eax +; SKX-NEXT: vmovd %eax, %xmm1 +; SKX-NEXT: vcvtph2ps %xmm1, %xmm1 +; SKX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; SKX-NEXT: vucomiss %xmm2, %xmm1 +; SKX-NEXT: setb %al +; SKX-NEXT: kmovd %eax, %k0 +; SKX-NEXT: kshiftlb $1, %k0, %k0 +; SKX-NEXT: vucomiss %xmm2, %xmm0 +; SKX-NEXT: setb %al +; SKX-NEXT: kmovd %eax, %k1 +; SKX-NEXT: kshiftlb $7, %k1, %k1 +; SKX-NEXT: kshiftrb $7, %k1, %k1 +; SKX-NEXT: korw %k0, %k1, %k0 +; SKX-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; SKX-NEXT: vucomiss %xmm2, %xmm1 +; SKX-NEXT: seta %al +; SKX-NEXT: kmovd %eax, %k1 +; SKX-NEXT: kshiftlb $1, %k1, %k1 +; SKX-NEXT: vucomiss %xmm2, %xmm0 +; SKX-NEXT: seta %al +; SKX-NEXT: kmovd %eax, %k2 +; SKX-NEXT: kshiftlb $7, %k2, %k2 +; SKX-NEXT: kshiftrb $7, %k2, %k2 +; SKX-NEXT: korw %k1, %k2, %k1 +; SKX-NEXT: kandw %k1, %k0, %k1 +; SKX-NEXT: kshiftrb $1, %k1, %k2 +; SKX-NEXT: movswl (%rsi), %eax +; SKX-NEXT: vmovd %eax, %xmm0 +; SKX-NEXT: vcvtph2ps %xmm0, %xmm0 +; SKX-NEXT: movswl 2(%rsi), %eax +; SKX-NEXT: vmovd %eax, %xmm1 +; SKX-NEXT: vcvtph2ps %xmm1, %xmm1 +; SKX-NEXT: vmovss %xmm1, %xmm0, %xmm1 {%k2} {z} +; SKX-NEXT: vmovss %xmm0, %xmm0, %xmm0 {%k1} {z} +; SKX-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; SKX-NEXT: vmovd %xmm0, %eax +; SKX-NEXT: movw %ax, (%rdx) +; SKX-NEXT: vcvtps2ph $4, %xmm1, %xmm0 +; SKX-NEXT: vmovd %xmm0, %eax +; SKX-NEXT: movw %ax, 2(%rdx) +; SKX-NEXT: retq + %tmp = load <2 x half>, <2 x half>* %arg, align 8 + %tmp3 = fcmp fast olt <2 x half> %tmp, <half 0xH4600, half 0xH4600> + %tmp4 = fcmp fast ogt <2 x half> %tmp, zeroinitializer + %tmp5 = and <2 x i1> %tmp3, %tmp4 + %tmp6 = load <2 x half>, <2 x half>* %arg1, align 8 + %tmp7 = select <2 x i1> %tmp5, <2 x half> %tmp6, <2 x half> zeroinitializer + store <2 x half> %tmp7, <2 x half>* %arg2, align 8 + ret void +} diff --git a/llvm/test/CodeGen/X86/vec_saddo.ll b/llvm/test/CodeGen/X86/vec_saddo.ll index a9e5697d932..aeb1951fbef 100644 --- a/llvm/test/CodeGen/X86/vec_saddo.ll +++ b/llvm/test/CodeGen/X86/vec_saddo.ll @@ -1871,7 +1871,8 @@ define <2 x i32> @saddo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2) ; AVX512-NEXT: cmpb %al, %cl ; AVX512-NEXT: sete %al ; AVX512-NEXT: andb %bl, %al -; AVX512-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; AVX512-NEXT: kmovd %eax, %k0 +; AVX512-NEXT: kshiftlw $1, %k0, %k0 ; AVX512-NEXT: testq %r9, %r9 ; AVX512-NEXT: setns %al ; AVX512-NEXT: testq %rsi, %rsi @@ -1884,8 +1885,9 @@ define <2 x i32> @saddo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2) ; AVX512-NEXT: cmpb %bl, %cl ; AVX512-NEXT: setne %cl ; AVX512-NEXT: andb %al, %cl -; AVX512-NEXT: movb %cl, -{{[0-9]+}}(%rsp) -; AVX512-NEXT: kmovw -{{[0-9]+}}(%rsp), %k1 +; AVX512-NEXT: andl $1, %ecx +; AVX512-NEXT: kmovw %ecx, %k1 +; AVX512-NEXT: korw %k0, %k1, %k1 ; AVX512-NEXT: movq %rdx, 16(%r10) ; AVX512-NEXT: movq %rdi, (%r10) ; AVX512-NEXT: movq %r14, 24(%r10) diff --git a/llvm/test/CodeGen/X86/vec_smulo.ll b/llvm/test/CodeGen/X86/vec_smulo.ll index 8b716b7f1ec..ab97c51df41 100644 --- a/llvm/test/CodeGen/X86/vec_smulo.ll +++ b/llvm/test/CodeGen/X86/vec_smulo.ll @@ -2706,44 +2706,42 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2) ; AVX512-NEXT: pushq %r13 ; AVX512-NEXT: pushq %r12 ; AVX512-NEXT: pushq %rbx -; AVX512-NEXT: subq $40, %rsp -; AVX512-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512-NEXT: movq %r8, %r15 -; AVX512-NEXT: movq %rdx, %rax -; AVX512-NEXT: movq %rsi, %r12 -; AVX512-NEXT: movq %rdi, %rbx -; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r14 -; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rdx -; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r9 +; AVX512-NEXT: subq $24, %rsp +; AVX512-NEXT: movq %r8, %rax +; AVX512-NEXT: movq %rcx, %r14 +; AVX512-NEXT: movq %rdx, %rbx +; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r15 +; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r12 ; AVX512-NEXT: movq $0, {{[0-9]+}}(%rsp) ; AVX512-NEXT: leaq {{[0-9]+}}(%rsp), %r8 -; AVX512-NEXT: movq %rax, %rdi -; AVX512-NEXT: movq %rcx, %rsi +; AVX512-NEXT: movq %rax, %rdx ; AVX512-NEXT: movq %r9, %rcx ; AVX512-NEXT: callq __muloti4 ; AVX512-NEXT: movq %rax, %r13 ; AVX512-NEXT: movq %rdx, %rbp -; AVX512-NEXT: cmpq $0, {{[0-9]+}}(%rsp) -; AVX512-NEXT: setne %al -; AVX512-NEXT: movb %al, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movq $0, {{[0-9]+}}(%rsp) ; AVX512-NEXT: leaq {{[0-9]+}}(%rsp), %r8 ; AVX512-NEXT: movq %rbx, %rdi -; AVX512-NEXT: movq %r12, %rsi -; AVX512-NEXT: movq %r15, %rdx -; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; AVX512-NEXT: movq %r14, %rsi +; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rdx +; AVX512-NEXT: movq %r12, %rcx ; AVX512-NEXT: callq __muloti4 ; AVX512-NEXT: cmpq $0, {{[0-9]+}}(%rsp) ; AVX512-NEXT: setne %cl -; AVX512-NEXT: movb %cl, {{[0-9]+}}(%rsp) -; AVX512-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 -; AVX512-NEXT: movq %rbp, 24(%r14) -; AVX512-NEXT: movq %r13, 16(%r14) -; AVX512-NEXT: movq %rdx, 8(%r14) -; AVX512-NEXT: movq %rax, (%r14) +; AVX512-NEXT: kmovd %ecx, %k0 +; AVX512-NEXT: kshiftlw $1, %k0, %k0 +; AVX512-NEXT: cmpq $0, {{[0-9]+}}(%rsp) +; AVX512-NEXT: setne %cl +; AVX512-NEXT: andl $1, %ecx +; AVX512-NEXT: kmovw %ecx, %k1 +; AVX512-NEXT: korw %k0, %k1, %k1 +; AVX512-NEXT: movq %rdx, 24(%r15) +; AVX512-NEXT: movq %rax, 16(%r15) +; AVX512-NEXT: movq %rbp, 8(%r15) +; AVX512-NEXT: movq %r13, (%r15) ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} -; AVX512-NEXT: addq $40, %rsp +; AVX512-NEXT: addq $24, %rsp ; AVX512-NEXT: popq %rbx ; AVX512-NEXT: popq %r12 ; AVX512-NEXT: popq %r13 diff --git a/llvm/test/CodeGen/X86/vec_ssubo.ll b/llvm/test/CodeGen/X86/vec_ssubo.ll index 51192ed45d4..15c0531d67a 100644 --- a/llvm/test/CodeGen/X86/vec_ssubo.ll +++ b/llvm/test/CodeGen/X86/vec_ssubo.ll @@ -1910,7 +1910,8 @@ define <2 x i32> @ssubo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2) ; AVX512-NEXT: cmpb %al, %cl ; AVX512-NEXT: setne %al ; AVX512-NEXT: andb %bl, %al -; AVX512-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; AVX512-NEXT: kmovd %eax, %k0 +; AVX512-NEXT: kshiftlw $1, %k0, %k0 ; AVX512-NEXT: testq %r9, %r9 ; AVX512-NEXT: setns %al ; AVX512-NEXT: testq %rsi, %rsi @@ -1923,8 +1924,9 @@ define <2 x i32> @ssubo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2) ; AVX512-NEXT: cmpb %bl, %cl ; AVX512-NEXT: setne %cl ; AVX512-NEXT: andb %al, %cl -; AVX512-NEXT: movb %cl, -{{[0-9]+}}(%rsp) -; AVX512-NEXT: kmovw -{{[0-9]+}}(%rsp), %k1 +; AVX512-NEXT: andl $1, %ecx +; AVX512-NEXT: kmovw %ecx, %k1 +; AVX512-NEXT: korw %k0, %k1, %k1 ; AVX512-NEXT: movq %rdx, 16(%r10) ; AVX512-NEXT: movq %rdi, (%r10) ; AVX512-NEXT: movq %r14, 24(%r10) diff --git a/llvm/test/CodeGen/X86/vec_uaddo.ll b/llvm/test/CodeGen/X86/vec_uaddo.ll index 36dc9311731..41a0e258e3d 100644 --- a/llvm/test/CodeGen/X86/vec_uaddo.ll +++ b/llvm/test/CodeGen/X86/vec_uaddo.ll @@ -1336,12 +1336,14 @@ define <2 x i32> @uaddo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2) ; AVX512-NEXT: addq {{[0-9]+}}(%rsp), %rdx ; AVX512-NEXT: adcq {{[0-9]+}}(%rsp), %rcx ; AVX512-NEXT: setb %al -; AVX512-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; AVX512-NEXT: kmovd %eax, %k0 +; AVX512-NEXT: kshiftlw $1, %k0, %k0 ; AVX512-NEXT: addq %r8, %rdi ; AVX512-NEXT: adcq %r9, %rsi ; AVX512-NEXT: setb %al -; AVX512-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; AVX512-NEXT: kmovw -{{[0-9]+}}(%rsp), %k1 +; AVX512-NEXT: andl $1, %eax +; AVX512-NEXT: kmovw %eax, %k1 +; AVX512-NEXT: korw %k0, %k1, %k1 ; AVX512-NEXT: movq %rdx, 16(%r10) ; AVX512-NEXT: movq %rdi, (%r10) ; AVX512-NEXT: movq %rcx, 24(%r10) diff --git a/llvm/test/CodeGen/X86/vec_umulo.ll b/llvm/test/CodeGen/X86/vec_umulo.ll index 6f37183c053..0c95b73853e 100644 --- a/llvm/test/CodeGen/X86/vec_umulo.ll +++ b/llvm/test/CodeGen/X86/vec_umulo.ll @@ -2575,7 +2575,8 @@ define <2 x i32> @umulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2) ; AVX512-NEXT: setb %al ; AVX512-NEXT: orb %cl, %al ; AVX512-NEXT: orb %r13b, %al -; AVX512-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; AVX512-NEXT: kmovd %eax, %k0 +; AVX512-NEXT: kshiftlw $1, %k0, %k0 ; AVX512-NEXT: testq %r9, %r9 ; AVX512-NEXT: setne %al ; AVX512-NEXT: testq %rsi, %rsi @@ -2597,8 +2598,9 @@ define <2 x i32> @umulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2) ; AVX512-NEXT: setb %sil ; AVX512-NEXT: orb %bl, %sil ; AVX512-NEXT: orb %cl, %sil -; AVX512-NEXT: movb %sil, -{{[0-9]+}}(%rsp) -; AVX512-NEXT: kmovw -{{[0-9]+}}(%rsp), %k1 +; AVX512-NEXT: andl $1, %esi +; AVX512-NEXT: kmovw %esi, %k1 +; AVX512-NEXT: korw %k0, %k1, %k1 ; AVX512-NEXT: movq %r10, 16(%r14) ; AVX512-NEXT: movq %rax, (%r14) ; AVX512-NEXT: movq %r15, 24(%r14) diff --git a/llvm/test/CodeGen/X86/vec_usubo.ll b/llvm/test/CodeGen/X86/vec_usubo.ll index 5c843dc504c..b662ac45caf 100644 --- a/llvm/test/CodeGen/X86/vec_usubo.ll +++ b/llvm/test/CodeGen/X86/vec_usubo.ll @@ -1378,12 +1378,14 @@ define <2 x i32> @usubo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2) ; AVX512-NEXT: subq {{[0-9]+}}(%rsp), %rdx ; AVX512-NEXT: sbbq {{[0-9]+}}(%rsp), %rcx ; AVX512-NEXT: setb %al -; AVX512-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; AVX512-NEXT: kmovd %eax, %k0 +; AVX512-NEXT: kshiftlw $1, %k0, %k0 ; AVX512-NEXT: subq %r8, %rdi ; AVX512-NEXT: sbbq %r9, %rsi ; AVX512-NEXT: setb %al -; AVX512-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; AVX512-NEXT: kmovw -{{[0-9]+}}(%rsp), %k1 +; AVX512-NEXT: andl $1, %eax +; AVX512-NEXT: kmovw %eax, %k1 +; AVX512-NEXT: korw %k0, %k1, %k1 ; AVX512-NEXT: movq %rdx, 16(%r10) ; AVX512-NEXT: movq %rdi, (%r10) ; AVX512-NEXT: movq %rcx, 24(%r10) |

