diff options
Diffstat (limited to 'llvm/test/CodeGen/X86')
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll | 97 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll | 45 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-intrinsics.ll | 28 |
3 files changed, 142 insertions, 28 deletions
diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll index e5dce2d7248..9b0e30103f4 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll @@ -55,6 +55,103 @@ entry: ret i16 %13 } +define i32 @test_mm512_kortestc(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C, <8 x i64> %__D) { +; X32-LABEL: test_mm512_kortestc: +; X32: # %bb.0: # %entry +; X32-NEXT: pushl %ebp +; X32-NEXT: .cfi_def_cfa_offset 8 +; X32-NEXT: .cfi_offset %ebp, -8 +; X32-NEXT: movl %esp, %ebp +; X32-NEXT: .cfi_def_cfa_register %ebp +; X32-NEXT: andl $-64, %esp +; X32-NEXT: subl $64, %esp +; X32-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 +; X32-NEXT: vpcmpneqd 8(%ebp), %zmm2, %k1 +; X32-NEXT: korw %k0, %k1, %k0 +; X32-NEXT: kmovw %k0, %eax +; X32-NEXT: cmpw $-1, %ax +; X32-NEXT: sete %al +; X32-NEXT: andb $1, %al +; X32-NEXT: movzbl %al, %eax +; X32-NEXT: movl %ebp, %esp +; X32-NEXT: popl %ebp +; X32-NEXT: vzeroupper +; X32-NEXT: retl +; +; X64-LABEL: test_mm512_kortestc: +; X64: # %bb.0: # %entry +; X64-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 +; X64-NEXT: vpcmpneqd %zmm3, %zmm2, %k1 +; X64-NEXT: korw %k0, %k1, %k0 +; X64-NEXT: kmovw %k0, %eax +; X64-NEXT: cmpw $-1, %ax +; X64-NEXT: sete %al +; X64-NEXT: andb $1, %al +; X64-NEXT: movzbl %al, %eax +; X64-NEXT: vzeroupper +; X64-NEXT: retq +entry: + %0 = bitcast <8 x i64> %__A to <16 x i32> + %1 = bitcast <8 x i64> %__B to <16 x i32> + %2 = icmp ne <16 x i32> %0, %1 + %3 = bitcast <8 x i64> %__C to <16 x i32> + %4 = bitcast <8 x i64> %__D to <16 x i32> + %5 = icmp ne <16 x i32> %3, %4 + %6 = or <16 x i1> %5, %2 %7 = bitcast <16 x i1> %6 to i16 + %8 = icmp eq i16 %7, -1 + %9 = zext i1 %8 to i32 + ret i32 %9 +} + +define i32 @test_mm512_kortestz(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C, <8 x i64> %__D) { +; X32-LABEL: test_mm512_kortestz: +; X32: # %bb.0: # %entry +; X32-NEXT: pushl %ebp +; X32-NEXT: .cfi_def_cfa_offset 8 +; X32-NEXT: .cfi_offset %ebp, -8 +; X32-NEXT: movl %esp, %ebp +; X32-NEXT: .cfi_def_cfa_register %ebp +; X32-NEXT: andl $-64, %esp +; X32-NEXT: subl $64, %esp +; X32-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 +; X32-NEXT: vpcmpneqd 8(%ebp), %zmm2, %k1 +; X32-NEXT: korw %k0, %k1, %k0 +; X32-NEXT: kmovw %k0, %eax +; X32-NEXT: cmpw $0, %ax +; X32-NEXT: sete %al +; X32-NEXT: andb $1, %al +; X32-NEXT: movzbl %al, %eax +; X32-NEXT: movl %ebp, %esp +; X32-NEXT: popl %ebp +; X32-NEXT: vzeroupper +; X32-NEXT: retl +; +; X64-LABEL: test_mm512_kortestz: +; X64: # %bb.0: # %entry +; X64-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 +; X64-NEXT: vpcmpneqd %zmm3, %zmm2, %k1 +; X64-NEXT: korw %k0, %k1, %k0 +; X64-NEXT: kmovw %k0, %eax +; X64-NEXT: cmpw $0, %ax +; X64-NEXT: sete %al +; X64-NEXT: andb $1, %al +; X64-NEXT: movzbl %al, %eax +; X64-NEXT: vzeroupper +; X64-NEXT: retq +entry: + %0 = bitcast <8 x i64> %__A to <16 x i32> + %1 = bitcast <8 x i64> %__B to <16 x i32> + %2 = icmp ne <16 x i32> %0, %1 + %3 = bitcast <8 x i64> %__C to <16 x i32> + %4 = bitcast <8 x i64> %__D to <16 x i32> + %5 = icmp ne <16 x i32> %3, %4 + %6 = or <16 x i1> %5, %2 + %7 = bitcast <16 x i1> %6 to i16 + %8 = icmp eq i16 %7, 0 + %9 = zext i1 %8 to i32 + ret i32 %9 +} + define <16 x float> @test_mm512_shuffle_f32x4(<16 x float> %__A, <16 x float> %__B) { ; X32-LABEL: test_mm512_shuffle_f32x4: ; X32: # %bb.0: # %entry diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll index 307691061bf..642c82728cf 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll @@ -3832,3 +3832,48 @@ define i16 @test_kxor(i16 %a0, i16 %a1) { ret i16 %t2 } +declare i32 @llvm.x86.avx512.kortestz.w(i16, i16) nounwind readnone +define i32 @test_kortestz(<8 x i64> %A, <8 x i64> %B, <8 x i64> %C, <8 x i64> %D) { +; CHECK-LABEL: test_kortestz: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 +; CHECK-NEXT: vpcmpneqd %zmm3, %zmm2, %k1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: kortestw %k1, %k0 +; CHECK-NEXT: sete %al +; CHECK-NEXT: retq +entry: + %0 = bitcast <8 x i64> %A to <16 x i32> + %1 = bitcast <8 x i64> %B to <16 x i32> + %2 = icmp ne <16 x i32> %0, %1 + %3 = bitcast <8 x i64> %C to <16 x i32> + %4 = bitcast <8 x i64> %D to <16 x i32> + %5 = icmp ne <16 x i32> %3, %4 + %6 = bitcast <16 x i1> %2 to i16 + %7 = bitcast <16 x i1> %5 to i16 + %res = call i32 @llvm.x86.avx512.kortestz.w(i16 %6, i16 %7) + ret i32 %res +} + +declare i32 @llvm.x86.avx512.kortestc.w(i16, i16) nounwind readnone +define i32 @test_kortestc(<8 x i64> %A, <8 x i64> %B, <8 x i64> %C, <8 x i64> %D) { +; CHECK-LABEL: test_kortestc: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 +; CHECK-NEXT: vpcmpneqd %zmm3, %zmm2, %k1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: kortestw %k1, %k0 +; CHECK-NEXT: sete %al +; CHECK-NEXT: retq +entry: + %0 = bitcast <8 x i64> %A to <16 x i32> + %1 = bitcast <8 x i64> %B to <16 x i32> + %2 = icmp ne <16 x i32> %0, %1 + %3 = bitcast <8 x i64> %C to <16 x i32> + %4 = bitcast <8 x i64> %D to <16 x i32> + %5 = icmp ne <16 x i32> %3, %4 + %6 = bitcast <16 x i1> %2 to i16 + %7 = bitcast <16 x i1> %5 to i16 + %res = call i32 @llvm.x86.avx512.kortestz.w(i16 %6, i16 %7) + ret i32 %res +} diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-intrinsics.ll index 6c608ecddf3..74e91c38fc9 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics.ll @@ -1,34 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s -declare i32 @llvm.x86.avx512.kortestz.w(i16, i16) nounwind readnone -define i32 @test_kortestz(i16 %a0, i16 %a1) { -; CHECK-LABEL: test_kortestz: -; CHECK: ## %bb.0: -; CHECK-NEXT: kmovw %esi, %k0 -; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: kortestw %k0, %k1 -; CHECK-NEXT: sete %al -; CHECK-NEXT: retq - %res = call i32 @llvm.x86.avx512.kortestz.w(i16 %a0, i16 %a1) - ret i32 %res -} - -declare i32 @llvm.x86.avx512.kortestc.w(i16, i16) nounwind readnone -define i32 @test_kortestc(i16 %a0, i16 %a1) { -; CHECK-LABEL: test_kortestc: -; CHECK: ## %bb.0: -; CHECK-NEXT: kmovw %esi, %k0 -; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: kortestw %k0, %k1 -; CHECK-NEXT: setb %al -; CHECK-NEXT: retq - %res = call i32 @llvm.x86.avx512.kortestc.w(i16 %a0, i16 %a1) - ret i32 %res -} - define <16 x float> @test_rcp_ps_512(<16 x float> %a0) { ; CHECK-LABEL: test_rcp_ps_512: ; CHECK: ## %bb.0: |

