diff options
Diffstat (limited to 'llvm/test/CodeGen')
| -rw-r--r-- | llvm/test/CodeGen/X86/xop-intrinsics-fast-isel.ll | 14 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/xop-intrinsics-x86_64-upgrade.ll | 39 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/xop-intrinsics-x86_64.ll | 32 |
3 files changed, 72 insertions, 13 deletions
diff --git a/llvm/test/CodeGen/X86/xop-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/xop-intrinsics-fast-isel.ll index a9287e7d8c9..a100a1425dd 100644 --- a/llvm/test/CodeGen/X86/xop-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/xop-intrinsics-fast-isel.ll @@ -499,12 +499,22 @@ declare <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64>, <2 x i64>, <2 x i64>) nounwind define <4 x i64> @test_mm256_cmov_si256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) { ; X32-LABEL: test_mm256_cmov_si256: ; X32: # BB#0: -; X32-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0 +; X32-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 +; X32-NEXT: vinsertf128 $1, %xmm3, %ymm3, %ymm3 +; X32-NEXT: vxorps %ymm3, %ymm2, %ymm3 +; X32-NEXT: vandps %ymm2, %ymm0, %ymm0 +; X32-NEXT: vandps %ymm3, %ymm1, %ymm1 +; X32-NEXT: vorps %ymm1, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: test_mm256_cmov_si256: ; X64: # BB#0: -; X64-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0 +; X64-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 +; X64-NEXT: vinsertf128 $1, %xmm3, %ymm3, %ymm3 +; X64-NEXT: vxorps %ymm3, %ymm2, %ymm3 +; X64-NEXT: vandps %ymm2, %ymm0, %ymm0 +; X64-NEXT: vandps %ymm3, %ymm1, %ymm1 +; X64-NEXT: vorps %ymm1, %ymm0, %ymm0 ; X64-NEXT: retq %res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) ret <4 x i64> %res diff --git a/llvm/test/CodeGen/X86/xop-intrinsics-x86_64-upgrade.ll b/llvm/test/CodeGen/X86/xop-intrinsics-x86_64-upgrade.ll index 6fba72f2681..2369beffb6b 100644 --- a/llvm/test/CodeGen/X86/xop-intrinsics-x86_64-upgrade.ll +++ b/llvm/test/CodeGen/X86/xop-intrinsics-x86_64-upgrade.ll @@ -725,3 +725,42 @@ define <8 x i16> @test_int_x86_xop_vpcomtruew(<8 x i16> %a0, <8 x i16> %a1) { ret <8 x i16> %res } declare <8 x i16> @llvm.x86.xop.vpcomtruew(<8 x i16>, <8 x i16>) nounwind readnone + +define <2 x i64> @test_int_x86_xop_vpcmov(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) { +; CHECK-LABEL: test_int_x86_xop_vpcmov: +; CHECK: # BB#0: +; CHECK-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: retq + %res = call <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) ; + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64>, <2 x i64>, <2 x i64>) nounwind readnone + +define <4 x i64> @test_int_x86_xop_vpcmov_256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) { +; CHECK-LABEL: test_int_x86_xop_vpcmov_256: +; CHECK: # BB#0: +; CHECK-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: retq + %res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) ; + ret <4 x i64> %res +} +define <4 x i64> @test_int_x86_xop_vpcmov_256_mr(<4 x i64> %a0, <4 x i64>* %a1, <4 x i64> %a2) { +; CHECK-LABEL: test_int_x86_xop_vpcmov_256_mr: +; CHECK: # BB#0: +; CHECK-NEXT: vpcmov %ymm1, (%rdi), %ymm0, %ymm0 +; CHECK-NEXT: retq + %vec = load <4 x i64>, <4 x i64>* %a1 + %res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %vec, <4 x i64> %a2) ; + ret <4 x i64> %res +} +define <4 x i64> @test_int_x86_xop_vpcmov_256_rm(<4 x i64> %a0, <4 x i64> %a1, <4 x i64>* %a2) { +; CHECK-LABEL: test_int_x86_xop_vpcmov_256_rm: +; CHECK: # BB#0: +; CHECK-NEXT: vpcmov (%rdi), %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: retq + %vec = load <4 x i64>, <4 x i64>* %a2 + %res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %vec) ; + ret <4 x i64> %res +} +declare <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64>, <4 x i64>, <4 x i64>) nounwind readnone + diff --git a/llvm/test/CodeGen/X86/xop-intrinsics-x86_64.ll b/llvm/test/CodeGen/X86/xop-intrinsics-x86_64.ll index bb6ef50cdc6..76286a26ffa 100644 --- a/llvm/test/CodeGen/X86/xop-intrinsics-x86_64.ll +++ b/llvm/test/CodeGen/X86/xop-intrinsics-x86_64.ll @@ -82,18 +82,23 @@ define <2 x i64> @test_int_x86_xop_vpcmov(<2 x i64> %a0, <2 x i64> %a1, <2 x i64 ; CHECK: # BB#0: ; CHECK-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq - %res = call <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) ; - ret <2 x i64> %res + %1 = xor <2 x i64> %a2, <i64 -1, i64 -1> + %2 = and <2 x i64> %a0, %a2 + %3 = and <2 x i64> %a1, %1 + %4 = or <2 x i64> %2, %3 + ret <2 x i64> %4 } -declare <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64>, <2 x i64>, <2 x i64>) nounwind readnone define <4 x i64> @test_int_x86_xop_vpcmov_256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) { ; CHECK-LABEL: test_int_x86_xop_vpcmov_256: ; CHECK: # BB#0: ; CHECK-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: retq - %res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) ; - ret <4 x i64> %res + %1 = xor <4 x i64> %a2, <i64 -1, i64 -1, i64 -1, i64 -1> + %2 = and <4 x i64> %a0, %a2 + %3 = and <4 x i64> %a1, %1 + %4 = or <4 x i64> %2, %3 + ret <4 x i64> %4 } define <4 x i64> @test_int_x86_xop_vpcmov_256_mr(<4 x i64> %a0, <4 x i64>* %a1, <4 x i64> %a2) { ; CHECK-LABEL: test_int_x86_xop_vpcmov_256_mr: @@ -101,19 +106,24 @@ define <4 x i64> @test_int_x86_xop_vpcmov_256_mr(<4 x i64> %a0, <4 x i64>* %a1, ; CHECK-NEXT: vpcmov %ymm1, (%rdi), %ymm0, %ymm0 ; CHECK-NEXT: retq %vec = load <4 x i64>, <4 x i64>* %a1 - %res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %vec, <4 x i64> %a2) ; - ret <4 x i64> %res + %1 = xor <4 x i64> %a2, <i64 -1, i64 -1, i64 -1, i64 -1> + %2 = and <4 x i64> %a0, %a2 + %3 = and <4 x i64> %vec, %1 + %4 = or <4 x i64> %2, %3 + ret <4 x i64> %4 } define <4 x i64> @test_int_x86_xop_vpcmov_256_rm(<4 x i64> %a0, <4 x i64> %a1, <4 x i64>* %a2) { ; CHECK-LABEL: test_int_x86_xop_vpcmov_256_rm: ; CHECK: # BB#0: ; CHECK-NEXT: vpcmov (%rdi), %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: retq - %vec = load <4 x i64>, <4 x i64>* %a2 - %res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %vec) ; - ret <4 x i64> %res + %vec = load <4 x i64>, <4 x i64>* %a2 + %1 = xor <4 x i64> %vec, <i64 -1, i64 -1, i64 -1, i64 -1> + %2 = and <4 x i64> %a0, %vec + %3 = and <4 x i64> %a1, %1 + %4 = or <4 x i64> %2, %3 + ret <4 x i64> %4 } -declare <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64>, <4 x i64>, <4 x i64>) nounwind readnone define <4 x i32> @test_int_x86_xop_vphaddbd(<16 x i8> %a0) { ; CHECK-LABEL: test_int_x86_xop_vphaddbd: |

