diff options
Diffstat (limited to 'llvm/test/CodeGen/X86')
-rw-r--r-- | llvm/test/CodeGen/X86/avx512-intrinsics.ll | 83 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512bw-intrinsics.ll | 49 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll | 56 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512vl-intrinsics.ll | 224 |
4 files changed, 400 insertions, 12 deletions
diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-intrinsics.ll index 014d8a2ae5e..4860ea571d7 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics.ll @@ -858,49 +858,57 @@ define i16 @test_vptestmd(<16 x i32> %a0, <16 x i32> %a1) { } declare i16 @llvm.x86.avx512.mask.ptestm.d.512(<16 x i32>, <16 x i32>, i16) -define void @test_store1(<16 x float> %data, i8* %ptr, i16 %mask) { +define void @test_store1(<16 x float> %data, i8* %ptr, i8* %ptr2, i16 %mask) { ; CHECK-LABEL: test_store1: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovw %edx, %k1 ; CHECK-NEXT: vmovups %zmm0, (%rdi) {%k1} +; CHECK-NEXT: vmovups %zmm0, (%rsi) ; CHECK-NEXT: retq call void @llvm.x86.avx512.mask.storeu.ps.512(i8* %ptr, <16 x float> %data, i16 %mask) + call void @llvm.x86.avx512.mask.storeu.ps.512(i8* %ptr2, <16 x float> %data, i16 -1) ret void } declare void @llvm.x86.avx512.mask.storeu.ps.512(i8*, <16 x float>, i16 ) -define void @test_store2(<8 x double> %data, i8* %ptr, i8 %mask) { +define void @test_store2(<8 x double> %data, i8* %ptr, i8* %ptr2, i8 %mask) { ; CHECK-LABEL: test_store2: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovw %edx, %k1 ; CHECK-NEXT: vmovupd %zmm0, (%rdi) {%k1} +; CHECK-NEXT: vmovupd %zmm0, (%rsi) ; CHECK-NEXT: retq call void @llvm.x86.avx512.mask.storeu.pd.512(i8* %ptr, <8 x double> %data, i8 %mask) + call void @llvm.x86.avx512.mask.storeu.pd.512(i8* %ptr2, <8 x double> %data, i8 -1) ret void } declare void @llvm.x86.avx512.mask.storeu.pd.512(i8*, <8 x double>, i8) -define void @test_mask_store_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) { +define void @test_mask_store_aligned_ps(<16 x float> %data, i8* %ptr, i8* %ptr2, i16 %mask) { ; CHECK-LABEL: test_mask_store_aligned_ps: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovw %edx, %k1 ; CHECK-NEXT: vmovaps %zmm0, (%rdi) {%k1} +; CHECK-NEXT: vmovaps %zmm0, (%rsi) ; CHECK-NEXT: retq call void @llvm.x86.avx512.mask.store.ps.512(i8* %ptr, <16 x float> %data, i16 %mask) + call void @llvm.x86.avx512.mask.store.ps.512(i8* %ptr2, <16 x float> %data, i16 -1) ret void } declare void @llvm.x86.avx512.mask.store.ps.512(i8*, <16 x float>, i16 ) -define void @test_mask_store_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) { +define void @test_mask_store_aligned_pd(<8 x double> %data, i8* %ptr, i8* %ptr2, i8 %mask) { ; CHECK-LABEL: test_mask_store_aligned_pd: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovw %edx, %k1 ; CHECK-NEXT: vmovapd %zmm0, (%rdi) {%k1} +; CHECK-NEXT: vmovapd %zmm0, (%rsi) ; CHECK-NEXT: retq call void @llvm.x86.avx512.mask.store.pd.512(i8* %ptr, <8 x double> %data, i8 %mask) + call void @llvm.x86.avx512.mask.store.pd.512(i8* %ptr2, <8 x double> %data, i8 -1) ret void } @@ -922,6 +930,62 @@ define <16 x float> @test_mask_load_aligned_ps(<16 x float> %data, i8* %ptr, i16 ret <16 x float> %res4 } +declare void @llvm.x86.avx512.mask.storeu.q.512(i8*, <8 x i64>, i8) + +define void@test_int_x86_avx512_mask_storeu_q_512(i8* %ptr1, i8* %ptr2, <8 x i64> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_storeu_q_512: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edx, %k1 +; CHECK-NEXT: vmovdqu64 %zmm0, (%rdi) {%k1} +; CHECK-NEXT: vmovdqu64 %zmm0, (%rsi) +; CHECK-NEXT: retq + call void @llvm.x86.avx512.mask.storeu.q.512(i8* %ptr1, <8 x i64> %x1, i8 %x2) + call void @llvm.x86.avx512.mask.storeu.q.512(i8* %ptr2, <8 x i64> %x1, i8 -1) + ret void +} + +declare void @llvm.x86.avx512.mask.storeu.d.512(i8*, <16 x i32>, i16) + +define void@test_int_x86_avx512_mask_storeu_d_512(i8* %ptr1, i8* %ptr2, <16 x i32> %x1, i16 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_storeu_d_512: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edx, %k1 +; CHECK-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1} +; CHECK-NEXT: vmovdqu32 %zmm0, (%rsi) +; CHECK-NEXT: retq + call void @llvm.x86.avx512.mask.storeu.d.512(i8* %ptr1, <16 x i32> %x1, i16 %x2) + call void @llvm.x86.avx512.mask.storeu.d.512(i8* %ptr2, <16 x i32> %x1, i16 -1) + ret void +} + +declare void @llvm.x86.avx512.mask.store.q.512(i8*, <8 x i64>, i8) + +define void@test_int_x86_avx512_mask_store_q_512(i8* %ptr1, i8* %ptr2, <8 x i64> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_store_q_512: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edx, %k1 +; CHECK-NEXT: vmovdqa64 %zmm0, (%rdi) {%k1} +; CHECK-NEXT: vmovdqa64 %zmm0, (%rsi) +; CHECK-NEXT: retq + call void @llvm.x86.avx512.mask.store.q.512(i8* %ptr1, <8 x i64> %x1, i8 %x2) + call void @llvm.x86.avx512.mask.store.q.512(i8* %ptr2, <8 x i64> %x1, i8 -1) + ret void +} + +declare void @llvm.x86.avx512.mask.store.d.512(i8*, <16 x i32>, i16) + +define void@test_int_x86_avx512_mask_store_d_512(i8* %ptr1, i8* %ptr2, <16 x i32> %x1, i16 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_store_d_512: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edx, %k1 +; CHECK-NEXT: vmovdqa32 %zmm0, (%rdi) {%k1} +; CHECK-NEXT: vmovdqa32 %zmm0, (%rsi) +; CHECK-NEXT: retq + call void @llvm.x86.avx512.mask.store.d.512(i8* %ptr1, <16 x i32> %x1, i16 %x2) + call void @llvm.x86.avx512.mask.store.d.512(i8* %ptr2, <16 x i32> %x1, i16 -1) + ret void +} + declare <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8*, <16 x float>, i16) define <16 x float> @test_mask_load_unaligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) { @@ -6897,8 +6961,6 @@ define <8 x i64>@test_int_x86_avx512_mask_permvar_di_512(<8 x i64> %x0, <8 x i64 ret <8 x i64> %res4 } - - declare <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float>, <16 x i32>, <16 x float>, i16) define <16 x float>@test_int_x86_avx512_mask_permvar_sf_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) { @@ -6938,4 +7000,3 @@ define <16 x i32>@test_int_x86_avx512_mask_permvar_si_512(<16 x i32> %x0, <16 x %res4 = add <16 x i32> %res3, %res2 ret <16 x i32> %res4 } - diff --git a/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll b/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll index 58ab0840166..cf5fd539a3a 100644 --- a/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll @@ -3107,7 +3107,6 @@ define <32 x i16>@test_int_x86_avx512_mask_pmovzxb_w_512(<32 x i8> %x0, <32 x i1 ret <32 x i16> %res4 } - declare <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8>, <32 x i16>, i32) define <32 x i16>@test_int_x86_avx512_mask_pmovsxb_w_512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2) { @@ -3147,3 +3146,51 @@ define <32 x i16>@test_int_x86_avx512_mask_permvar_hi_512(<32 x i16> %x0, <32 x %res4 = add <32 x i16> %res3, %res2 ret <32 x i16> %res4 } + +declare void @llvm.x86.avx512.mask.storeu.b.512(i8*, <64 x i8>, i64) + +define void@test_int_x86_avx512_mask_storeu_b_512(i8* %ptr1, i8* %ptr2, <64 x i8> %x1, i64 %x2) { +; AVX512BW-LABEL: test_int_x86_avx512_mask_storeu_b_512: +; AVX512BW: ## BB#0: +; AVX512BW-NEXT: kmovq %rdx, %k1 +; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rdi) {%k1} +; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rsi) +; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_mask_storeu_b_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 +; AVX512F-32-NEXT: vmovdqu8 %zmm0, (%ecx) {%k1} +; AVX512F-32-NEXT: vmovdqu8 %zmm0, (%eax) +; AVX512F-32-NEXT: retl + call void @llvm.x86.avx512.mask.storeu.b.512(i8* %ptr1, <64 x i8> %x1, i64 %x2) + call void @llvm.x86.avx512.mask.storeu.b.512(i8* %ptr2, <64 x i8> %x1, i64 -1) + ret void +} + +declare void @llvm.x86.avx512.mask.storeu.w.512(i8*, <32 x i16>, i32) + +define void@test_int_x86_avx512_mask_storeu_w_512(i8* %ptr1, i8* %ptr2, <32 x i16> %x1, i32 %x2) { +; AVX512BW-LABEL: test_int_x86_avx512_mask_storeu_w_512: +; AVX512BW: ## BB#0: +; AVX512BW-NEXT: kmovd %edx, %k1 +; AVX512BW-NEXT: vmovdqu16 %zmm0, (%rdi) {%k1} +; AVX512BW-NEXT: vmovdqu16 %zmm0, (%rsi) +; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_mask_storeu_w_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; AVX512F-32-NEXT: vmovdqu16 %zmm0, (%ecx) {%k1} +; AVX512F-32-NEXT: vmovdqu16 %zmm0, (%eax) +; AVX512F-32-NEXT: retl + call void @llvm.x86.avx512.mask.storeu.w.512(i8* %ptr1, <32 x i16> %x1, i32 %x2) + call void @llvm.x86.avx512.mask.storeu.w.512(i8* %ptr2, <32 x i16> %x1, i32 -1) + ret void +} diff --git a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll index e33ae5968e3..c284bb36ea5 100644 --- a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll @@ -5107,3 +5107,59 @@ define <16 x i16>@test_int_x86_avx512_mask_permvar_hi_256(<16 x i16> %x0, <16 x %res4 = add <16 x i16> %res3, %res2 ret <16 x i16> %res4 } + +declare void @llvm.x86.avx512.mask.storeu.b.128(i8*, <16 x i8>, i16) + +define void@test_int_x86_avx512_mask_storeu_b_128(i8* %ptr1, i8* %ptr2, <16 x i8> %x1, i16 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_storeu_b_128: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edx, %k1 +; CHECK-NEXT: vmovdqu8 %xmm0, (%rdi) {%k1} +; CHECK-NEXT: vmovdqu8 %xmm0, (%rsi) +; CHECK-NEXT: retq + call void @llvm.x86.avx512.mask.storeu.b.128(i8* %ptr1, <16 x i8> %x1, i16 %x2) + call void @llvm.x86.avx512.mask.storeu.b.128(i8* %ptr2, <16 x i8> %x1, i16 -1) + ret void +} + +declare void @llvm.x86.avx512.mask.storeu.b.256(i8*, <32 x i8>, i32) + +define void@test_int_x86_avx512_mask_storeu_b_256(i8* %ptr1, i8* %ptr2, <32 x i8> %x1, i32 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_storeu_b_256: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovd %edx, %k1 +; CHECK-NEXT: vmovdqu8 %ymm0, (%rdi) {%k1} +; CHECK-NEXT: vmovdqu8 %ymm0, (%rsi) +; CHECK-NEXT: retq + call void @llvm.x86.avx512.mask.storeu.b.256(i8* %ptr1, <32 x i8> %x1, i32 %x2) + call void @llvm.x86.avx512.mask.storeu.b.256(i8* %ptr2, <32 x i8> %x1, i32 -1) + ret void +} + +declare void @llvm.x86.avx512.mask.storeu.w.128(i8*, <8 x i16>, i8) + +define void@test_int_x86_avx512_mask_storeu_w_128(i8* %ptr1, i8* %ptr2, <8 x i16> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_storeu_w_128: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edx, %k1 +; CHECK-NEXT: vmovdqu16 %xmm0, (%rdi) {%k1} +; CHECK-NEXT: vmovdqu16 %xmm0, (%rsi) +; CHECK-NEXT: retq + call void @llvm.x86.avx512.mask.storeu.w.128(i8* %ptr1, <8 x i16> %x1, i8 %x2) + call void @llvm.x86.avx512.mask.storeu.w.128(i8* %ptr2, <8 x i16> %x1, i8 -1) + ret void +} + +declare void @llvm.x86.avx512.mask.storeu.w.256(i8*, <16 x i16>, i16) + +define void@test_int_x86_avx512_mask_storeu_w_256(i8* %ptr1, i8* %ptr2, <16 x i16> %x1, i16 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_storeu_w_256: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edx, %k1 +; CHECK-NEXT: vmovdqu16 %ymm0, (%rdi) {%k1} +; CHECK-NEXT: vmovdqu16 %ymm0, (%rsi) +; CHECK-NEXT: retq + call void @llvm.x86.avx512.mask.storeu.w.256(i8* %ptr1, <16 x i16> %x1, i16 %x2) + call void @llvm.x86.avx512.mask.storeu.w.256(i8* %ptr2, <16 x i16> %x1, i16 -1) + ret void +} diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll index 7d5e8736c16..b2735a78a4a 100644 --- a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll @@ -7466,3 +7466,227 @@ define <8 x i32>@test_int_x86_avx512_mask_permvar_si_256(<8 x i32> %x0, <8 x i32 %res4 = add <8 x i32> %res3, %res2 ret <8 x i32> %res4 } + +declare void @llvm.x86.avx512.mask.store.pd.128(i8*, <2 x double>, i8) + +define void@test_int_x86_avx512_mask_store_pd_128(i8* %ptr1, i8* %ptr2, <2 x double> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_store_pd_128: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edx, %k1 +; CHECK-NEXT: vmovapd %xmm0, (%rdi) {%k1} +; CHECK-NEXT: vmovapd %xmm0, (%rsi) +; CHECK-NEXT: retq + call void @llvm.x86.avx512.mask.store.pd.128(i8* %ptr1, <2 x double> %x1, i8 %x2) + call void @llvm.x86.avx512.mask.store.pd.128(i8* %ptr2, <2 x double> %x1, i8 -1) + ret void +} + +declare void @llvm.x86.avx512.mask.store.pd.256(i8*, <4 x double>, i8) + +define void@test_int_x86_avx512_mask_store_pd_256(i8* %ptr1, i8* %ptr2, <4 x double> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_store_pd_256: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edx, %k1 +; CHECK-NEXT: vmovapd %ymm0, (%rdi) {%k1} +; CHECK-NEXT: vmovapd %ymm0, (%rsi) +; CHECK-NEXT: retq + call void @llvm.x86.avx512.mask.store.pd.256(i8* %ptr1, <4 x double> %x1, i8 %x2) + call void @llvm.x86.avx512.mask.store.pd.256(i8* %ptr2, <4 x double> %x1, i8 -1) + ret void +} + +declare void @llvm.x86.avx512.mask.storeu.pd.128(i8*, <2 x double>, i8) + +define void@test_int_x86_avx512_mask_storeu_pd_128(i8* %ptr1, i8* %ptr2, <2 x double> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_storeu_pd_128: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edx, %k1 +; CHECK-NEXT: vmovupd %xmm0, (%rdi) {%k1} +; CHECK-NEXT: vmovupd %xmm0, (%rsi) +; CHECK-NEXT: retq + call void @llvm.x86.avx512.mask.storeu.pd.128(i8* %ptr1, <2 x double> %x1, i8 %x2) + call void @llvm.x86.avx512.mask.storeu.pd.128(i8* %ptr2, <2 x double> %x1, i8 -1) + ret void +} + +declare void @llvm.x86.avx512.mask.storeu.pd.256(i8*, <4 x double>, i8) + +define void@test_int_x86_avx512_mask_storeu_pd_256(i8* %ptr1, i8* %ptr2, <4 x double> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_storeu_pd_256: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edx, %k1 +; CHECK-NEXT: vmovupd %ymm0, (%rdi) {%k1} +; CHECK-NEXT: vmovupd %ymm0, (%rsi) +; CHECK-NEXT: retq + call void @llvm.x86.avx512.mask.storeu.pd.256(i8* %ptr1, <4 x double> %x1, i8 %x2) + call void @llvm.x86.avx512.mask.storeu.pd.256(i8* %ptr2, <4 x double> %x1, i8 -1) + ret void +} + +declare void @llvm.x86.avx512.mask.store.ps.128(i8*, <4 x float>, i8) + +define void@test_int_x86_avx512_mask_store_ps_128(i8* %ptr1, i8* %ptr2, <4 x float> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_store_ps_128: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edx, %k1 +; CHECK-NEXT: vmovaps %xmm0, (%rdi) {%k1} +; CHECK-NEXT: vmovaps %xmm0, (%rsi) +; CHECK-NEXT: retq + call void @llvm.x86.avx512.mask.store.ps.128(i8* %ptr1, <4 x float> %x1, i8 %x2) + call void @llvm.x86.avx512.mask.store.ps.128(i8* %ptr2, <4 x float> %x1, i8 -1) + ret void +} + +declare void @llvm.x86.avx512.mask.store.ps.256(i8*, <8 x float>, i8) + +define void@test_int_x86_avx512_mask_store_ps_256(i8* %ptr1, i8* %ptr2, <8 x float> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_store_ps_256: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edx, %k1 +; CHECK-NEXT: vmovaps %ymm0, (%rdi) {%k1} +; CHECK-NEXT: vmovaps %ymm0, (%rsi) +; CHECK-NEXT: retq + call void @llvm.x86.avx512.mask.store.ps.256(i8* %ptr1, <8 x float> %x1, i8 %x2) + call void @llvm.x86.avx512.mask.store.ps.256(i8* %ptr2, <8 x float> %x1, i8 -1) + ret void +} + +declare void @llvm.x86.avx512.mask.storeu.ps.128(i8*, <4 x float>, i8) + +define void@test_int_x86_avx512_mask_storeu_ps_128(i8* %ptr1, i8* %ptr2, <4 x float> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_storeu_ps_128: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edx, %k1 +; CHECK-NEXT: vmovups %xmm0, (%rdi) {%k1} +; CHECK-NEXT: vmovups %xmm0, (%rsi) +; CHECK-NEXT: retq + call void @llvm.x86.avx512.mask.storeu.ps.128(i8* %ptr1, <4 x float> %x1, i8 %x2) + call void @llvm.x86.avx512.mask.storeu.ps.128(i8* %ptr2, <4 x float> %x1, i8 -1) + ret void +} + +declare void @llvm.x86.avx512.mask.storeu.ps.256(i8*, <8 x float>, i8) + +define void@test_int_x86_avx512_mask_storeu_ps_256(i8* %ptr1, i8* %ptr2, <8 x float> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_storeu_ps_256: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edx, %k1 +; CHECK-NEXT: vmovups %ymm0, (%rdi) {%k1} +; CHECK-NEXT: vmovups %ymm0, (%rsi) +; CHECK-NEXT: retq + call void @llvm.x86.avx512.mask.storeu.ps.256(i8* %ptr1, <8 x float> %x1, i8 %x2) + call void @llvm.x86.avx512.mask.storeu.ps.256(i8* %ptr2, <8 x float> %x1, i8 -1) + ret void +} + +declare void @llvm.x86.avx512.mask.storeu.q.128(i8*, <2 x i64>, i8) + +define void@test_int_x86_avx512_mask_storeu_q_128(i8* %ptr1, i8* %ptr2, <2 x i64> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_storeu_q_128: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edx, %k1 +; CHECK-NEXT: vmovdqu64 %xmm0, (%rdi) {%k1} +; CHECK-NEXT: vmovdqu64 %xmm0, (%rsi) +; CHECK-NEXT: retq + call void @llvm.x86.avx512.mask.storeu.q.128(i8* %ptr1, <2 x i64> %x1, i8 %x2) + call void @llvm.x86.avx512.mask.storeu.q.128(i8* %ptr2, <2 x i64> %x1, i8 -1) + ret void +} + +declare void @llvm.x86.avx512.mask.storeu.q.256(i8*, <4 x i64>, i8) + +define void@test_int_x86_avx512_mask_storeu_q_256(i8* %ptr1, i8* %ptr2, <4 x i64> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_storeu_q_256: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edx, %k1 +; CHECK-NEXT: vmovdqu64 %ymm0, (%rdi) {%k1} +; CHECK-NEXT: vmovdqu64 %ymm0, (%rsi) +; CHECK-NEXT: retq + call void @llvm.x86.avx512.mask.storeu.q.256(i8* %ptr1, <4 x i64> %x1, i8 %x2) + call void @llvm.x86.avx512.mask.storeu.q.256(i8* %ptr2, <4 x i64> %x1, i8 -1) + ret void +} + +declare void @llvm.x86.avx512.mask.storeu.d.128(i8*, <4 x i32>, i8) + +define void@test_int_x86_avx512_mask_storeu_d_128(i8* %ptr1, i8* %ptr2, <4 x i32> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_storeu_d_128: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edx, %k1 +; CHECK-NEXT: vmovdqu32 %xmm0, (%rdi) {%k1} +; CHECK-NEXT: vmovdqu32 %xmm0, (%rsi) +; CHECK-NEXT: retq + call void @llvm.x86.avx512.mask.storeu.d.128(i8* %ptr1, <4 x i32> %x1, i8 %x2) + call void @llvm.x86.avx512.mask.storeu.d.128(i8* %ptr2, <4 x i32> %x1, i8 -1) + ret void +} + +declare void @llvm.x86.avx512.mask.storeu.d.256(i8*, <8 x i32>, i8) + +define void@test_int_x86_avx512_mask_storeu_d_256(i8* %ptr1, i8* %ptr2, <8 x i32> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_storeu_d_256: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edx, %k1 +; CHECK-NEXT: vmovdqu32 %ymm0, (%rdi) {%k1} +; CHECK-NEXT: vmovdqu32 %ymm0, (%rsi) +; CHECK-NEXT: retq + call void @llvm.x86.avx512.mask.storeu.d.256(i8* %ptr1, <8 x i32> %x1, i8 %x2) + call void @llvm.x86.avx512.mask.storeu.d.256(i8* %ptr2, <8 x i32> %x1, i8 -1) + ret void +} + +declare void @llvm.x86.avx512.mask.store.q.128(i8*, <2 x i64>, i8) + +define void@test_int_x86_avx512_mask_store_q_128(i8* %ptr1, i8* %ptr2, <2 x i64> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_store_q_128: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edx, %k1 +; CHECK-NEXT: vmovdqa64 %xmm0, (%rdi) {%k1} +; CHECK-NEXT: vmovdqa64 %xmm0, (%rsi) +; CHECK-NEXT: retq + call void @llvm.x86.avx512.mask.store.q.128(i8* %ptr1, <2 x i64> %x1, i8 %x2) + call void @llvm.x86.avx512.mask.store.q.128(i8* %ptr2, <2 x i64> %x1, i8 -1) + ret void +} + +declare void @llvm.x86.avx512.mask.store.q.256(i8*, <4 x i64>, i8) + +define void@test_int_x86_avx512_mask_store_q_256(i8* %ptr1, i8* %ptr2, <4 x i64> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_store_q_256: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edx, %k1 +; CHECK-NEXT: vmovdqa64 %ymm0, (%rdi) {%k1} +; CHECK-NEXT: vmovdqa64 %ymm0, (%rsi) +; CHECK-NEXT: retq + call void @llvm.x86.avx512.mask.store.q.256(i8* %ptr1, <4 x i64> %x1, i8 %x2) + call void @llvm.x86.avx512.mask.store.q.256(i8* %ptr2, <4 x i64> %x1, i8 -1) + ret void +} + +declare void @llvm.x86.avx512.mask.store.d.128(i8*, <4 x i32>, i8) + +define void@test_int_x86_avx512_mask_store_d_128(i8* %ptr1, i8* %ptr2, <4 x i32> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_store_d_128: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edx, %k1 +; CHECK-NEXT: vmovdqa32 %xmm0, (%rdi) {%k1} +; CHECK-NEXT: vmovdqa32 %xmm0, (%rsi) +; CHECK-NEXT: retq + call void @llvm.x86.avx512.mask.store.d.128(i8* %ptr1, <4 x i32> %x1, i8 %x2) + call void @llvm.x86.avx512.mask.store.d.128(i8* %ptr2, <4 x i32> %x1, i8 -1) + ret void +} + +declare void @llvm.x86.avx512.mask.store.d.256(i8*, <8 x i32>, i8) + +define void@test_int_x86_avx512_mask_store_d_256(i8* %ptr1, i8* %ptr2, <8 x i32> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_store_d_256: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edx, %k1 +; CHECK-NEXT: vmovdqa32 %ymm0, (%rdi) {%k1} +; CHECK-NEXT: vmovdqa32 %ymm0, (%rsi) +; CHECK-NEXT: retq + call void @llvm.x86.avx512.mask.store.d.256(i8* %ptr1, <8 x i32> %x1, i8 %x2) + call void @llvm.x86.avx512.mask.store.d.256(i8* %ptr2, <8 x i32> %x1, i8 -1) + ret void +} |