diff options
| author | Craig Topper <craig.topper@gmail.com> | 2016-05-31 01:50:02 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@gmail.com> | 2016-05-31 01:50:02 +0000 |
| commit | 50f85c22c511da7f4a01a40cd21f447dc80b4a90 (patch) | |
| tree | 3a98dc0165afe4dcd4a4e7d6dfd49116bfb163d1 | |
| parent | d2f705ddf942467911b2038d64bc04c7fddfdcdd (diff) | |
| download | bcm5719-llvm-50f85c22c511da7f4a01a40cd21f447dc80b4a90.tar.gz bcm5719-llvm-50f85c22c511da7f4a01a40cd21f447dc80b4a90.zip | |
[AVX512] Remove masked store intrinsics. Clang now emits generic masked store intrinsics instead.
The intrinsics will be autoupgraded to the same generic masked stores.
llvm-svn: 271245
| -rw-r--r-- | llvm/include/llvm/IR/IntrinsicsX86.td | 130 | ||||
| -rw-r--r-- | llvm/lib/IR/AutoUpgrade.cpp | 66 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86IntrinsicsInfo.h | 30 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll | 114 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-intrinsics.ll | 112 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll | 49 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512bw-intrinsics.ll | 48 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll | 58 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll | 56 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll | 226 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512vl-intrinsics.ll | 224 |
11 files changed, 513 insertions, 600 deletions
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td index 92843097cae..f3f11ef9b6d 100644 --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -2098,58 +2098,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[], [llvm_ptr_ty, llvm_v8i32_ty, llvm_v8f32_ty], [IntrArgMemOnly]>; - def int_x86_avx512_mask_storeu_ps_128 : - GCCBuiltin<"__builtin_ia32_storeups128_mask">, - Intrinsic<[], [llvm_ptr_ty, llvm_v4f32_ty, llvm_i8_ty], - [IntrArgMemOnly]>; - def int_x86_avx512_mask_storeu_ps_256 : - GCCBuiltin<"__builtin_ia32_storeups256_mask">, - Intrinsic<[], [llvm_ptr_ty, llvm_v8f32_ty, llvm_i8_ty], - [IntrArgMemOnly]>; - def int_x86_avx512_mask_storeu_ps_512 : - GCCBuiltin<"__builtin_ia32_storeups512_mask">, - Intrinsic<[], [llvm_ptr_ty, llvm_v16f32_ty, llvm_i16_ty], - [IntrArgMemOnly]>; - - def int_x86_avx512_mask_storeu_pd_128 : - GCCBuiltin<"__builtin_ia32_storeupd128_mask">, - Intrinsic<[], [llvm_ptr_ty, llvm_v2f64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; - def int_x86_avx512_mask_storeu_pd_256 : - GCCBuiltin<"__builtin_ia32_storeupd256_mask">, - Intrinsic<[], [llvm_ptr_ty, llvm_v4f64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; - def int_x86_avx512_mask_storeu_pd_512 : - GCCBuiltin<"__builtin_ia32_storeupd512_mask">, - Intrinsic<[], [llvm_ptr_ty, llvm_v8f64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; - - def int_x86_avx512_mask_store_ps_128 : - GCCBuiltin<"__builtin_ia32_storeaps128_mask">, - Intrinsic<[], [llvm_ptr_ty, llvm_v4f32_ty, llvm_i8_ty], - [IntrArgMemOnly]>; - def int_x86_avx512_mask_store_ps_256 : - GCCBuiltin<"__builtin_ia32_storeaps256_mask">, - Intrinsic<[], [llvm_ptr_ty, llvm_v8f32_ty, llvm_i8_ty], - [IntrArgMemOnly]>; - def int_x86_avx512_mask_store_ps_512 : - GCCBuiltin<"__builtin_ia32_storeaps512_mask">, - Intrinsic<[], [llvm_ptr_ty, llvm_v16f32_ty, llvm_i16_ty], - [IntrArgMemOnly]>; - - def int_x86_avx512_mask_store_pd_128 : - GCCBuiltin<"__builtin_ia32_storeapd128_mask">, - Intrinsic<[], [llvm_ptr_ty, llvm_v2f64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; - def int_x86_avx512_mask_store_pd_256 : - GCCBuiltin<"__builtin_ia32_storeapd256_mask">, - Intrinsic<[], [llvm_ptr_ty, llvm_v4f64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; - def int_x86_avx512_mask_store_pd_512 : - GCCBuiltin<"__builtin_ia32_storeapd512_mask">, - Intrinsic<[], [llvm_ptr_ty, llvm_v8f64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; - def int_x86_avx512_mask_store_ss : GCCBuiltin<"__builtin_ia32_storess_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v4f32_ty, llvm_i8_ty], @@ -3131,84 +3079,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". GCCBuiltin<"__builtin_ia32_maskstoreq256">, Intrinsic<[], [llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i64_ty], [IntrArgMemOnly]>; - - def int_x86_avx512_mask_storeu_b_128 : - GCCBuiltin<"__builtin_ia32_storedquqi128_mask">, - Intrinsic<[], [llvm_ptr_ty, llvm_v16i8_ty, llvm_i16_ty], - [IntrArgMemOnly]>; - def int_x86_avx512_mask_storeu_b_256 : - GCCBuiltin<"__builtin_ia32_storedquqi256_mask">, - Intrinsic<[], [llvm_ptr_ty, llvm_v32i8_ty, llvm_i32_ty], - [IntrArgMemOnly]>; - def int_x86_avx512_mask_storeu_b_512 : - GCCBuiltin<"__builtin_ia32_storedquqi512_mask">, - Intrinsic<[], [llvm_ptr_ty, llvm_v64i8_ty, llvm_i64_ty], - [IntrArgMemOnly]>; - - def int_x86_avx512_mask_storeu_w_128 : - GCCBuiltin<"__builtin_ia32_storedquhi128_mask">, - Intrinsic<[], [llvm_ptr_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrArgMemOnly]>; - def int_x86_avx512_mask_storeu_w_256 : - GCCBuiltin<"__builtin_ia32_storedquhi256_mask">, - Intrinsic<[], [llvm_ptr_ty, llvm_v16i16_ty, llvm_i16_ty], - [IntrArgMemOnly]>; - def int_x86_avx512_mask_storeu_w_512 : - GCCBuiltin<"__builtin_ia32_storedquhi512_mask">, - Intrinsic<[], [llvm_ptr_ty, llvm_v32i16_ty, llvm_i32_ty], - [IntrArgMemOnly]>; - - def int_x86_avx512_mask_storeu_d_128 : - GCCBuiltin<"__builtin_ia32_storedqusi128_mask">, - Intrinsic<[], [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty], - [IntrArgMemOnly]>; - def int_x86_avx512_mask_storeu_d_256 : - GCCBuiltin<"__builtin_ia32_storedqusi256_mask">, - Intrinsic<[], [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty], - [IntrArgMemOnly]>; - def int_x86_avx512_mask_storeu_d_512 : - GCCBuiltin<"__builtin_ia32_storedqusi512_mask">, - Intrinsic<[], [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty], - [IntrArgMemOnly]>; - - def int_x86_avx512_mask_storeu_q_128 : - GCCBuiltin<"__builtin_ia32_storedqudi128_mask">, - Intrinsic<[], [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; - def int_x86_avx512_mask_storeu_q_256 : - GCCBuiltin<"__builtin_ia32_storedqudi256_mask">, - Intrinsic<[], [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; - def int_x86_avx512_mask_storeu_q_512 : - GCCBuiltin<"__builtin_ia32_storedqudi512_mask">, - Intrinsic<[], [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; - - def int_x86_avx512_mask_store_d_128 : - GCCBuiltin<"__builtin_ia32_movdqa32store128_mask">, - Intrinsic<[], [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty], - [IntrArgMemOnly]>; - def int_x86_avx512_mask_store_d_256 : - GCCBuiltin<"__builtin_ia32_movdqa32store256_mask">, - Intrinsic<[], [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty], - [IntrArgMemOnly]>; - def int_x86_avx512_mask_store_d_512 : - GCCBuiltin<"__builtin_ia32_movdqa32store512_mask">, - Intrinsic<[], [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty], - [IntrArgMemOnly]>; - - def int_x86_avx512_mask_store_q_128 : - GCCBuiltin<"__builtin_ia32_movdqa64store128_mask">, - Intrinsic<[], [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; - def int_x86_avx512_mask_store_q_256 : - GCCBuiltin<"__builtin_ia32_movdqa64store256_mask">, - Intrinsic<[], [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; - def int_x86_avx512_mask_store_q_512 : - GCCBuiltin<"__builtin_ia32_movdqa64store512_mask">, - Intrinsic<[], [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; } // Variable bit shift ops diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index ce0b10d8a77..52dd90fd9b1 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -194,6 +194,16 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { Name.startswith("x86.sse.storeu.") || Name.startswith("x86.sse2.storeu.") || Name.startswith("x86.avx.storeu.") || + Name.startswith("x86.avx512.mask.storeu.p") || + Name.startswith("x86.avx512.mask.storeu.b.") || + Name.startswith("x86.avx512.mask.storeu.w.") || + Name.startswith("x86.avx512.mask.storeu.d.") || + Name.startswith("x86.avx512.mask.storeu.q.") || + Name.startswith("x86.avx512.mask.store.p") || + Name.startswith("x86.avx512.mask.store.b.") || + Name.startswith("x86.avx512.mask.store.w.") || + Name.startswith("x86.avx512.mask.store.d.") || + Name.startswith("x86.avx512.mask.store.q.") || Name == "x86.sse42.crc32.64.8" || Name.startswith("x86.avx.vbroadcast.s") || Name.startswith("x86.sse2.psll.dq") || @@ -358,6 +368,40 @@ static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, LLVMContext &C, return Builder.CreateBitCast(Res, ResultTy, "cast"); } +static Value *UpgradeMaskedStore(IRBuilder<> &Builder, LLVMContext &C, + Value *Ptr, Value *Data, Value *Mask, + bool Aligned) { + // Cast the pointer to the right type. + Ptr = Builder.CreateBitCast(Ptr, + llvm::PointerType::getUnqual(Data->getType())); + unsigned Align = + Aligned ? cast<VectorType>(Data->getType())->getBitWidth() / 8 : 1; + + // If the mask is all ones just emit a regular store. + if (const auto *C = dyn_cast<Constant>(Mask)) + if (C->isAllOnesValue()) + return Builder.CreateAlignedStore(Data, Ptr, Align); + + // Convert the mask from an integer type to a vector of i1. + unsigned NumElts = Data->getType()->getVectorNumElements(); + llvm::VectorType *MaskTy = llvm::VectorType::get(Builder.getInt1Ty(), + cast<IntegerType>(Mask->getType())->getBitWidth()); + Mask = Builder.CreateBitCast(Mask, MaskTy); + + // If we have less than 8 elements, then the starting mask was an i8 and + // we need to extract down to the right number of elements. + if (NumElts < 8) { + int Indices[4]; + for (unsigned i = 0; i != NumElts; ++i) + Indices[i] = i; + Mask = Builder.CreateShuffleVector(Mask, Mask, + makeArrayRef(Indices, NumElts), + "extract"); + } + + return Builder.CreateMaskedStore(Data, Ptr, Align, Mask); +} + // UpgradeIntrinsicCall - Upgrade a call to an old intrinsic to be a call the // upgraded intrinsic. All argument and return casting must be provided in // order to seamlessly integrate with existing context. @@ -459,6 +503,28 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { // Remove intrinsic. CI->eraseFromParent(); return; + } else if (Name.startswith("llvm.x86.avx512.mask.storeu.p") || + Name.startswith("llvm.x86.avx512.mask.storeu.b.") || + Name.startswith("llvm.x86.avx512.mask.storeu.w.") || + Name.startswith("llvm.x86.avx512.mask.storeu.d.") || + Name.startswith("llvm.x86.avx512.mask.storeu.q.")) { + UpgradeMaskedStore(Builder, C, CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), /*Aligned*/false); + + // Remove intrinsic. + CI->eraseFromParent(); + return; + } else if (Name.startswith("llvm.x86.avx512.mask.store.p") || + Name.startswith("llvm.x86.avx512.mask.store.b.") || + Name.startswith("llvm.x86.avx512.mask.store.w.") || + Name.startswith("llvm.x86.avx512.mask.store.d.") || + Name.startswith("llvm.x86.avx512.mask.store.q.")) { + UpgradeMaskedStore(Builder, C, CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), /*Aligned*/true); + + // Remove intrinsic. + CI->eraseFromParent(); + return; } else if (Name.startswith("llvm.x86.xop.vpcom")) { Intrinsic::ID intID; if (Name.endswith("ub")) diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index 6d8f254b5c4..c4c5c1c95ab 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -214,36 +214,6 @@ static const IntrinsicData IntrinsicsWithChain[] = { X86ISD::VTRUNC, 0), X86_INTRINSIC_DATA(avx512_mask_pmov_wb_mem_512, TRUNCATE_TO_MEM_VI8, X86ISD::VTRUNC, 0), - X86_INTRINSIC_DATA(avx512_mask_store_d_128, STOREA, ISD::DELETED_NODE, 0), - X86_INTRINSIC_DATA(avx512_mask_store_d_256, STOREA, ISD::DELETED_NODE, 0), - X86_INTRINSIC_DATA(avx512_mask_store_d_512, STOREA, ISD::DELETED_NODE, 0), - X86_INTRINSIC_DATA(avx512_mask_store_pd_128, STOREA, ISD::DELETED_NODE, 0), - X86_INTRINSIC_DATA(avx512_mask_store_pd_256, STOREA, ISD::DELETED_NODE, 0), - X86_INTRINSIC_DATA(avx512_mask_store_pd_512, STOREA, ISD::DELETED_NODE, 0), - X86_INTRINSIC_DATA(avx512_mask_store_ps_128, STOREA, ISD::DELETED_NODE, 0), - X86_INTRINSIC_DATA(avx512_mask_store_ps_256, STOREA, ISD::DELETED_NODE, 0), - X86_INTRINSIC_DATA(avx512_mask_store_ps_512, STOREA, ISD::DELETED_NODE, 0), - X86_INTRINSIC_DATA(avx512_mask_store_q_128, STOREA, ISD::DELETED_NODE, 0), - X86_INTRINSIC_DATA(avx512_mask_store_q_256, STOREA, ISD::DELETED_NODE, 0), - X86_INTRINSIC_DATA(avx512_mask_store_q_512, STOREA, ISD::DELETED_NODE, 0), - X86_INTRINSIC_DATA(avx512_mask_storeu_b_128, STOREU, ISD::DELETED_NODE, 0), - X86_INTRINSIC_DATA(avx512_mask_storeu_b_256, STOREU, ISD::DELETED_NODE, 0), - X86_INTRINSIC_DATA(avx512_mask_storeu_b_512, STOREU, ISD::DELETED_NODE, 0), - X86_INTRINSIC_DATA(avx512_mask_storeu_d_128, STOREU, ISD::DELETED_NODE, 0), - X86_INTRINSIC_DATA(avx512_mask_storeu_d_256, STOREU, ISD::DELETED_NODE, 0), - X86_INTRINSIC_DATA(avx512_mask_storeu_d_512, STOREU, ISD::DELETED_NODE, 0), - X86_INTRINSIC_DATA(avx512_mask_storeu_pd_128, STOREU, ISD::DELETED_NODE, 0), - X86_INTRINSIC_DATA(avx512_mask_storeu_pd_256, STOREU, ISD::DELETED_NODE, 0), - X86_INTRINSIC_DATA(avx512_mask_storeu_pd_512, STOREU, ISD::DELETED_NODE, 0), - X86_INTRINSIC_DATA(avx512_mask_storeu_ps_128, STOREU, ISD::DELETED_NODE, 0), - X86_INTRINSIC_DATA(avx512_mask_storeu_ps_256, STOREU, ISD::DELETED_NODE, 0), - X86_INTRINSIC_DATA(avx512_mask_storeu_ps_512, STOREU, ISD::DELETED_NODE, 0), - X86_INTRINSIC_DATA(avx512_mask_storeu_q_128, STOREU, ISD::DELETED_NODE, 0), - X86_INTRINSIC_DATA(avx512_mask_storeu_q_256, STOREU, ISD::DELETED_NODE, 0), - X86_INTRINSIC_DATA(avx512_mask_storeu_q_512, STOREU, ISD::DELETED_NODE, 0), - X86_INTRINSIC_DATA(avx512_mask_storeu_w_128, STOREU, ISD::DELETED_NODE, 0), - X86_INTRINSIC_DATA(avx512_mask_storeu_w_256, STOREU, ISD::DELETED_NODE, 0), - X86_INTRINSIC_DATA(avx512_mask_storeu_w_512, STOREU, ISD::DELETED_NODE, 0), X86_INTRINSIC_DATA(avx512_scatter_dpd_512, SCATTER, X86::VSCATTERDPDZmr, 0), X86_INTRINSIC_DATA(avx512_scatter_dpi_512, SCATTER, X86::VPSCATTERDDZmr, 0), X86_INTRINSIC_DATA(avx512_scatter_dpq_512, SCATTER, X86::VPSCATTERDQZmr, 0), diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll new file mode 100644 index 00000000000..c16c1bb623a --- /dev/null +++ b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll @@ -0,0 +1,114 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s + +define void @test_store1(<16 x float> %data, i8* %ptr, i8* %ptr2, i16 %mask) { +; CHECK-LABEL: test_store1: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edx, %k1 +; CHECK-NEXT: vmovups %zmm0, (%rdi) {%k1} +; CHECK-NEXT: vmovups %zmm0, (%rsi) +; CHECK-NEXT: retq + call void @llvm.x86.avx512.mask.storeu.ps.512(i8* %ptr, <16 x float> %data, i16 %mask) + call void @llvm.x86.avx512.mask.storeu.ps.512(i8* %ptr2, <16 x float> %data, i16 -1) + ret void +} + +declare void @llvm.x86.avx512.mask.storeu.ps.512(i8*, <16 x float>, i16 ) + +define void @test_store2(<8 x double> %data, i8* %ptr, i8* %ptr2, i8 %mask) { +; CHECK-LABEL: test_store2: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edx, %k1 +; CHECK-NEXT: vmovupd %zmm0, (%rdi) {%k1} +; CHECK-NEXT: vmovupd %zmm0, (%rsi) +; CHECK-NEXT: retq + call void @llvm.x86.avx512.mask.storeu.pd.512(i8* %ptr, <8 x double> %data, i8 %mask) + call void @llvm.x86.avx512.mask.storeu.pd.512(i8* %ptr2, <8 x double> %data, i8 -1) + ret void +} + +declare void @llvm.x86.avx512.mask.storeu.pd.512(i8*, <8 x double>, i8) + +define void @test_mask_store_aligned_ps(<16 x float> %data, i8* %ptr, i8* %ptr2, i16 %mask) { +; CHECK-LABEL: test_mask_store_aligned_ps: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edx, %k1 +; CHECK-NEXT: vmovaps %zmm0, (%rdi) {%k1} +; CHECK-NEXT: vmovaps %zmm0, (%rsi) +; CHECK-NEXT: retq + call void @llvm.x86.avx512.mask.store.ps.512(i8* %ptr, <16 x float> %data, i16 %mask) + call void @llvm.x86.avx512.mask.store.ps.512(i8* %ptr2, <16 x float> %data, i16 -1) + ret void +} + +declare void @llvm.x86.avx512.mask.store.ps.512(i8*, <16 x float>, i16 ) + +define void @test_mask_store_aligned_pd(<8 x double> %data, i8* %ptr, i8* %ptr2, i8 %mask) { +; CHECK-LABEL: test_mask_store_aligned_pd: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edx, %k1 +; CHECK-NEXT: vmovapd %zmm0, (%rdi) {%k1} +; CHECK-NEXT: vmovapd %zmm0, (%rsi) +; CHECK-NEXT: retq + call void @llvm.x86.avx512.mask.store.pd.512(i8* %ptr, <8 x double> %data, i8 %mask) + call void @llvm.x86.avx512.mask.store.pd.512(i8* %ptr2, <8 x double> %data, i8 -1) + ret void +} + +declare void @llvm.x86.avx512.mask.store.pd.512(i8*, <8 x double>, i8) + +define void@test_int_x86_avx512_mask_storeu_q_512(i8* %ptr1, i8* %ptr2, <8 x i64> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_storeu_q_512: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edx, %k1 +; CHECK-NEXT: vmovdqu64 %zmm0, (%rdi) {%k1} +; CHECK-NEXT: vmovdqu64 %zmm0, (%rsi) +; CHECK-NEXT: retq + call void @llvm.x86.avx512.mask.storeu.q.512(i8* %ptr1, <8 x i64> %x1, i8 %x2) + call void @llvm.x86.avx512.mask.storeu.q.512(i8* %ptr2, <8 x i64> %x1, i8 -1) + ret void +} + +declare void @llvm.x86.avx512.mask.storeu.q.512(i8*, <8 x i64>, i8) + +define void@test_int_x86_avx512_mask_storeu_d_512(i8* %ptr1, i8* %ptr2, <16 x i32> %x1, i16 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_storeu_d_512: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edx, %k1 +; CHECK-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1} +; CHECK-NEXT: vmovdqu32 %zmm0, (%rsi) +; CHECK-NEXT: retq + call void @llvm.x86.avx512.mask.storeu.d.512(i8* %ptr1, <16 x i32> %x1, i16 %x2) + call void @llvm.x86.avx512.mask.storeu.d.512(i8* %ptr2, <16 x i32> %x1, i16 -1) + ret void +} + +declare void @llvm.x86.avx512.mask.storeu.d.512(i8*, <16 x i32>, i16) + +define void@test_int_x86_avx512_mask_store_q_512(i8* %ptr1, i8* %ptr2, <8 x i64> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_store_q_512: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edx, %k1 +; CHECK-NEXT: vmovdqa64 %zmm0, (%rdi) {%k1} +; CHECK-NEXT: vmovdqa64 %zmm0, (%rsi) +; CHECK-NEXT: retq + call void @llvm.x86.avx512.mask.store.q.512(i8* %ptr1, <8 x i64> %x1, i8 %x2) + call void @llvm.x86.avx512.mask.store.q.512(i8* %ptr2, <8 x i64> %x1, i8 -1) + ret void +} + +declare void @llvm.x86.avx512.mask.store.q.512(i8*, <8 x i64>, i8) + +define void@test_int_x86_avx512_mask_store_d_512(i8* %ptr1, i8* %ptr2, <16 x i32> %x1, i16 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_store_d_512: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edx, %k1 +; CHECK-NEXT: vmovdqa32 %zmm0, (%rdi) {%k1} +; CHECK-NEXT: vmovdqa32 %zmm0, (%rsi) +; CHECK-NEXT: retq + call void @llvm.x86.avx512.mask.store.d.512(i8* %ptr1, <16 x i32> %x1, i16 %x2) + call void @llvm.x86.avx512.mask.store.d.512(i8* %ptr2, <16 x i32> %x1, i16 -1) + ret void +} + +declare void @llvm.x86.avx512.mask.store.d.512(i8*, <16 x i32>, i16) diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-intrinsics.ll index 016eab23519..de85c350aba 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics.ll @@ -972,62 +972,6 @@ define i16 @test_vptestmd(<16 x i32> %a0, <16 x i32> %a1, i16 %m) { } declare i16 @llvm.x86.avx512.ptestm.d.512(<16 x i32>, <16 x i32>, i16) -define void @test_store1(<16 x float> %data, i8* %ptr, i8* %ptr2, i16 %mask) { -; CHECK-LABEL: test_store1: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edx, %k1 -; CHECK-NEXT: vmovups %zmm0, (%rdi) {%k1} -; CHECK-NEXT: vmovups %zmm0, (%rsi) -; CHECK-NEXT: retq - call void @llvm.x86.avx512.mask.storeu.ps.512(i8* %ptr, <16 x float> %data, i16 %mask) - call void @llvm.x86.avx512.mask.storeu.ps.512(i8* %ptr2, <16 x float> %data, i16 -1) - ret void -} - -declare void @llvm.x86.avx512.mask.storeu.ps.512(i8*, <16 x float>, i16 ) - -define void @test_store2(<8 x double> %data, i8* %ptr, i8* %ptr2, i8 %mask) { -; CHECK-LABEL: test_store2: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edx, %k1 -; CHECK-NEXT: vmovupd %zmm0, (%rdi) {%k1} -; CHECK-NEXT: vmovupd %zmm0, (%rsi) -; CHECK-NEXT: retq - call void @llvm.x86.avx512.mask.storeu.pd.512(i8* %ptr, <8 x double> %data, i8 %mask) - call void @llvm.x86.avx512.mask.storeu.pd.512(i8* %ptr2, <8 x double> %data, i8 -1) - ret void -} - -declare void @llvm.x86.avx512.mask.storeu.pd.512(i8*, <8 x double>, i8) - -define void @test_mask_store_aligned_ps(<16 x float> %data, i8* %ptr, i8* %ptr2, i16 %mask) { -; CHECK-LABEL: test_mask_store_aligned_ps: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edx, %k1 -; CHECK-NEXT: vmovaps %zmm0, (%rdi) {%k1} -; CHECK-NEXT: vmovaps %zmm0, (%rsi) -; CHECK-NEXT: retq - call void @llvm.x86.avx512.mask.store.ps.512(i8* %ptr, <16 x float> %data, i16 %mask) - call void @llvm.x86.avx512.mask.store.ps.512(i8* %ptr2, <16 x float> %data, i16 -1) - ret void -} - -declare void @llvm.x86.avx512.mask.store.ps.512(i8*, <16 x float>, i16 ) - -define void @test_mask_store_aligned_pd(<8 x double> %data, i8* %ptr, i8* %ptr2, i8 %mask) { -; CHECK-LABEL: test_mask_store_aligned_pd: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edx, %k1 -; CHECK-NEXT: vmovapd %zmm0, (%rdi) {%k1} -; CHECK-NEXT: vmovapd %zmm0, (%rsi) -; CHECK-NEXT: retq - call void @llvm.x86.avx512.mask.store.pd.512(i8* %ptr, <8 x double> %data, i8 %mask) - call void @llvm.x86.avx512.mask.store.pd.512(i8* %ptr2, <8 x double> %data, i8 -1) - ret void -} - -declare void @llvm.x86.avx512.mask.store.pd.512(i8*, <8 x double>, i8) - define <16 x float> @test_mask_load_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) { ; CHECK-LABEL: test_mask_load_aligned_ps: ; CHECK: ## BB#0: @@ -1044,62 +988,6 @@ define <16 x float> @test_mask_load_aligned_ps(<16 x float> %data, i8* %ptr, i16 ret <16 x float> %res4 } -declare void @llvm.x86.avx512.mask.storeu.q.512(i8*, <8 x i64>, i8) - -define void@test_int_x86_avx512_mask_storeu_q_512(i8* %ptr1, i8* %ptr2, <8 x i64> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_storeu_q_512: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edx, %k1 -; CHECK-NEXT: vmovdqu64 %zmm0, (%rdi) {%k1} -; CHECK-NEXT: vmovdqu64 %zmm0, (%rsi) -; CHECK-NEXT: retq - call void @llvm.x86.avx512.mask.storeu.q.512(i8* %ptr1, <8 x i64> %x1, i8 %x2) - call void @llvm.x86.avx512.mask.storeu.q.512(i8* %ptr2, <8 x i64> %x1, i8 -1) - ret void -} - -declare void @llvm.x86.avx512.mask.storeu.d.512(i8*, <16 x i32>, i16) - -define void@test_int_x86_avx512_mask_storeu_d_512(i8* %ptr1, i8* %ptr2, <16 x i32> %x1, i16 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_storeu_d_512: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edx, %k1 -; CHECK-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1} -; CHECK-NEXT: vmovdqu32 %zmm0, (%rsi) -; CHECK-NEXT: retq - call void @llvm.x86.avx512.mask.storeu.d.512(i8* %ptr1, <16 x i32> %x1, i16 %x2) - call void @llvm.x86.avx512.mask.storeu.d.512(i8* %ptr2, <16 x i32> %x1, i16 -1) - ret void -} - -declare void @llvm.x86.avx512.mask.store.q.512(i8*, <8 x i64>, i8) - -define void@test_int_x86_avx512_mask_store_q_512(i8* %ptr1, i8* %ptr2, <8 x i64> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_store_q_512: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edx, %k1 -; CHECK-NEXT: vmovdqa64 %zmm0, (%rdi) {%k1} -; CHECK-NEXT: vmovdqa64 %zmm0, (%rsi) -; CHECK-NEXT: retq - call void @llvm.x86.avx512.mask.store.q.512(i8* %ptr1, <8 x i64> %x1, i8 %x2) - call void @llvm.x86.avx512.mask.store.q.512(i8* %ptr2, <8 x i64> %x1, i8 -1) - ret void -} - -declare void @llvm.x86.avx512.mask.store.d.512(i8*, <16 x i32>, i16) - -define void@test_int_x86_avx512_mask_store_d_512(i8* %ptr1, i8* %ptr2, <16 x i32> %x1, i16 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_store_d_512: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edx, %k1 -; CHECK-NEXT: vmovdqa32 %zmm0, (%rdi) {%k1} -; CHECK-NEXT: vmovdqa32 %zmm0, (%rsi) -; CHECK-NEXT: retq - call void @llvm.x86.avx512.mask.store.d.512(i8* %ptr1, <16 x i32> %x1, i16 %x2) - call void @llvm.x86.avx512.mask.store.d.512(i8* %ptr2, <16 x i32> %x1, i16 -1) - ret void -} - declare <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8*, <16 x float>, i16) define <16 x float> @test_mask_load_unaligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) { diff --git a/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll new file mode 100644 index 00000000000..b7e0bdbefd4 --- /dev/null +++ b/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll @@ -0,0 +1,49 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512BW +; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512F-32 + +declare void @llvm.x86.avx512.mask.storeu.b.512(i8*, <64 x i8>, i64) + +define void@test_int_x86_avx512_mask_storeu_b_512(i8* %ptr1, i8* %ptr2, <64 x i8> %x1, i64 %x2) { +; AVX512BW-LABEL: test_int_x86_avx512_mask_storeu_b_512: +; AVX512BW: ## BB#0: +; AVX512BW-NEXT: kmovq %rdx, %k1 +; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rdi) {%k1} +; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rsi) +; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_mask_storeu_b_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vmovdqu8 %zmm0, (%ecx) {%k1} +; AVX512F-32-NEXT: vmovdqu8 %zmm0, (%eax) +; AVX512F-32-NEXT: retl + call void @llvm.x86.avx512.mask.storeu.b.512(i8* %ptr1, <64 x i8> %x1, i64 %x2) + call void @llvm.x86.avx512.mask.storeu.b.512(i8* %ptr2, <64 x i8> %x1, i64 -1) + ret void +} + +declare void @llvm.x86.avx512.mask.storeu.w.512(i8*, <32 x i16>, i32) + +define void@test_int_x86_avx512_mask_storeu_w_512(i8* %ptr1, i8* %ptr2, <32 x i16> %x1, i32 %x2) { +; AVX512BW-LABEL: test_int_x86_avx512_mask_storeu_w_512: +; AVX512BW: ## BB#0: +; AVX512BW-NEXT: kmovd %edx, %k1 +; AVX512BW-NEXT: vmovdqu16 %zmm0, (%rdi) {%k1} +; AVX512BW-NEXT: vmovdqu16 %zmm0, (%rsi) +; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_mask_storeu_w_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vmovdqu16 %zmm0, (%ecx) {%k1} +; AVX512F-32-NEXT: vmovdqu16 %zmm0, (%eax) +; AVX512F-32-NEXT: retl + call void @llvm.x86.avx512.mask.storeu.w.512(i8* %ptr1, <32 x i16> %x1, i32 %x2) + call void @llvm.x86.avx512.mask.storeu.w.512(i8* %ptr2, <32 x i16> %x1, i32 -1) + ret void +} diff --git a/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll b/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll index e3ca747e7c5..70eba508420 100644 --- a/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll @@ -3347,54 +3347,6 @@ define <32 x i16>@test_int_x86_avx512_mask_permvar_hi_512(<32 x i16> %x0, <32 x ret <32 x i16> %res4 } -declare void @llvm.x86.avx512.mask.storeu.b.512(i8*, <64 x i8>, i64) - -define void@test_int_x86_avx512_mask_storeu_b_512(i8* %ptr1, i8* %ptr2, <64 x i8> %x1, i64 %x2) { -; AVX512BW-LABEL: test_int_x86_avx512_mask_storeu_b_512: -; AVX512BW: ## BB#0: -; AVX512BW-NEXT: kmovq %rdx, %k1 -; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rdi) {%k1} -; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rsi) -; AVX512BW-NEXT: retq -; -; AVX512F-32-LABEL: test_int_x86_avx512_mask_storeu_b_512: -; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 -; AVX512F-32-NEXT: vmovdqu8 %zmm0, (%ecx) {%k1} -; AVX512F-32-NEXT: vmovdqu8 %zmm0, (%eax) -; AVX512F-32-NEXT: retl - call void @llvm.x86.avx512.mask.storeu.b.512(i8* %ptr1, <64 x i8> %x1, i64 %x2) - call void @llvm.x86.avx512.mask.storeu.b.512(i8* %ptr2, <64 x i8> %x1, i64 -1) - ret void -} - -declare void @llvm.x86.avx512.mask.storeu.w.512(i8*, <32 x i16>, i32) - -define void@test_int_x86_avx512_mask_storeu_w_512(i8* %ptr1, i8* %ptr2, <32 x i16> %x1, i32 %x2) { -; AVX512BW-LABEL: test_int_x86_avx512_mask_storeu_w_512: -; AVX512BW: ## BB#0: -; AVX512BW-NEXT: kmovd %edx, %k1 -; AVX512BW-NEXT: vmovdqu16 %zmm0, (%rdi) {%k1} -; AVX512BW-NEXT: vmovdqu16 %zmm0, (%rsi) -; AVX512BW-NEXT: retq -; -; AVX512F-32-LABEL: test_int_x86_avx512_mask_storeu_w_512: -; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; AVX512F-32-NEXT: vmovdqu16 %zmm0, (%ecx) {%k1} -; AVX512F-32-NEXT: vmovdqu16 %zmm0, (%eax) -; AVX512F-32-NEXT: retl - call void @llvm.x86.avx512.mask.storeu.w.512(i8* %ptr1, <32 x i16> %x1, i32 %x2) - call void @llvm.x86.avx512.mask.storeu.w.512(i8* %ptr2, <32 x i16> %x1, i32 -1) - ret void -} - declare <32 x i16> @llvm.x86.avx512.mask.movu.w.512(<32 x i16>, <32 x i16>, i32) define <32 x i16>@test_int_x86_avx512_mask_movu_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) { diff --git a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll new file mode 100644 index 00000000000..635fab68809 --- /dev/null +++ b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll @@ -0,0 +1,58 @@ +; NOTE: Assertions have been autogenerated by update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw -mattr=+avx512vl --show-mc-encoding| FileCheck %s + +declare void @llvm.x86.avx512.mask.storeu.b.128(i8*, <16 x i8>, i16) + +define void@test_int_x86_avx512_mask_storeu_b_128(i8* %ptr1, i8* %ptr2, <16 x i8> %x1, i16 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_storeu_b_128: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] +; CHECK-NEXT: vmovdqu8 %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7f,0x09,0x7f,0x07] +; CHECK-NEXT: vmovdqu8 %xmm0, (%rsi) ## encoding: [0x62,0xf1,0x7f,0x08,0x7f,0x06] +; CHECK-NEXT: retq ## encoding: [0xc3] + call void @llvm.x86.avx512.mask.storeu.b.128(i8* %ptr1, <16 x i8> %x1, i16 %x2) + call void @llvm.x86.avx512.mask.storeu.b.128(i8* %ptr2, <16 x i8> %x1, i16 -1) + ret void +} + +declare void @llvm.x86.avx512.mask.storeu.b.256(i8*, <32 x i8>, i32) + +define void@test_int_x86_avx512_mask_storeu_b_256(i8* %ptr1, i8* %ptr2, <32 x i8> %x1, i32 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_storeu_b_256: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovd %edx, %k1 ## encoding: [0xc5,0xfb,0x92,0xca] +; CHECK-NEXT: vmovdqu8 %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7f,0x29,0x7f,0x07] +; CHECK-NEXT: vmovdqu8 %ymm0, (%rsi) ## encoding: [0x62,0xf1,0x7f,0x28,0x7f,0x06] +; CHECK-NEXT: retq ## encoding: [0xc3] + call void @llvm.x86.avx512.mask.storeu.b.256(i8* %ptr1, <32 x i8> %x1, i32 %x2) + call void @llvm.x86.avx512.mask.storeu.b.256(i8* %ptr2, <32 x i8> %x1, i32 -1) + ret void +} + +declare void @llvm.x86.avx512.mask.storeu.w.128(i8*, <8 x i16>, i8) + +define void@test_int_x86_avx512_mask_storeu_w_128(i8* %ptr1, i8* %ptr2, <8 x i16> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_storeu_w_128: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] +; CHECK-NEXT: vmovdqu16 %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xff,0x09,0x7f,0x07] +; CHECK-NEXT: vmovdqu16 %xmm0, (%rsi) ## encoding: [0x62,0xf1,0xff,0x08,0x7f,0x06] +; CHECK-NEXT: retq ## encoding: [0xc3] + call void @llvm.x86.avx512.mask.storeu.w.128(i8* %ptr1, <8 x i16> %x1, i8 %x2) + call void @llvm.x86.avx512.mask.storeu.w.128(i8* %ptr2, <8 x i16> %x1, i8 -1) + ret void +} + +declare void @llvm.x86.avx512.mask.storeu.w.256(i8*, <16 x i16>, i16) + +define void@test_int_x86_avx512_mask_storeu_w_256(i8* %ptr1, i8* %ptr2, <16 x i16> %x1, i16 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_storeu_w_256: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] +; CHECK-NEXT: vmovdqu16 %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xff,0x29,0x7f,0x07] +; CHECK-NEXT: vmovdqu16 %ymm0, (%rsi) ## encoding: [0x62,0xf1,0xff,0x28,0x7f,0x06] +; CHECK-NEXT: retq ## encoding: [0xc3] + call void @llvm.x86.avx512.mask.storeu.w.256(i8* %ptr1, <16 x i16> %x1, i16 %x2) + call void @llvm.x86.avx512.mask.storeu.w.256(i8* %ptr2, <16 x i16> %x1, i16 -1) + ret void +} diff --git a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll index 29e46d21691..73026ed6a11 100644 --- a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll @@ -6464,62 +6464,6 @@ define <16 x i16>@test_int_x86_avx512_mask_permvar_hi_256(<16 x i16> %x0, <16 x ret <16 x i16> %res4 } -declare void @llvm.x86.avx512.mask.storeu.b.128(i8*, <16 x i8>, i16) - -define void@test_int_x86_avx512_mask_storeu_b_128(i8* %ptr1, i8* %ptr2, <16 x i8> %x1, i16 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_storeu_b_128: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] -; CHECK-NEXT: vmovdqu8 %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7f,0x09,0x7f,0x07] -; CHECK-NEXT: vmovdqu8 %xmm0, (%rsi) ## encoding: [0x62,0xf1,0x7f,0x08,0x7f,0x06] -; CHECK-NEXT: retq ## encoding: [0xc3] - call void @llvm.x86.avx512.mask.storeu.b.128(i8* %ptr1, <16 x i8> %x1, i16 %x2) - call void @llvm.x86.avx512.mask.storeu.b.128(i8* %ptr2, <16 x i8> %x1, i16 -1) - ret void -} - -declare void @llvm.x86.avx512.mask.storeu.b.256(i8*, <32 x i8>, i32) - -define void@test_int_x86_avx512_mask_storeu_b_256(i8* %ptr1, i8* %ptr2, <32 x i8> %x1, i32 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_storeu_b_256: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovd %edx, %k1 ## encoding: [0xc5,0xfb,0x92,0xca] -; CHECK-NEXT: vmovdqu8 %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7f,0x29,0x7f,0x07] -; CHECK-NEXT: vmovdqu8 %ymm0, (%rsi) ## encoding: [0x62,0xf1,0x7f,0x28,0x7f,0x06] -; CHECK-NEXT: retq ## encoding: [0xc3] - call void @llvm.x86.avx512.mask.storeu.b.256(i8* %ptr1, <32 x i8> %x1, i32 %x2) - call void @llvm.x86.avx512.mask.storeu.b.256(i8* %ptr2, <32 x i8> %x1, i32 -1) - ret void -} - -declare void @llvm.x86.avx512.mask.storeu.w.128(i8*, <8 x i16>, i8) - -define void@test_int_x86_avx512_mask_storeu_w_128(i8* %ptr1, i8* %ptr2, <8 x i16> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_storeu_w_128: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] -; CHECK-NEXT: vmovdqu16 %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xff,0x09,0x7f,0x07] -; CHECK-NEXT: vmovdqu16 %xmm0, (%rsi) ## encoding: [0x62,0xf1,0xff,0x08,0x7f,0x06] -; CHECK-NEXT: retq ## encoding: [0xc3] - call void @llvm.x86.avx512.mask.storeu.w.128(i8* %ptr1, <8 x i16> %x1, i8 %x2) - call void @llvm.x86.avx512.mask.storeu.w.128(i8* %ptr2, <8 x i16> %x1, i8 -1) - ret void -} - -declare void @llvm.x86.avx512.mask.storeu.w.256(i8*, <16 x i16>, i16) - -define void@test_int_x86_avx512_mask_storeu_w_256(i8* %ptr1, i8* %ptr2, <16 x i16> %x1, i16 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_storeu_w_256: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] -; CHECK-NEXT: vmovdqu16 %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xff,0x29,0x7f,0x07] -; CHECK-NEXT: vmovdqu16 %ymm0, (%rsi) ## encoding: [0x62,0xf1,0xff,0x28,0x7f,0x06] -; CHECK-NEXT: retq ## encoding: [0xc3] - call void @llvm.x86.avx512.mask.storeu.w.256(i8* %ptr1, <16 x i16> %x1, i16 %x2) - call void @llvm.x86.avx512.mask.storeu.w.256(i8* %ptr2, <16 x i16> %x1, i16 -1) - ret void -} - declare <8 x i16> @llvm.x86.avx512.mask.movu.w.128(<8 x i16>, <8 x i16>, i8) define <8 x i16>@test_int_x86_avx512_mask_movu_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) { diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll new file mode 100644 index 00000000000..bb9b6482032 --- /dev/null +++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll @@ -0,0 +1,226 @@ +; NOTE: Assertions have been autogenerated by update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl --show-mc-encoding| FileCheck %s + +declare void @llvm.x86.avx512.mask.store.pd.128(i8*, <2 x double>, i8) + +define void@test_int_x86_avx512_mask_store_pd_128(i8* %ptr1, i8* %ptr2, <2 x double> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_store_pd_128: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] +; CHECK-NEXT: vmovapd %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x29,0x07] +; CHECK-NEXT: vmovapd %xmm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x08,0x29,0x06] +; CHECK-NEXT: retq ## encoding: [0xc3] + call void @llvm.x86.avx512.mask.store.pd.128(i8* %ptr1, <2 x double> %x1, i8 %x2) + call void @llvm.x86.avx512.mask.store.pd.128(i8* %ptr2, <2 x double> %x1, i8 -1) + ret void +} + +declare void @llvm.x86.avx512.mask.store.pd.256(i8*, <4 x double>, i8) + +define void@test_int_x86_avx512_mask_store_pd_256(i8* %ptr1, i8* %ptr2, <4 x double> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_store_pd_256: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] +; CHECK-NEXT: vmovapd %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x29,0x07] +; CHECK-NEXT: vmovapd %ymm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x28,0x29,0x06] +; CHECK-NEXT: retq ## encoding: [0xc3] + call void @llvm.x86.avx512.mask.store.pd.256(i8* %ptr1, <4 x double> %x1, i8 %x2) + call void @llvm.x86.avx512.mask.store.pd.256(i8* %ptr2, <4 x double> %x1, i8 -1) + ret void +} + +declare void @llvm.x86.avx512.mask.storeu.pd.128(i8*, <2 x double>, i8) + +define void@test_int_x86_avx512_mask_storeu_pd_128(i8* %ptr1, i8* %ptr2, <2 x double> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_storeu_pd_128: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] +; CHECK-NEXT: vmovupd %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x11,0x07] +; CHECK-NEXT: vmovupd %xmm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x08,0x11,0x06] +; CHECK-NEXT: retq ## encoding: [0xc3] + call void @llvm.x86.avx512.mask.storeu.pd.128(i8* %ptr1, <2 x double> %x1, i8 %x2) + call void @llvm.x86.avx512.mask.storeu.pd.128(i8* %ptr2, <2 x double> %x1, i8 -1) + ret void +} + +declare void @llvm.x86.avx512.mask.storeu.pd.256(i8*, <4 x double>, i8) + +define void@test_int_x86_avx512_mask_storeu_pd_256(i8* %ptr1, i8* %ptr2, <4 x double> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_storeu_pd_256: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] +; CHECK-NEXT: vmovupd %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x11,0x07] +; CHECK-NEXT: vmovupd %ymm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x28,0x11,0x06] +; CHECK-NEXT: retq ## encoding: [0xc3] + call void @llvm.x86.avx512.mask.storeu.pd.256(i8* %ptr1, <4 x double> %x1, i8 %x2) + call void @llvm.x86.avx512.mask.storeu.pd.256(i8* %ptr2, <4 x double> %x1, i8 -1) + ret void +} + +declare void @llvm.x86.avx512.mask.store.ps.128(i8*, <4 x float>, i8) + +define void@test_int_x86_avx512_mask_store_ps_128(i8* %ptr1, i8* %ptr2, <4 x float> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_store_ps_128: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] +; CHECK-NEXT: vmovaps %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x29,0x07] +; CHECK-NEXT: vmovaps %xmm0, (%rsi) ## encoding: [0x62,0xf1,0x7c,0x08,0x29,0x06] +; CHECK-NEXT: retq ## encoding: [0xc3] + call void @llvm.x86.avx512.mask.store.ps.128(i8* %ptr1, <4 x float> %x1, i8 %x2) + call void @llvm.x86.avx512.mask.store.ps.128(i8* %ptr2, <4 x float> %x1, i8 -1) + ret void +} + +declare void @llvm.x86.avx512.mask.store.ps.256(i8*, <8 x float>, i8) + +define void@test_int_x86_avx512_mask_store_ps_256(i8* %ptr1, i8* %ptr2, <8 x float> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_store_ps_256: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] +; CHECK-NEXT: vmovaps %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x29,0x07] +; CHECK-NEXT: vmovaps %ymm0, (%rsi) ## encoding: [0x62,0xf1,0x7c,0x28,0x29,0x06] +; CHECK-NEXT: retq ## encoding: [0xc3] + call void @llvm.x86.avx512.mask.store.ps.256(i8* %ptr1, <8 x float> %x1, i8 %x2) + call void @llvm.x86.avx512.mask.store.ps.256(i8* %ptr2, <8 x float> %x1, i8 -1) + ret void +} + +declare void @llvm.x86.avx512.mask.storeu.ps.128(i8*, <4 x float>, i8) + +define void@test_int_x86_avx512_mask_storeu_ps_128(i8* %ptr1, i8* %ptr2, <4 x float> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_storeu_ps_128: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] +; CHECK-NEXT: vmovups %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x11,0x07] +; CHECK-NEXT: vmovups %xmm0, (%rsi) ## encoding: [0x62,0xf1,0x7c,0x08,0x11,0x06] +; CHECK-NEXT: retq ## encoding: [0xc3] + call void @llvm.x86.avx512.mask.storeu.ps.128(i8* %ptr1, <4 x float> %x1, i8 %x2) + call void @llvm.x86.avx512.mask.storeu.ps.128(i8* %ptr2, <4 x float> %x1, i8 -1) + ret void +} + +declare void @llvm.x86.avx512.mask.storeu.ps.256(i8*, <8 x float>, i8) + +define void@test_int_x86_avx512_mask_storeu_ps_256(i8* %ptr1, i8* %ptr2, <8 x float> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_storeu_ps_256: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] +; CHECK-NEXT: vmovups %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x11,0x07] +; CHECK-NEXT: vmovups %ymm0, (%rsi) ## encoding: [0x62,0xf1,0x7c,0x28,0x11,0x06] +; CHECK-NEXT: retq ## encoding: [0xc3] + call void @llvm.x86.avx512.mask.storeu.ps.256(i8* %ptr1, <8 x float> %x1, i8 %x2) + call void @llvm.x86.avx512.mask.storeu.ps.256(i8* %ptr2, <8 x float> %x1, i8 -1) + ret void +} + +declare void @llvm.x86.avx512.mask.storeu.q.128(i8*, <2 x i64>, i8) + +define void@test_int_x86_avx512_mask_storeu_q_128(i8* %ptr1, i8* %ptr2, <2 x i64> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_storeu_q_128: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] +; CHECK-NEXT: vmovdqu64 %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfe,0x09,0x7f,0x07] +; CHECK-NEXT: vmovdqu64 %xmm0, (%rsi) ## encoding: [0x62,0xf1,0xfe,0x08,0x7f,0x06] +; CHECK-NEXT: retq ## encoding: [0xc3] + call void @llvm.x86.avx512.mask.storeu.q.128(i8* %ptr1, <2 x i64> %x1, i8 %x2) + call void @llvm.x86.avx512.mask.storeu.q.128(i8* %ptr2, <2 x i64> %x1, i8 -1) + ret void +} + +declare void @llvm.x86.avx512.mask.storeu.q.256(i8*, <4 x i64>, i8) + +define void@test_int_x86_avx512_mask_storeu_q_256(i8* %ptr1, i8* %ptr2, <4 x i64> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_storeu_q_256: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] +; CHECK-NEXT: vmovdqu64 %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfe,0x29,0x7f,0x07] +; CHECK-NEXT: vmovdqu64 %ymm0, (%rsi) ## encoding: [0x62,0xf1,0xfe,0x28,0x7f,0x06] +; CHECK-NEXT: retq ## encoding: [0xc3] + call void @llvm.x86.avx512.mask.storeu.q.256(i8* %ptr1, <4 x i64> %x1, i8 %x2) + call void @llvm.x86.avx512.mask.storeu.q.256(i8* %ptr2, <4 x i64> %x1, i8 -1) + ret void +} + +declare void @llvm.x86.avx512.mask.storeu.d.128(i8*, <4 x i32>, i8) + +define void@test_int_x86_avx512_mask_storeu_d_128(i8* %ptr1, i8* %ptr2, <4 x i32> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_storeu_d_128: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] +; CHECK-NEXT: vmovdqu32 %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x7f,0x07] +; CHECK-NEXT: vmovdqu32 %xmm0, (%rsi) ## encoding: [0x62,0xf1,0x7e,0x08,0x7f,0x06] +; CHECK-NEXT: retq ## encoding: [0xc3] + call void @llvm.x86.avx512.mask.storeu.d.128(i8* %ptr1, <4 x i32> %x1, i8 %x2) + call void @llvm.x86.avx512.mask.storeu.d.128(i8* %ptr2, <4 x i32> %x1, i8 -1) + ret void +} + +declare void @llvm.x86.avx512.mask.storeu.d.256(i8*, <8 x i32>, i8) + +define void@test_int_x86_avx512_mask_storeu_d_256(i8* %ptr1, i8* %ptr2, <8 x i32> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_storeu_d_256: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] +; CHECK-NEXT: vmovdqu32 %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0x7f,0x07] +; CHECK-NEXT: vmovdqu32 %ymm0, (%rsi) ## encoding: [0x62,0xf1,0x7e,0x28,0x7f,0x06] +; CHECK-NEXT: retq ## encoding: [0xc3] + call void @llvm.x86.avx512.mask.storeu.d.256(i8* %ptr1, <8 x i32> %x1, i8 %x2) + call void @llvm.x86.avx512.mask.storeu.d.256(i8* %ptr2, <8 x i32> %x1, i8 -1) + ret void +} + +declare void @llvm.x86.avx512.mask.store.q.128(i8*, <2 x i64>, i8) + +define void@test_int_x86_avx512_mask_store_q_128(i8* %ptr1, i8* %ptr2, <2 x i64> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_store_q_128: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] +; CHECK-NEXT: vmovdqa64 %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x7f,0x07] +; CHECK-NEXT: vmovdqa64 %xmm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x08,0x7f,0x06] +; CHECK-NEXT: retq ## encoding: [0xc3] + call void @llvm.x86.avx512.mask.store.q.128(i8* %ptr1, <2 x i64> %x1, i8 %x2) + call void @llvm.x86.avx512.mask.store.q.128(i8* %ptr2, <2 x i64> %x1, i8 -1) + ret void +} + +declare void @llvm.x86.avx512.mask.store.q.256(i8*, <4 x i64>, i8) + +define void@test_int_x86_avx512_mask_store_q_256(i8* %ptr1, i8* %ptr2, <4 x i64> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_store_q_256: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] +; CHECK-NEXT: vmovdqa64 %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x7f,0x07] +; CHECK-NEXT: vmovdqa64 %ymm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x28,0x7f,0x06] +; CHECK-NEXT: retq ## encoding: [0xc3] + call void @llvm.x86.avx512.mask.store.q.256(i8* %ptr1, <4 x i64> %x1, i8 %x2) + call void @llvm.x86.avx512.mask.store.q.256(i8* %ptr2, <4 x i64> %x1, i8 -1) + ret void +} + +declare void @llvm.x86.avx512.mask.store.d.128(i8*, <4 x i32>, i8) + +define void@test_int_x86_avx512_mask_store_d_128(i8* %ptr1, i8* %ptr2, <4 x i32> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_store_d_128: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] +; CHECK-NEXT: vmovdqa32 %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x7f,0x07] +; CHECK-NEXT: vmovdqa32 %xmm0, (%rsi) ## encoding: [0x62,0xf1,0x7d,0x08,0x7f,0x06] +; CHECK-NEXT: retq ## encoding: [0xc3] + call void @llvm.x86.avx512.mask.store.d.128(i8* %ptr1, <4 x i32> %x1, i8 %x2) + call void @llvm.x86.avx512.mask.store.d.128(i8* %ptr2, <4 x i32> %x1, i8 -1) + ret void +} + +declare void @llvm.x86.avx512.mask.store.d.256(i8*, <8 x i32>, i8) + +define void@test_int_x86_avx512_mask_store_d_256(i8* %ptr1, i8* %ptr2, <8 x i32> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_store_d_256: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] +; CHECK-NEXT: vmovdqa32 %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x7f,0x07] +; CHECK-NEXT: vmovdqa32 %ymm0, (%rsi) ## encoding: [0x62,0xf1,0x7d,0x28,0x7f,0x06] +; CHECK-NEXT: retq ## encoding: [0xc3] + call void @llvm.x86.avx512.mask.store.d.256(i8* %ptr1, <8 x i32> %x1, i8 %x2) + call void @llvm.x86.avx512.mask.store.d.256(i8* %ptr2, <8 x i32> %x1, i8 -1) + ret void +} diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll index ed428bb754e..f2a967a2e21 100644 --- a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll @@ -9411,230 +9411,6 @@ define <8 x i32>@test_int_x86_avx512_mask_mova_d_256(<8 x i32> %x0, <8 x i32> %x ret <8 x i32> %res2 } -declare void @llvm.x86.avx512.mask.store.pd.128(i8*, <2 x double>, i8) - -define void@test_int_x86_avx512_mask_store_pd_128(i8* %ptr1, i8* %ptr2, <2 x double> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_store_pd_128: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] -; CHECK-NEXT: vmovapd %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x29,0x07] -; CHECK-NEXT: vmovapd %xmm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x08,0x29,0x06] -; CHECK-NEXT: retq ## encoding: [0xc3] - call void @llvm.x86.avx512.mask.store.pd.128(i8* %ptr1, <2 x double> %x1, i8 %x2) - call void @llvm.x86.avx512.mask.store.pd.128(i8* %ptr2, <2 x double> %x1, i8 -1) - ret void -} - -declare void @llvm.x86.avx512.mask.store.pd.256(i8*, <4 x double>, i8) - -define void@test_int_x86_avx512_mask_store_pd_256(i8* %ptr1, i8* %ptr2, <4 x double> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_store_pd_256: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] -; CHECK-NEXT: vmovapd %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x29,0x07] -; CHECK-NEXT: vmovapd %ymm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x28,0x29,0x06] -; CHECK-NEXT: retq ## encoding: [0xc3] - call void @llvm.x86.avx512.mask.store.pd.256(i8* %ptr1, <4 x double> %x1, i8 %x2) - call void @llvm.x86.avx512.mask.store.pd.256(i8* %ptr2, <4 x double> %x1, i8 -1) - ret void -} - -declare void @llvm.x86.avx512.mask.storeu.pd.128(i8*, <2 x double>, i8) - -define void@test_int_x86_avx512_mask_storeu_pd_128(i8* %ptr1, i8* %ptr2, <2 x double> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_storeu_pd_128: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] -; CHECK-NEXT: vmovupd %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x11,0x07] -; CHECK-NEXT: vmovupd %xmm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x08,0x11,0x06] -; CHECK-NEXT: retq ## encoding: [0xc3] - call void @llvm.x86.avx512.mask.storeu.pd.128(i8* %ptr1, <2 x double> %x1, i8 %x2) - call void @llvm.x86.avx512.mask.storeu.pd.128(i8* %ptr2, <2 x double> %x1, i8 -1) - ret void -} - -declare void @llvm.x86.avx512.mask.storeu.pd.256(i8*, <4 x double>, i8) - -define void@test_int_x86_avx512_mask_storeu_pd_256(i8* %ptr1, i8* %ptr2, <4 x double> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_storeu_pd_256: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] -; CHECK-NEXT: vmovupd %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x11,0x07] -; CHECK-NEXT: vmovupd %ymm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x28,0x11,0x06] -; CHECK-NEXT: retq ## encoding: [0xc3] - call void @llvm.x86.avx512.mask.storeu.pd.256(i8* %ptr1, <4 x double> %x1, i8 %x2) - call void @llvm.x86.avx512.mask.storeu.pd.256(i8* %ptr2, <4 x double> %x1, i8 -1) - ret void -} - -declare void @llvm.x86.avx512.mask.store.ps.128(i8*, <4 x float>, i8) - -define void@test_int_x86_avx512_mask_store_ps_128(i8* %ptr1, i8* %ptr2, <4 x float> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_store_ps_128: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] -; CHECK-NEXT: vmovaps %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x29,0x07] -; CHECK-NEXT: vmovaps %xmm0, (%rsi) ## encoding: [0x62,0xf1,0x7c,0x08,0x29,0x06] -; CHECK-NEXT: retq ## encoding: [0xc3] - call void @llvm.x86.avx512.mask.store.ps.128(i8* %ptr1, <4 x float> %x1, i8 %x2) - call void @llvm.x86.avx512.mask.store.ps.128(i8* %ptr2, <4 x float> %x1, i8 -1) - ret void -} - -declare void @llvm.x86.avx512.mask.store.ps.256(i8*, <8 x float>, i8) - -define void@test_int_x86_avx512_mask_store_ps_256(i8* %ptr1, i8* %ptr2, <8 x float> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_store_ps_256: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] -; CHECK-NEXT: vmovaps %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x29,0x07] -; CHECK-NEXT: vmovaps %ymm0, (%rsi) ## encoding: [0x62,0xf1,0x7c,0x28,0x29,0x06] -; CHECK-NEXT: retq ## encoding: [0xc3] - call void @llvm.x86.avx512.mask.store.ps.256(i8* %ptr1, <8 x float> %x1, i8 %x2) - call void @llvm.x86.avx512.mask.store.ps.256(i8* %ptr2, <8 x float> %x1, i8 -1) - ret void -} - -declare void @llvm.x86.avx512.mask.storeu.ps.128(i8*, <4 x float>, i8) - -define void@test_int_x86_avx512_mask_storeu_ps_128(i8* %ptr1, i8* %ptr2, <4 x float> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_storeu_ps_128: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] -; CHECK-NEXT: vmovups %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x11,0x07] -; CHECK-NEXT: vmovups %xmm0, (%rsi) ## encoding: [0x62,0xf1,0x7c,0x08,0x11,0x06] -; CHECK-NEXT: retq ## encoding: [0xc3] - call void @llvm.x86.avx512.mask.storeu.ps.128(i8* %ptr1, <4 x float> %x1, i8 %x2) - call void @llvm.x86.avx512.mask.storeu.ps.128(i8* %ptr2, <4 x float> %x1, i8 -1) - ret void -} - -declare void @llvm.x86.avx512.mask.storeu.ps.256(i8*, <8 x float>, i8) - -define void@test_int_x86_avx512_mask_storeu_ps_256(i8* %ptr1, i8* %ptr2, <8 x float> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_storeu_ps_256: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] -; CHECK-NEXT: vmovups %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x11,0x07] -; CHECK-NEXT: vmovups %ymm0, (%rsi) ## encoding: [0x62,0xf1,0x7c,0x28,0x11,0x06] -; CHECK-NEXT: retq ## encoding: [0xc3] - call void @llvm.x86.avx512.mask.storeu.ps.256(i8* %ptr1, <8 x float> %x1, i8 %x2) - call void @llvm.x86.avx512.mask.storeu.ps.256(i8* %ptr2, <8 x float> %x1, i8 -1) - ret void -} - -declare void @llvm.x86.avx512.mask.storeu.q.128(i8*, <2 x i64>, i8) - -define void@test_int_x86_avx512_mask_storeu_q_128(i8* %ptr1, i8* %ptr2, <2 x i64> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_storeu_q_128: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] -; CHECK-NEXT: vmovdqu64 %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfe,0x09,0x7f,0x07] -; CHECK-NEXT: vmovdqu64 %xmm0, (%rsi) ## encoding: [0x62,0xf1,0xfe,0x08,0x7f,0x06] -; CHECK-NEXT: retq ## encoding: [0xc3] - call void @llvm.x86.avx512.mask.storeu.q.128(i8* %ptr1, <2 x i64> %x1, i8 %x2) - call void @llvm.x86.avx512.mask.storeu.q.128(i8* %ptr2, <2 x i64> %x1, i8 -1) - ret void -} - -declare void @llvm.x86.avx512.mask.storeu.q.256(i8*, <4 x i64>, i8) - -define void@test_int_x86_avx512_mask_storeu_q_256(i8* %ptr1, i8* %ptr2, <4 x i64> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_storeu_q_256: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] -; CHECK-NEXT: vmovdqu64 %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfe,0x29,0x7f,0x07] -; CHECK-NEXT: vmovdqu64 %ymm0, (%rsi) ## encoding: [0x62,0xf1,0xfe,0x28,0x7f,0x06] -; CHECK-NEXT: retq ## encoding: [0xc3] - call void @llvm.x86.avx512.mask.storeu.q.256(i8* %ptr1, <4 x i64> %x1, i8 %x2) - call void @llvm.x86.avx512.mask.storeu.q.256(i8* %ptr2, <4 x i64> %x1, i8 -1) - ret void -} - -declare void @llvm.x86.avx512.mask.storeu.d.128(i8*, <4 x i32>, i8) - -define void@test_int_x86_avx512_mask_storeu_d_128(i8* %ptr1, i8* %ptr2, <4 x i32> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_storeu_d_128: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] -; CHECK-NEXT: vmovdqu32 %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x7f,0x07] -; CHECK-NEXT: vmovdqu32 %xmm0, (%rsi) ## encoding: [0x62,0xf1,0x7e,0x08,0x7f,0x06] -; CHECK-NEXT: retq ## encoding: [0xc3] - call void @llvm.x86.avx512.mask.storeu.d.128(i8* %ptr1, <4 x i32> %x1, i8 %x2) - call void @llvm.x86.avx512.mask.storeu.d.128(i8* %ptr2, <4 x i32> %x1, i8 -1) - ret void -} - -declare void @llvm.x86.avx512.mask.storeu.d.256(i8*, <8 x i32>, i8) - -define void@test_int_x86_avx512_mask_storeu_d_256(i8* %ptr1, i8* %ptr2, <8 x i32> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_storeu_d_256: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] -; CHECK-NEXT: vmovdqu32 %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0x7f,0x07] -; CHECK-NEXT: vmovdqu32 %ymm0, (%rsi) ## encoding: [0x62,0xf1,0x7e,0x28,0x7f,0x06] -; CHECK-NEXT: retq ## encoding: [0xc3] - call void @llvm.x86.avx512.mask.storeu.d.256(i8* %ptr1, <8 x i32> %x1, i8 %x2) - call void @llvm.x86.avx512.mask.storeu.d.256(i8* %ptr2, <8 x i32> %x1, i8 -1) - ret void -} - -declare void @llvm.x86.avx512.mask.store.q.128(i8*, <2 x i64>, i8) - -define void@test_int_x86_avx512_mask_store_q_128(i8* %ptr1, i8* %ptr2, <2 x i64> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_store_q_128: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] -; CHECK-NEXT: vmovdqa64 %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x7f,0x07] -; CHECK-NEXT: vmovdqa64 %xmm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x08,0x7f,0x06] -; CHECK-NEXT: retq ## encoding: [0xc3] - call void @llvm.x86.avx512.mask.store.q.128(i8* %ptr1, <2 x i64> %x1, i8 %x2) - call void @llvm.x86.avx512.mask.store.q.128(i8* %ptr2, <2 x i64> %x1, i8 -1) - ret void -} - -declare void @llvm.x86.avx512.mask.store.q.256(i8*, <4 x i64>, i8) - -define void@test_int_x86_avx512_mask_store_q_256(i8* %ptr1, i8* %ptr2, <4 x i64> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_store_q_256: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] -; CHECK-NEXT: vmovdqa64 %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x7f,0x07] -; CHECK-NEXT: vmovdqa64 %ymm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x28,0x7f,0x06] -; CHECK-NEXT: retq ## encoding: [0xc3] - call void @llvm.x86.avx512.mask.store.q.256(i8* %ptr1, <4 x i64> %x1, i8 %x2) - call void @llvm.x86.avx512.mask.store.q.256(i8* %ptr2, <4 x i64> %x1, i8 -1) - ret void -} - -declare void @llvm.x86.avx512.mask.store.d.128(i8*, <4 x i32>, i8) - -define void@test_int_x86_avx512_mask_store_d_128(i8* %ptr1, i8* %ptr2, <4 x i32> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_store_d_128: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] -; CHECK-NEXT: vmovdqa32 %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x7f,0x07] -; CHECK-NEXT: vmovdqa32 %xmm0, (%rsi) ## encoding: [0x62,0xf1,0x7d,0x08,0x7f,0x06] -; CHECK-NEXT: retq ## encoding: [0xc3] - call void @llvm.x86.avx512.mask.store.d.128(i8* %ptr1, <4 x i32> %x1, i8 %x2) - call void @llvm.x86.avx512.mask.store.d.128(i8* %ptr2, <4 x i32> %x1, i8 -1) - ret void -} - -declare void @llvm.x86.avx512.mask.store.d.256(i8*, <8 x i32>, i8) - -define void@test_int_x86_avx512_mask_store_d_256(i8* %ptr1, i8* %ptr2, <8 x i32> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_store_d_256: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] -; CHECK-NEXT: vmovdqa32 %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x7f,0x07] -; CHECK-NEXT: vmovdqa32 %ymm0, (%rsi) ## encoding: [0x62,0xf1,0x7d,0x28,0x7f,0x06] -; CHECK-NEXT: retq ## encoding: [0xc3] - call void @llvm.x86.avx512.mask.store.d.256(i8* %ptr1, <8 x i32> %x1, i8 %x2) - call void @llvm.x86.avx512.mask.store.d.256(i8* %ptr2, <8 x i32> %x1, i8 -1) - ret void -} - declare <2 x double> @llvm.x86.avx512.mask.fixupimm.pd.128(<2 x double>, <2 x double>, <2 x i64>, i32, i8) define <2 x double>@test_int_x86_avx512_mask_fixupimm_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i8 %x4) { |

