diff options
-rw-r--r-- | llvm/include/llvm/IR/IntrinsicsX86.td | 114 | ||||
-rw-r--r-- | llvm/lib/IR/AutoUpgrade.cpp | 32 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 57 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86IntrinsicsInfo.h | 75 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll | 396 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512-intrinsics.ll | 225 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512vbmi2-intrinsics.ll | 276 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512vbmi2vl-intrinsics.ll | 351 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll | 752 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512vl-intrinsics.ll | 720 |
10 files changed, 1181 insertions, 1817 deletions
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td index fe814e5a13f..7b0c69912cd 100644 --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -4613,25 +4613,6 @@ let TargetPrefix = "x86" in { Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_compress_store_ps_512 : // FIXME: remove - Intrinsic<[], [llvm_ptr_ty, llvm_v16f32_ty, - llvm_i16_ty], [IntrArgMemOnly]>; - def int_x86_avx512_mask_compress_store_pd_512 : // FIXME: remove - Intrinsic<[], [llvm_ptr_ty, llvm_v8f64_ty, - llvm_i8_ty], [IntrArgMemOnly]>; - def int_x86_avx512_mask_compress_store_ps_256 : // FIXME: remove - Intrinsic<[], [llvm_ptr_ty, llvm_v8f32_ty, - llvm_i8_ty], [IntrArgMemOnly]>; - def int_x86_avx512_mask_compress_store_pd_256 : // FIXME: remove - Intrinsic<[], [llvm_ptr_ty, llvm_v4f64_ty, - llvm_i8_ty], [IntrArgMemOnly]>; - def int_x86_avx512_mask_compress_store_ps_128 : // FIXME: remove - Intrinsic<[], [llvm_ptr_ty, llvm_v4f32_ty, - llvm_i8_ty], [IntrArgMemOnly]>; - def int_x86_avx512_mask_compress_store_pd_128 : // FIXME: remove - Intrinsic<[], [llvm_ptr_ty, llvm_v2f64_ty, - llvm_i8_ty], [IntrArgMemOnly]>; - def int_x86_avx512_mask_compress_d_512 : GCCBuiltin<"__builtin_ia32_compresssi512_mask">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, @@ -4657,25 +4638,6 @@ let TargetPrefix = "x86" in { Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_compress_store_d_512 : // FIXME: remove - Intrinsic<[], [llvm_ptr_ty, llvm_v16i32_ty, - llvm_i16_ty], [IntrArgMemOnly]>; - def int_x86_avx512_mask_compress_store_q_512 : // FIXME: remove - Intrinsic<[], [llvm_ptr_ty, llvm_v8i64_ty, - llvm_i8_ty], [IntrArgMemOnly]>; - def int_x86_avx512_mask_compress_store_d_256 : // FIXME: remove - Intrinsic<[], [llvm_ptr_ty, llvm_v8i32_ty, - llvm_i8_ty], [IntrArgMemOnly]>; - def int_x86_avx512_mask_compress_store_q_256 : // FIXME: remove - Intrinsic<[], [llvm_ptr_ty, llvm_v4i64_ty, - llvm_i8_ty], [IntrArgMemOnly]>; - def int_x86_avx512_mask_compress_store_d_128 : // FIXME: remove - Intrinsic<[], [llvm_ptr_ty, llvm_v4i32_ty, - llvm_i8_ty], [IntrArgMemOnly]>; - def int_x86_avx512_mask_compress_store_q_128 : // FIXME: remove - Intrinsic<[], [llvm_ptr_ty, llvm_v2i64_ty, - llvm_i8_ty], [IntrArgMemOnly]>; - def int_x86_avx512_mask_compress_b_512 : GCCBuiltin<"__builtin_ia32_compressqi512_mask">, Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty, @@ -4701,25 +4663,6 @@ let TargetPrefix = "x86" in { Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_compress_store_b_512 : // FIXME: remove - Intrinsic<[], [llvm_ptr_ty, llvm_v64i8_ty, - llvm_i64_ty], [IntrArgMemOnly]>; - def int_x86_avx512_mask_compress_store_w_512 : // FIXME: remove - Intrinsic<[], [llvm_ptr_ty, llvm_v32i16_ty, - llvm_i32_ty], [IntrArgMemOnly]>; - def int_x86_avx512_mask_compress_store_b_256 : // FIXME: remove - Intrinsic<[], [llvm_ptr_ty, llvm_v32i8_ty, - llvm_i32_ty], [IntrArgMemOnly]>; - def int_x86_avx512_mask_compress_store_w_256 : // FIXME: remove - Intrinsic<[], [llvm_ptr_ty, llvm_v16i16_ty, - llvm_i16_ty], [IntrArgMemOnly]>; - def int_x86_avx512_mask_compress_store_b_128 : // FIXME: remove - Intrinsic<[], [llvm_ptr_ty, llvm_v16i8_ty, - llvm_i16_ty], [IntrArgMemOnly]>; - def int_x86_avx512_mask_compress_store_w_128 : // FIXME: remove - Intrinsic<[], [llvm_ptr_ty, llvm_v8i16_ty, - llvm_i8_ty], [IntrArgMemOnly]>; - // expand def int_x86_avx512_mask_expand_ps_512 : GCCBuiltin<"__builtin_ia32_expandsf512_mask">, @@ -4746,25 +4689,6 @@ let TargetPrefix = "x86" in { Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_expand_load_ps_512 : // FIXME: remove - Intrinsic<[llvm_v16f32_ty], [llvm_ptr_ty, llvm_v16f32_ty, - llvm_i16_ty], [IntrReadMem, IntrArgMemOnly]>; - def int_x86_avx512_mask_expand_load_pd_512 : // FIXME: remove - Intrinsic<[llvm_v8f64_ty], [llvm_ptr_ty, llvm_v8f64_ty, - llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>; - def int_x86_avx512_mask_expand_load_ps_256 : // FIXME: remove - Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty, llvm_v8f32_ty, - llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>; - def int_x86_avx512_mask_expand_load_pd_256 : // FIXME: remove - Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty, llvm_v4f64_ty, - llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>; - def int_x86_avx512_mask_expand_load_ps_128 : // FIXME: remove - Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty, llvm_v4f32_ty, - llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>; - def int_x86_avx512_mask_expand_load_pd_128 : // FIXME: remove - Intrinsic<[llvm_v2f64_ty], [llvm_ptr_ty, llvm_v2f64_ty, - llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>; - def int_x86_avx512_mask_expand_d_512 : GCCBuiltin<"__builtin_ia32_expandsi512_mask">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, @@ -4790,25 +4714,6 @@ let TargetPrefix = "x86" in { Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_expand_load_d_512 : // FIXME: remove - Intrinsic<[llvm_v16i32_ty], [llvm_ptr_ty, llvm_v16i32_ty, - llvm_i16_ty], [IntrReadMem, IntrArgMemOnly]>; - def int_x86_avx512_mask_expand_load_q_512 : // FIXME: remove - Intrinsic<[llvm_v8i64_ty], [llvm_ptr_ty, llvm_v8i64_ty, - llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>; - def int_x86_avx512_mask_expand_load_d_256 : // FIXME: remove - Intrinsic<[llvm_v8i32_ty], [llvm_ptr_ty, llvm_v8i32_ty, - llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>; - def int_x86_avx512_mask_expand_load_q_256 : // FIXME: remove - Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty, llvm_v4i64_ty, - llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>; - def int_x86_avx512_mask_expand_load_d_128 : // FIXME: remove - Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_v4i32_ty, - llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>; - def int_x86_avx512_mask_expand_load_q_128 : // FIXME: remove - Intrinsic<[llvm_v2i64_ty], [llvm_ptr_ty, llvm_v2i64_ty, - llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>; - def int_x86_avx512_mask_expand_b_512 : GCCBuiltin<"__builtin_ia32_expandqi512_mask">, Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty, @@ -4833,25 +4738,6 @@ let TargetPrefix = "x86" in { GCCBuiltin<"__builtin_ia32_expandhi128_mask">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; - - def int_x86_avx512_mask_expand_load_b_512 : // FIXME: remove - Intrinsic<[llvm_v64i8_ty], [llvm_ptr_ty, llvm_v64i8_ty, - llvm_i64_ty], [IntrReadMem, IntrArgMemOnly]>; - def int_x86_avx512_mask_expand_load_w_512 : // FIXME: remove - Intrinsic<[llvm_v32i16_ty], [llvm_ptr_ty, llvm_v32i16_ty, - llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; - def int_x86_avx512_mask_expand_load_b_256 : // FIXME: remove - Intrinsic<[llvm_v32i8_ty], [llvm_ptr_ty, llvm_v32i8_ty, - llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; - def int_x86_avx512_mask_expand_load_w_256 : // FIXME: remove - Intrinsic<[llvm_v16i16_ty], [llvm_ptr_ty, llvm_v16i16_ty, - llvm_i16_ty], [IntrReadMem, IntrArgMemOnly]>; - def int_x86_avx512_mask_expand_load_b_128 : // FIXME: remove - Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_v16i8_ty, - llvm_i16_ty], [IntrReadMem, IntrArgMemOnly]>; - def int_x86_avx512_mask_expand_load_w_128 : // FIXME: remove - Intrinsic<[llvm_v8i16_ty], [llvm_ptr_ty, llvm_v8i16_ty, - llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>; } // VBMI2 Concat & Shift diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index eefd034b0b9..609048e00ba 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -302,6 +302,8 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) { Name == "avx512.mask.store.ss" || // Added in 7.0 Name.startswith("avx512.mask.loadu.") || // Added in 3.9 Name.startswith("avx512.mask.load.") || // Added in 3.9 + Name.startswith("avx512.mask.expand.load.") || // Added in 7.0 + Name.startswith("avx512.mask.compress.store.") || // Added in 7.0 Name == "sse42.crc32.64.8" || // Added in 3.4 Name.startswith("avx.vbroadcast.s") || // Added in 3.5 Name.startswith("avx512.vbroadcast.s") || // Added in 7.0 @@ -1659,6 +1661,36 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0), CI->getArgOperand(1),CI->getArgOperand(2), /*Aligned*/true); + } else if (IsX86 && Name.startswith("avx512.mask.expand.load.")) { + Type *ResultTy = CI->getType(); + Type *PtrTy = ResultTy->getVectorElementType(); + + // Cast the pointer to element type. + Value *Ptr = Builder.CreateBitCast(CI->getOperand(0), + llvm::PointerType::getUnqual(PtrTy)); + + Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2), + ResultTy->getVectorNumElements()); + + Function *ELd = Intrinsic::getDeclaration(F->getParent(), + Intrinsic::masked_expandload, + ResultTy); + Rep = Builder.CreateCall(ELd, { Ptr, MaskVec, CI->getOperand(1) }); + } else if (IsX86 && Name.startswith("avx512.mask.compress.store.")) { + Type *ResultTy = CI->getArgOperand(1)->getType(); + Type *PtrTy = ResultTy->getVectorElementType(); + + // Cast the pointer to element type. + Value *Ptr = Builder.CreateBitCast(CI->getOperand(0), + llvm::PointerType::getUnqual(PtrTy)); + + Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2), + ResultTy->getVectorNumElements()); + + Function *CSt = Intrinsic::getDeclaration(F->getParent(), + Intrinsic::masked_compressstore, + ResultTy); + Rep = Builder.CreateCall(CSt, { CI->getArgOperand(1), Ptr, MaskVec }); } else if (IsX86 && Name.startswith("xop.vpcom")) { Intrinsic::ID intID; if (Name.endswith("ub")) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 87fa5b23b07..fffde112f2e 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -4606,20 +4606,6 @@ bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.offset = 0; switch (IntrData->Type) { - case EXPAND_FROM_MEM: { - Info.ptrVal = I.getArgOperand(0); - Info.memVT = MVT::getVT(I.getType()); - Info.align = 1; - Info.flags |= MachineMemOperand::MOLoad; - break; - } - case COMPRESS_TO_MEM: { - Info.ptrVal = I.getArgOperand(0); - Info.memVT = MVT::getVT(I.getArgOperand(1)->getType()); - Info.align = 1; - Info.flags |= MachineMemOperand::MOStore; - break; - } case TRUNCATE_TO_MEM_VI8: case TRUNCATE_TO_MEM_VI16: case TRUNCATE_TO_MEM_VI32: { @@ -21577,27 +21563,6 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget, SDValue Results[] = { SetCC, Store }; return DAG.getMergeValues(Results, dl); } - case COMPRESS_TO_MEM: { - SDValue Mask = Op.getOperand(4); - SDValue DataToCompress = Op.getOperand(3); - SDValue Addr = Op.getOperand(2); - SDValue Chain = Op.getOperand(0); - MVT VT = DataToCompress.getSimpleValueType(); - - MemIntrinsicSDNode *MemIntr = dyn_cast<MemIntrinsicSDNode>(Op); - assert(MemIntr && "Expected MemIntrinsicSDNode!"); - - if (isAllOnesConstant(Mask)) // return just a store - return DAG.getStore(Chain, dl, DataToCompress, Addr, - MemIntr->getMemOperand()); - - MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); - SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); - - return DAG.getMaskedStore(Chain, dl, DataToCompress, Addr, VMask, VT, - MemIntr->getMemOperand(), - false /* truncating */, true /* compressing */); - } case TRUNCATE_TO_MEM_VI8: case TRUNCATE_TO_MEM_VI16: case TRUNCATE_TO_MEM_VI32: { @@ -21641,28 +21606,6 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget, llvm_unreachable("Unsupported truncstore intrinsic"); } } - - case EXPAND_FROM_MEM: { - SDValue Mask = Op.getOperand(4); - SDValue PassThru = Op.getOperand(3); - SDValue Addr = Op.getOperand(2); - SDValue Chain = Op.getOperand(0); - MVT VT = Op.getSimpleValueType(); - - MemIntrinsicSDNode *MemIntr = dyn_cast<MemIntrinsicSDNode>(Op); - assert(MemIntr && "Expected MemIntrinsicSDNode!"); - - if (isAllOnesConstant(Mask)) // Return a regular (unmasked) vector load. - return DAG.getLoad(VT, dl, Chain, Addr, MemIntr->getMemOperand()); - if (X86::isZeroNode(Mask)) - return DAG.getMergeValues({PassThru, Chain}, dl); - - MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); - SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); - return DAG.getMaskedLoad(VT, dl, Chain, Addr, VMask, PassThru, VT, - MemIntr->getMemOperand(), ISD::NON_EXTLOAD, - true /* expanding */); - } } } diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index 124c2d8dd0a..5381caa710d 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -33,9 +33,8 @@ enum IntrinsicType : uint16_t { FMA_OP_SCALAR_MASK, FMA_OP_SCALAR_MASKZ, FMA_OP_SCALAR_MASK3, IFMA_OP, VPERM_2OP, INTR_TYPE_SCALAR_MASK, INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK, - COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM, + COMPRESS_EXPAND_IN_REG, TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32, - EXPAND_FROM_MEM, FIXUPIMM, FIXUPIMM_MASKZ, FIXUPIMMS, FIXUPIMMS_MASKZ, GATHER_AVX2, ROUNDP, ROUNDS @@ -120,78 +119,6 @@ static const IntrinsicData IntrinsicsWithChain[] = { X86_INTRINSIC_DATA(avx512_gatherpf_qps_512, PREFETCH, X86::VGATHERPF0QPSm, X86::VGATHERPF1QPSm), - X86_INTRINSIC_DATA(avx512_mask_compress_store_b_128, - COMPRESS_TO_MEM, X86ISD::COMPRESS, 0), - X86_INTRINSIC_DATA(avx512_mask_compress_store_b_256, - COMPRESS_TO_MEM, X86ISD::COMPRESS, 0), - X86_INTRINSIC_DATA(avx512_mask_compress_store_b_512, - COMPRESS_TO_MEM, X86ISD::COMPRESS, 0), - X86_INTRINSIC_DATA(avx512_mask_compress_store_d_128, - COMPRESS_TO_MEM, X86ISD::COMPRESS, 0), - X86_INTRINSIC_DATA(avx512_mask_compress_store_d_256, - COMPRESS_TO_MEM, X86ISD::COMPRESS, 0), - X86_INTRINSIC_DATA(avx512_mask_compress_store_d_512, - COMPRESS_TO_MEM, X86ISD::COMPRESS, 0), - X86_INTRINSIC_DATA(avx512_mask_compress_store_pd_128, - COMPRESS_TO_MEM, X86ISD::COMPRESS, 0), - X86_INTRINSIC_DATA(avx512_mask_compress_store_pd_256, - COMPRESS_TO_MEM, X86ISD::COMPRESS, 0), - X86_INTRINSIC_DATA(avx512_mask_compress_store_pd_512, - COMPRESS_TO_MEM, X86ISD::COMPRESS, 0), - X86_INTRINSIC_DATA(avx512_mask_compress_store_ps_128, - COMPRESS_TO_MEM, X86ISD::COMPRESS, 0), - X86_INTRINSIC_DATA(avx512_mask_compress_store_ps_256, - COMPRESS_TO_MEM, X86ISD::COMPRESS, 0), - X86_INTRINSIC_DATA(avx512_mask_compress_store_ps_512, - COMPRESS_TO_MEM, X86ISD::COMPRESS, 0), - X86_INTRINSIC_DATA(avx512_mask_compress_store_q_128, - COMPRESS_TO_MEM, X86ISD::COMPRESS, 0), - X86_INTRINSIC_DATA(avx512_mask_compress_store_q_256, - COMPRESS_TO_MEM, X86ISD::COMPRESS, 0), - X86_INTRINSIC_DATA(avx512_mask_compress_store_q_512, - COMPRESS_TO_MEM, X86ISD::COMPRESS, 0), - X86_INTRINSIC_DATA(avx512_mask_compress_store_w_128, - COMPRESS_TO_MEM, X86ISD::COMPRESS, 0), - X86_INTRINSIC_DATA(avx512_mask_compress_store_w_256, - COMPRESS_TO_MEM, X86ISD::COMPRESS, 0), - X86_INTRINSIC_DATA(avx512_mask_compress_store_w_512, - COMPRESS_TO_MEM, X86ISD::COMPRESS, 0), - X86_INTRINSIC_DATA(avx512_mask_expand_load_b_128, - EXPAND_FROM_MEM, X86ISD::EXPAND, 0), - X86_INTRINSIC_DATA(avx512_mask_expand_load_b_256, - EXPAND_FROM_MEM, X86ISD::EXPAND, 0), - X86_INTRINSIC_DATA(avx512_mask_expand_load_b_512, - EXPAND_FROM_MEM, X86ISD::EXPAND, 0), - X86_INTRINSIC_DATA(avx512_mask_expand_load_d_128, - EXPAND_FROM_MEM, X86ISD::EXPAND, 0), - X86_INTRINSIC_DATA(avx512_mask_expand_load_d_256, - EXPAND_FROM_MEM, X86ISD::EXPAND, 0), - X86_INTRINSIC_DATA(avx512_mask_expand_load_d_512, - EXPAND_FROM_MEM, X86ISD::EXPAND, 0), - X86_INTRINSIC_DATA(avx512_mask_expand_load_pd_128, - EXPAND_FROM_MEM, X86ISD::EXPAND, 0), - X86_INTRINSIC_DATA(avx512_mask_expand_load_pd_256, - EXPAND_FROM_MEM, X86ISD::EXPAND, 0), - X86_INTRINSIC_DATA(avx512_mask_expand_load_pd_512, - EXPAND_FROM_MEM, X86ISD::EXPAND, 0), - X86_INTRINSIC_DATA(avx512_mask_expand_load_ps_128, - EXPAND_FROM_MEM, X86ISD::EXPAND, 0), - X86_INTRINSIC_DATA(avx512_mask_expand_load_ps_256, - EXPAND_FROM_MEM, X86ISD::EXPAND, 0), - X86_INTRINSIC_DATA(avx512_mask_expand_load_ps_512, - EXPAND_FROM_MEM, X86ISD::EXPAND, 0), - X86_INTRINSIC_DATA(avx512_mask_expand_load_q_128, - EXPAND_FROM_MEM, X86ISD::EXPAND, 0), - X86_INTRINSIC_DATA(avx512_mask_expand_load_q_256, - EXPAND_FROM_MEM, X86ISD::EXPAND, 0), - X86_INTRINSIC_DATA(avx512_mask_expand_load_q_512, - EXPAND_FROM_MEM, X86ISD::EXPAND, 0), - X86_INTRINSIC_DATA(avx512_mask_expand_load_w_128, - EXPAND_FROM_MEM, X86ISD::EXPAND, 0), - X86_INTRINSIC_DATA(avx512_mask_expand_load_w_256, - EXPAND_FROM_MEM, X86ISD::EXPAND, 0), - X86_INTRINSIC_DATA(avx512_mask_expand_load_w_512, - EXPAND_FROM_MEM, X86ISD::EXPAND, 0), X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_128, TRUNCATE_TO_MEM_VI8, X86ISD::VTRUNC, 0), X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_256, TRUNCATE_TO_MEM_VI8, diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll index 7803ff75012..cb1706f5710 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll @@ -7802,3 +7802,399 @@ define <16 x float> @test_mm512_div_round_ps_current(<16 x float> %a0, <16 x flo ret <16 x float> %res } declare <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) + +define void @test_mask_compress_store_pd_512(i8* %addr, <8 x double> %data, i8 %mask) { +; X86-LABEL: test_mask_compress_store_pd_512: +; X86: ## %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08] +; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9] +; X86-NEXT: vcompresspd %zmm0, (%eax) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8a,0x00] +; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] +; X86-NEXT: retl ## encoding: [0xc3] +; +; X64-LABEL: test_mask_compress_store_pd_512: +; X64: ## %bb.0: +; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vcompresspd %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8a,0x07] +; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] +; X64-NEXT: retq ## encoding: [0xc3] + call void @llvm.x86.avx512.mask.compress.store.pd.512(i8* %addr, <8 x double> %data, i8 %mask) + ret void +} + +declare void @llvm.x86.avx512.mask.compress.store.pd.512(i8* %addr, <8 x double> %data, i8 %mask) + +define void @test_compress_store_pd_512(i8* %addr, <8 x double> %data) { +; X86-LABEL: test_compress_store_pd_512: +; X86: ## %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8] +; X86-NEXT: vcompresspd %zmm0, (%eax) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8a,0x00] +; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] +; X86-NEXT: retl ## encoding: [0xc3] +; +; X64-LABEL: test_compress_store_pd_512: +; X64: ## %bb.0: +; X64-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8] +; X64-NEXT: vcompresspd %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8a,0x07] +; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] +; X64-NEXT: retq ## encoding: [0xc3] + call void @llvm.x86.avx512.mask.compress.store.pd.512(i8* %addr, <8 x double> %data, i8 -1) + ret void +} + +define void @test_mask_compress_store_ps_512(i8* %addr, <16 x float> %data, i16 %mask) { +; X86-LABEL: test_mask_compress_store_ps_512: +; X86: ## %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] +; X86-NEXT: vcompressps %zmm0, (%eax) {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8a,0x00] +; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] +; X86-NEXT: retl ## encoding: [0xc3] +; +; X64-LABEL: test_mask_compress_store_ps_512: +; X64: ## %bb.0: +; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vcompressps %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8a,0x07] +; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] +; X64-NEXT: retq ## encoding: [0xc3] + call void @llvm.x86.avx512.mask.compress.store.ps.512(i8* %addr, <16 x float> %data, i16 %mask) + ret void +} + +declare void @llvm.x86.avx512.mask.compress.store.ps.512(i8* %addr, <16 x float> %data, i16 %mask) + +define void @test_compress_store_ps_512(i8* %addr, <16 x float> %data) { +; X86-LABEL: test_compress_store_ps_512: +; X86: ## %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8] +; X86-NEXT: vcompressps %zmm0, (%eax) {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8a,0x00] +; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] +; X86-NEXT: retl ## encoding: [0xc3] +; +; X64-LABEL: test_compress_store_ps_512: +; X64: ## %bb.0: +; X64-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8] +; X64-NEXT: vcompressps %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8a,0x07] +; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] +; X64-NEXT: retq ## encoding: [0xc3] + call void @llvm.x86.avx512.mask.compress.store.ps.512(i8* %addr, <16 x float> %data, i16 -1) + ret void +} + +define void @test_mask_compress_store_q_512(i8* %addr, <8 x i64> %data, i8 %mask) { +; X86-LABEL: test_mask_compress_store_q_512: +; X86: ## %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08] +; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9] +; X86-NEXT: vpcompressq %zmm0, (%eax) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8b,0x00] +; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] +; X86-NEXT: retl ## encoding: [0xc3] +; +; X64-LABEL: test_mask_compress_store_q_512: +; X64: ## %bb.0: +; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vpcompressq %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8b,0x07] +; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] +; X64-NEXT: retq ## encoding: [0xc3] + call void @llvm.x86.avx512.mask.compress.store.q.512(i8* %addr, <8 x i64> %data, i8 %mask) + ret void +} + +declare void @llvm.x86.avx512.mask.compress.store.q.512(i8* %addr, <8 x i64> %data, i8 %mask) + +define void @test_compress_store_q_512(i8* %addr, <8 x i64> %data) { +; X86-LABEL: test_compress_store_q_512: +; X86: ## %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8] +; X86-NEXT: vpcompressq %zmm0, (%eax) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8b,0x00] +; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] +; X86-NEXT: retl ## encoding: [0xc3] +; +; X64-LABEL: test_compress_store_q_512: +; X64: ## %bb.0: +; X64-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8] +; X64-NEXT: vpcompressq %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8b,0x07] +; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] +; X64-NEXT: retq ## encoding: [0xc3] + call void @llvm.x86.avx512.mask.compress.store.q.512(i8* %addr, <8 x i64> %data, i8 -1) + ret void +} + +define void @test_mask_compress_store_d_512(i8* %addr, <16 x i32> %data, i16 %mask) { +; X86-LABEL: test_mask_compress_store_d_512: +; X86: ## %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] +; X86-NEXT: vpcompressd %zmm0, (%eax) {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8b,0x00] +; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] +; X86-NEXT: retl ## encoding: [0xc3] +; +; X64-LABEL: test_mask_compress_store_d_512: +; X64: ## %bb.0: +; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vpcompressd %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8b,0x07] +; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] +; X64-NEXT: retq ## encoding: [0xc3] + call void @llvm.x86.avx512.mask.compress.store.d.512(i8* %addr, <16 x i32> %data, i16 %mask) + ret void +} + +declare void @llvm.x86.avx512.mask.compress.store.d.512(i8* %addr, <16 x i32> %data, i16 %mask) + +define void @test_compress_store_d_512(i8* %addr, <16 x i32> %data) { +; X86-LABEL: test_compress_store_d_512: +; X86: ## %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8] +; X86-NEXT: vpcompressd %zmm0, (%eax) {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8b,0x00] +; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] +; X86-NEXT: retl ## encoding: [0xc3] +; +; X64-LABEL: test_compress_store_d_512: +; X64: ## %bb.0: +; X64-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8] +; X64-NEXT: vpcompressd %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8b,0x07] +; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] +; X64-NEXT: retq ## encoding: [0xc3] + call void @llvm.x86.avx512.mask.compress.store.d.512(i8* %addr, <16 x i32> %data, i16 -1) + ret void +} + +define <8 x double> @test_mask_expand_load_pd_512(i8* %addr, <8 x double> %data, i8 %mask) { +; X86-LABEL: test_mask_expand_load_pd_512: +; X86: ## %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08] +; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9] +; X86-NEXT: vexpandpd (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x88,0x00] +; X86-NEXT: retl ## encoding: [0xc3] +; +; X64-LABEL: test_mask_expand_load_pd_512: +; X64: ## %bb.0: +; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vexpandpd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x88,0x07] +; X64-NEXT: retq ## encoding: [0xc3] + %res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 %mask) + ret <8 x double> %res +} + +define <8 x double> @test_maskz_expand_load_pd_512(i8* %addr, i8 %mask) { +; X86-LABEL: test_maskz_expand_load_pd_512: +; X86: ## %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08] +; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9] +; X86-NEXT: vexpandpd (%eax), %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x88,0x00] +; X86-NEXT: retl ## encoding: [0xc3] +; +; X64-LABEL: test_maskz_expand_load_pd_512: +; X64: ## %bb.0: +; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vexpandpd (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x88,0x07] +; X64-NEXT: retq ## encoding: [0xc3] + %res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> zeroinitializer, i8 %mask) + ret <8 x double> %res +} + +declare <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 %mask) + +define <8 x double> @test_expand_load_pd_512(i8* %addr, <8 x double> %data) { +; X86-LABEL: test_expand_load_pd_512: +; X86: ## %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8] +; X86-NEXT: vexpandpd (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x88,0x00] +; X86-NEXT: retl ## encoding: [0xc3] +; +; X64-LABEL: test_expand_load_pd_512: +; X64: ## %bb.0: +; X64-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8] +; X64-NEXT: vexpandpd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x88,0x07] +; X64-NEXT: retq ## encoding: [0xc3] + %res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 -1) + ret <8 x double> %res +} + +; Make sure we don't crash if you pass 0 to the mask. +define <8 x double> @test_zero_mask_expand_load_pd_512(i8* %addr, <8 x double> %data, i8 %mask) { +; X86-LABEL: test_zero_mask_expand_load_pd_512: +; X86: ## %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: kxorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x47,0xc8] +; X86-NEXT: vexpandpd (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x88,0x00] +; X86-NEXT: retl ## encoding: [0xc3] +; +; X64-LABEL: test_zero_mask_expand_load_pd_512: +; X64: ## %bb.0: +; X64-NEXT: kxorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x47,0xc8] +; X64-NEXT: vexpandpd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x88,0x07] +; X64-NEXT: retq ## encoding: [0xc3] + %res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 0) + ret <8 x double> %res +} + +define <16 x float> @test_mask_expand_load_ps_512(i8* %addr, <16 x float> %data, i16 %mask) { +; X86-LABEL: test_mask_expand_load_ps_512: +; X86: ## %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] +; X86-NEXT: vexpandps (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x88,0x00] +; X86-NEXT: retl ## encoding: [0xc3] +; +; X64-LABEL: test_mask_expand_load_ps_512: +; X64: ## %bb.0: +; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vexpandps (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x88,0x07] +; X64-NEXT: retq ## encoding: [0xc3] + %res = call <16 x float> @llvm.x86.avx512.mask.expand.load.ps.512(i8* %addr, <16 x float> %data, i16 %mask) + ret <16 x float> %res +} + +define <16 x float> @test_maskz_expand_load_ps_512(i8* %addr, i16 %mask) { +; X86-LABEL: test_maskz_expand_load_ps_512: +; X86: ## %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] +; X86-NEXT: vexpandps (%eax), %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x88,0x00] +; X86-NEXT: retl ## encoding: [0xc3] +; +; X64-LABEL: test_maskz_expand_load_ps_512: +; X64: ## %bb.0: +; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vexpandps (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x88,0x07] +; X64-NEXT: retq ## encoding: [0xc3] + %res = call <16 x float> @llvm.x86.avx512.mask.expand.load.ps.512(i8* %addr, <16 x float> zeroinitializer, i16 %mask) + ret <16 x float> %res +} + +declare <16 x float> @llvm.x86.avx512.mask.expand.load.ps.512(i8* %addr, <16 x float> %data, i16 %mask) + +define <16 x float> @test_expand_load_ps_512(i8* %addr, <16 x float> %data) { +; X86-LABEL: test_expand_load_ps_512: +; X86: ## %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8] +; X86-NEXT: vexpandps (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x88,0x00] +; X86-NEXT: retl ## encoding: [0xc3] +; +; X64-LABEL: test_expand_load_ps_512: +; X64: ## %bb.0: +; X64-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8] +; X64-NEXT: vexpandps (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x88,0x07] +; X64-NEXT: retq ## encoding: [0xc3] + %res = call <16 x float> @llvm.x86.avx512.mask.expand.load.ps.512(i8* %addr, <16 x float> %data, i16 -1) + ret <16 x float> %res +} + +define <8 x i64> @test_mask_expand_load_q_512(i8* %addr, <8 x i64> %data, i8 %mask) { +; X86-LABEL: test_mask_expand_load_q_512: +; X86: ## %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08] +; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9] +; X86-NEXT: vpexpandq (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x89,0x00] +; X86-NEXT: retl ## encoding: [0xc3] +; +; X64-LABEL: test_mask_expand_load_q_512: +; X64: ## %bb.0: +; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vpexpandq (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x89,0x07] +; X64-NEXT: retq ## encoding: [0xc3] + %res = call <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> %data, i8 %mask) + ret <8 x i64> %res +} + +define <8 x i64> @test_maskz_expand_load_q_512(i8* %addr, i8 %mask) { +; X86-LABEL: test_maskz_expand_load_q_512: +; X86: ## %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08] +; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9] +; X86-NEXT: vpexpandq (%eax), %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x89,0x00] +; X86-NEXT: retl ## encoding: [0xc3] +; +; X64-LABEL: test_maskz_expand_load_q_512: +; X64: ## %bb.0: +; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vpexpandq (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x89,0x07] +; X64-NEXT: retq ## encoding: [0xc3] + %res = call <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> zeroinitializer, i8 %mask) + ret <8 x i64> %res +} + +declare <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> %data, i8 %mask) + +define <8 x i64> @test_expand_load_q_512(i8* %addr, <8 x i64> %data) { +; X86-LABEL: test_expand_load_q_512: +; X86: ## %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8] +; X86-NEXT: vpexpandq (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x89,0x00] +; X86-NEXT: retl ## encoding: [0xc3] +; +; X64-LABEL: test_expand_load_q_512: +; X64: ## %bb.0: +; X64-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8] +; X64-NEXT: vpexpandq (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x89,0x07] +; X64-NEXT: retq ## encoding: [0xc3] + %res = call <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> %data, i8 -1) + ret <8 x i64> %res +} + +define <16 x i32> @test_mask_expand_load_d_512(i8* %addr, <16 x i32> %data, i16 %mask) { +; X86-LABEL: test_mask_expand_load_d_512: +; X86: ## %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] +; X86-NEXT: vpexpandd (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x89,0x00] +; X86-NEXT: retl ## encoding: [0xc3] +; +; X64-LABEL: test_mask_expand_load_d_512: +; X64: ## %bb.0: +; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vpexpandd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x89,0x07] +; X64-NEXT: retq ## encoding: [0xc3] + %res = call <16 x i32> @llvm.x86.avx512.mask.expand.load.d.512(i8* %addr, <16 x i32> %data, i16 %mask) + ret <16 x i32> %res +} + +define <16 x i32> @test_maskz_expand_load_d_512(i8* %addr, i16 %mask) { +; X86-LABEL: test_maskz_expand_load_d_512: +; X86: ## %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] +; X86-NEXT: vpexpandd (%eax), %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x89,0x00] +; X86-NEXT: retl ## encoding: [0xc3] +; +; X64-LABEL: test_maskz_expand_load_d_512: +; X64: ## %bb.0: +; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vpexpandd (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x89,0x07] +; X64-NEXT: retq ## encoding: [0xc3] + %res = call <16 x i32> @llvm.x86.avx512.mask.expand.load.d.512(i8* %addr, <16 x i32> zeroinitializer, i16 %mask) + ret <16 x i32> %res +} + +declare <16 x i32> @llvm.x86.avx512.mask.expand.load.d.512(i8* %addr, <16 x i32> %data, i16 %mask) + +define <16 x i32> @test_expand_load_d_512(i8* %addr, <16 x i32> %data) { +; X86-LABEL: test_expand_load_d_512: +; X86: ## %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8] +; X86-NEXT: vpexpandd (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x89,0x00] +; X86-NEXT: retl ## encoding: [0xc3] +; +; X64-LABEL: test_expand_load_d_512: +; X64: ## %bb.0: +; X64-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8] +; X64-NEXT: vpexpandd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x89,0x07] +; X64-NEXT: retq ## encoding: [0xc3] + %res = call <16 x i32> @llvm.x86.avx512.mask.expand.load.d.512(i8* %addr, <16 x i32> %data, i16 -1) + ret <16 x i32> %res +} diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-intrinsics.ll index 1c864498b11..546f29d7fc0 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics.ll @@ -2,19 +2,6 @@ ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s -define void @test_mask_compress_store_pd_512(i8* %addr, <8 x double> %data, i8 %mask) { -; CHECK-LABEL: test_mask_compress_store_pd_512: -; CHECK: ## %bb.0: -; CHECK-NEXT: kmovw %esi, %k1 -; CHECK-NEXT: vcompresspd %zmm0, (%rdi) {%k1} -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq - call void @llvm.x86.avx512.mask.compress.store.pd.512(i8* %addr, <8 x double> %data, i8 %mask) - ret void -} - -declare void @llvm.x86.avx512.mask.compress.store.pd.512(i8* %addr, <8 x double> %data, i8 %mask) - define <8 x double> @test_mask_compress_pd_512(<8 x double> %data, <8 x double> %passthru, i8 %mask) { ; CHECK-LABEL: test_mask_compress_pd_512: ; CHECK: ## %bb.0: @@ -46,29 +33,6 @@ define <8 x double> @test_compress_pd_512(<8 x double> %data) { declare <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> %src0, i8 %mask) -define void @test_compress_store_pd_512(i8* %addr, <8 x double> %data) { -; CHECK-LABEL: test_compress_store_pd_512: -; CHECK: ## %bb.0: -; CHECK-NEXT: vmovups %zmm0, (%rdi) -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq - call void @llvm.x86.avx512.mask.compress.store.pd.512(i8* %addr, <8 x double> %data, i8 -1) - ret void -} - -define void @test_mask_compress_store_ps_512(i8* %addr, <16 x float> %data, i16 %mask) { -; CHECK-LABEL: test_mask_compress_store_ps_512: -; CHECK: ## %bb.0: -; CHECK-NEXT: kmovw %esi, %k1 -; CHECK-NEXT: vcompressps %zmm0, (%rdi) {%k1} -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq - call void @llvm.x86.avx512.mask.compress.store.ps.512(i8* %addr, <16 x float> %data, i16 %mask) - ret void -} - -declare void @llvm.x86.avx512.mask.compress.store.ps.512(i8* %addr, <16 x float> %data, i16 %mask) - define <16 x float> @test_mask_compress_ps_512(<16 x float> %data, <16 x float> %passthru, i16 %mask) { ; CHECK-LABEL: test_mask_compress_ps_512: ; CHECK: ## %bb.0: @@ -100,29 +64,6 @@ define <16 x float> @test_compress_ps_512(<16 x float> %data) { declare <16 x float> @llvm.x86.avx512.mask.compress.ps.512(<16 x float> %data, <16 x float> %src0, i16 %mask) -define void @test_compress_store_ps_512(i8* %addr, <16 x float> %data) { -; CHECK-LABEL: test_compress_store_ps_512: -; CHECK: ## %bb.0: -; CHECK-NEXT: vmovups %zmm0, (%rdi) -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq - call void @llvm.x86.avx512.mask.compress.store.ps.512(i8* %addr, <16 x float> %data, i16 -1) - ret void -} - -define void @test_mask_compress_store_q_512(i8* %addr, <8 x i64> %data, i8 %mask) { -; CHECK-LABEL: test_mask_compress_store_q_512: -; CHECK: ## %bb.0: -; CHECK-NEXT: kmovw %esi, %k1 -; CHECK-NEXT: vpcompressq %zmm0, (%rdi) {%k1} -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq - call void @llvm.x86.avx512.mask.compress.store.q.512(i8* %addr, <8 x i64> %data, i8 %mask) - ret void -} - -declare void @llvm.x86.avx512.mask.compress.store.q.512(i8* %addr, <8 x i64> %data, i8 %mask) - define <8 x i64> @test_mask_compress_q_512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask) { ; CHECK-LABEL: test_mask_compress_q_512: ; CHECK: ## %bb.0: @@ -154,29 +95,6 @@ define <8 x i64> @test_compress_q_512(<8 x i64> %data) { declare <8 x i64> @llvm.x86.avx512.mask.compress.q.512(<8 x i64> %data, <8 x i64> %src0, i8 %mask) -define void @test_compress_store_q_512(i8* %addr, <8 x i64> %data) { -; CHECK-LABEL: test_compress_store_q_512: -; CHECK: ## %bb.0: -; CHECK-NEXT: vmovups %zmm0, (%rdi) -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq - call void @llvm.x86.avx512.mask.compress.store.q.512(i8* %addr, <8 x i64> %data, i8 -1) - ret void -} - -define void @test_mask_compress_store_d_512(i8* %addr, <16 x i32> %data, i16 %mask) { -; CHECK-LABEL: test_mask_compress_store_d_512: -; CHECK: ## %bb.0: -; CHECK-NEXT: kmovw %esi, %k1 -; CHECK-NEXT: vpcompressd %zmm0, (%rdi) {%k1} -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq - call void @llvm.x86.avx512.mask.compress.store.d.512(i8* %addr, <16 x i32> %data, i16 %mask) - ret void -} - -declare void @llvm.x86.avx512.mask.compress.store.d.512(i8* %addr, <16 x i32> %data, i16 %mask) - define <16 x i32> @test_mask_compress_d_512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask) { ; CHECK-LABEL: test_mask_compress_d_512: ; CHECK: ## %bb.0: @@ -208,38 +126,6 @@ define <16 x i32> @test_compress_d_512(<16 x i32> %data) { declare <16 x i32> @llvm.x86.avx512.mask.compress.d.512(<16 x i32> %data, <16 x i32> %src0, i16 %mask) -define void @test_compress_store_d_512(i8* %addr, <16 x i32> %data) { -; CHECK-LABEL: test_compress_store_d_512: -; CHECK: ## %bb.0: -; CHECK-NEXT: vmovups %zmm0, (%rdi) -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq - call void @llvm.x86.avx512.mask.compress.store.d.512(i8* %addr, <16 x i32> %data, i16 -1) - ret void -} - -define <8 x double> @test_mask_expand_load_pd_512(i8* %addr, <8 x double> %data, i8 %mask) { -; CHECK-LABEL: test_mask_expand_load_pd_512: -; CHECK: ## %bb.0: -; CHECK-NEXT: kmovw %esi, %k1 -; CHECK-NEXT: vexpandpd (%rdi), %zmm0 {%k1} -; CHECK-NEXT: retq - %res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 %mask) - ret <8 x double> %res -} - -define <8 x double> @test_maskz_expand_load_pd_512(i8* %addr, i8 %mask) { -; CHECK-LABEL: test_maskz_expand_load_pd_512: -; CHECK: ## %bb.0: -; CHECK-NEXT: kmovw %esi, %k1 -; CHECK-NEXT: vexpandpd (%rdi), %zmm0 {%k1} {z} -; CHECK-NEXT: retq - %res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> zeroinitializer, i8 %mask) - ret <8 x double> %res -} - -declare <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 %mask) - define <8 x double> @test_expand_pd_512(<8 x double> %data) { ; CHECK-LABEL: test_expand_pd_512: ; CHECK: ## %bb.0: @@ -271,46 +157,6 @@ define <8 x double> @test_maskz_expand_pd_512(<8 x double> %data, i8 %mask) { declare <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> %src0, i8 %mask) -define <8 x double> @test_expand_load_pd_512(i8* %addr, <8 x double> %data) { -; CHECK-LABEL: test_expand_load_pd_512: -; CHECK: ## %bb.0: -; CHECK-NEXT: vmovups (%rdi), %zmm0 -; CHECK-NEXT: retq - %res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 -1) - ret <8 x double> %res -} - -; Make sure we don't crash if you pass 0 to the mask. -define <8 x double> @test_zero_mask_expand_load_pd_512(i8* %addr, <8 x double> %data, i8 %mask) { -; CHECK-LABEL: test_zero_mask_expand_load_pd_512: -; CHECK: ## %bb.0: -; CHECK-NEXT: retq - %res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 0) - ret <8 x double> %res -} - -define <16 x float> @test_mask_expand_load_ps_512(i8* %addr, <16 x float> %data, i16 %mask) { -; CHECK-LABEL: test_mask_expand_load_ps_512: -; CHECK: ## %bb.0: -; CHECK-NEXT: kmovw %esi, %k1 -; CHECK-NEXT: vexpandps (%rdi), %zmm0 {%k1} -; CHECK-NEXT: retq - %res = call <16 x float> @llvm.x86.avx512.mask.expand.load.ps.512(i8* %addr, <16 x float> %data, i16 %mask) - ret <16 x float> %res -} - -define <16 x float> @test_maskz_expand_load_ps_512(i8* %addr, i16 %mask) { -; CHECK-LABEL: test_maskz_expand_load_ps_512: -; CHECK: ## %bb.0: -; CHECK-NEXT: kmovw %esi, %k1 -; CHECK-NEXT: vexpandps (%rdi), %zmm0 {%k1} {z} -; CHECK-NEXT: retq - %res = call <16 x float> @llvm.x86.avx512.mask.expand.load.ps.512(i8* %addr, <16 x float> zeroinitializer, i16 %mask) - ret <16 x float> %res -} - -declare <16 x float> @llvm.x86.avx512.mask.expand.load.ps.512(i8* %addr, <16 x float> %data, i16 %mask) - define <16 x float> @test_expand_ps_512(<16 x float> %data) { ; CHECK-LABEL: test_expand_ps_512: ; CHECK: ## %bb.0: @@ -342,37 +188,6 @@ define <16 x float> @test_maskz_expand_ps_512(<16 x float> %data, i16 %mask) { declare <16 x float> @llvm.x86.avx512.mask.expand.ps.512(<16 x float> %data, <16 x float> %src0, i16 %mask) -define <16 x float> @test_expand_load_ps_512(i8* %addr, <16 x float> %data) { -; CHECK-LABEL: test_expand_load_ps_512: -; CHECK: ## %bb.0: -; CHECK-NEXT: vmovups (%rdi), %zmm0 -; CHECK-NEXT: retq - %res = call <16 x float> @llvm.x86.avx512.mask.expand.load.ps.512(i8* %addr, <16 x float> %data, i16 -1) - ret <16 x float> %res -} - -define <8 x i64> @test_mask_expand_load_q_512(i8* %addr, <8 x i64> %data, i8 %mask) { -; CHECK-LABEL: test_mask_expand_load_q_512: -; CHECK: ## %bb.0: -; CHECK-NEXT: kmovw %esi, %k1 -; CHECK-NEXT: vpexpandq (%rdi), %zmm0 {%k1} -; CHECK-NEXT: retq - %res = call <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> %data, i8 %mask) - ret <8 x i64> %res -} - -define <8 x i64> @test_maskz_expand_load_q_512(i8* %addr, i8 %mask) { -; CHECK-LABEL: test_maskz_expand_load_q_512: -; CHECK: ## %bb.0: -; CHECK-NEXT: kmovw %esi, %k1 -; CHECK-NEXT: vpexpandq (%rdi), %zmm0 {%k1} {z} -; CHECK-NEXT: retq - %res = call <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> zeroinitializer, i8 %mask) - ret <8 x i64> %res -} - -declare <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> %data, i8 %mask) - define <8 x i64> @test_expand_q_512(<8 x i64> %data) { ; CHECK-LABEL: test_expand_q_512: ; CHECK: ## %bb.0: @@ -404,37 +219,6 @@ define <8 x i64> @test_maskz_expand_q_512(<8 x i64> %data, i8 %mask) { declare <8 x i64> @llvm.x86.avx512.mask.expand.q.512(<8 x i64> %data, <8 x i64> %src0, i8 %mask) -define <8 x i64> @test_expand_load_q_512(i8* %addr, <8 x i64> %data) { -; CHECK-LABEL: test_expand_load_q_512: -; CHECK: ## %bb.0: -; CHECK-NEXT: vmovups (%rdi), %zmm0 -; CHECK-NEXT: retq - %res = call <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> %data, i8 -1) - ret <8 x i64> %res -} - -define <16 x i32> @test_mask_expand_load_d_512(i8* %addr, <16 x i32> %data, i16 %mask) { -; CHECK-LABEL: test_mask_expand_load_d_512: -; CHECK: ## %bb.0: -; CHECK-NEXT: kmovw %esi, %k1 -; CHECK-NEXT: vpexpandd (%rdi), %zmm0 {%k1} -; CHECK-NEXT: retq - %res = call <16 x i32> @llvm.x86.avx512.mask.expand.load.d.512(i8* %addr, <16 x i32> %data, i16 %mask) - ret <16 x i32> %res -} - -define <16 x i32> @test_maskz_expand_load_d_512(i8* %addr, i16 %mask) { -; CHECK-LABEL: test_maskz_expand_load_d_512: -; CHECK: ## %bb.0: -; CHECK-NEXT: kmovw %esi, %k1 -; CHECK-NEXT: vpexpandd (%rdi), %zmm0 {%k1} {z} -; CHECK-NEXT: retq - %res = call <16 x i32> @llvm.x86.avx512.mask.expand.load.d.512(i8* %addr, <16 x i32> zeroinitializer, i16 %mask) - ret <16 x i32> %res -} - -declare <16 x i32> @llvm.x86.avx512.mask.expand.load.d.512(i8* %addr, <16 x i32> %data, i16 %mask) - define <16 x i32> @test_expand_d_512(<16 x i32> %data) { ; CHECK-LABEL: test_expand_d_512: ; CHECK: ## %bb.0: @@ -466,15 +250,6 @@ define <16 x i32> @test_maskz_expand_d_512(<16 x i32> %data, i16 %mask) { declare <16 x i32> @llvm.x86.avx512.mask.expand.d.512(<16 x i32> %data, <16 x i32> %src0, i16 %mask) -define <16 x i32> @test_expand_load_d_512(i8* %addr, <16 x i32> %data) { -; CHECK-LABEL: test_expand_load_d_512: -; CHECK: ## %bb.0: -; CHECK-NEXT: vmovups (%rdi), %zmm0 -; CHECK-NEXT: retq - %res = call <16 x i32> @llvm.x86.avx512.mask.expand.load.d.512(i8* %addr, <16 x i32> %data, i16 -1) - ret <16 x i32> %res -} - define <16 x float> @test_rcp_ps_512(<16 x float> %a0) { ; CHECK-LABEL: test_rcp_ps_512: ; CHECK: ## %bb.0: diff --git a/llvm/test/CodeGen/X86/avx512vbmi2-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vbmi2-intrinsics.ll index 586faac2d27..5dbc27f59c9 100644 --- a/llvm/test/CodeGen/X86/avx512vbmi2-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512vbmi2-intrinsics.ll @@ -2,42 +2,6 @@ ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vbmi2 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi2 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 -define <32 x i16> @test_mask_expand_load_w_512(i8* %addr, <32 x i16> %data, i32 %mask) { -; X86-LABEL: test_mask_expand_load_w_512: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vpexpandw (%eax), %zmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_expand_load_w_512: -; X64: # %bb.0: -; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpexpandw (%rdi), %zmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0x07] -; X64-NEXT: retq # encoding: [0xc3] - %res = call <32 x i16> @llvm.x86.avx512.mask.expand.load.w.512(i8* %addr, <32 x i16> %data, i32 %mask) - ret <32 x i16> %res -} - -define <32 x i16> @test_maskz_expand_load_w_512(i8* %addr, i32 %mask) { -; X86-LABEL: test_maskz_expand_load_w_512: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vpexpandw (%eax), %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x62,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_maskz_expand_load_w_512: -; X64: # %bb.0: -; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpexpandw (%rdi), %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x62,0x07] -; X64-NEXT: retq # encoding: [0xc3] - %res = call <32 x i16> @llvm.x86.avx512.mask.expand.load.w.512(i8* %addr, <32 x i16> zeroinitializer, i32 %mask) - ret <32 x i16> %res -} - -declare <32 x i16> @llvm.x86.avx512.mask.expand.load.w.512(i8* %addr, <32 x i16> %data, i32 %mask) - define <32 x i16> @test_expand_w_512(<32 x i16> %data) { ; CHECK-LABEL: test_expand_w_512: ; CHECK: # %bb.0: @@ -82,61 +46,6 @@ define <32 x i16> @test_maskz_expand_w_512(<32 x i16> %data, i32 %mask) { declare <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %data, <32 x i16> %src0, i32 %mask) -define <32 x i16> @test_expand_load_w_512(i8* %addr, <32 x i16> %data) { -; X86-LABEL: test_expand_load_w_512: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vmovups (%eax), %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x10,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_expand_load_w_512: -; X64: # %bb.0: -; X64-NEXT: vmovups (%rdi), %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x10,0x07] -; X64-NEXT: retq # encoding: [0xc3] - %res = call <32 x i16> @llvm.x86.avx512.mask.expand.load.w.512(i8* %addr, <32 x i16> %data, i32 -1) - ret <32 x i16> %res -} - -define <64 x i8> @test_mask_expand_load_b_512(i8* %addr, <64 x i8> %data, i64 %mask) { -; X86-LABEL: test_mask_expand_load_b_512: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 # encoding: [0xc4,0xe1,0xf9,0x90,0x44,0x24,0x08] -; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x0c] -; X86-NEXT: kunpckdq %k0, %k1, %k1 # encoding: [0xc4,0xe1,0xf4,0x4b,0xc8] -; X86-NEXT: vpexpandb (%eax), %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_expand_load_b_512: -; X64: # %bb.0: -; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce] -; X64-NEXT: vpexpandb (%rdi), %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0x07] -; X64-NEXT: retq # encoding: [0xc3] - %res = call <64 x i8> @llvm.x86.avx512.mask.expand.load.b.512(i8* %addr, <64 x i8> %data, i64 %mask) - ret <64 x i8> %res -} - -define <64 x i8> @test_maskz_expand_load_b_512(i8* %addr, i64 %mask) { -; X86-LABEL: test_maskz_expand_load_b_512: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 # encoding: [0xc4,0xe1,0xf9,0x90,0x44,0x24,0x08] -; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x0c] -; X86-NEXT: kunpckdq %k0, %k1, %k1 # encoding: [0xc4,0xe1,0xf4,0x4b,0xc8] -; X86-NEXT: vpexpandb (%eax), %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x62,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_maskz_expand_load_b_512: -; X64: # %bb.0: -; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce] -; X64-NEXT: vpexpandb (%rdi), %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x62,0x07] -; X64-NEXT: retq # encoding: [0xc3] - %res = call <64 x i8> @llvm.x86.avx512.mask.expand.load.b.512(i8* %addr, <64 x i8> zeroinitializer, i64 %mask) - ret <64 x i8> %res -} - -declare <64 x i8> @llvm.x86.avx512.mask.expand.load.b.512(i8* %addr, <64 x i8> %data, i64 %mask) - define <64 x i8> @test_expand_b_512(<64 x i8> %data) { ; CHECK-LABEL: test_expand_b_512: ; CHECK: # %bb.0: @@ -185,191 +94,6 @@ define <64 x i8> @test_maskz_expand_b_512(<64 x i8> %data, i64 %mask) { declare <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %data, <64 x i8> %src0, i64 %mask) -define <64 x i8> @test_expand_load_b_512(i8* %addr, <64 x i8> %data) { -; X86-LABEL: test_expand_load_b_512: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vmovups (%eax), %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x10,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_expand_load_b_512: -; X64: # %bb.0: -; X64-NEXT: vmovups (%rdi), %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x10,0x07] -; X64-NEXT: retq # encoding: [0xc3] - %res = call <64 x i8> @llvm.x86.avx512.mask.expand.load.b.512(i8* %addr, <64 x i8> %data, i64 -1) - ret <64 x i8> %res -} - -define void @test_mask_compress_store_w_512(i8* %addr, <32 x i16> %data, i32 %mask) { -; X86-LABEL: test_mask_compress_store_w_512: -; X86: # %bb.0: -; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vpcompressw %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0x00] -; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_compress_store_w_512: -; X64: # %bb.0: -; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpcompressw %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0x07] -; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] -; X64-NEXT: retq # encoding: [0xc3] - call void @llvm.x86.avx512.mask.compress.store.w.512(i8* %addr, <32 x i16> %data, i32 %mask) - ret void -} - -declare void @llvm.x86.avx512.mask.compress.store.w.512(i8* %addr, <32 x i16> %data, i32 %mask) - -define <32 x i16> @test_mask_compress_w_512(<32 x i16> %data, <32 x i16> %passthru, i32 %mask) { -; X86-LABEL: test_mask_compress_w_512: -; X86: # %bb.0: -; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] -; X86-NEXT: vpcompressw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0xc1] -; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_compress_w_512: -; X64: # %bb.0: -; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] -; X64-NEXT: vpcompressw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0xc1] -; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] -; X64-NEXT: retq # encoding: [0xc3] - %res = call <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %data, <32 x i16> %passthru, i32 %mask) - ret <32 x i16> %res -} - -define <32 x i16> @test_maskz_compress_w_512(<32 x i16> %data, i32 %mask) { -; X86-LABEL: test_maskz_compress_w_512: -; X86: # %bb.0: -; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] -; X86-NEXT: vpcompressw %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x63,0xc0] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_maskz_compress_w_512: -; X64: # %bb.0: -; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] -; X64-NEXT: vpcompressw %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x63,0xc0] -; X64-NEXT: retq # encoding: [0xc3] - %res = call <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %data, <32 x i16> zeroinitializer, i32 %mask) - ret <32 x i16> %res -} - -define <32 x i16> @test_compress_w_512(<32 x i16> %data) { -; CHECK-LABEL: test_compress_w_512: -; CHECK: # %bb.0: -; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] - %res = call <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %data, <32 x i16> undef, i32 -1) - ret <32 x i16> %res -} - -declare <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %data, <32 x i16> %src0, i32 %mask) - -define void @test_compress_store_w_512(i8* %addr, <32 x i16> %data) { -; X86-LABEL: test_compress_store_w_512: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vmovups %zmm0, (%eax) # encoding: [0x62,0xf1,0x7c,0x48,0x11,0x00] -; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_compress_store_w_512: -; X64: # %bb.0: -; X64-NEXT: vmovups %zmm0, (%rdi) # encoding: [0x62,0xf1,0x7c,0x48,0x11,0x07] -; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] -; X64-NEXT: retq # encoding: [0xc3] - call void @llvm.x86.avx512.mask.compress.store.w.512(i8* %addr, <32 x i16> %data, i32 -1) - ret void -} - -define void @test_mask_compress_store_b_512(i8* %addr, <64 x i8> %data, i64 %mask) { -; X86-LABEL: test_mask_compress_store_b_512: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 # encoding: [0xc4,0xe1,0xf9,0x90,0x44,0x24,0x08] -; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x0c] -; X86-NEXT: kunpckdq %k0, %k1, %k1 # encoding: [0xc4,0xe1,0xf4,0x4b,0xc8] -; X86-NEXT: vpcompressb %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x00] -; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_compress_store_b_512: -; X64: # %bb.0: -; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce] -; X64-NEXT: vpcompressb %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x07] -; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] -; X64-NEXT: retq # encoding: [0xc3] - call void @llvm.x86.avx512.mask.compress.store.b.512(i8* %addr, <64 x i8> %data, i64 %mask) - ret void -} - -declare void @llvm.x86.avx512.mask.compress.store.b.512(i8* %addr, <64 x i8> %data, i64 %mask) - -define <64 x i8> @test_mask_compress_b_512(<64 x i8> %data, <64 x i8> %passthru, i64 %mask) { -; X86-LABEL: test_mask_compress_b_512: -; X86: # %bb.0: -; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 # encoding: [0xc4,0xe1,0xf9,0x90,0x44,0x24,0x04] -; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: kunpckdq %k0, %k1, %k1 # encoding: [0xc4,0xe1,0xf4,0x4b,0xc8] -; X86-NEXT: vpcompressb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0xc1] -; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_compress_b_512: -; X64: # %bb.0: -; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] -; X64-NEXT: vpcompressb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0xc1] -; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] -; X64-NEXT: retq # encoding: [0xc3] - %res = call <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %data, <64 x i8> %passthru, i64 %mask) - ret <64 x i8> %res -} - -define <64 x i8> @test_maskz_compress_b_512(<64 x i8> %data, i64 %mask) { -; X86-LABEL: test_maskz_compress_b_512: -; X86: # %bb.0: -; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 # encoding: [0xc4,0xe1,0xf9,0x90,0x44,0x24,0x04] -; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: kunpckdq %k0, %k1, %k1 # encoding: [0xc4,0xe1,0xf4,0x4b,0xc8] -; X86-NEXT: vpcompressb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x63,0xc0] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_maskz_compress_b_512: -; X64: # %bb.0: -; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] -; X64-NEXT: vpcompressb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x63,0xc0] -; X64-NEXT: retq # encoding: [0xc3] - %res = call <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %data, <64 x i8> zeroinitializer, i64 %mask) - ret <64 x i8> %res -} - -define <64 x i8> @test_compress_b_512(<64 x i8> %data) { -; CHECK-LABEL: test_compress_b_512: -; CHECK: # %bb.0: -; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] - %res = call <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %data, <64 x i8> undef, i64 -1) - ret <64 x i8> %res -} - -declare <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %data, <64 x i8> %src0, i64 %mask) - -define void @test_compress_store_b_512(i8* %addr, <64 x i8> %data) { -; X86-LABEL: test_compress_store_b_512: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vmovups %zmm0, (%eax) # encoding: [0x62,0xf1,0x7c,0x48,0x11,0x00] -; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_compress_store_b_512: -; X64: # %bb.0: -; X64-NEXT: vmovups %zmm0, (%rdi) # encoding: [0x62,0xf1,0x7c,0x48,0x11,0x07] -; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] -; X64-NEXT: retq # encoding: [0xc3] - call void @llvm.x86.avx512.mask.compress.store.b.512(i8* %addr, <64 x i8> %data, i64 -1) - ret void -} - define <16 x i32>@test_int_x86_avx512_mask_vpshld_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3, i16 %x4) { ; X86-LABEL: test_int_x86_avx512_mask_vpshld_d_512: ; X86: # %bb.0: diff --git a/llvm/test/CodeGen/X86/avx512vbmi2vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vbmi2vl-intrinsics.ll index d7e127b9d3a..c5bff9a0150 100644 --- a/llvm/test/CodeGen/X86/avx512vbmi2vl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512vbmi2vl-intrinsics.ll @@ -2,44 +2,6 @@ ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vbmi2,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi2,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 -define <8 x i16> @test_mask_expand_load_w_128(i8* %addr, <8 x i16> %data, i8 %mask) { -; X86-LABEL: test_mask_expand_load_w_128: -; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] -; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vpexpandw (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_expand_load_w_128: -; X64: # %bb.0: -; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpexpandw (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0x07] -; X64-NEXT: retq # encoding: [0xc3] - %res = call <8 x i16> @llvm.x86.avx512.mask.expand.load.w.128(i8* %addr, <8 x i16> %data, i8 %mask) - ret <8 x i16> %res -} - -define <8 x i16> @test_maskz_expand_load_w_128(i8* %addr, i8 %mask) { -; X86-LABEL: test_maskz_expand_load_w_128: -; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] -; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vpexpandw (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x62,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_maskz_expand_load_w_128: -; X64: # %bb.0: -; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpexpandw (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x62,0x07] -; X64-NEXT: retq # encoding: [0xc3] - %res = call <8 x i16> @llvm.x86.avx512.mask.expand.load.w.128(i8* %addr, <8 x i16> zeroinitializer, i8 %mask) - ret <8 x i16> %res -} - -declare <8 x i16> @llvm.x86.avx512.mask.expand.load.w.128(i8* %addr, <8 x i16> %data, i8 %mask) - define <8 x i16> @test_expand_w_128(<8 x i16> %data) { ; CHECK-LABEL: test_expand_w_128: ; CHECK: # %bb.0: @@ -86,57 +48,6 @@ define <8 x i16> @test_maskz_expand_w_128(<8 x i16> %data, i8 %mask) { declare <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %data, <8 x i16> %src0, i8 %mask) -define <8 x i16> @test_expand_load_w_128(i8* %addr, <8 x i16> %data) { -; X86-LABEL: test_expand_load_w_128: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vmovups (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_expand_load_w_128: -; X64: # %bb.0: -; X64-NEXT: vmovups (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07] -; X64-NEXT: retq # encoding: [0xc3] - %res = call <8 x i16> @llvm.x86.avx512.mask.expand.load.w.128(i8* %addr, <8 x i16> %data, i8 -1) - ret <8 x i16> %res -} - -define <16 x i8> @test_mask_expand_load_b_128(i8* %addr, <16 x i8> %data, i16 %mask) { -; X86-LABEL: test_mask_expand_load_b_128: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] -; X86-NEXT: vpexpandb (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_expand_load_b_128: -; X64: # %bb.0: -; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpexpandb (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0x07] -; X64-NEXT: retq # encoding: [0xc3] - %res = call <16 x i8> @llvm.x86.avx512.mask.expand.load.b.128(i8* %addr, <16 x i8> %data, i16 %mask) - ret <16 x i8> %res -} - -define <16 x i8> @test_maskz_expand_load_b_128(i8* %addr, i16 %mask) { -; X86-LABEL: test_maskz_expand_load_b_128: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] -; X86-NEXT: vpexpandb (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x62,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_maskz_expand_load_b_128: -; X64: # %bb.0: -; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpexpandb (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x62,0x07] -; X64-NEXT: retq # encoding: [0xc3] - %res = call <16 x i8> @llvm.x86.avx512.mask.expand.load.b.128(i8* %addr, <16 x i8> zeroinitializer, i16 %mask) - ret <16 x i8> %res -} - -declare <16 x i8> @llvm.x86.avx512.mask.expand.load.b.128(i8* %addr, <16 x i8> %data, i16 %mask) - define <16 x i8> @test_expand_b_128(<16 x i8> %data) { ; CHECK-LABEL: test_expand_b_128: ; CHECK: # %bb.0: @@ -181,41 +92,6 @@ define <16 x i8> @test_maskz_expand_b_128(<16 x i8> %data, i16 %mask) { declare <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %data, <16 x i8> %src0, i16 %mask) -define <16 x i8> @test_expand_load_b_128(i8* %addr, <16 x i8> %data) { -; X86-LABEL: test_expand_load_b_128: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vmovups (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_expand_load_b_128: -; X64: # %bb.0: -; X64-NEXT: vmovups (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07] -; X64-NEXT: retq # encoding: [0xc3] - %res = call <16 x i8> @llvm.x86.avx512.mask.expand.load.b.128(i8* %addr, <16 x i8> %data, i16 -1) - ret <16 x i8> %res -} - -define void @test_mask_compress_store_w_128(i8* %addr, <8 x i16> %data, i8 %mask) { -; X86-LABEL: test_mask_compress_store_w_128: -; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] -; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vpcompressw %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_compress_store_w_128: -; X64: # %bb.0: -; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpcompressw %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0x07] -; X64-NEXT: retq # encoding: [0xc3] - call void @llvm.x86.avx512.mask.compress.store.w.128(i8* %addr, <8 x i16> %data, i8 %mask) - ret void -} - -declare void @llvm.x86.avx512.mask.compress.store.w.128(i8* %addr, <8 x i16> %data, i8 %mask) - define <8 x i16> @test_mask_compress_w_128(<8 x i16> %data, <8 x i16> %passthru, i8 %mask) { ; X86-LABEL: test_mask_compress_w_128: ; X86: # %bb.0: @@ -262,40 +138,6 @@ define <8 x i16> @test_compress_w_128(<8 x i16> %data) { declare <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %data, <8 x i16> %src0, i8 %mask) -define void @test_compress_store_w_128(i8* %addr, <8 x i16> %data) { -; X86-LABEL: test_compress_store_w_128: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vmovups %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_compress_store_w_128: -; X64: # %bb.0: -; X64-NEXT: vmovups %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07] -; X64-NEXT: retq # encoding: [0xc3] - call void @llvm.x86.avx512.mask.compress.store.w.128(i8* %addr, <8 x i16> %data, i8 -1) - ret void -} - -define void @test_mask_compress_store_b_128(i8* %addr, <16 x i8> %data, i16 %mask) { -; X86-LABEL: test_mask_compress_store_b_128: -; X86: # %bb.0: -; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vpcompressb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_compress_store_b_128: -; X64: # %bb.0: -; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpcompressb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0x07] -; X64-NEXT: retq # encoding: [0xc3] - call void @llvm.x86.avx512.mask.compress.store.b.128(i8* %addr, <16 x i8> %data, i16 %mask) - ret void -} - -declare void @llvm.x86.avx512.mask.compress.store.b.128(i8* %addr, <16 x i8> %data, i16 %mask) - define <16 x i8> @test_mask_compress_b_128(<16 x i8> %data, <16 x i8> %passthru, i16 %mask) { ; X86-LABEL: test_mask_compress_b_128: ; X86: # %bb.0: @@ -340,57 +182,6 @@ define <16 x i8> @test_compress_b_128(<16 x i8> %data) { declare <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %data, <16 x i8> %src0, i16 %mask) -define void @test_compress_store_b_128(i8* %addr, <16 x i8> %data) { -; X86-LABEL: test_compress_store_b_128: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vmovups %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_compress_store_b_128: -; X64: # %bb.0: -; X64-NEXT: vmovups %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07] -; X64-NEXT: retq # encoding: [0xc3] - call void @llvm.x86.avx512.mask.compress.store.b.128(i8* %addr, <16 x i8> %data, i16 -1) - ret void -} - -define <16 x i16> @test_mask_expand_load_w_256(i8* %addr, <16 x i16> %data, i16 %mask) { -; X86-LABEL: test_mask_expand_load_w_256: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] -; X86-NEXT: vpexpandw (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_expand_load_w_256: -; X64: # %bb.0: -; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpexpandw (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0x07] -; X64-NEXT: retq # encoding: [0xc3] - %res = call <16 x i16> @llvm.x86.avx512.mask.expand.load.w.256(i8* %addr, <16 x i16> %data, i16 %mask) - ret <16 x i16> %res -} - -define <16 x i16> @test_maskz_expand_load_w_256(i8* %addr, i16 %mask) { -; X86-LABEL: test_maskz_expand_load_w_256: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] -; X86-NEXT: vpexpandw (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x62,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_maskz_expand_load_w_256: -; X64: # %bb.0: -; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpexpandw (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x62,0x07] -; X64-NEXT: retq # encoding: [0xc3] - %res = call <16 x i16> @llvm.x86.avx512.mask.expand.load.w.256(i8* %addr, <16 x i16> zeroinitializer, i16 %mask) - ret <16 x i16> %res -} - -declare <16 x i16> @llvm.x86.avx512.mask.expand.load.w.256(i8* %addr, <16 x i16> %data, i16 %mask) - define <16 x i16> @test_expand_w_256(<16 x i16> %data) { ; CHECK-LABEL: test_expand_w_256: ; CHECK: # %bb.0: @@ -435,57 +226,6 @@ define <16 x i16> @test_maskz_expand_w_256(<16 x i16> %data, i16 %mask) { declare <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %data, <16 x i16> %src0, i16 %mask) -define <16 x i16> @test_expand_load_w_256(i8* %addr, <16 x i16> %data) { -; X86-LABEL: test_expand_load_w_256: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vmovups (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_expand_load_w_256: -; X64: # %bb.0: -; X64-NEXT: vmovups (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x07] -; X64-NEXT: retq # encoding: [0xc3] - %res = call <16 x i16> @llvm.x86.avx512.mask.expand.load.w.256(i8* %addr, <16 x i16> %data, i16 -1) - ret <16 x i16> %res -} - -define <32 x i8> @test_mask_expand_load_b_256(i8* %addr, <32 x i8> %data, i32 %mask) { -; X86-LABEL: test_mask_expand_load_b_256: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vpexpandb (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_expand_load_b_256: -; X64: # %bb.0: -; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpexpandb (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0x07] -; X64-NEXT: retq # encoding: [0xc3] - %res = call <32 x i8> @llvm.x86.avx512.mask.expand.load.b.256(i8* %addr, <32 x i8> %data, i32 %mask) - ret <32 x i8> %res -} - -define <32 x i8> @test_maskz_expand_load_b_256(i8* %addr, i32 %mask) { -; X86-LABEL: test_maskz_expand_load_b_256: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vpexpandb (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x62,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_maskz_expand_load_b_256: -; X64: # %bb.0: -; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpexpandb (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x62,0x07] -; X64-NEXT: retq # encoding: [0xc3] - %res = call <32 x i8> @llvm.x86.avx512.mask.expand.load.b.256(i8* %addr, <32 x i8> zeroinitializer, i32 %mask) - ret <32 x i8> %res -} - -declare <32 x i8> @llvm.x86.avx512.mask.expand.load.b.256(i8* %addr, <32 x i8> %data, i32 %mask) - define <32 x i8> @test_expand_b_256(<32 x i8> %data) { ; CHECK-LABEL: test_expand_b_256: ; CHECK: # %bb.0: @@ -530,42 +270,6 @@ define <32 x i8> @test_maskz_expand_b_256(<32 x i8> %data, i32 %mask) { declare <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %data, <32 x i8> %src0, i32 %mask) -define <32 x i8> @test_expand_load_b_256(i8* %addr, <32 x i8> %data) { -; X86-LABEL: test_expand_load_b_256: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vmovups (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_expand_load_b_256: -; X64: # %bb.0: -; X64-NEXT: vmovups (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x07] -; X64-NEXT: retq # encoding: [0xc3] - %res = call <32 x i8> @llvm.x86.avx512.mask.expand.load.b.256(i8* %addr, <32 x i8> %data, i32 -1) - ret <32 x i8> %res -} - -define void @test_mask_compress_store_w_256(i8* %addr, <16 x i16> %data, i16 %mask) { -; X86-LABEL: test_mask_compress_store_w_256: -; X86: # %bb.0: -; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vpcompressw %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0x00] -; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_compress_store_w_256: -; X64: # %bb.0: -; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpcompressw %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0x07] -; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] -; X64-NEXT: retq # encoding: [0xc3] - call void @llvm.x86.avx512.mask.compress.store.w.256(i8* %addr, <16 x i16> %data, i16 %mask) - ret void -} - -declare void @llvm.x86.avx512.mask.compress.store.w.256(i8* %addr, <16 x i16> %data, i16 %mask) - define <16 x i16> @test_mask_compress_w_256(<16 x i16> %data, <16 x i16> %passthru, i16 %mask) { ; X86-LABEL: test_mask_compress_w_256: ; X86: # %bb.0: @@ -610,44 +314,6 @@ define <16 x i16> @test_compress_w_256(<16 x i16> %data) { declare <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %data, <16 x i16> %src0, i16 %mask) -define void @test_compress_store_w_256(i8* %addr, <16 x i16> %data) { -; X86-LABEL: test_compress_store_w_256: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vmovups %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x00] -; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_compress_store_w_256: -; X64: # %bb.0: -; X64-NEXT: vmovups %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x07] -; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] -; X64-NEXT: retq # encoding: [0xc3] - call void @llvm.x86.avx512.mask.compress.store.w.256(i8* %addr, <16 x i16> %data, i16 -1) - ret void -} - -define void @test_mask_compress_store_b_256(i8* %addr, <32 x i8> %data, i32 %mask) { -; X86-LABEL: test_mask_compress_store_b_256: -; X86: # %bb.0: -; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vpcompressb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0x00] -; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_compress_store_b_256: -; X64: # %bb.0: -; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpcompressb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0x07] -; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] -; X64-NEXT: retq # encoding: [0xc3] - call void @llvm.x86.avx512.mask.compress.store.b.256(i8* %addr, <32 x i8> %data, i32 %mask) - ret void -} - -declare void @llvm.x86.avx512.mask.compress.store.b.256(i8* %addr, <32 x i8> %data, i32 %mask) - define <32 x i8> @test_mask_compress_b_256(<32 x i8> %data, <32 x i8> %passthru, i32 %mask) { ; X86-LABEL: test_mask_compress_b_256: ; X86: # %bb.0: @@ -692,23 +358,6 @@ define <32 x i8> @test_compress_b_256(<32 x i8> %data) { declare <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %data, <32 x i8> %src0, i32 %mask) -define void @test_compress_store_b_256(i8* %addr, <32 x i8> %data) { -; X86-LABEL: test_compress_store_b_256: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vmovups %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x00] -; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_compress_store_b_256: -; X64: # %bb.0: -; X64-NEXT: vmovups %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x07] -; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] -; X64-NEXT: retq # encoding: [0xc3] - call void @llvm.x86.avx512.mask.compress.store.b.256(i8* %addr, <32 x i8> %data, i32 -1) - ret void -} - define <4 x i32>@test_int_x86_avx512_mask_vpshld_d_128(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) { ; X86-LABEL: test_int_x86_avx512_mask_vpshld_d_128: ; X86: # %bb.0: diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll index 903baba53c1..de021a369ac 100644 --- a/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll @@ -11318,3 +11318,755 @@ define <4 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_256(<4 x i64> %x0, <4 x %res2 = add <4 x i64> %res, %res1 ret <4 x i64> %res2 } + +define void @test_mask_compress_store_pd_128(i8* %addr, <2 x double> %data, i8 %mask) { +; X86-LABEL: test_mask_compress_store_pd_128: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] +; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] +; X86-NEXT: vcompresspd %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8a,0x00] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_mask_compress_store_pd_128: +; X64: # %bb.0: +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vcompresspd %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8a,0x07] +; X64-NEXT: retq # encoding: [0xc3] + call void @llvm.x86.avx512.mask.compress.store.pd.128(i8* %addr, <2 x double> %data, i8 %mask) + ret void +} + +declare void @llvm.x86.avx512.mask.compress.store.pd.128(i8* %addr, <2 x double> %data, i8 %mask) + +define void @test_compress_store_pd_128(i8* %addr, <2 x double> %data) { +; X86-LABEL: test_compress_store_pd_128: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] +; X86-NEXT: vcompresspd %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8a,0x00] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_compress_store_pd_128: +; X64: # %bb.0: +; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] +; X64-NEXT: vcompresspd %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8a,0x07] +; X64-NEXT: retq # encoding: [0xc3] + call void @llvm.x86.avx512.mask.compress.store.pd.128(i8* %addr, <2 x double> %data, i8 -1) + ret void +} + +define void @test_mask_compress_store_ps_128(i8* %addr, <4 x float> %data, i8 %mask) { +; X86-LABEL: test_mask_compress_store_ps_128: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] +; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] +; X86-NEXT: vcompressps %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8a,0x00] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_mask_compress_store_ps_128: +; X64: # %bb.0: +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vcompressps %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8a,0x07] +; X64-NEXT: retq # encoding: [0xc3] + call void @llvm.x86.avx512.mask.compress.store.ps.128(i8* %addr, <4 x float> %data, i8 %mask) + ret void +} + +declare void @llvm.x86.avx512.mask.compress.store.ps.128(i8* %addr, <4 x float> %data, i8 %mask) + +define void @test_compress_store_ps_128(i8* %addr, <4 x float> %data) { +; X86-LABEL: test_compress_store_ps_128: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] +; X86-NEXT: vcompressps %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8a,0x00] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_compress_store_ps_128: +; X64: # %bb.0: +; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] +; X64-NEXT: vcompressps %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8a,0x07] +; X64-NEXT: retq # encoding: [0xc3] + call void @llvm.x86.avx512.mask.compress.store.ps.128(i8* %addr, <4 x float> %data, i8 -1) + ret void +} + +define void @test_mask_compress_store_q_128(i8* %addr, <2 x i64> %data, i8 %mask) { +; X86-LABEL: test_mask_compress_store_q_128: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] +; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] +; X86-NEXT: vpcompressq %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8b,0x00] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_mask_compress_store_q_128: +; X64: # %bb.0: +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vpcompressq %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8b,0x07] +; X64-NEXT: retq # encoding: [0xc3] + call void @llvm.x86.avx512.mask.compress.store.q.128(i8* %addr, <2 x i64> %data, i8 %mask) + ret void +} + +declare void @llvm.x86.avx512.mask.compress.store.q.128(i8* %addr, <2 x i64> %data, i8 %mask) + +define void @test_compress_store_q_128(i8* %addr, <2 x i64> %data) { +; X86-LABEL: test_compress_store_q_128: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] +; X86-NEXT: vpcompressq %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8b,0x00] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_compress_store_q_128: +; X64: # %bb.0: +; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] +; X64-NEXT: vpcompressq %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8b,0x07] +; X64-NEXT: retq # encoding: [0xc3] + call void @llvm.x86.avx512.mask.compress.store.q.128(i8* %addr, <2 x i64> %data, i8 -1) + ret void +} + +define void @test_mask_compress_store_d_128(i8* %addr, <4 x i32> %data, i8 %mask) { +; X86-LABEL: test_mask_compress_store_d_128: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] +; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] +; X86-NEXT: vpcompressd %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8b,0x00] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_mask_compress_store_d_128: +; X64: # %bb.0: +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vpcompressd %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8b,0x07] +; X64-NEXT: retq # encoding: [0xc3] + call void @llvm.x86.avx512.mask.compress.store.d.128(i8* %addr, <4 x i32> %data, i8 %mask) + ret void +} + +declare void @llvm.x86.avx512.mask.compress.store.d.128(i8* %addr, <4 x i32> %data, i8 %mask) + +define void @test_compress_store_d_128(i8* %addr, <4 x i32> %data) { +; X86-LABEL: test_compress_store_d_128: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] +; X86-NEXT: vpcompressd %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8b,0x00] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_compress_store_d_128: +; X64: # %bb.0: +; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] +; X64-NEXT: vpcompressd %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8b,0x07] +; X64-NEXT: retq # encoding: [0xc3] + call void @llvm.x86.avx512.mask.compress.store.d.128(i8* %addr, <4 x i32> %data, i8 -1) + ret void +} + +define <2 x double> @test_mask_expand_load_pd_128(i8* %addr, <2 x double> %data, i8 %mask) { +; X86-LABEL: test_mask_expand_load_pd_128: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] +; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] +; X86-NEXT: vexpandpd (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x88,0x00] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_mask_expand_load_pd_128: +; X64: # %bb.0: +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vexpandpd (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x88,0x07] +; X64-NEXT: retq # encoding: [0xc3] + %res = call <2 x double> @llvm.x86.avx512.mask.expand.load.pd.128(i8* %addr, <2 x double> %data, i8 %mask) + ret <2 x double> %res +} + +define <2 x double> @test_maskz_expand_load_pd_128(i8* %addr, i8 %mask) { +; X86-LABEL: test_maskz_expand_load_pd_128: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] +; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] +; X86-NEXT: vexpandpd (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x88,0x00] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_maskz_expand_load_pd_128: +; X64: # %bb.0: +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vexpandpd (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x88,0x07] +; X64-NEXT: retq # encoding: [0xc3] + %res = call <2 x double> @llvm.x86.avx512.mask.expand.load.pd.128(i8* %addr, <2 x double> zeroinitializer, i8 %mask) + ret <2 x double> %res +} + +declare <2 x double> @llvm.x86.avx512.mask.expand.load.pd.128(i8* %addr, <2 x double> %data, i8 %mask) + +define <2 x double> @test_expand_load_pd_128(i8* %addr, <2 x double> %data) { +; X86-LABEL: test_expand_load_pd_128: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] +; X86-NEXT: vexpandpd (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x88,0x00] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_expand_load_pd_128: +; X64: # %bb.0: +; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] +; X64-NEXT: vexpandpd (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x88,0x07] +; X64-NEXT: retq # encoding: [0xc3] + %res = call <2 x double> @llvm.x86.avx512.mask.expand.load.pd.128(i8* %addr, <2 x double> %data, i8 -1) + ret <2 x double> %res +} + +define <4 x float> @test_mask_expand_load_ps_128(i8* %addr, <4 x float> %data, i8 %mask) { +; X86-LABEL: test_mask_expand_load_ps_128: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] +; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] +; X86-NEXT: vexpandps (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x88,0x00] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_mask_expand_load_ps_128: +; X64: # %bb.0: +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vexpandps (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x88,0x07] +; X64-NEXT: retq # encoding: [0xc3] + %res = call <4 x float> @llvm.x86.avx512.mask.expand.load.ps.128(i8* %addr, <4 x float> %data, i8 %mask) + ret <4 x float> %res +} + +define <4 x float> @test_maskz_expand_load_ps_128(i8* %addr, i8 %mask) { +; X86-LABEL: test_maskz_expand_load_ps_128: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] +; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] +; X86-NEXT: vexpandps (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x88,0x00] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_maskz_expand_load_ps_128: +; X64: # %bb.0: +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vexpandps (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x88,0x07] +; X64-NEXT: retq # encoding: [0xc3] + %res = call <4 x float> @llvm.x86.avx512.mask.expand.load.ps.128(i8* %addr, <4 x float> zeroinitializer, i8 %mask) + ret <4 x float> %res +} + +declare <4 x float> @llvm.x86.avx512.mask.expand.load.ps.128(i8* %addr, <4 x float> %data, i8 %mask) + +define <4 x float> @test_expand_load_ps_128(i8* %addr, <4 x float> %data) { +; X86-LABEL: test_expand_load_ps_128: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] +; X86-NEXT: vexpandps (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x88,0x00] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_expand_load_ps_128: +; X64: # %bb.0: +; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] +; X64-NEXT: vexpandps (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x88,0x07] +; X64-NEXT: retq # encoding: [0xc3] + %res = call <4 x float> @llvm.x86.avx512.mask.expand.load.ps.128(i8* %addr, <4 x float> %data, i8 -1) + ret <4 x float> %res +} + +define <2 x i64> @test_mask_expand_load_q_128(i8* %addr, <2 x i64> %data, i8 %mask) { +; X86-LABEL: test_mask_expand_load_q_128: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] +; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] +; X86-NEXT: vpexpandq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x89,0x00] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_mask_expand_load_q_128: +; X64: # %bb.0: +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vpexpandq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x89,0x07] +; X64-NEXT: retq # encoding: [0xc3] + %res = call <2 x i64> @llvm.x86.avx512.mask.expand.load.q.128(i8* %addr, <2 x i64> %data, i8 %mask) + ret <2 x i64> %res +} + +define <2 x i64> @test_maskz_expand_load_q_128(i8* %addr, i8 %mask) { +; X86-LABEL: test_maskz_expand_load_q_128: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] +; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] +; X86-NEXT: vpexpandq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x89,0x00] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_maskz_expand_load_q_128: +; X64: # %bb.0: +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vpexpandq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x89,0x07] +; X64-NEXT: retq # encoding: [0xc3] + %res = call <2 x i64> @llvm.x86.avx512.mask.expand.load.q.128(i8* %addr, <2 x i64> zeroinitializer, i8 %mask) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.x86.avx512.mask.expand.load.q.128(i8* %addr, <2 x i64> %data, i8 %mask) + +define <2 x i64> @test_expand_load_q_128(i8* %addr, <2 x i64> %data) { +; X86-LABEL: test_expand_load_q_128: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] +; X86-NEXT: vpexpandq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x89,0x00] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_expand_load_q_128: +; X64: # %bb.0: +; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] +; X64-NEXT: vpexpandq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x89,0x07] +; X64-NEXT: retq # encoding: [0xc3] + %res = call <2 x i64> @llvm.x86.avx512.mask.expand.load.q.128(i8* %addr, <2 x i64> %data, i8 -1) + ret <2 x i64> %res +} + +define <4 x i32> @test_mask_expand_load_d_128(i8* %addr, <4 x i32> %data, i8 %mask) { +; X86-LABEL: test_mask_expand_load_d_128: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] +; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] +; X86-NEXT: vpexpandd (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x89,0x00] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_mask_expand_load_d_128: +; X64: # %bb.0: +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vpexpandd (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x89,0x07] +; X64-NEXT: retq # encoding: [0xc3] + %res = call <4 x i32> @llvm.x86.avx512.mask.expand.load.d.128(i8* %addr, <4 x i32> %data, i8 %mask) + ret <4 x i32> %res +} + +define <4 x i32> @test_maskz_expand_load_d_128(i8* %addr, i8 %mask) { +; X86-LABEL: test_maskz_expand_load_d_128: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] +; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] +; X86-NEXT: vpexpandd (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x89,0x00] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_maskz_expand_load_d_128: +; X64: # %bb.0: +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vpexpandd (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x89,0x07] +; X64-NEXT: retq # encoding: [0xc3] + %res = call <4 x i32> @llvm.x86.avx512.mask.expand.load.d.128(i8* %addr, <4 x i32> zeroinitializer, i8 %mask) + ret <4 x i32> %res +} + +declare <4 x i32> @llvm.x86.avx512.mask.expand.load.d.128(i8* %addr, <4 x i32> %data, i8 %mask) + +define <4 x i32> @test_expand_load_d_128(i8* %addr, <4 x i32> %data) { +; X86-LABEL: test_expand_load_d_128: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] +; X86-NEXT: vpexpandd (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x89,0x00] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_expand_load_d_128: +; X64: # %bb.0: +; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] +; X64-NEXT: vpexpandd (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x89,0x07] +; X64-NEXT: retq # encoding: [0xc3] + %res = call <4 x i32> @llvm.x86.avx512.mask.expand.load.d.128(i8* %addr, <4 x i32> %data, i8 -1) + ret <4 x i32> %res +} + +define void @test_mask_compress_store_pd_256(i8* %addr, <4 x double> %data, i8 %mask) { +; X86-LABEL: test_mask_compress_store_pd_256: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] +; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] +; X86-NEXT: vcompresspd %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8a,0x00] +; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_mask_compress_store_pd_256: +; X64: # %bb.0: +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vcompresspd %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8a,0x07] +; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; X64-NEXT: retq # encoding: [0xc3] + call void @llvm.x86.avx512.mask.compress.store.pd.256(i8* %addr, <4 x double> %data, i8 %mask) + ret void +} + +declare void @llvm.x86.avx512.mask.compress.store.pd.256(i8* %addr, <4 x double> %data, i8 %mask) + +define void @test_compress_store_pd_256(i8* %addr, <4 x double> %data) { +; X86-LABEL: test_compress_store_pd_256: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] +; X86-NEXT: vcompresspd %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8a,0x00] +; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_compress_store_pd_256: +; X64: # %bb.0: +; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] +; X64-NEXT: vcompresspd %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8a,0x07] +; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; X64-NEXT: retq # encoding: [0xc3] + call void @llvm.x86.avx512.mask.compress.store.pd.256(i8* %addr, <4 x double> %data, i8 -1) + ret void +} + +define void @test_mask_compress_store_ps_256(i8* %addr, <8 x float> %data, i8 %mask) { +; X86-LABEL: test_mask_compress_store_ps_256: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] +; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] +; X86-NEXT: vcompressps %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8a,0x00] +; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_mask_compress_store_ps_256: +; X64: # %bb.0: +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vcompressps %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8a,0x07] +; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; X64-NEXT: retq # encoding: [0xc3] + call void @llvm.x86.avx512.mask.compress.store.ps.256(i8* %addr, <8 x float> %data, i8 %mask) + ret void +} + +declare void @llvm.x86.avx512.mask.compress.store.ps.256(i8* %addr, <8 x float> %data, i8 %mask) + +define void @test_compress_store_ps_256(i8* %addr, <8 x float> %data) { +; X86-LABEL: test_compress_store_ps_256: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] +; X86-NEXT: vcompressps %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8a,0x00] +; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_compress_store_ps_256: +; X64: # %bb.0: +; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] +; X64-NEXT: vcompressps %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8a,0x07] +; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; X64-NEXT: retq # encoding: [0xc3] + call void @llvm.x86.avx512.mask.compress.store.ps.256(i8* %addr, <8 x float> %data, i8 -1) + ret void +} + +define void @test_mask_compress_store_q_256(i8* %addr, <4 x i64> %data, i8 %mask) { +; X86-LABEL: test_mask_compress_store_q_256: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] +; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] +; X86-NEXT: vpcompressq %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8b,0x00] +; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_mask_compress_store_q_256: +; X64: # %bb.0: +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vpcompressq %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8b,0x07] +; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; X64-NEXT: retq # encoding: [0xc3] + call void @llvm.x86.avx512.mask.compress.store.q.256(i8* %addr, <4 x i64> %data, i8 %mask) + ret void +} + +declare void @llvm.x86.avx512.mask.compress.store.q.256(i8* %addr, <4 x i64> %data, i8 %mask) + +define void @test_compress_store_q_256(i8* %addr, <4 x i64> %data) { +; X86-LABEL: test_compress_store_q_256: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] +; X86-NEXT: vpcompressq %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8b,0x00] +; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_compress_store_q_256: +; X64: # %bb.0: +; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] +; X64-NEXT: vpcompressq %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8b,0x07] +; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; X64-NEXT: retq # encoding: [0xc3] + call void @llvm.x86.avx512.mask.compress.store.q.256(i8* %addr, <4 x i64> %data, i8 -1) + ret void +} + +define void @test_mask_compress_store_d_256(i8* %addr, <8 x i32> %data, i8 %mask) { +; X86-LABEL: test_mask_compress_store_d_256: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] +; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] +; X86-NEXT: vpcompressd %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8b,0x00] +; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_mask_compress_store_d_256: +; X64: # %bb.0: +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vpcompressd %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8b,0x07] +; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; X64-NEXT: retq # encoding: [0xc3] + call void @llvm.x86.avx512.mask.compress.store.d.256(i8* %addr, <8 x i32> %data, i8 %mask) + ret void +} + +declare void @llvm.x86.avx512.mask.compress.store.d.256(i8* %addr, <8 x i32> %data, i8 %mask) + +define void @test_compress_store_d_256(i8* %addr, <8 x i32> %data) { +; X86-LABEL: test_compress_store_d_256: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] +; X86-NEXT: vpcompressd %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8b,0x00] +; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_compress_store_d_256: +; X64: # %bb.0: +; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] +; X64-NEXT: vpcompressd %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8b,0x07] +; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; X64-NEXT: retq # encoding: [0xc3] + call void @llvm.x86.avx512.mask.compress.store.d.256(i8* %addr, <8 x i32> %data, i8 -1) + ret void +} + +define <4 x double> @test_mask_expand_load_pd_256(i8* %addr, <4 x double> %data, i8 %mask) { +; X86-LABEL: test_mask_expand_load_pd_256: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] +; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] +; X86-NEXT: vexpandpd (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x88,0x00] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_mask_expand_load_pd_256: +; X64: # %bb.0: +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vexpandpd (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x88,0x07] +; X64-NEXT: retq # encoding: [0xc3] + %res = call <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> %data, i8 %mask) + ret <4 x double> %res +} + +define <4 x double> @test_maskz_expand_load_pd_256(i8* %addr, i8 %mask) { +; X86-LABEL: test_maskz_expand_load_pd_256: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] +; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] +; X86-NEXT: vexpandpd (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x88,0x00] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_maskz_expand_load_pd_256: +; X64: # %bb.0: +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vexpandpd (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x88,0x07] +; X64-NEXT: retq # encoding: [0xc3] + %res = call <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> zeroinitializer, i8 %mask) + ret <4 x double> %res +} + +declare <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> %data, i8 %mask) + +define <4 x double> @test_expand_load_pd_256(i8* %addr, <4 x double> %data) { +; X86-LABEL: test_expand_load_pd_256: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] +; X86-NEXT: vexpandpd (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x88,0x00] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_expand_load_pd_256: +; X64: # %bb.0: +; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] +; X64-NEXT: vexpandpd (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x88,0x07] +; X64-NEXT: retq # encoding: [0xc3] + %res = call <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> %data, i8 -1) + ret <4 x double> %res +} + +define <8 x float> @test_mask_expand_load_ps_256(i8* %addr, <8 x float> %data, i8 %mask) { +; X86-LABEL: test_mask_expand_load_ps_256: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] +; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] +; X86-NEXT: vexpandps (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x88,0x00] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_mask_expand_load_ps_256: +; X64: # %bb.0: +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vexpandps (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x88,0x07] +; X64-NEXT: retq # encoding: [0xc3] + %res = call <8 x float> @llvm.x86.avx512.mask.expand.load.ps.256(i8* %addr, <8 x float> %data, i8 %mask) + ret <8 x float> %res +} + +define <8 x float> @test_maskz_expand_load_ps_256(i8* %addr, i8 %mask) { +; X86-LABEL: test_maskz_expand_load_ps_256: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] +; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] +; X86-NEXT: vexpandps (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x88,0x00] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_maskz_expand_load_ps_256: +; X64: # %bb.0: +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vexpandps (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x88,0x07] +; X64-NEXT: retq # encoding: [0xc3] + %res = call <8 x float> @llvm.x86.avx512.mask.expand.load.ps.256(i8* %addr, <8 x float> zeroinitializer, i8 %mask) + ret <8 x float> %res +} + +declare <8 x float> @llvm.x86.avx512.mask.expand.load.ps.256(i8* %addr, <8 x float> %data, i8 %mask) + +define <8 x float> @test_expand_load_ps_256(i8* %addr, <8 x float> %data) { +; X86-LABEL: test_expand_load_ps_256: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] +; X86-NEXT: vexpandps (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x88,0x00] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_expand_load_ps_256: +; X64: # %bb.0: +; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] +; X64-NEXT: vexpandps (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x88,0x07] +; X64-NEXT: retq # encoding: [0xc3] + %res = call <8 x float> @llvm.x86.avx512.mask.expand.load.ps.256(i8* %addr, <8 x float> %data, i8 -1) + ret <8 x float> %res +} + +define <4 x i64> @test_mask_expand_load_q_256(i8* %addr, <4 x i64> %data, i8 %mask) { +; X86-LABEL: test_mask_expand_load_q_256: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] +; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] +; X86-NEXT: vpexpandq (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x89,0x00] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_mask_expand_load_q_256: +; X64: # %bb.0: +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vpexpandq (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x89,0x07] +; X64-NEXT: retq # encoding: [0xc3] + %res = call <4 x i64> @llvm.x86.avx512.mask.expand.load.q.256(i8* %addr, <4 x i64> %data, i8 %mask) + ret <4 x i64> %res +} + +define <4 x i64> @test_maskz_expand_load_q_256(i8* %addr, i8 %mask) { +; X86-LABEL: test_maskz_expand_load_q_256: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] +; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] +; X86-NEXT: vpexpandq (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x89,0x00] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_maskz_expand_load_q_256: +; X64: # %bb.0: +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vpexpandq (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x89,0x07] +; X64-NEXT: retq # encoding: [0xc3] + %res = call <4 x i64> @llvm.x86.avx512.mask.expand.load.q.256(i8* %addr, <4 x i64> zeroinitializer, i8 %mask) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.x86.avx512.mask.expand.load.q.256(i8* %addr, <4 x i64> %data, i8 %mask) + +define <4 x i64> @test_expand_load_q_256(i8* %addr, <4 x i64> %data) { +; X86-LABEL: test_expand_load_q_256: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] +; X86-NEXT: vpexpandq (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x89,0x00] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_expand_load_q_256: +; X64: # %bb.0: +; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] +; X64-NEXT: vpexpandq (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x89,0x07] +; X64-NEXT: retq # encoding: [0xc3] + %res = call <4 x i64> @llvm.x86.avx512.mask.expand.load.q.256(i8* %addr, <4 x i64> %data, i8 -1) + ret <4 x i64> %res +} + +define <8 x i32> @test_mask_expand_load_d_256(i8* %addr, <8 x i32> %data, i8 %mask) { +; X86-LABEL: test_mask_expand_load_d_256: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] +; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] +; X86-NEXT: vpexpandd (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x89,0x00] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_mask_expand_load_d_256: +; X64: # %bb.0: +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vpexpandd (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x89,0x07] +; X64-NEXT: retq # encoding: [0xc3] + %res = call <8 x i32> @llvm.x86.avx512.mask.expand.load.d.256(i8* %addr, <8 x i32> %data, i8 %mask) + ret <8 x i32> %res +} + +define <8 x i32> @test_maskz_expand_load_d_256(i8* %addr, i8 %mask) { +; X86-LABEL: test_maskz_expand_load_d_256: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] +; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] +; X86-NEXT: vpexpandd (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x89,0x00] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_maskz_expand_load_d_256: +; X64: # %bb.0: +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vpexpandd (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x89,0x07] +; X64-NEXT: retq # encoding: [0xc3] + %res = call <8 x i32> @llvm.x86.avx512.mask.expand.load.d.256(i8* %addr, <8 x i32> zeroinitializer, i8 %mask) + ret <8 x i32> %res +} + +declare <8 x i32> @llvm.x86.avx512.mask.expand.load.d.256(i8* %addr, <8 x i32> %data, i8 %mask) + +define <8 x i32> @test_expand_load_d_256(i8* %addr, <8 x i32> %data) { +; X86-LABEL: test_expand_load_d_256: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] +; X86-NEXT: vpexpandd (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x89,0x00] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_expand_load_d_256: +; X64: # %bb.0: +; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] +; X64-NEXT: vpexpandd (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x89,0x07] +; X64-NEXT: retq # encoding: [0xc3] + %res = call <8 x i32> @llvm.x86.avx512.mask.expand.load.d.256(i8* %addr, <8 x i32> %data, i8 -1) + ret <8 x i32> %res +} diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll index 96fcab49594..19de714ce8f 100644 --- a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll @@ -2,26 +2,6 @@ ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 -define void @test_mask_compress_store_pd_128(i8* %addr, <2 x double> %data, i8 %mask) { -; X86-LABEL: test_mask_compress_store_pd_128: -; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] -; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vcompresspd %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8a,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_compress_store_pd_128: -; X64: # %bb.0: -; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vcompresspd %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8a,0x07] -; X64-NEXT: retq # encoding: [0xc3] - call void @llvm.x86.avx512.mask.compress.store.pd.128(i8* %addr, <2 x double> %data, i8 %mask) - ret void -} - -declare void @llvm.x86.avx512.mask.compress.store.pd.128(i8* %addr, <2 x double> %data, i8 %mask) - define <2 x double> @test_mask_compress_pd_128(<2 x double> %data, <2 x double> %passthru, i8 %mask) { ; X86-LABEL: test_mask_compress_pd_128: ; X86: # %bb.0: @@ -68,41 +48,6 @@ define <2 x double> @test_compress_pd_128(<2 x double> %data) { declare <2 x double> @llvm.x86.avx512.mask.compress.pd.128(<2 x double> %data, <2 x double> %src0, i8 %mask) -define void @test_compress_store_pd_128(i8* %addr, <2 x double> %data) { -; X86-LABEL: test_compress_store_pd_128: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vmovups %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_compress_store_pd_128: -; X64: # %bb.0: -; X64-NEXT: vmovups %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07] -; X64-NEXT: retq # encoding: [0xc3] - call void @llvm.x86.avx512.mask.compress.store.pd.128(i8* %addr, <2 x double> %data, i8 -1) - ret void -} - -define void @test_mask_compress_store_ps_128(i8* %addr, <4 x float> %data, i8 %mask) { -; X86-LABEL: test_mask_compress_store_ps_128: -; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] -; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vcompressps %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8a,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_compress_store_ps_128: -; X64: # %bb.0: -; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vcompressps %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8a,0x07] -; X64-NEXT: retq # encoding: [0xc3] - call void @llvm.x86.avx512.mask.compress.store.ps.128(i8* %addr, <4 x float> %data, i8 %mask) - ret void -} - -declare void @llvm.x86.avx512.mask.compress.store.ps.128(i8* %addr, <4 x float> %data, i8 %mask) - define <4 x float> @test_mask_compress_ps_128(<4 x float> %data, <4 x float> %passthru, i8 %mask) { ; X86-LABEL: test_mask_compress_ps_128: ; X86: # %bb.0: @@ -149,41 +94,6 @@ define <4 x float> @test_compress_ps_128(<4 x float> %data) { declare <4 x float> @llvm.x86.avx512.mask.compress.ps.128(<4 x float> %data, <4 x float> %src0, i8 %mask) -define void @test_compress_store_ps_128(i8* %addr, <4 x float> %data) { -; X86-LABEL: test_compress_store_ps_128: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vmovups %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_compress_store_ps_128: -; X64: # %bb.0: -; X64-NEXT: vmovups %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07] -; X64-NEXT: retq # encoding: [0xc3] - call void @llvm.x86.avx512.mask.compress.store.ps.128(i8* %addr, <4 x float> %data, i8 -1) - ret void -} - -define void @test_mask_compress_store_q_128(i8* %addr, <2 x i64> %data, i8 %mask) { -; X86-LABEL: test_mask_compress_store_q_128: -; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] -; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vpcompressq %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8b,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_compress_store_q_128: -; X64: # %bb.0: -; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vpcompressq %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8b,0x07] -; X64-NEXT: retq # encoding: [0xc3] - call void @llvm.x86.avx512.mask.compress.store.q.128(i8* %addr, <2 x i64> %data, i8 %mask) - ret void -} - -declare void @llvm.x86.avx512.mask.compress.store.q.128(i8* %addr, <2 x i64> %data, i8 %mask) - define <2 x i64> @test_mask_compress_q_128(<2 x i64> %data, <2 x i64> %passthru, i8 %mask) { ; X86-LABEL: test_mask_compress_q_128: ; X86: # %bb.0: @@ -230,41 +140,6 @@ define <2 x i64> @test_compress_q_128(<2 x i64> %data) { declare <2 x i64> @llvm.x86.avx512.mask.compress.q.128(<2 x i64> %data, <2 x i64> %src0, i8 %mask) -define void @test_compress_store_q_128(i8* %addr, <2 x i64> %data) { -; X86-LABEL: test_compress_store_q_128: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vmovups %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_compress_store_q_128: -; X64: # %bb.0: -; X64-NEXT: vmovups %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07] -; X64-NEXT: retq # encoding: [0xc3] - call void @llvm.x86.avx512.mask.compress.store.q.128(i8* %addr, <2 x i64> %data, i8 -1) - ret void -} - -define void @test_mask_compress_store_d_128(i8* %addr, <4 x i32> %data, i8 %mask) { -; X86-LABEL: test_mask_compress_store_d_128: -; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] -; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vpcompressd %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8b,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_compress_store_d_128: -; X64: # %bb.0: -; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vpcompressd %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8b,0x07] -; X64-NEXT: retq # encoding: [0xc3] - call void @llvm.x86.avx512.mask.compress.store.d.128(i8* %addr, <4 x i32> %data, i8 %mask) - ret void -} - -declare void @llvm.x86.avx512.mask.compress.store.d.128(i8* %addr, <4 x i32> %data, i8 %mask) - define <4 x i32> @test_mask_compress_d_128(<4 x i32> %data, <4 x i32> %passthru, i8 %mask) { ; X86-LABEL: test_mask_compress_d_128: ; X86: # %bb.0: @@ -311,59 +186,6 @@ define <4 x i32> @test_compress_d_128(<4 x i32> %data) { declare <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %data, <4 x i32> %src0, i8 %mask) -define void @test_compress_store_d_128(i8* %addr, <4 x i32> %data) { -; X86-LABEL: test_compress_store_d_128: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vmovups %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_compress_store_d_128: -; X64: # %bb.0: -; X64-NEXT: vmovups %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07] -; X64-NEXT: retq # encoding: [0xc3] - call void @llvm.x86.avx512.mask.compress.store.d.128(i8* %addr, <4 x i32> %data, i8 -1) - ret void -} - -define <2 x double> @test_mask_expand_load_pd_128(i8* %addr, <2 x double> %data, i8 %mask) { -; X86-LABEL: test_mask_expand_load_pd_128: -; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] -; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vexpandpd (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x88,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_expand_load_pd_128: -; X64: # %bb.0: -; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vexpandpd (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x88,0x07] -; X64-NEXT: retq # encoding: [0xc3] - %res = call <2 x double> @llvm.x86.avx512.mask.expand.load.pd.128(i8* %addr, <2 x double> %data, i8 %mask) - ret <2 x double> %res -} - -define <2 x double> @test_maskz_expand_load_pd_128(i8* %addr, i8 %mask) { -; X86-LABEL: test_maskz_expand_load_pd_128: -; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] -; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vexpandpd (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x88,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_maskz_expand_load_pd_128: -; X64: # %bb.0: -; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vexpandpd (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x88,0x07] -; X64-NEXT: retq # encoding: [0xc3] - %res = call <2 x double> @llvm.x86.avx512.mask.expand.load.pd.128(i8* %addr, <2 x double> zeroinitializer, i8 %mask) - ret <2 x double> %res -} - -declare <2 x double> @llvm.x86.avx512.mask.expand.load.pd.128(i8* %addr, <2 x double> %data, i8 %mask) - define <2 x double> @test_expand_pd_128(<2 x double> %data) { ; CHECK-LABEL: test_expand_pd_128: ; CHECK: # %bb.0: @@ -410,59 +232,6 @@ define <2 x double> @test_maskz_expand_pd_128(<2 x double> %data, i8 %mask) { declare <2 x double> @llvm.x86.avx512.mask.expand.pd.128(<2 x double> %data, <2 x double> %src0, i8 %mask) -define <2 x double> @test_expand_load_pd_128(i8* %addr, <2 x double> %data) { -; X86-LABEL: test_expand_load_pd_128: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vmovups (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_expand_load_pd_128: -; X64: # %bb.0: -; X64-NEXT: vmovups (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07] -; X64-NEXT: retq # encoding: [0xc3] - %res = call <2 x double> @llvm.x86.avx512.mask.expand.load.pd.128(i8* %addr, <2 x double> %data, i8 -1) - ret <2 x double> %res -} - -define <4 x float> @test_mask_expand_load_ps_128(i8* %addr, <4 x float> %data, i8 %mask) { -; X86-LABEL: test_mask_expand_load_ps_128: -; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] -; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vexpandps (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x88,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_expand_load_ps_128: -; X64: # %bb.0: -; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vexpandps (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x88,0x07] -; X64-NEXT: retq # encoding: [0xc3] - %res = call <4 x float> @llvm.x86.avx512.mask.expand.load.ps.128(i8* %addr, <4 x float> %data, i8 %mask) - ret <4 x float> %res -} - -define <4 x float> @test_maskz_expand_load_ps_128(i8* %addr, i8 %mask) { -; X86-LABEL: test_maskz_expand_load_ps_128: -; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] -; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vexpandps (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x88,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_maskz_expand_load_ps_128: -; X64: # %bb.0: -; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vexpandps (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x88,0x07] -; X64-NEXT: retq # encoding: [0xc3] - %res = call <4 x float> @llvm.x86.avx512.mask.expand.load.ps.128(i8* %addr, <4 x float> zeroinitializer, i8 %mask) - ret <4 x float> %res -} - -declare <4 x float> @llvm.x86.avx512.mask.expand.load.ps.128(i8* %addr, <4 x float> %data, i8 %mask) - define <4 x float> @test_expand_ps_128(<4 x float> %data) { ; CHECK-LABEL: test_expand_ps_128: ; CHECK: # %bb.0: @@ -509,59 +278,6 @@ define <4 x float> @test_maskz_expand_ps_128(<4 x float> %data, i8 %mask) { declare <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float> %src0, i8 %mask) -define <4 x float> @test_expand_load_ps_128(i8* %addr, <4 x float> %data) { -; X86-LABEL: test_expand_load_ps_128: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vmovups (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_expand_load_ps_128: -; X64: # %bb.0: -; X64-NEXT: vmovups (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07] -; X64-NEXT: retq # encoding: [0xc3] - %res = call <4 x float> @llvm.x86.avx512.mask.expand.load.ps.128(i8* %addr, <4 x float> %data, i8 -1) - ret <4 x float> %res -} - -define <2 x i64> @test_mask_expand_load_q_128(i8* %addr, <2 x i64> %data, i8 %mask) { -; X86-LABEL: test_mask_expand_load_q_128: -; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] -; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vpexpandq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x89,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_expand_load_q_128: -; X64: # %bb.0: -; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vpexpandq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x89,0x07] -; X64-NEXT: retq # encoding: [0xc3] - %res = call <2 x i64> @llvm.x86.avx512.mask.expand.load.q.128(i8* %addr, <2 x i64> %data, i8 %mask) - ret <2 x i64> %res -} - -define <2 x i64> @test_maskz_expand_load_q_128(i8* %addr, i8 %mask) { -; X86-LABEL: test_maskz_expand_load_q_128: -; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] -; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vpexpandq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x89,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_maskz_expand_load_q_128: -; X64: # %bb.0: -; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vpexpandq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x89,0x07] -; X64-NEXT: retq # encoding: [0xc3] - %res = call <2 x i64> @llvm.x86.avx512.mask.expand.load.q.128(i8* %addr, <2 x i64> zeroinitializer, i8 %mask) - ret <2 x i64> %res -} - -declare <2 x i64> @llvm.x86.avx512.mask.expand.load.q.128(i8* %addr, <2 x i64> %data, i8 %mask) - define <2 x i64> @test_expand_q_128(<2 x i64> %data) { ; CHECK-LABEL: test_expand_q_128: ; CHECK: # %bb.0: @@ -608,59 +324,6 @@ define <2 x i64> @test_maskz_expand_q_128(<2 x i64> %data, i8 %mask) { declare <2 x i64> @llvm.x86.avx512.mask.expand.q.128(<2 x i64> %data, <2 x i64> %src0, i8 %mask) -define <2 x i64> @test_expand_load_q_128(i8* %addr, <2 x i64> %data) { -; X86-LABEL: test_expand_load_q_128: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vmovups (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_expand_load_q_128: -; X64: # %bb.0: -; X64-NEXT: vmovups (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07] -; X64-NEXT: retq # encoding: [0xc3] - %res = call <2 x i64> @llvm.x86.avx512.mask.expand.load.q.128(i8* %addr, <2 x i64> %data, i8 -1) - ret <2 x i64> %res -} - -define <4 x i32> @test_mask_expand_load_d_128(i8* %addr, <4 x i32> %data, i8 %mask) { -; X86-LABEL: test_mask_expand_load_d_128: -; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] -; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vpexpandd (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x89,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_expand_load_d_128: -; X64: # %bb.0: -; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vpexpandd (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x89,0x07] -; X64-NEXT: retq # encoding: [0xc3] - %res = call <4 x i32> @llvm.x86.avx512.mask.expand.load.d.128(i8* %addr, <4 x i32> %data, i8 %mask) - ret <4 x i32> %res -} - -define <4 x i32> @test_maskz_expand_load_d_128(i8* %addr, i8 %mask) { -; X86-LABEL: test_maskz_expand_load_d_128: -; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] -; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vpexpandd (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x89,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_maskz_expand_load_d_128: -; X64: # %bb.0: -; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vpexpandd (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x89,0x07] -; X64-NEXT: retq # encoding: [0xc3] - %res = call <4 x i32> @llvm.x86.avx512.mask.expand.load.d.128(i8* %addr, <4 x i32> zeroinitializer, i8 %mask) - ret <4 x i32> %res -} - -declare <4 x i32> @llvm.x86.avx512.mask.expand.load.d.128(i8* %addr, <4 x i32> %data, i8 %mask) - define <4 x i32> @test_expand_d_128(<4 x i32> %data) { ; CHECK-LABEL: test_expand_d_128: ; CHECK: # %bb.0: @@ -707,43 +370,6 @@ define <4 x i32> @test_maskz_expand_d_128(<4 x i32> %data, i8 %mask) { declare <4 x i32> @llvm.x86.avx512.mask.expand.d.128(<4 x i32> %data, <4 x i32> %src0, i8 %mask) -define <4 x i32> @test_expand_load_d_128(i8* %addr, <4 x i32> %data) { -; X86-LABEL: test_expand_load_d_128: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vmovups (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_expand_load_d_128: -; X64: # %bb.0: -; X64-NEXT: vmovups (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07] -; X64-NEXT: retq # encoding: [0xc3] - %res = call <4 x i32> @llvm.x86.avx512.mask.expand.load.d.128(i8* %addr, <4 x i32> %data, i8 -1) - ret <4 x i32> %res -} - -define void @test_mask_compress_store_pd_256(i8* %addr, <4 x double> %data, i8 %mask) { -; X86-LABEL: test_mask_compress_store_pd_256: -; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] -; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vcompresspd %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8a,0x00] -; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_compress_store_pd_256: -; X64: # %bb.0: -; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vcompresspd %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8a,0x07] -; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] -; X64-NEXT: retq # encoding: [0xc3] - call void @llvm.x86.avx512.mask.compress.store.pd.256(i8* %addr, <4 x double> %data, i8 %mask) - ret void -} - -declare void @llvm.x86.avx512.mask.compress.store.pd.256(i8* %addr, <4 x double> %data, i8 %mask) - define <4 x double> @test_mask_compress_pd_256(<4 x double> %data, <4 x double> %passthru, i8 %mask) { ; X86-LABEL: test_mask_compress_pd_256: ; X86: # %bb.0: @@ -790,45 +416,6 @@ define <4 x double> @test_compress_pd_256(<4 x double> %data) { declare <4 x double> @llvm.x86.avx512.mask.compress.pd.256(<4 x double> %data, <4 x double> %src0, i8 %mask) -define void @test_compress_store_pd_256(i8* %addr, <4 x double> %data) { -; X86-LABEL: test_compress_store_pd_256: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vmovups %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x00] -; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_compress_store_pd_256: -; X64: # %bb.0: -; X64-NEXT: vmovups %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x07] -; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] -; X64-NEXT: retq # encoding: [0xc3] - call void @llvm.x86.avx512.mask.compress.store.pd.256(i8* %addr, <4 x double> %data, i8 -1) - ret void -} - -define void @test_mask_compress_store_ps_256(i8* %addr, <8 x float> %data, i8 %mask) { -; X86-LABEL: test_mask_compress_store_ps_256: -; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] -; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vcompressps %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8a,0x00] -; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_compress_store_ps_256: -; X64: # %bb.0: -; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vcompressps %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8a,0x07] -; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] -; X64-NEXT: retq # encoding: [0xc3] - call void @llvm.x86.avx512.mask.compress.store.ps.256(i8* %addr, <8 x float> %data, i8 %mask) - ret void -} - -declare void @llvm.x86.avx512.mask.compress.store.ps.256(i8* %addr, <8 x float> %data, i8 %mask) - define <8 x float> @test_mask_compress_ps_256(<8 x float> %data, <8 x float> %passthru, i8 %mask) { ; X86-LABEL: test_mask_compress_ps_256: ; X86: # %bb.0: @@ -875,45 +462,6 @@ define <8 x float> @test_compress_ps_256(<8 x float> %data) { declare <8 x float> @llvm.x86.avx512.mask.compress.ps.256(<8 x float> %data, <8 x float> %src0, i8 %mask) -define void @test_compress_store_ps_256(i8* %addr, <8 x float> %data) { -; X86-LABEL: test_compress_store_ps_256: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vmovups %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x00] -; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_compress_store_ps_256: -; X64: # %bb.0: -; X64-NEXT: vmovups %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x07] -; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] -; X64-NEXT: retq # encoding: [0xc3] - call void @llvm.x86.avx512.mask.compress.store.ps.256(i8* %addr, <8 x float> %data, i8 -1) - ret void -} - -define void @test_mask_compress_store_q_256(i8* %addr, <4 x i64> %data, i8 %mask) { -; X86-LABEL: test_mask_compress_store_q_256: -; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] -; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vpcompressq %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8b,0x00] -; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_compress_store_q_256: -; X64: # %bb.0: -; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vpcompressq %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8b,0x07] -; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] -; X64-NEXT: retq # encoding: [0xc3] - call void @llvm.x86.avx512.mask.compress.store.q.256(i8* %addr, <4 x i64> %data, i8 %mask) - ret void -} - -declare void @llvm.x86.avx512.mask.compress.store.q.256(i8* %addr, <4 x i64> %data, i8 %mask) - define <4 x i64> @test_mask_compress_q_256(<4 x i64> %data, <4 x i64> %passthru, i8 %mask) { ; X86-LABEL: test_mask_compress_q_256: ; X86: # %bb.0: @@ -960,45 +508,6 @@ define <4 x i64> @test_compress_q_256(<4 x i64> %data) { declare <4 x i64> @llvm.x86.avx512.mask.compress.q.256(<4 x i64> %data, <4 x i64> %src0, i8 %mask) -define void @test_compress_store_q_256(i8* %addr, <4 x i64> %data) { -; X86-LABEL: test_compress_store_q_256: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vmovups %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x00] -; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_compress_store_q_256: -; X64: # %bb.0: -; X64-NEXT: vmovups %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x07] -; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] -; X64-NEXT: retq # encoding: [0xc3] - call void @llvm.x86.avx512.mask.compress.store.q.256(i8* %addr, <4 x i64> %data, i8 -1) - ret void -} - -define void @test_mask_compress_store_d_256(i8* %addr, <8 x i32> %data, i8 %mask) { -; X86-LABEL: test_mask_compress_store_d_256: -; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] -; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vpcompressd %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8b,0x00] -; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_compress_store_d_256: -; X64: # %bb.0: -; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vpcompressd %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8b,0x07] -; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] -; X64-NEXT: retq # encoding: [0xc3] - call void @llvm.x86.avx512.mask.compress.store.d.256(i8* %addr, <8 x i32> %data, i8 %mask) - ret void -} - -declare void @llvm.x86.avx512.mask.compress.store.d.256(i8* %addr, <8 x i32> %data, i8 %mask) - define <8 x i32> @test_mask_compress_d_256(<8 x i32> %data, <8 x i32> %passthru, i8 %mask) { ; X86-LABEL: test_mask_compress_d_256: ; X86: # %bb.0: @@ -1045,61 +554,6 @@ define <8 x i32> @test_compress_d_256(<8 x i32> %data) { declare <8 x i32> @llvm.x86.avx512.mask.compress.d.256(<8 x i32> %data, <8 x i32> %src0, i8 %mask) -define void @test_compress_store_d_256(i8* %addr, <8 x i32> %data) { -; X86-LABEL: test_compress_store_d_256: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vmovups %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x00] -; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_compress_store_d_256: -; X64: # %bb.0: -; X64-NEXT: vmovups %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x07] -; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] -; X64-NEXT: retq # encoding: [0xc3] - call void @llvm.x86.avx512.mask.compress.store.d.256(i8* %addr, <8 x i32> %data, i8 -1) - ret void -} - -define <4 x double> @test_mask_expand_load_pd_256(i8* %addr, <4 x double> %data, i8 %mask) { -; X86-LABEL: test_mask_expand_load_pd_256: -; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] -; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vexpandpd (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x88,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_expand_load_pd_256: -; X64: # %bb.0: -; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vexpandpd (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x88,0x07] -; X64-NEXT: retq # encoding: [0xc3] - %res = call <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> %data, i8 %mask) - ret <4 x double> %res -} - -define <4 x double> @test_maskz_expand_load_pd_256(i8* %addr, i8 %mask) { -; X86-LABEL: test_maskz_expand_load_pd_256: -; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] -; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vexpandpd (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x88,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_maskz_expand_load_pd_256: -; X64: # %bb.0: -; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vexpandpd (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x88,0x07] -; X64-NEXT: retq # encoding: [0xc3] - %res = call <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> zeroinitializer, i8 %mask) - ret <4 x double> %res -} - -declare <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> %data, i8 %mask) - define <4 x double> @test_expand_pd_256(<4 x double> %data) { ; CHECK-LABEL: test_expand_pd_256: ; CHECK: # %bb.0: @@ -1146,59 +600,6 @@ define <4 x double> @test_maskz_expand_pd_256(<4 x double> %data, i8 %mask) { declare <4 x double> @llvm.x86.avx512.mask.expand.pd.256(<4 x double> %data, <4 x double> %src0, i8 %mask) -define <4 x double> @test_expand_load_pd_256(i8* %addr, <4 x double> %data) { -; X86-LABEL: test_expand_load_pd_256: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vmovups (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_expand_load_pd_256: -; X64: # %bb.0: -; X64-NEXT: vmovups (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x07] -; X64-NEXT: retq # encoding: [0xc3] - %res = call <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> %data, i8 -1) - ret <4 x double> %res -} - -define <8 x float> @test_mask_expand_load_ps_256(i8* %addr, <8 x float> %data, i8 %mask) { -; X86-LABEL: test_mask_expand_load_ps_256: -; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] -; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vexpandps (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x88,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_expand_load_ps_256: -; X64: # %bb.0: -; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vexpandps (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x88,0x07] -; X64-NEXT: retq # encoding: [0xc3] - %res = call <8 x float> @llvm.x86.avx512.mask.expand.load.ps.256(i8* %addr, <8 x float> %data, i8 %mask) - ret <8 x float> %res -} - -define <8 x float> @test_maskz_expand_load_ps_256(i8* %addr, i8 %mask) { -; X86-LABEL: test_maskz_expand_load_ps_256: -; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] -; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vexpandps (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x88,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_maskz_expand_load_ps_256: -; X64: # %bb.0: -; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vexpandps (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x88,0x07] -; X64-NEXT: retq # encoding: [0xc3] - %res = call <8 x float> @llvm.x86.avx512.mask.expand.load.ps.256(i8* %addr, <8 x float> zeroinitializer, i8 %mask) - ret <8 x float> %res -} - -declare <8 x float> @llvm.x86.avx512.mask.expand.load.ps.256(i8* %addr, <8 x float> %data, i8 %mask) - define <8 x float> @test_expand_ps_256(<8 x float> %data) { ; CHECK-LABEL: test_expand_ps_256: ; CHECK: # %bb.0: @@ -1245,59 +646,6 @@ define <8 x float> @test_maskz_expand_ps_256(<8 x float> %data, i8 %mask) { declare <8 x float> @llvm.x86.avx512.mask.expand.ps.256(<8 x float> %data, <8 x float> %src0, i8 %mask) -define <8 x float> @test_expand_load_ps_256(i8* %addr, <8 x float> %data) { -; X86-LABEL: test_expand_load_ps_256: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vmovups (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_expand_load_ps_256: -; X64: # %bb.0: -; X64-NEXT: vmovups (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x07] -; X64-NEXT: retq # encoding: [0xc3] - %res = call <8 x float> @llvm.x86.avx512.mask.expand.load.ps.256(i8* %addr, <8 x float> %data, i8 -1) - ret <8 x float> %res -} - -define <4 x i64> @test_mask_expand_load_q_256(i8* %addr, <4 x i64> %data, i8 %mask) { -; X86-LABEL: test_mask_expand_load_q_256: -; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] -; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vpexpandq (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x89,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_expand_load_q_256: -; X64: # %bb.0: -; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vpexpandq (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x89,0x07] -; X64-NEXT: retq # encoding: [0xc3] - %res = call <4 x i64> @llvm.x86.avx512.mask.expand.load.q.256(i8* %addr, <4 x i64> %data, i8 %mask) - ret <4 x i64> %res -} - -define <4 x i64> @test_maskz_expand_load_q_256(i8* %addr, i8 %mask) { -; X86-LABEL: test_maskz_expand_load_q_256: -; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] -; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vpexpandq (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x89,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_maskz_expand_load_q_256: -; X64: # %bb.0: -; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vpexpandq (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x89,0x07] -; X64-NEXT: retq # encoding: [0xc3] - %res = call <4 x i64> @llvm.x86.avx512.mask.expand.load.q.256(i8* %addr, <4 x i64> zeroinitializer, i8 %mask) - ret <4 x i64> %res -} - -declare <4 x i64> @llvm.x86.avx512.mask.expand.load.q.256(i8* %addr, <4 x i64> %data, i8 %mask) - define <4 x i64> @test_expand_q_256(<4 x i64> %data) { ; CHECK-LABEL: test_expand_q_256: ; CHECK: # %bb.0: @@ -1344,59 +692,6 @@ define <4 x i64> @test_maskz_expand_q_256(<4 x i64> %data, i8 %mask) { declare <4 x i64> @llvm.x86.avx512.mask.expand.q.256(<4 x i64> %data, <4 x i64> %src0, i8 %mask) -define <4 x i64> @test_expand_load_q_256(i8* %addr, <4 x i64> %data) { -; X86-LABEL: test_expand_load_q_256: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vmovups (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_expand_load_q_256: -; X64: # %bb.0: -; X64-NEXT: vmovups (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x07] -; X64-NEXT: retq # encoding: [0xc3] - %res = call <4 x i64> @llvm.x86.avx512.mask.expand.load.q.256(i8* %addr, <4 x i64> %data, i8 -1) - ret <4 x i64> %res -} - -define <8 x i32> @test_mask_expand_load_d_256(i8* %addr, <8 x i32> %data, i8 %mask) { -; X86-LABEL: test_mask_expand_load_d_256: -; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] -; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vpexpandd (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x89,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_expand_load_d_256: -; X64: # %bb.0: -; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vpexpandd (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x89,0x07] -; X64-NEXT: retq # encoding: [0xc3] - %res = call <8 x i32> @llvm.x86.avx512.mask.expand.load.d.256(i8* %addr, <8 x i32> %data, i8 %mask) - ret <8 x i32> %res -} - -define <8 x i32> @test_maskz_expand_load_d_256(i8* %addr, i8 %mask) { -; X86-LABEL: test_maskz_expand_load_d_256: -; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] -; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vpexpandd (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x89,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_maskz_expand_load_d_256: -; X64: # %bb.0: -; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vpexpandd (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x89,0x07] -; X64-NEXT: retq # encoding: [0xc3] - %res = call <8 x i32> @llvm.x86.avx512.mask.expand.load.d.256(i8* %addr, <8 x i32> zeroinitializer, i8 %mask) - ret <8 x i32> %res -} - -declare <8 x i32> @llvm.x86.avx512.mask.expand.load.d.256(i8* %addr, <8 x i32> %data, i8 %mask) - define <8 x i32> @test_expand_d_256(<8 x i32> %data) { ; CHECK-LABEL: test_expand_d_256: ; CHECK: # %bb.0: @@ -1443,21 +738,6 @@ define <8 x i32> @test_maskz_expand_d_256(<8 x i32> %data, i8 %mask) { declare <8 x i32> @llvm.x86.avx512.mask.expand.d.256(<8 x i32> %data, <8 x i32> %src0, i8 %mask) -define <8 x i32> @test_expand_load_d_256(i8* %addr, <8 x i32> %data) { -; X86-LABEL: test_expand_load_d_256: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vmovups (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_expand_load_d_256: -; X64: # %bb.0: -; X64-NEXT: vmovups (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x07] -; X64-NEXT: retq # encoding: [0xc3] - %res = call <8 x i32> @llvm.x86.avx512.mask.expand.load.d.256(i8* %addr, <8 x i32> %data, i8 -1) - ret <8 x i32> %res -} - define i8 @test_cmpps_256(<8 x float> %a, <8 x float> %b) { ; CHECK-LABEL: test_cmpps_256: ; CHECK: # %bb.0: |