diff options
| -rw-r--r-- | llvm/include/llvm/IR/IntrinsicsX86.td | 29 | ||||
| -rw-r--r-- | llvm/lib/IR/AutoUpgrade.cpp | 21 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 13 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86IntrinsicsInfo.h | 14 | ||||
| -rwxr-xr-x | llvm/test/CodeGen/X86/avx512-mask-zext-bugfix.ll | 50 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll | 44 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512bw-intrinsics.ll | 41 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll | 52 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll | 51 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll | 25 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512dq-intrinsics.ll | 26 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll | 51 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll | 52 |
13 files changed, 217 insertions, 252 deletions
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td index e5ab18eedad..35eb8310d96 100644 --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -3801,35 +3801,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx512_cvtsi2sd64 : GCCBuiltin<"__builtin_ia32_cvtsi2sd64">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>; - - def int_x86_avx512_cvtb2mask_128 : // TODO remove this intrinsic - Intrinsic<[llvm_i16_ty], [llvm_v16i8_ty], [IntrNoMem]>; - def int_x86_avx512_cvtb2mask_256 : // TODO remove this intrinsic - Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty], [IntrNoMem]>; - def int_x86_avx512_cvtb2mask_512 : // TODO remove this intrinsic - Intrinsic<[llvm_i64_ty], [llvm_v64i8_ty], [IntrNoMem]>; - - def int_x86_avx512_cvtw2mask_128 : // TODO remove this intrinsic - Intrinsic<[llvm_i8_ty], [llvm_v8i16_ty], [IntrNoMem]>; - def int_x86_avx512_cvtw2mask_256 : // TODO remove this intrinsic - Intrinsic<[llvm_i16_ty], [llvm_v16i16_ty], [IntrNoMem]>; - def int_x86_avx512_cvtw2mask_512 : // TODO remove this intrinsic - Intrinsic<[llvm_i32_ty], [llvm_v32i16_ty], [IntrNoMem]>; - - def int_x86_avx512_cvtd2mask_128 : // TODO remove this intrinsic - Intrinsic<[llvm_i8_ty], [llvm_v4i32_ty], [IntrNoMem]>; - def int_x86_avx512_cvtd2mask_256 : // TODO remove this intrinsic - Intrinsic<[llvm_i8_ty], [llvm_v8i32_ty], [IntrNoMem]>; - def int_x86_avx512_cvtd2mask_512 : // TODO remove this intrinsic - Intrinsic<[llvm_i16_ty], [llvm_v16i32_ty], [IntrNoMem]>; - - def int_x86_avx512_cvtq2mask_128 : // TODO remove this intrinsic - Intrinsic<[llvm_i8_ty], [llvm_v2i64_ty], [IntrNoMem]>; - def int_x86_avx512_cvtq2mask_256 : // TODO remove this intrinsic - Intrinsic<[llvm_i8_ty], [llvm_v4i64_ty], [IntrNoMem]>; - def int_x86_avx512_cvtq2mask_512 : // TODO remove this intrinsic - Intrinsic<[llvm_i8_ty], [llvm_v8i64_ty], [IntrNoMem]>; - } // Pack ops. diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index c258d1a4e3a..98fd616d0fc 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -157,6 +157,10 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) { Name.startswith("avx512.mask.cmp.q") || // Added in 5.0 Name.startswith("avx512.mask.cmp.w") || // Added in 5.0 Name.startswith("avx512.mask.ucmp.") || // Added in 5.0 + Name.startswith("avx512.cvtb2mask.") || // Added in 7.0 + Name.startswith("avx512.cvtw2mask.") || // Added in 7.0 + Name.startswith("avx512.cvtd2mask.") || // Added in 7.0 + Name.startswith("avx512.cvtq2mask.") || // Added in 7.0 Name == "avx512.mask.add.pd.128" || // Added in 4.0 Name == "avx512.mask.add.pd.256" || // Added in 4.0 Name == "avx512.mask.add.ps.128" || // Added in 4.0 @@ -829,9 +833,11 @@ static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI, // Applying mask on vector of i1's and make sure result is at least 8 bits wide. static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder,Value *Vec, Value *Mask, unsigned NumElts) { - const auto *C = dyn_cast<Constant>(Mask); - if (!C || !C->isAllOnesValue()) - Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts)); + if (Mask) { + const auto *C = dyn_cast<Constant>(Mask); + if (!C || !C->isAllOnesValue()) + Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts)); + } if (NumElts < 8) { uint32_t Indices[8]; @@ -1115,6 +1121,15 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { } else if (IsX86 && Name.startswith("avx512.mask.ucmp")) { unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); Rep = upgradeMaskedCompare(Builder, *CI, Imm, false); + } else if (IsX86 && (Name.startswith("avx512.cvtb2mask.") || + Name.startswith("avx512.cvtw2mask.") || + Name.startswith("avx512.cvtd2mask.") || + Name.startswith("avx512.cvtq2mask."))) { + Value *Op = CI->getArgOperand(0); + Value *Zero = llvm::Constant::getNullValue(Op->getType()); + Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero); + Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, nullptr, + Op->getType()->getVectorNumElements()); } else if(IsX86 && (Name == "ssse3.pabs.b.128" || Name == "ssse3.pabs.w.128" || Name == "ssse3.pabs.d.128" || diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 7f1969d0c79..ae5eb552b73 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -20518,19 +20518,6 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, Src1, Src2, Src3, Imm, Rnd), Mask, Passthru, Subtarget, DAG); } - case CONVERT_TO_MASK: { - MVT SrcVT = Op.getOperand(1).getSimpleValueType(); - MVT MaskVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorNumElements()); - MVT BitcastVT = MVT::getVectorVT(MVT::i1, VT.getSizeInBits()); - - SDValue CvtMask = DAG.getNode(X86ISD::PCMPGTM, dl, MaskVT, - DAG.getConstant(0, dl, SrcVT), - Op.getOperand(1)); - SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, BitcastVT, - DAG.getUNDEF(BitcastVT), CvtMask, - DAG.getIntPtrConstant(0, dl)); - return DAG.getBitcast(Op.getValueType(), Res); - } case ROUNDP: { assert(IntrData->Opc0 == X86ISD::VRNDSCALE && "Unexpected opcode"); // Clear the upper bits of the rounding immediate so that the legacy diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index d14369b7776..bd21fee7544 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -37,7 +37,7 @@ enum IntrinsicType : uint16_t { TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32, EXPAND_FROM_MEM, TERLOG_OP_MASK, TERLOG_OP_MASKZ, BROADCASTM, FIXUPIMM, FIXUPIMM_MASKZ, FIXUPIMMS, - FIXUPIMMS_MASKZ, CONVERT_TO_MASK, GATHER_AVX2, MASK_BINOP, + FIXUPIMMS_MASKZ, GATHER_AVX2, MASK_BINOP, ROUNDP, ROUNDS }; @@ -449,15 +449,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx2_psubs_w, INTR_TYPE_2OP, X86ISD::SUBS, 0), X86_INTRINSIC_DATA(avx2_psubus_b, INTR_TYPE_2OP, X86ISD::SUBUS, 0), X86_INTRINSIC_DATA(avx2_psubus_w, INTR_TYPE_2OP, X86ISD::SUBUS, 0), - X86_INTRINSIC_DATA(avx512_cvtb2mask_128, CONVERT_TO_MASK, X86ISD::PCMPGTM, 0), - X86_INTRINSIC_DATA(avx512_cvtb2mask_256, CONVERT_TO_MASK, X86ISD::PCMPGTM, 0), - X86_INTRINSIC_DATA(avx512_cvtb2mask_512, CONVERT_TO_MASK, X86ISD::PCMPGTM, 0), - X86_INTRINSIC_DATA(avx512_cvtd2mask_128, CONVERT_TO_MASK, X86ISD::PCMPGTM, 0), - X86_INTRINSIC_DATA(avx512_cvtd2mask_256, CONVERT_TO_MASK, X86ISD::PCMPGTM, 0), - X86_INTRINSIC_DATA(avx512_cvtd2mask_512, CONVERT_TO_MASK, X86ISD::PCMPGTM, 0), - X86_INTRINSIC_DATA(avx512_cvtq2mask_128, CONVERT_TO_MASK, X86ISD::PCMPGTM, 0), - X86_INTRINSIC_DATA(avx512_cvtq2mask_256, CONVERT_TO_MASK, X86ISD::PCMPGTM, 0), - X86_INTRINSIC_DATA(avx512_cvtq2mask_512, CONVERT_TO_MASK, X86ISD::PCMPGTM, 0), X86_INTRINSIC_DATA(avx512_cvtsi2sd64, INTR_TYPE_3OP, X86ISD::SCALAR_SINT_TO_FP_RND, 0), X86_INTRINSIC_DATA(avx512_cvtsi2ss32, INTR_TYPE_3OP, X86ISD::SCALAR_SINT_TO_FP_RND, 0), X86_INTRINSIC_DATA(avx512_cvtsi2ss64, INTR_TYPE_3OP, X86ISD::SCALAR_SINT_TO_FP_RND, 0), @@ -472,9 +463,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_cvtusi2ss, INTR_TYPE_3OP, X86ISD::SCALAR_UINT_TO_FP_RND, 0), X86_INTRINSIC_DATA(avx512_cvtusi642sd, INTR_TYPE_3OP, X86ISD::SCALAR_UINT_TO_FP_RND, 0), X86_INTRINSIC_DATA(avx512_cvtusi642ss, INTR_TYPE_3OP, X86ISD::SCALAR_UINT_TO_FP_RND, 0), - X86_INTRINSIC_DATA(avx512_cvtw2mask_128, CONVERT_TO_MASK, X86ISD::PCMPGTM, 0), - X86_INTRINSIC_DATA(avx512_cvtw2mask_256, CONVERT_TO_MASK, X86ISD::PCMPGTM, 0), - X86_INTRINSIC_DATA(avx512_cvtw2mask_512, CONVERT_TO_MASK, X86ISD::PCMPGTM, 0), X86_INTRINSIC_DATA(avx512_exp2_pd, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0), X86_INTRINSIC_DATA(avx512_exp2_ps, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0), X86_INTRINSIC_DATA(avx512_kand_w, MASK_BINOP, ISD::AND, 0), diff --git a/llvm/test/CodeGen/X86/avx512-mask-zext-bugfix.ll b/llvm/test/CodeGen/X86/avx512-mask-zext-bugfix.ll index f828c4dcef7..11aba2fdf73 100755 --- a/llvm/test/CodeGen/X86/avx512-mask-zext-bugfix.ll +++ b/llvm/test/CodeGen/X86/avx512-mask-zext-bugfix.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -O0 -mtriple=x86_64-apple-darwin -mcpu=skx -fast-isel-abort=1 | FileCheck %s +; RUN: llc < %s -O0 -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s ; ModuleID = 'mask_set.c' source_filename = "mask_set.c" @@ -17,51 +17,53 @@ declare i32 @check_mask16(i16 zeroext %res_mask, i16 zeroext %exp_mask, i8* %fna define void @test_xmm(i32 %shift, i32 %mulp, <2 x i64> %a,i8* %arraydecay,i8* %fname){ ; CHECK-LABEL: test_xmm: ; CHECK: ## %bb.0: -; CHECK-NEXT: subq $56, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 64 -; CHECK-NEXT: movl $2, %esi -; CHECK-NEXT: movl $8, %eax +; CHECK-NEXT: subq $72, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 80 +; CHECK-NEXT: movl $4, %eax ; CHECK-NEXT: vpmovw2m %xmm0, %k0 -; CHECK-NEXT: kmovd %k0, %edi -; CHECK-NEXT: movb %dil, %r8b -; CHECK-NEXT: movzbl %r8b, %edi -; CHECK-NEXT: movw %di, %r9w +; CHECK-NEXT: movl $2, %esi +; CHECK-NEXT: movl $8, %edi +; CHECK-NEXT: movl %edi, {{[0-9]+}}(%rsp) ## 4-byte Spill ; CHECK-NEXT: movq %rdx, %rdi +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %r8d ## 4-byte Reload ; CHECK-NEXT: movq %rdx, {{[0-9]+}}(%rsp) ## 8-byte Spill -; CHECK-NEXT: movl %eax, %edx -; CHECK-NEXT: movw %r9w, {{[0-9]+}}(%rsp) ## 2-byte Spill +; CHECK-NEXT: movl %r8d, %edx ; CHECK-NEXT: movq %rcx, {{[0-9]+}}(%rsp) ## 8-byte Spill ; CHECK-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp) ## 16-byte Spill +; CHECK-NEXT: movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill +; CHECK-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ## 2-byte Spill ; CHECK-NEXT: callq _calc_expected_mask_val -; CHECK-NEXT: movw %ax, %r9w -; CHECK-NEXT: movw {{[0-9]+}}(%rsp), %r10w ## 2-byte Reload -; CHECK-NEXT: movzwl %r10w, %edi +; CHECK-NEXT: movl %eax, %edx +; CHECK-NEXT: movw %dx, %r9w ; CHECK-NEXT: movzwl %r9w, %esi +; CHECK-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Reload +; CHECK-NEXT: kmovb %k0, %edi ; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rdx ## 8-byte Reload ; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload ; CHECK-NEXT: callq _check_mask16 -; CHECK-NEXT: movl $4, %esi ; CHECK-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 ## 16-byte Reload ; CHECK-NEXT: vpmovd2m %xmm0, %k0 -; CHECK-NEXT: kmovd %k0, %edi -; CHECK-NEXT: movb %dil, %r8b -; CHECK-NEXT: movzbl %r8b, %edi -; CHECK-NEXT: movw %di, %r9w +; CHECK-NEXT: kmovq %k0, %k1 +; CHECK-NEXT: kmovd %k0, %esi +; CHECK-NEXT: movb %sil, %r10b +; CHECK-NEXT: movzbl %r10b, %esi +; CHECK-NEXT: movw %si, %r9w ; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rdi ## 8-byte Reload -; CHECK-NEXT: movl %esi, {{[0-9]+}}(%rsp) ## 4-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %esi ## 4-byte Reload ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %edx ## 4-byte Reload ; CHECK-NEXT: movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill +; CHECK-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Spill ; CHECK-NEXT: movw %r9w, {{[0-9]+}}(%rsp) ## 2-byte Spill ; CHECK-NEXT: callq _calc_expected_mask_val ; CHECK-NEXT: movw %ax, %r9w -; CHECK-NEXT: movw {{[0-9]+}}(%rsp), %r10w ## 2-byte Reload -; CHECK-NEXT: movzwl %r10w, %edi +; CHECK-NEXT: movw {{[0-9]+}}(%rsp), %r11w ## 2-byte Reload +; CHECK-NEXT: movzwl %r11w, %edi ; CHECK-NEXT: movzwl %r9w, %esi ; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rdx ## 8-byte Reload ; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload ; CHECK-NEXT: callq _check_mask16 -; CHECK-NEXT: movl %eax, (%rsp) ## 4-byte Spill -; CHECK-NEXT: addq $56, %rsp +; CHECK-NEXT: movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill +; CHECK-NEXT: addq $72, %rsp ; CHECK-NEXT: retq %d2 = bitcast <2 x i64> %a to <8 x i16> %m2 = call i8 @llvm.x86.avx512.cvtw2mask.128(<8 x i16> %d2) diff --git a/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll index 562d0c19e2b..84e7b0ec535 100644 --- a/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll @@ -3590,3 +3590,47 @@ define i32@test_int_x86_avx512_ptestnm_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 ret i32 %res2 } +declare i64 @llvm.x86.avx512.cvtb2mask.512(<64 x i8>) + +define i64@test_int_x86_avx512_cvtb2mask_512(<64 x i8> %x0) { +; AVX512BW-LABEL: test_int_x86_avx512_cvtb2mask_512: +; AVX512BW: ## %bb.0: +; AVX512BW-NEXT: vpmovb2m %zmm0, %k0 +; AVX512BW-NEXT: kmovq %k0, %rax +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_cvtb2mask_512: +; AVX512F-32: # %bb.0: +; AVX512F-32-NEXT: subl $12, %esp +; AVX512F-32-NEXT: .cfi_def_cfa_offset 16 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: kmovq %k0, (%esp) +; AVX512F-32-NEXT: movl (%esp), %eax +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: addl $12, %esp +; AVX512F-32-NEXT: vzeroupper +; AVX512F-32-NEXT: retl + %res = call i64 @llvm.x86.avx512.cvtb2mask.512(<64 x i8> %x0) + ret i64 %res +} + +declare i32 @llvm.x86.avx512.cvtw2mask.512(<32 x i16>) + +define i32@test_int_x86_avx512_cvtw2mask_512(<32 x i16> %x0) { +; AVX512BW-LABEL: test_int_x86_avx512_cvtw2mask_512: +; AVX512BW: ## %bb.0: +; AVX512BW-NEXT: vpmovw2m %zmm0, %k0 +; AVX512BW-NEXT: kmovd %k0, %eax +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_cvtw2mask_512: +; AVX512F-32: # %bb.0: +; AVX512F-32-NEXT: vpmovw2m %zmm0, %k0 +; AVX512F-32-NEXT: kmovd %k0, %eax +; AVX512F-32-NEXT: vzeroupper +; AVX512F-32-NEXT: retl + %res = call i32 @llvm.x86.avx512.cvtw2mask.512(<32 x i16> %x0) + ret i32 %res +} diff --git a/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll b/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll index 2fa7c2c5b8a..5d6b60a5645 100644 --- a/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll @@ -1455,47 +1455,6 @@ define <8 x i64>@test_int_x86_avx512_mask_psadb_w_512(<64 x i8> %x0, <64 x i8> ret <8 x i64> %res2 } -declare i64 @llvm.x86.avx512.cvtb2mask.512(<64 x i8>) - -define i64@test_int_x86_avx512_cvtb2mask_512(<64 x i8> %x0) { -; AVX512BW-LABEL: test_int_x86_avx512_cvtb2mask_512: -; AVX512BW: ## %bb.0: -; AVX512BW-NEXT: vpmovb2m %zmm0, %k0 -; AVX512BW-NEXT: kmovq %k0, %rax -; AVX512BW-NEXT: retq -; -; AVX512F-32-LABEL: test_int_x86_avx512_cvtb2mask_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: subl $12, %esp -; AVX512F-32-NEXT: .cfi_def_cfa_offset 16 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 -; AVX512F-32-NEXT: kmovq %k0, (%esp) -; AVX512F-32-NEXT: movl (%esp), %eax -; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx -; AVX512F-32-NEXT: addl $12, %esp -; AVX512F-32-NEXT: retl - %res = call i64 @llvm.x86.avx512.cvtb2mask.512(<64 x i8> %x0) - ret i64 %res -} - -declare i32 @llvm.x86.avx512.cvtw2mask.512(<32 x i16>) - -define i32@test_int_x86_avx512_cvtw2mask_512(<32 x i16> %x0) { -; AVX512BW-LABEL: test_int_x86_avx512_cvtw2mask_512: -; AVX512BW: ## %bb.0: -; AVX512BW-NEXT: vpmovw2m %zmm0, %k0 -; AVX512BW-NEXT: kmovd %k0, %eax -; AVX512BW-NEXT: retq -; -; AVX512F-32-LABEL: test_int_x86_avx512_cvtw2mask_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: vpmovw2m %zmm0, %k0 -; AVX512F-32-NEXT: kmovd %k0, %eax -; AVX512F-32-NEXT: retl - %res = call i32 @llvm.x86.avx512.cvtw2mask.512(<32 x i16> %x0) - ret i32 %res -} - declare <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16>, <32 x i16>, <32 x i16>, i32) define <32 x i16>@test_int_x86_avx512_mask_psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { diff --git a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll index 4e343eef6fa..685f7f5fda0 100644 --- a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll @@ -3826,3 +3826,55 @@ define i16@test_int_x86_avx512_ptestnm_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 ret i16 %res2 } +declare i16 @llvm.x86.avx512.cvtb2mask.128(<16 x i8>) + +define i16@test_int_x86_avx512_cvtb2mask_128(<16 x i8> %x0) { +; CHECK-LABEL: test_int_x86_avx512_cvtb2mask_128: +; CHECK: ## %bb.0: +; CHECK-NEXT: vpmovb2m %xmm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x08,0x29,0xc0] +; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] +; CHECK-NEXT: ## kill: def %ax killed %ax killed %eax +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call i16 @llvm.x86.avx512.cvtb2mask.128(<16 x i8> %x0) + ret i16 %res +} + +declare i32 @llvm.x86.avx512.cvtb2mask.256(<32 x i8>) + +define i32@test_int_x86_avx512_cvtb2mask_256(<32 x i8> %x0) { +; CHECK-LABEL: test_int_x86_avx512_cvtb2mask_256: +; CHECK: ## %bb.0: +; CHECK-NEXT: vpmovb2m %ymm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x28,0x29,0xc0] +; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] +; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call i32 @llvm.x86.avx512.cvtb2mask.256(<32 x i8> %x0) + ret i32 %res +} + +declare i8 @llvm.x86.avx512.cvtw2mask.128(<8 x i16>) + +define i8@test_int_x86_avx512_cvtw2mask_128(<8 x i16> %x0) { +; CHECK-LABEL: test_int_x86_avx512_cvtw2mask_128: +; CHECK: ## %bb.0: +; CHECK-NEXT: vpmovw2m %xmm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x08,0x29,0xc0] +; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] +; CHECK-NEXT: ## kill: def %al killed %al killed %eax +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call i8 @llvm.x86.avx512.cvtw2mask.128(<8 x i16> %x0) + ret i8 %res +} + +declare i16 @llvm.x86.avx512.cvtw2mask.256(<16 x i16>) + +define i16@test_int_x86_avx512_cvtw2mask_256(<16 x i16> %x0) { +; CHECK-LABEL: test_int_x86_avx512_cvtw2mask_256: +; CHECK: ## %bb.0: +; CHECK-NEXT: vpmovw2m %ymm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x28,0x29,0xc0] +; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] +; CHECK-NEXT: ## kill: def %ax killed %ax killed %eax +; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call i16 @llvm.x86.avx512.cvtw2mask.256(<16 x i16> %x0) + ret i16 %res +} diff --git a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll index 64ad66e336b..74feae79022 100644 --- a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll @@ -2304,57 +2304,6 @@ define <16 x i16>@test_int_x86_avx512_mask_dbpsadbw_256(<32 x i8> %x0, <32 x i8> ret <16 x i16> %res4 } -declare i16 @llvm.x86.avx512.cvtb2mask.128(<16 x i8>) - -define i16@test_int_x86_avx512_cvtb2mask_128(<16 x i8> %x0) { -; CHECK-LABEL: test_int_x86_avx512_cvtb2mask_128: -; CHECK: ## %bb.0: -; CHECK-NEXT: vpmovb2m %xmm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x08,0x29,0xc0] -; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] -; CHECK-NEXT: ## kill: def %ax killed %ax killed %eax -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call i16 @llvm.x86.avx512.cvtb2mask.128(<16 x i8> %x0) - ret i16 %res -} - -declare i32 @llvm.x86.avx512.cvtb2mask.256(<32 x i8>) - -define i32@test_int_x86_avx512_cvtb2mask_256(<32 x i8> %x0) { -; CHECK-LABEL: test_int_x86_avx512_cvtb2mask_256: -; CHECK: ## %bb.0: -; CHECK-NEXT: vpmovb2m %ymm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x28,0x29,0xc0] -; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call i32 @llvm.x86.avx512.cvtb2mask.256(<32 x i8> %x0) - ret i32 %res -} - -declare i8 @llvm.x86.avx512.cvtw2mask.128(<8 x i16>) - -define i8@test_int_x86_avx512_cvtw2mask_128(<8 x i16> %x0) { -; CHECK-LABEL: test_int_x86_avx512_cvtw2mask_128: -; CHECK: ## %bb.0: -; CHECK-NEXT: vpmovw2m %xmm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x08,0x29,0xc0] -; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] -; CHECK-NEXT: ## kill: def %al killed %al killed %eax -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call i8 @llvm.x86.avx512.cvtw2mask.128(<8 x i16> %x0) - ret i8 %res -} - -declare i16 @llvm.x86.avx512.cvtw2mask.256(<16 x i16>) - -define i16@test_int_x86_avx512_cvtw2mask_256(<16 x i16> %x0) { -; CHECK-LABEL: test_int_x86_avx512_cvtw2mask_256: -; CHECK: ## %bb.0: -; CHECK-NEXT: vpmovw2m %ymm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x28,0x29,0xc0] -; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] -; CHECK-NEXT: ## kill: def %ax killed %ax killed %eax -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call i16 @llvm.x86.avx512.cvtw2mask.256(<16 x i16> %x0) - ret i16 %res -} - declare <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16>, <16 x i16>, <16 x i16>, i16) define <16 x i16>@test_int_x86_avx512_mask_psrlv16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { diff --git a/llvm/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll index cb30dfd32aa..de0ee1b7459 100644 --- a/llvm/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll @@ -328,3 +328,28 @@ define <16 x i32>@test_int_x86_avx512_mask_broadcasti32x2_512(<4 x i32> %x0, <16 ret <16 x i32> %res4 } +declare i16 @llvm.x86.avx512.cvtd2mask.512(<16 x i32>) + +define i16@test_int_x86_avx512_cvtd2mask_512(<16 x i32> %x0) { +; CHECK-LABEL: test_int_x86_avx512_cvtd2mask_512: +; CHECK: ## %bb.0: +; CHECK-NEXT: vpmovd2m %zmm0, %k0 +; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: ## kill: def %ax killed %ax killed %eax +; CHECK-NEXT: retq + %res = call i16 @llvm.x86.avx512.cvtd2mask.512(<16 x i32> %x0) + ret i16 %res +} + +declare i8 @llvm.x86.avx512.cvtq2mask.512(<8 x i64>) + +define i8@test_int_x86_avx512_cvtq2mask_512(<8 x i64> %x0) { +; CHECK-LABEL: test_int_x86_avx512_cvtq2mask_512: +; CHECK: ## %bb.0: +; CHECK-NEXT: vpmovq2m %zmm0, %k0 +; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: ## kill: def %al killed %al killed %eax +; CHECK-NEXT: retq + %res = call i8 @llvm.x86.avx512.cvtq2mask.512(<8 x i64> %x0) + ret i8 %res +} diff --git a/llvm/test/CodeGen/X86/avx512dq-intrinsics.ll b/llvm/test/CodeGen/X86/avx512dq-intrinsics.ll index e0e7b237447..6863fc8986e 100644 --- a/llvm/test/CodeGen/X86/avx512dq-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512dq-intrinsics.ll @@ -438,29 +438,3 @@ define i8 @test_int_x86_avx512_mask_fpclass_ss_load(<4 x float>* %x0ptr, i8 %x1) %res = call i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float> %x0, i32 4, i8 -1) ret i8 %res } - -declare i16 @llvm.x86.avx512.cvtd2mask.512(<16 x i32>) - -define i16@test_int_x86_avx512_cvtd2mask_512(<16 x i32> %x0) { -; CHECK-LABEL: test_int_x86_avx512_cvtd2mask_512: -; CHECK: ## %bb.0: -; CHECK-NEXT: vpmovd2m %zmm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: ## kill: def %ax killed %ax killed %eax -; CHECK-NEXT: retq - %res = call i16 @llvm.x86.avx512.cvtd2mask.512(<16 x i32> %x0) - ret i16 %res -} - -declare i8 @llvm.x86.avx512.cvtq2mask.512(<8 x i64>) - -define i8@test_int_x86_avx512_cvtq2mask_512(<8 x i64> %x0) { -; CHECK-LABEL: test_int_x86_avx512_cvtq2mask_512: -; CHECK: ## %bb.0: -; CHECK-NEXT: vpmovq2m %zmm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: ## kill: def %al killed %al killed %eax -; CHECK-NEXT: retq - %res = call i8 @llvm.x86.avx512.cvtq2mask.512(<8 x i64> %x0) - ret i8 %res -} diff --git a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll index 3a1bce05e67..1f37d790488 100644 --- a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll @@ -1804,3 +1804,54 @@ define <4 x i32>@test_int_x86_avx512_mask_broadcasti32x2_128(<4 x i32> %x0, <4 x ret <4 x i32> %res4 } +declare i8 @llvm.x86.avx512.cvtd2mask.128(<4 x i32>) + +define i8@test_int_x86_avx512_cvtd2mask_128(<4 x i32> %x0) { +; CHECK-LABEL: test_int_x86_avx512_cvtd2mask_128: +; CHECK: ## %bb.0: +; CHECK-NEXT: vpmovd2m %xmm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x08,0x39,0xc0] +; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: ## kill: def %al killed %al killed %eax +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call i8 @llvm.x86.avx512.cvtd2mask.128(<4 x i32> %x0) + ret i8 %res +} + +declare i8 @llvm.x86.avx512.cvtd2mask.256(<8 x i32>) + +define i8@test_int_x86_avx512_cvtd2mask_256(<8 x i32> %x0) { +; CHECK-LABEL: test_int_x86_avx512_cvtd2mask_256: +; CHECK: ## %bb.0: +; CHECK-NEXT: vpmovd2m %ymm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x28,0x39,0xc0] +; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: ## kill: def %al killed %al killed %eax +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call i8 @llvm.x86.avx512.cvtd2mask.256(<8 x i32> %x0) + ret i8 %res +} + +declare i8 @llvm.x86.avx512.cvtq2mask.128(<2 x i64>) + +define i8@test_int_x86_avx512_cvtq2mask_128(<2 x i64> %x0) { +; CHECK-LABEL: test_int_x86_avx512_cvtq2mask_128: +; CHECK: ## %bb.0: +; CHECK-NEXT: vpmovq2m %xmm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x08,0x39,0xc0] +; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: ## kill: def %al killed %al killed %eax +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call i8 @llvm.x86.avx512.cvtq2mask.128(<2 x i64> %x0) + ret i8 %res +} + +declare i8 @llvm.x86.avx512.cvtq2mask.256(<4 x i64>) + +define i8@test_int_x86_avx512_cvtq2mask_256(<4 x i64> %x0) { +; CHECK-LABEL: test_int_x86_avx512_cvtq2mask_256: +; CHECK: ## %bb.0: +; CHECK-NEXT: vpmovq2m %ymm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x28,0x39,0xc0] +; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: ## kill: def %al killed %al killed %eax +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call i8 @llvm.x86.avx512.cvtq2mask.256(<4 x i64> %x0) + ret i8 %res +} diff --git a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll index f201599c4aa..5692efdd6d5 100644 --- a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll @@ -624,55 +624,3 @@ define i8 @test_int_x86_avx512_mask_fpclass_pd_256(<4 x double> %x0, i8 %x1) { %res2 = add i8 %res, %res1 ret i8 %res2 } - -declare i8 @llvm.x86.avx512.cvtd2mask.128(<4 x i32>) - -define i8@test_int_x86_avx512_cvtd2mask_128(<4 x i32> %x0) { -; CHECK-LABEL: test_int_x86_avx512_cvtd2mask_128: -; CHECK: ## %bb.0: -; CHECK-NEXT: vpmovd2m %xmm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x08,0x39,0xc0] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: ## kill: def %al killed %al killed %eax -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call i8 @llvm.x86.avx512.cvtd2mask.128(<4 x i32> %x0) - ret i8 %res -} - -declare i8 @llvm.x86.avx512.cvtd2mask.256(<8 x i32>) - -define i8@test_int_x86_avx512_cvtd2mask_256(<8 x i32> %x0) { -; CHECK-LABEL: test_int_x86_avx512_cvtd2mask_256: -; CHECK: ## %bb.0: -; CHECK-NEXT: vpmovd2m %ymm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x28,0x39,0xc0] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: ## kill: def %al killed %al killed %eax -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call i8 @llvm.x86.avx512.cvtd2mask.256(<8 x i32> %x0) - ret i8 %res -} - -declare i8 @llvm.x86.avx512.cvtq2mask.128(<2 x i64>) - -define i8@test_int_x86_avx512_cvtq2mask_128(<2 x i64> %x0) { -; CHECK-LABEL: test_int_x86_avx512_cvtq2mask_128: -; CHECK: ## %bb.0: -; CHECK-NEXT: vpmovq2m %xmm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x08,0x39,0xc0] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: ## kill: def %al killed %al killed %eax -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call i8 @llvm.x86.avx512.cvtq2mask.128(<2 x i64> %x0) - ret i8 %res -} - -declare i8 @llvm.x86.avx512.cvtq2mask.256(<4 x i64>) - -define i8@test_int_x86_avx512_cvtq2mask_256(<4 x i64> %x0) { -; CHECK-LABEL: test_int_x86_avx512_cvtq2mask_256: -; CHECK: ## %bb.0: -; CHECK-NEXT: vpmovq2m %ymm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x28,0x39,0xc0] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: ## kill: def %al killed %al killed %eax -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call i8 @llvm.x86.avx512.cvtq2mask.256(<4 x i64> %x0) - ret i8 %res -} |

