diff options
| author | Craig Topper <craig.topper@intel.com> | 2018-01-09 00:50:47 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2018-01-09 00:50:47 +0000 |
| commit | cc342d465ea93ab94e79a0fd0500a06af253943d (patch) | |
| tree | 1257961ef91e88da554605aa141c7be820c99956 | |
| parent | 7c2abdd249b9c7361ff95ee1e90cc46ce2c326d1 (diff) | |
| download | bcm5719-llvm-cc342d465ea93ab94e79a0fd0500a06af253943d.tar.gz bcm5719-llvm-cc342d465ea93ab94e79a0fd0500a06af253943d.zip | |
[X86] Remove llvm.x86.avx512.cvt*2mask.* intrinsics and autoupgrade to (icmp slt X, 0)
I had to drop fast-isel-abort from a test because we can't fast isel some of the mask stuff. When we used intrinsics we implicitly fell back to SelectionDAG for the intrinsic call without triggering the abort error. But with native IR that doesn't happen the same way.
llvm-svn: 322050
| -rw-r--r-- | llvm/include/llvm/IR/IntrinsicsX86.td | 29 | ||||
| -rw-r--r-- | llvm/lib/IR/AutoUpgrade.cpp | 21 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 13 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86IntrinsicsInfo.h | 14 | ||||
| -rwxr-xr-x | llvm/test/CodeGen/X86/avx512-mask-zext-bugfix.ll | 50 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll | 44 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512bw-intrinsics.ll | 41 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll | 52 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll | 51 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll | 25 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512dq-intrinsics.ll | 26 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll | 51 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll | 52 |
13 files changed, 217 insertions, 252 deletions
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td index e5ab18eedad..35eb8310d96 100644 --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -3801,35 +3801,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx512_cvtsi2sd64 : GCCBuiltin<"__builtin_ia32_cvtsi2sd64">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>; - - def int_x86_avx512_cvtb2mask_128 : // TODO remove this intrinsic - Intrinsic<[llvm_i16_ty], [llvm_v16i8_ty], [IntrNoMem]>; - def int_x86_avx512_cvtb2mask_256 : // TODO remove this intrinsic - Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty], [IntrNoMem]>; - def int_x86_avx512_cvtb2mask_512 : // TODO remove this intrinsic - Intrinsic<[llvm_i64_ty], [llvm_v64i8_ty], [IntrNoMem]>; - - def int_x86_avx512_cvtw2mask_128 : // TODO remove this intrinsic - Intrinsic<[llvm_i8_ty], [llvm_v8i16_ty], [IntrNoMem]>; - def int_x86_avx512_cvtw2mask_256 : // TODO remove this intrinsic - Intrinsic<[llvm_i16_ty], [llvm_v16i16_ty], [IntrNoMem]>; - def int_x86_avx512_cvtw2mask_512 : // TODO remove this intrinsic - Intrinsic<[llvm_i32_ty], [llvm_v32i16_ty], [IntrNoMem]>; - - def int_x86_avx512_cvtd2mask_128 : // TODO remove this intrinsic - Intrinsic<[llvm_i8_ty], [llvm_v4i32_ty], [IntrNoMem]>; - def int_x86_avx512_cvtd2mask_256 : // TODO remove this intrinsic - Intrinsic<[llvm_i8_ty], [llvm_v8i32_ty], [IntrNoMem]>; - def int_x86_avx512_cvtd2mask_512 : // TODO remove this intrinsic - Intrinsic<[llvm_i16_ty], [llvm_v16i32_ty], [IntrNoMem]>; - - def int_x86_avx512_cvtq2mask_128 : // TODO remove this intrinsic - Intrinsic<[llvm_i8_ty], [llvm_v2i64_ty], [IntrNoMem]>; - def int_x86_avx512_cvtq2mask_256 : // TODO remove this intrinsic - Intrinsic<[llvm_i8_ty], [llvm_v4i64_ty], [IntrNoMem]>; - def int_x86_avx512_cvtq2mask_512 : // TODO remove this intrinsic - Intrinsic<[llvm_i8_ty], [llvm_v8i64_ty], [IntrNoMem]>; - } // Pack ops. diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index c258d1a4e3a..98fd616d0fc 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -157,6 +157,10 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) { Name.startswith("avx512.mask.cmp.q") || // Added in 5.0 Name.startswith("avx512.mask.cmp.w") || // Added in 5.0 Name.startswith("avx512.mask.ucmp.") || // Added in 5.0 + Name.startswith("avx512.cvtb2mask.") || // Added in 7.0 + Name.startswith("avx512.cvtw2mask.") || // Added in 7.0 + Name.startswith("avx512.cvtd2mask.") || // Added in 7.0 + Name.startswith("avx512.cvtq2mask.") || // Added in 7.0 Name == "avx512.mask.add.pd.128" || // Added in 4.0 Name == "avx512.mask.add.pd.256" || // Added in 4.0 Name == "avx512.mask.add.ps.128" || // Added in 4.0 @@ -829,9 +833,11 @@ static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI, // Applying mask on vector of i1's and make sure result is at least 8 bits wide. static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder,Value *Vec, Value *Mask, unsigned NumElts) { - const auto *C = dyn_cast<Constant>(Mask); - if (!C || !C->isAllOnesValue()) - Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts)); + if (Mask) { + const auto *C = dyn_cast<Constant>(Mask); + if (!C || !C->isAllOnesValue()) + Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts)); + } if (NumElts < 8) { uint32_t Indices[8]; @@ -1115,6 +1121,15 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { } else if (IsX86 && Name.startswith("avx512.mask.ucmp")) { unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); Rep = upgradeMaskedCompare(Builder, *CI, Imm, false); + } else if (IsX86 && (Name.startswith("avx512.cvtb2mask.") || + Name.startswith("avx512.cvtw2mask.") || + Name.startswith("avx512.cvtd2mask.") || + Name.startswith("avx512.cvtq2mask."))) { + Value *Op = CI->getArgOperand(0); + Value *Zero = llvm::Constant::getNullValue(Op->getType()); + Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero); + Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, nullptr, + Op->getType()->getVectorNumElements()); } else if(IsX86 && (Name == "ssse3.pabs.b.128" || Name == "ssse3.pabs.w.128" || Name == "ssse3.pabs.d.128" || diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 7f1969d0c79..ae5eb552b73 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -20518,19 +20518,6 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, Src1, Src2, Src3, Imm, Rnd), Mask, Passthru, Subtarget, DAG); } - case CONVERT_TO_MASK: { - MVT SrcVT = Op.getOperand(1).getSimpleValueType(); - MVT MaskVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorNumElements()); - MVT BitcastVT = MVT::getVectorVT(MVT::i1, VT.getSizeInBits()); - - SDValue CvtMask = DAG.getNode(X86ISD::PCMPGTM, dl, MaskVT, - DAG.getConstant(0, dl, SrcVT), - Op.getOperand(1)); - SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, BitcastVT, - DAG.getUNDEF(BitcastVT), CvtMask, - DAG.getIntPtrConstant(0, dl)); - return DAG.getBitcast(Op.getValueType(), Res); - } case ROUNDP: { assert(IntrData->Opc0 == X86ISD::VRNDSCALE && "Unexpected opcode"); // Clear the upper bits of the rounding immediate so that the legacy diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index d14369b7776..bd21fee7544 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -37,7 +37,7 @@ enum IntrinsicType : uint16_t { TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32, EXPAND_FROM_MEM, TERLOG_OP_MASK, TERLOG_OP_MASKZ, BROADCASTM, FIXUPIMM, FIXUPIMM_MASKZ, FIXUPIMMS, - FIXUPIMMS_MASKZ, CONVERT_TO_MASK, GATHER_AVX2, MASK_BINOP, + FIXUPIMMS_MASKZ, GATHER_AVX2, MASK_BINOP, ROUNDP, ROUNDS }; @@ -449,15 +449,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx2_psubs_w, INTR_TYPE_2OP, X86ISD::SUBS, 0), X86_INTRINSIC_DATA(avx2_psubus_b, INTR_TYPE_2OP, X86ISD::SUBUS, 0), X86_INTRINSIC_DATA(avx2_psubus_w, INTR_TYPE_2OP, X86ISD::SUBUS, 0), - X86_INTRINSIC_DATA(avx512_cvtb2mask_128, CONVERT_TO_MASK, X86ISD::PCMPGTM, 0), - X86_INTRINSIC_DATA(avx512_cvtb2mask_256, CONVERT_TO_MASK, X86ISD::PCMPGTM, 0), - X86_INTRINSIC_DATA(avx512_cvtb2mask_512, CONVERT_TO_MASK, X86ISD::PCMPGTM, 0), - X86_INTRINSIC_DATA(avx512_cvtd2mask_128, CONVERT_TO_MASK, X86ISD::PCMPGTM, 0), - X86_INTRINSIC_DATA(avx512_cvtd2mask_256, CONVERT_TO_MASK, X86ISD::PCMPGTM, 0), - X86_INTRINSIC_DATA(avx512_cvtd2mask_512, CONVERT_TO_MASK, X86ISD::PCMPGTM, 0), - X86_INTRINSIC_DATA(avx512_cvtq2mask_128, CONVERT_TO_MASK, X86ISD::PCMPGTM, 0), - X86_INTRINSIC_DATA(avx512_cvtq2mask_256, CONVERT_TO_MASK, X86ISD::PCMPGTM, 0), - X86_INTRINSIC_DATA(avx512_cvtq2mask_512, CONVERT_TO_MASK, X86ISD::PCMPGTM, 0), X86_INTRINSIC_DATA(avx512_cvtsi2sd64, INTR_TYPE_3OP, X86ISD::SCALAR_SINT_TO_FP_RND, 0), X86_INTRINSIC_DATA(avx512_cvtsi2ss32, INTR_TYPE_3OP, X86ISD::SCALAR_SINT_TO_FP_RND, 0), X86_INTRINSIC_DATA(avx512_cvtsi2ss64, INTR_TYPE_3OP, X86ISD::SCALAR_SINT_TO_FP_RND, 0), @@ -472,9 +463,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_cvtusi2ss, INTR_TYPE_3OP, X86ISD::SCALAR_UINT_TO_FP_RND, 0), X86_INTRINSIC_DATA(avx512_cvtusi642sd, INTR_TYPE_3OP, X86ISD::SCALAR_UINT_TO_FP_RND, 0), X86_INTRINSIC_DATA(avx512_cvtusi642ss, INTR_TYPE_3OP, X86ISD::SCALAR_UINT_TO_FP_RND, 0), - X86_INTRINSIC_DATA(avx512_cvtw2mask_128, CONVERT_TO_MASK, X86ISD::PCMPGTM, 0), - X86_INTRINSIC_DATA(avx512_cvtw2mask_256, CONVERT_TO_MASK, X86ISD::PCMPGTM, 0), - X86_INTRINSIC_DATA(avx512_cvtw2mask_512, CONVERT_TO_MASK, X86ISD::PCMPGTM, 0), X86_INTRINSIC_DATA(avx512_exp2_pd, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0), X86_INTRINSIC_DATA(avx512_exp2_ps, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0), X86_INTRINSIC_DATA(avx512_kand_w, MASK_BINOP, ISD::AND, 0), diff --git a/llvm/test/CodeGen/X86/avx512-mask-zext-bugfix.ll b/llvm/test/CodeGen/X86/avx512-mask-zext-bugfix.ll index f828c4dcef7..11aba2fdf73 100755 --- a/llvm/test/CodeGen/X86/avx512-mask-zext-bugfix.ll +++ b/llvm/test/CodeGen/X86/avx512-mask-zext-bugfix.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -O0 -mtriple=x86_64-apple-darwin -mcpu=skx -fast-isel-abort=1 | FileCheck %s +; RUN: llc < %s -O0 -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s ; ModuleID = 'mask_set.c' source_filename = "mask_set.c" @@ -17,51 +17,53 @@ declare i32 @check_mask16(i16 zeroext %res_mask, i16 zeroext %exp_mask, i8* %fna define void @test_xmm(i32 %shift, i32 %mulp, <2 x i64> %a,i8* %arraydecay,i8* %fname){ ; CHECK-LABEL: test_xmm: ; CHECK: ## %bb.0: -; CHECK-NEXT: subq $56, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 64 -; CHECK-NEXT: movl $2, %esi -; CHECK-NEXT: movl $8, %eax +; CHECK-NEXT: subq $72, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 80 +; CHECK-NEXT: movl $4, %eax ; CHECK-NEXT: vpmovw2m %xmm0, %k0 -; CHECK-NEXT: kmovd %k0, %edi -; CHECK-NEXT: movb %dil, %r8b -; CHECK-NEXT: movzbl %r8b, %edi -; CHECK-NEXT: movw %di, %r9w +; CHECK-NEXT: movl $2, %esi +; CHECK-NEXT: movl $8, %edi +; CHECK-NEXT: movl %edi, {{[0-9]+}}(%rsp) ## 4-byte Spill ; CHECK-NEXT: movq %rdx, %rdi +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %r8d ## 4-byte Reload ; CHECK-NEXT: movq %rdx, {{[0-9]+}}(%rsp) ## 8-byte Spill -; CHECK-NEXT: movl %eax, %edx -; CHECK-NEXT: movw %r9w, {{[0-9]+}}(%rsp) ## 2-byte Spill +; CHECK-NEXT: movl %r8d, %edx ; CHECK-NEXT: movq %rcx, {{[0-9]+}}(%rsp) ## 8-byte Spill ; CHECK-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp) ## 16-byte Spill +; CHECK-NEXT: movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill +; CHECK-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ## 2-byte Spill ; CHECK-NEXT: callq _calc_expected_mask_val -; CHECK-NEXT: movw %ax, %r9w -; CHECK-NEXT: movw {{[0-9]+}}(%rsp), %r10w ## 2-byte Reload -; CHECK-NEXT: movzwl %r10w, %edi +; CHECK-NEXT: movl %eax, %edx +; CHECK-NEXT: movw %dx, %r9w ; CHECK-NEXT: movzwl %r9w, %esi +; CHECK-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Reload +; CHECK-NEXT: kmovb %k0, %edi ; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rdx ## 8-byte Reload ; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload ; CHECK-NEXT: callq _check_mask16 -; CHECK-NEXT: movl $4, %esi ; CHECK-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 ## 16-byte Reload ; CHECK-NEXT: vpmovd2m %xmm0, %k0 -; CHECK-NEXT: kmovd %k0, %edi -; CHECK-NEXT: movb %dil, %r8b -; CHECK-NEXT: movzbl %r8b, %edi -; CHECK-NEXT: movw %di, %r9w +; CHECK-NEXT: kmovq %k0, %k1 +; CHECK-NEXT: kmovd %k0, %esi +; CHECK-NEXT: movb %sil, %r10b +; CHECK-NEXT: movzbl %r10b, %esi +; CHECK-NEXT: movw %si, %r9w ; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rdi ## 8-byte Reload -; CHECK-NEXT: movl %esi, {{[0-9]+}}(%rsp) ## 4-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %esi ## 4-byte Reload ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %edx ## 4-byte Reload ; CHECK-NEXT: movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill +; CHECK-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Spill ; CHECK-NEXT: movw %r9w, {{[0-9]+}}(%rsp) ## 2-byte Spill ; CHECK-NEXT: callq _calc_expected_mask_val ; CHECK-NEXT: movw %ax, %r9w -; CHECK-NEXT: movw {{[0-9]+}}(%rsp), %r10w ## 2-byte Reload -; CHECK-NEXT: movzwl %r10w, %edi +; CHECK-NEXT: movw {{[0-9]+}}(%rsp), %r11w ## 2-byte Reload +; CHECK-NEXT: movzwl %r11w, %edi ; CHECK-NEXT: movzwl %r9w, %esi ; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rdx ## 8-byte Reload ; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload ; CHECK-NEXT: callq _check_mask16 -; CHECK-NEXT: movl %eax, (%rsp) ## 4-byte Spill -; CHECK-NEXT: addq $56, %rsp +; CHECK-NEXT: movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill +; CHECK-NEXT: addq $72, %rsp ; CHECK-NEXT: retq %d2 = bitcast <2 x i64> %a to <8 x i16> %m2 = call i8 @llvm.x86.avx512.cvtw2mask.128(<8 x i16> %d2) diff --git a/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll index 562d0c19e2b..84e7b0ec535 100644 --- a/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll @@ -3590,3 +3590,47 @@ define i32@test_int_x86_avx512_ptestnm_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 ret i32 %res2 } +declare i64 @llvm.x86.avx512.cvtb2mask.512(<64 x i8>) + +define i64@test_int_x86_avx512_cvtb2mask_512(<64 x i8> %x0) { +; AVX512BW-LABEL: test_int_x86_avx512_cvtb2mask_512: +; AVX512BW: ## %bb.0: +; AVX512BW-NEXT: vpmovb2m %zmm0, %k0 +; AVX512BW-NEXT: kmovq %k0, %rax +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_cvtb2mask_512: +; AVX512F-32: # %bb.0: +; AVX512F-32-NEXT: subl $12, %esp +; AVX512F-32-NEXT: .cfi_def_cfa_offset 16 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: kmovq %k0, (%esp) +; AVX512F-32-NEXT: movl (%esp), %eax +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: addl $12, %esp +; AVX512F-32-NEXT: vzeroupper +; AVX512F-32-NEXT: retl + %res = call i64 @llvm.x86.avx512.cvtb2mask.512(<64 x i8> %x0) + ret i64 %res +} + +declare i32 @llvm.x86.avx512.cvtw2mask.512(<32 x i16>) + +define i32@test_int_x86_avx512_cvtw2mask_512(<32 x i16> %x0) { +; AVX512BW-LABEL: test_int_x86_avx512_cvtw2mask_512: +; AVX512BW: ## %bb.0: +; AVX512BW-NEXT: vpmovw2m %zmm0, %k0 +; AVX512BW-NEXT: kmovd %k0, %eax +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_cvtw2mask_512: +; AVX512F-32: # %bb.0: +; AVX512F-32-NEXT: vpmovw2m %zmm0, %k0 +; AVX512F-32-NEXT: kmovd %k0, %eax +; AVX512F-32-NEXT: vzeroupper +; AVX512F-32-NEXT: retl + %res = call i32 @llvm.x86.avx512.cvtw2mask.512(<32 x i16> %x0) + ret i32 %res +} diff --git a/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll b/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll index 2fa7c2c5b8a..5d6b60a5645 100644 --- a/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll @@ -1455,47 +1455,6 @@ define <8 x i64>@test_int_x86_avx512_mask_psadb_w_512(<64 x i8> %x0, <64 x i8> ret <8 x i64> %res2 } -declare i64 @llvm.x86.avx512.cvtb2mask.512(<64 x i8>) - -define i64@test_int_x86_avx512_cvtb2mask_512(<64 x i8> %x0) { -; AVX512BW-LABEL: test_int_x86_avx512_cvtb2mask_512: -; AVX512BW: ## %bb.0: -; AVX512BW-NEXT: vpmovb2m %zmm0, %k0 -; AVX512BW-NEXT: kmovq %k0, %rax -; AVX512BW-NEXT: retq -; -; AVX512F-32-LABEL: test_int_x86_avx512_cvtb2mask_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: subl $12, %esp -; AVX512F-32-NEXT: .cfi_def_cfa_offset 16 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 -; AVX512F-32-NEXT: kmovq %k0, (%esp) -; AVX512F-32-NEXT: movl (%esp), %eax -; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx -; AVX512F-32-NEXT: addl $12, %esp -; AVX512F-32-NEXT: retl - %res = call i64 @llvm.x86.avx512.cvtb2mask.512(<64 x i8> %x0) - ret i64 %res -} - -declare i32 @llvm.x86.avx512.cvtw2mask.512(<32 x i16>) - -define i32@test_int_x86_avx512_cvtw2mask_512(<32 x i16> %x0) { -; AVX512BW-LABEL: test_int_x86_avx512_cvtw2mask_512: -; AVX512BW: ## %bb.0: -; AVX512BW-NEXT: vpmovw2m %zmm0, %k0 -; AVX512BW-NEXT: kmovd %k0, %eax -; AVX512BW-NEXT: retq -; -; AVX512F-32-LABEL: test_int_x86_avx512_cvtw2mask_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: vpmovw2m %zmm0, %k0 -; AVX512F-32-NEXT: kmovd %k0, %eax -; AVX512F-32-NEXT: retl - %res = call i32 @llvm.x86.avx512.cvtw2mask.512(<32 x i16> %x0) - ret i32 %res -} - declare <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16>, <32 x i16>, <32 x i16>, i32) define <32 x i16>@test_int_x86_avx512_mask_psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { diff --git a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll index 4e343eef6fa..685f7f5fda0 100644 --- a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll @@ -3826,3 +3826,55 @@ define i16@test_int_x86_avx512_ptestnm_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 ret i16 %res2 } +declare i16 @llvm.x86.avx512.cvtb2mask.128(<16 x i8>) + +define i16@test_int_x86_avx512_cvtb2mask_128(<16 x i8> %x0) { +; CHECK-LABEL: test_int_x86_avx512_cvtb2mask_128: +; CHECK: ## %bb.0: +; CHECK-NEXT: vpmovb2m %xmm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x08,0x29,0xc0] +; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] +; CHECK-NEXT: ## kill: def %ax killed %ax killed %eax +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call i16 @llvm.x86.avx512.cvtb2mask.128(<16 x i8> %x0) + ret i16 %res +} + +declare i32 @llvm.x86.avx512.cvtb2mask.256(<32 x i8>) + +define i32@test_int_x86_avx512_cvtb2mask_256(<32 x i8> %x0) { +; CHECK-LABEL: test_int_x86_avx512_cvtb2mask_256: +; CHECK: ## %bb.0: +; CHECK-NEXT: vpmovb2m %ymm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x28,0x29,0xc0] +; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] +; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call i32 @llvm.x86.avx512.cvtb2mask.256(<32 x i8> %x0) + ret i32 %res +} + +declare i8 @llvm.x86.avx512.cvtw2mask.128(<8 x i16>) + +define i8@test_int_x86_avx512_cvtw2mask_128(<8 x i16> %x0) { +; CHECK-LABEL: test_int_x86_avx512_cvtw2mask_128: +; CHECK: ## %bb.0: +; CHECK-NEXT: vpmovw2m %xmm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x08,0x29,0xc0] +; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] +; CHECK-NEXT: ## kill: def %al killed %al killed %eax +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call i8 @llvm.x86.avx512.cvtw2mask.128(<8 x i16> %x0) + ret i8 %res +} + +declare i16 @llvm.x86.avx512.cvtw2mask.256(<16 x i16>) + +define i16@test_int_x86_avx512_cvtw2mask_256(<16 x i16> %x0) { +; CHECK-LABEL: test_int_x86_avx512_cvtw2mask_256: +; CHECK: ## %bb.0: +; CHECK-NEXT: vpmovw2m %ymm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x28,0x29,0xc0] +; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] +; CHECK-NEXT: ## kill: def %ax killed %ax killed %eax +; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call i16 @llvm.x86.avx512.cvtw2mask.256(<16 x i16> %x0) + ret i16 %res +} diff --git a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll index 64ad66e336b..74feae79022 100644 --- a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll @@ -2304,57 +2304,6 @@ define <16 x i16>@test_int_x86_avx512_mask_dbpsadbw_256(<32 x i8> %x0, <32 x i8> ret <16 x i16> %res4 } -declare i16 @llvm.x86.avx512.cvtb2mask.128(<16 x i8>) - -define i16@test_int_x86_avx512_cvtb2mask_128(<16 x i8> %x0) { -; CHECK-LABEL: test_int_x86_avx512_cvtb2mask_128: -; CHECK: ## %bb.0: -; CHECK-NEXT: vpmovb2m %xmm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x08,0x29,0xc0] -; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] -; CHECK-NEXT: ## kill: def %ax killed %ax killed %eax -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call i16 @llvm.x86.avx512.cvtb2mask.128(<16 x i8> %x0) - ret i16 %res -} - -declare i32 @llvm.x86.avx512.cvtb2mask.256(<32 x i8>) - -define i32@test_int_x86_avx512_cvtb2mask_256(<32 x i8> %x0) { -; CHECK-LABEL: test_int_x86_avx512_cvtb2mask_256: -; CHECK: ## %bb.0: -; CHECK-NEXT: vpmovb2m %ymm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x28,0x29,0xc0] -; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call i32 @llvm.x86.avx512.cvtb2mask.256(<32 x i8> %x0) - ret i32 %res -} - -declare i8 @llvm.x86.avx512.cvtw2mask.128(<8 x i16>) - -define i8@test_int_x86_avx512_cvtw2mask_128(<8 x i16> %x0) { -; CHECK-LABEL: test_int_x86_avx512_cvtw2mask_128: -; CHECK: ## %bb.0: -; CHECK-NEXT: vpmovw2m %xmm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x08,0x29,0xc0] -; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] -; CHECK-NEXT: ## kill: def %al killed %al killed %eax -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call i8 @llvm.x86.avx512.cvtw2mask.128(<8 x i16> %x0) - ret i8 %res -} - -declare i16 @llvm.x86.avx512.cvtw2mask.256(<16 x i16>) - -define i16@test_int_x86_avx512_cvtw2mask_256(<16 x i16> %x0) { -; CHECK-LABEL: test_int_x86_avx512_cvtw2mask_256: -; CHECK: ## %bb.0: -; CHECK-NEXT: vpmovw2m %ymm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x28,0x29,0xc0] -; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] -; CHECK-NEXT: ## kill: def %ax killed %ax killed %eax -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call i16 @llvm.x86.avx512.cvtw2mask.256(<16 x i16> %x0) - ret i16 %res -} - declare <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16>, <16 x i16>, <16 x i16>, i16) define <16 x i16>@test_int_x86_avx512_mask_psrlv16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { diff --git a/llvm/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll index cb30dfd32aa..de0ee1b7459 100644 --- a/llvm/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll @@ -328,3 +328,28 @@ define <16 x i32>@test_int_x86_avx512_mask_broadcasti32x2_512(<4 x i32> %x0, <16 ret <16 x i32> %res4 } +declare i16 @llvm.x86.avx512.cvtd2mask.512(<16 x i32>) + +define i16@test_int_x86_avx512_cvtd2mask_512(<16 x i32> %x0) { +; CHECK-LABEL: test_int_x86_avx512_cvtd2mask_512: +; CHECK: ## %bb.0: +; CHECK-NEXT: vpmovd2m %zmm0, %k0 +; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: ## kill: def %ax killed %ax killed %eax +; CHECK-NEXT: retq + %res = call i16 @llvm.x86.avx512.cvtd2mask.512(<16 x i32> %x0) + ret i16 %res +} + +declare i8 @llvm.x86.avx512.cvtq2mask.512(<8 x i64>) + +define i8@test_int_x86_avx512_cvtq2mask_512(<8 x i64> %x0) { +; CHECK-LABEL: test_int_x86_avx512_cvtq2mask_512: +; CHECK: ## %bb.0: +; CHECK-NEXT: vpmovq2m %zmm0, %k0 +; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: ## kill: def %al killed %al killed %eax +; CHECK-NEXT: retq + %res = call i8 @llvm.x86.avx512.cvtq2mask.512(<8 x i64> %x0) + ret i8 %res +} diff --git a/llvm/test/CodeGen/X86/avx512dq-intrinsics.ll b/llvm/test/CodeGen/X86/avx512dq-intrinsics.ll index e0e7b237447..6863fc8986e 100644 --- a/llvm/test/CodeGen/X86/avx512dq-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512dq-intrinsics.ll @@ -438,29 +438,3 @@ define i8 @test_int_x86_avx512_mask_fpclass_ss_load(<4 x float>* %x0ptr, i8 %x1) %res = call i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float> %x0, i32 4, i8 -1) ret i8 %res } - -declare i16 @llvm.x86.avx512.cvtd2mask.512(<16 x i32>) - -define i16@test_int_x86_avx512_cvtd2mask_512(<16 x i32> %x0) { -; CHECK-LABEL: test_int_x86_avx512_cvtd2mask_512: -; CHECK: ## %bb.0: -; CHECK-NEXT: vpmovd2m %zmm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: ## kill: def %ax killed %ax killed %eax -; CHECK-NEXT: retq - %res = call i16 @llvm.x86.avx512.cvtd2mask.512(<16 x i32> %x0) - ret i16 %res -} - -declare i8 @llvm.x86.avx512.cvtq2mask.512(<8 x i64>) - -define i8@test_int_x86_avx512_cvtq2mask_512(<8 x i64> %x0) { -; CHECK-LABEL: test_int_x86_avx512_cvtq2mask_512: -; CHECK: ## %bb.0: -; CHECK-NEXT: vpmovq2m %zmm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: ## kill: def %al killed %al killed %eax -; CHECK-NEXT: retq - %res = call i8 @llvm.x86.avx512.cvtq2mask.512(<8 x i64> %x0) - ret i8 %res -} diff --git a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll index 3a1bce05e67..1f37d790488 100644 --- a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll @@ -1804,3 +1804,54 @@ define <4 x i32>@test_int_x86_avx512_mask_broadcasti32x2_128(<4 x i32> %x0, <4 x ret <4 x i32> %res4 } +declare i8 @llvm.x86.avx512.cvtd2mask.128(<4 x i32>) + +define i8@test_int_x86_avx512_cvtd2mask_128(<4 x i32> %x0) { +; CHECK-LABEL: test_int_x86_avx512_cvtd2mask_128: +; CHECK: ## %bb.0: +; CHECK-NEXT: vpmovd2m %xmm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x08,0x39,0xc0] +; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: ## kill: def %al killed %al killed %eax +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call i8 @llvm.x86.avx512.cvtd2mask.128(<4 x i32> %x0) + ret i8 %res +} + +declare i8 @llvm.x86.avx512.cvtd2mask.256(<8 x i32>) + +define i8@test_int_x86_avx512_cvtd2mask_256(<8 x i32> %x0) { +; CHECK-LABEL: test_int_x86_avx512_cvtd2mask_256: +; CHECK: ## %bb.0: +; CHECK-NEXT: vpmovd2m %ymm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x28,0x39,0xc0] +; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: ## kill: def %al killed %al killed %eax +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call i8 @llvm.x86.avx512.cvtd2mask.256(<8 x i32> %x0) + ret i8 %res +} + +declare i8 @llvm.x86.avx512.cvtq2mask.128(<2 x i64>) + +define i8@test_int_x86_avx512_cvtq2mask_128(<2 x i64> %x0) { +; CHECK-LABEL: test_int_x86_avx512_cvtq2mask_128: +; CHECK: ## %bb.0: +; CHECK-NEXT: vpmovq2m %xmm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x08,0x39,0xc0] +; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: ## kill: def %al killed %al killed %eax +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call i8 @llvm.x86.avx512.cvtq2mask.128(<2 x i64> %x0) + ret i8 %res +} + +declare i8 @llvm.x86.avx512.cvtq2mask.256(<4 x i64>) + +define i8@test_int_x86_avx512_cvtq2mask_256(<4 x i64> %x0) { +; CHECK-LABEL: test_int_x86_avx512_cvtq2mask_256: +; CHECK: ## %bb.0: +; CHECK-NEXT: vpmovq2m %ymm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x28,0x39,0xc0] +; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: ## kill: def %al killed %al killed %eax +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call i8 @llvm.x86.avx512.cvtq2mask.256(<4 x i64> %x0) + ret i8 %res +} diff --git a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll index f201599c4aa..5692efdd6d5 100644 --- a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll @@ -624,55 +624,3 @@ define i8 @test_int_x86_avx512_mask_fpclass_pd_256(<4 x double> %x0, i8 %x1) { %res2 = add i8 %res, %res1 ret i8 %res2 } - -declare i8 @llvm.x86.avx512.cvtd2mask.128(<4 x i32>) - -define i8@test_int_x86_avx512_cvtd2mask_128(<4 x i32> %x0) { -; CHECK-LABEL: test_int_x86_avx512_cvtd2mask_128: -; CHECK: ## %bb.0: -; CHECK-NEXT: vpmovd2m %xmm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x08,0x39,0xc0] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: ## kill: def %al killed %al killed %eax -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call i8 @llvm.x86.avx512.cvtd2mask.128(<4 x i32> %x0) - ret i8 %res -} - -declare i8 @llvm.x86.avx512.cvtd2mask.256(<8 x i32>) - -define i8@test_int_x86_avx512_cvtd2mask_256(<8 x i32> %x0) { -; CHECK-LABEL: test_int_x86_avx512_cvtd2mask_256: -; CHECK: ## %bb.0: -; CHECK-NEXT: vpmovd2m %ymm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x28,0x39,0xc0] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: ## kill: def %al killed %al killed %eax -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call i8 @llvm.x86.avx512.cvtd2mask.256(<8 x i32> %x0) - ret i8 %res -} - -declare i8 @llvm.x86.avx512.cvtq2mask.128(<2 x i64>) - -define i8@test_int_x86_avx512_cvtq2mask_128(<2 x i64> %x0) { -; CHECK-LABEL: test_int_x86_avx512_cvtq2mask_128: -; CHECK: ## %bb.0: -; CHECK-NEXT: vpmovq2m %xmm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x08,0x39,0xc0] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: ## kill: def %al killed %al killed %eax -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call i8 @llvm.x86.avx512.cvtq2mask.128(<2 x i64> %x0) - ret i8 %res -} - -declare i8 @llvm.x86.avx512.cvtq2mask.256(<4 x i64>) - -define i8@test_int_x86_avx512_cvtq2mask_256(<4 x i64> %x0) { -; CHECK-LABEL: test_int_x86_avx512_cvtq2mask_256: -; CHECK: ## %bb.0: -; CHECK-NEXT: vpmovq2m %ymm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x28,0x39,0xc0] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: ## kill: def %al killed %al killed %eax -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call i8 @llvm.x86.avx512.cvtq2mask.256(<4 x i64> %x0) - ret i8 %res -} |

