summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/IR/IntrinsicsX86.td29
-rw-r--r--llvm/lib/IR/AutoUpgrade.cpp21
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp13
-rw-r--r--llvm/lib/Target/X86/X86IntrinsicsInfo.h14
-rwxr-xr-xllvm/test/CodeGen/X86/avx512-mask-zext-bugfix.ll50
-rw-r--r--llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll44
-rw-r--r--llvm/test/CodeGen/X86/avx512bw-intrinsics.ll41
-rw-r--r--llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll52
-rw-r--r--llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll51
-rw-r--r--llvm/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll25
-rw-r--r--llvm/test/CodeGen/X86/avx512dq-intrinsics.ll26
-rw-r--r--llvm/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll51
-rw-r--r--llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll52
13 files changed, 217 insertions, 252 deletions
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index e5ab18eedad..35eb8310d96 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -3801,35 +3801,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx512_cvtsi2sd64 : GCCBuiltin<"__builtin_ia32_cvtsi2sd64">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>;
-
- def int_x86_avx512_cvtb2mask_128 : // TODO remove this intrinsic
- Intrinsic<[llvm_i16_ty], [llvm_v16i8_ty], [IntrNoMem]>;
- def int_x86_avx512_cvtb2mask_256 : // TODO remove this intrinsic
- Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty], [IntrNoMem]>;
- def int_x86_avx512_cvtb2mask_512 : // TODO remove this intrinsic
- Intrinsic<[llvm_i64_ty], [llvm_v64i8_ty], [IntrNoMem]>;
-
- def int_x86_avx512_cvtw2mask_128 : // TODO remove this intrinsic
- Intrinsic<[llvm_i8_ty], [llvm_v8i16_ty], [IntrNoMem]>;
- def int_x86_avx512_cvtw2mask_256 : // TODO remove this intrinsic
- Intrinsic<[llvm_i16_ty], [llvm_v16i16_ty], [IntrNoMem]>;
- def int_x86_avx512_cvtw2mask_512 : // TODO remove this intrinsic
- Intrinsic<[llvm_i32_ty], [llvm_v32i16_ty], [IntrNoMem]>;
-
- def int_x86_avx512_cvtd2mask_128 : // TODO remove this intrinsic
- Intrinsic<[llvm_i8_ty], [llvm_v4i32_ty], [IntrNoMem]>;
- def int_x86_avx512_cvtd2mask_256 : // TODO remove this intrinsic
- Intrinsic<[llvm_i8_ty], [llvm_v8i32_ty], [IntrNoMem]>;
- def int_x86_avx512_cvtd2mask_512 : // TODO remove this intrinsic
- Intrinsic<[llvm_i16_ty], [llvm_v16i32_ty], [IntrNoMem]>;
-
- def int_x86_avx512_cvtq2mask_128 : // TODO remove this intrinsic
- Intrinsic<[llvm_i8_ty], [llvm_v2i64_ty], [IntrNoMem]>;
- def int_x86_avx512_cvtq2mask_256 : // TODO remove this intrinsic
- Intrinsic<[llvm_i8_ty], [llvm_v4i64_ty], [IntrNoMem]>;
- def int_x86_avx512_cvtq2mask_512 : // TODO remove this intrinsic
- Intrinsic<[llvm_i8_ty], [llvm_v8i64_ty], [IntrNoMem]>;
-
}
// Pack ops.
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index c258d1a4e3a..98fd616d0fc 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -157,6 +157,10 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
Name.startswith("avx512.mask.cmp.q") || // Added in 5.0
Name.startswith("avx512.mask.cmp.w") || // Added in 5.0
Name.startswith("avx512.mask.ucmp.") || // Added in 5.0
+ Name.startswith("avx512.cvtb2mask.") || // Added in 7.0
+ Name.startswith("avx512.cvtw2mask.") || // Added in 7.0
+ Name.startswith("avx512.cvtd2mask.") || // Added in 7.0
+ Name.startswith("avx512.cvtq2mask.") || // Added in 7.0
Name == "avx512.mask.add.pd.128" || // Added in 4.0
Name == "avx512.mask.add.pd.256" || // Added in 4.0
Name == "avx512.mask.add.ps.128" || // Added in 4.0
@@ -829,9 +833,11 @@ static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI,
// Applying mask on vector of i1's and make sure result is at least 8 bits wide.
static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder,Value *Vec, Value *Mask,
unsigned NumElts) {
- const auto *C = dyn_cast<Constant>(Mask);
- if (!C || !C->isAllOnesValue())
- Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
+ if (Mask) {
+ const auto *C = dyn_cast<Constant>(Mask);
+ if (!C || !C->isAllOnesValue())
+ Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
+ }
if (NumElts < 8) {
uint32_t Indices[8];
@@ -1115,6 +1121,15 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
} else if (IsX86 && Name.startswith("avx512.mask.ucmp")) {
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
+ } else if (IsX86 && (Name.startswith("avx512.cvtb2mask.") ||
+ Name.startswith("avx512.cvtw2mask.") ||
+ Name.startswith("avx512.cvtd2mask.") ||
+ Name.startswith("avx512.cvtq2mask."))) {
+ Value *Op = CI->getArgOperand(0);
+ Value *Zero = llvm::Constant::getNullValue(Op->getType());
+ Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
+ Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, nullptr,
+ Op->getType()->getVectorNumElements());
} else if(IsX86 && (Name == "ssse3.pabs.b.128" ||
Name == "ssse3.pabs.w.128" ||
Name == "ssse3.pabs.d.128" ||
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 7f1969d0c79..ae5eb552b73 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -20518,19 +20518,6 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
Src1, Src2, Src3, Imm, Rnd),
Mask, Passthru, Subtarget, DAG);
}
- case CONVERT_TO_MASK: {
- MVT SrcVT = Op.getOperand(1).getSimpleValueType();
- MVT MaskVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorNumElements());
- MVT BitcastVT = MVT::getVectorVT(MVT::i1, VT.getSizeInBits());
-
- SDValue CvtMask = DAG.getNode(X86ISD::PCMPGTM, dl, MaskVT,
- DAG.getConstant(0, dl, SrcVT),
- Op.getOperand(1));
- SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, BitcastVT,
- DAG.getUNDEF(BitcastVT), CvtMask,
- DAG.getIntPtrConstant(0, dl));
- return DAG.getBitcast(Op.getValueType(), Res);
- }
case ROUNDP: {
assert(IntrData->Opc0 == X86ISD::VRNDSCALE && "Unexpected opcode");
// Clear the upper bits of the rounding immediate so that the legacy
diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index d14369b7776..bd21fee7544 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -37,7 +37,7 @@ enum IntrinsicType : uint16_t {
TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32,
EXPAND_FROM_MEM,
TERLOG_OP_MASK, TERLOG_OP_MASKZ, BROADCASTM, FIXUPIMM, FIXUPIMM_MASKZ, FIXUPIMMS,
- FIXUPIMMS_MASKZ, CONVERT_TO_MASK, GATHER_AVX2, MASK_BINOP,
+ FIXUPIMMS_MASKZ, GATHER_AVX2, MASK_BINOP,
ROUNDP, ROUNDS
};
@@ -449,15 +449,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx2_psubs_w, INTR_TYPE_2OP, X86ISD::SUBS, 0),
X86_INTRINSIC_DATA(avx2_psubus_b, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
X86_INTRINSIC_DATA(avx2_psubus_w, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
- X86_INTRINSIC_DATA(avx512_cvtb2mask_128, CONVERT_TO_MASK, X86ISD::PCMPGTM, 0),
- X86_INTRINSIC_DATA(avx512_cvtb2mask_256, CONVERT_TO_MASK, X86ISD::PCMPGTM, 0),
- X86_INTRINSIC_DATA(avx512_cvtb2mask_512, CONVERT_TO_MASK, X86ISD::PCMPGTM, 0),
- X86_INTRINSIC_DATA(avx512_cvtd2mask_128, CONVERT_TO_MASK, X86ISD::PCMPGTM, 0),
- X86_INTRINSIC_DATA(avx512_cvtd2mask_256, CONVERT_TO_MASK, X86ISD::PCMPGTM, 0),
- X86_INTRINSIC_DATA(avx512_cvtd2mask_512, CONVERT_TO_MASK, X86ISD::PCMPGTM, 0),
- X86_INTRINSIC_DATA(avx512_cvtq2mask_128, CONVERT_TO_MASK, X86ISD::PCMPGTM, 0),
- X86_INTRINSIC_DATA(avx512_cvtq2mask_256, CONVERT_TO_MASK, X86ISD::PCMPGTM, 0),
- X86_INTRINSIC_DATA(avx512_cvtq2mask_512, CONVERT_TO_MASK, X86ISD::PCMPGTM, 0),
X86_INTRINSIC_DATA(avx512_cvtsi2sd64, INTR_TYPE_3OP, X86ISD::SCALAR_SINT_TO_FP_RND, 0),
X86_INTRINSIC_DATA(avx512_cvtsi2ss32, INTR_TYPE_3OP, X86ISD::SCALAR_SINT_TO_FP_RND, 0),
X86_INTRINSIC_DATA(avx512_cvtsi2ss64, INTR_TYPE_3OP, X86ISD::SCALAR_SINT_TO_FP_RND, 0),
@@ -472,9 +463,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_cvtusi2ss, INTR_TYPE_3OP, X86ISD::SCALAR_UINT_TO_FP_RND, 0),
X86_INTRINSIC_DATA(avx512_cvtusi642sd, INTR_TYPE_3OP, X86ISD::SCALAR_UINT_TO_FP_RND, 0),
X86_INTRINSIC_DATA(avx512_cvtusi642ss, INTR_TYPE_3OP, X86ISD::SCALAR_UINT_TO_FP_RND, 0),
- X86_INTRINSIC_DATA(avx512_cvtw2mask_128, CONVERT_TO_MASK, X86ISD::PCMPGTM, 0),
- X86_INTRINSIC_DATA(avx512_cvtw2mask_256, CONVERT_TO_MASK, X86ISD::PCMPGTM, 0),
- X86_INTRINSIC_DATA(avx512_cvtw2mask_512, CONVERT_TO_MASK, X86ISD::PCMPGTM, 0),
X86_INTRINSIC_DATA(avx512_exp2_pd, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0),
X86_INTRINSIC_DATA(avx512_exp2_ps, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0),
X86_INTRINSIC_DATA(avx512_kand_w, MASK_BINOP, ISD::AND, 0),
diff --git a/llvm/test/CodeGen/X86/avx512-mask-zext-bugfix.ll b/llvm/test/CodeGen/X86/avx512-mask-zext-bugfix.ll
index f828c4dcef7..11aba2fdf73 100755
--- a/llvm/test/CodeGen/X86/avx512-mask-zext-bugfix.ll
+++ b/llvm/test/CodeGen/X86/avx512-mask-zext-bugfix.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -O0 -mtriple=x86_64-apple-darwin -mcpu=skx -fast-isel-abort=1 | FileCheck %s
+; RUN: llc < %s -O0 -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s
; ModuleID = 'mask_set.c'
source_filename = "mask_set.c"
@@ -17,51 +17,53 @@ declare i32 @check_mask16(i16 zeroext %res_mask, i16 zeroext %exp_mask, i8* %fna
define void @test_xmm(i32 %shift, i32 %mulp, <2 x i64> %a,i8* %arraydecay,i8* %fname){
; CHECK-LABEL: test_xmm:
; CHECK: ## %bb.0:
-; CHECK-NEXT: subq $56, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 64
-; CHECK-NEXT: movl $2, %esi
-; CHECK-NEXT: movl $8, %eax
+; CHECK-NEXT: subq $72, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 80
+; CHECK-NEXT: movl $4, %eax
; CHECK-NEXT: vpmovw2m %xmm0, %k0
-; CHECK-NEXT: kmovd %k0, %edi
-; CHECK-NEXT: movb %dil, %r8b
-; CHECK-NEXT: movzbl %r8b, %edi
-; CHECK-NEXT: movw %di, %r9w
+; CHECK-NEXT: movl $2, %esi
+; CHECK-NEXT: movl $8, %edi
+; CHECK-NEXT: movl %edi, {{[0-9]+}}(%rsp) ## 4-byte Spill
; CHECK-NEXT: movq %rdx, %rdi
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %r8d ## 4-byte Reload
; CHECK-NEXT: movq %rdx, {{[0-9]+}}(%rsp) ## 8-byte Spill
-; CHECK-NEXT: movl %eax, %edx
-; CHECK-NEXT: movw %r9w, {{[0-9]+}}(%rsp) ## 2-byte Spill
+; CHECK-NEXT: movl %r8d, %edx
; CHECK-NEXT: movq %rcx, {{[0-9]+}}(%rsp) ## 8-byte Spill
; CHECK-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp) ## 16-byte Spill
+; CHECK-NEXT: movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill
+; CHECK-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ## 2-byte Spill
; CHECK-NEXT: callq _calc_expected_mask_val
-; CHECK-NEXT: movw %ax, %r9w
-; CHECK-NEXT: movw {{[0-9]+}}(%rsp), %r10w ## 2-byte Reload
-; CHECK-NEXT: movzwl %r10w, %edi
+; CHECK-NEXT: movl %eax, %edx
+; CHECK-NEXT: movw %dx, %r9w
; CHECK-NEXT: movzwl %r9w, %esi
+; CHECK-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Reload
+; CHECK-NEXT: kmovb %k0, %edi
; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rdx ## 8-byte Reload
; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload
; CHECK-NEXT: callq _check_mask16
-; CHECK-NEXT: movl $4, %esi
; CHECK-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 ## 16-byte Reload
; CHECK-NEXT: vpmovd2m %xmm0, %k0
-; CHECK-NEXT: kmovd %k0, %edi
-; CHECK-NEXT: movb %dil, %r8b
-; CHECK-NEXT: movzbl %r8b, %edi
-; CHECK-NEXT: movw %di, %r9w
+; CHECK-NEXT: kmovq %k0, %k1
+; CHECK-NEXT: kmovd %k0, %esi
+; CHECK-NEXT: movb %sil, %r10b
+; CHECK-NEXT: movzbl %r10b, %esi
+; CHECK-NEXT: movw %si, %r9w
; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rdi ## 8-byte Reload
-; CHECK-NEXT: movl %esi, {{[0-9]+}}(%rsp) ## 4-byte Spill
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %esi ## 4-byte Reload
; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %edx ## 4-byte Reload
; CHECK-NEXT: movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill
+; CHECK-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Spill
; CHECK-NEXT: movw %r9w, {{[0-9]+}}(%rsp) ## 2-byte Spill
; CHECK-NEXT: callq _calc_expected_mask_val
; CHECK-NEXT: movw %ax, %r9w
-; CHECK-NEXT: movw {{[0-9]+}}(%rsp), %r10w ## 2-byte Reload
-; CHECK-NEXT: movzwl %r10w, %edi
+; CHECK-NEXT: movw {{[0-9]+}}(%rsp), %r11w ## 2-byte Reload
+; CHECK-NEXT: movzwl %r11w, %edi
; CHECK-NEXT: movzwl %r9w, %esi
; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rdx ## 8-byte Reload
; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload
; CHECK-NEXT: callq _check_mask16
-; CHECK-NEXT: movl %eax, (%rsp) ## 4-byte Spill
-; CHECK-NEXT: addq $56, %rsp
+; CHECK-NEXT: movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill
+; CHECK-NEXT: addq $72, %rsp
; CHECK-NEXT: retq
%d2 = bitcast <2 x i64> %a to <8 x i16>
%m2 = call i8 @llvm.x86.avx512.cvtw2mask.128(<8 x i16> %d2)
diff --git a/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
index 562d0c19e2b..84e7b0ec535 100644
--- a/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
+++ b/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
@@ -3590,3 +3590,47 @@ define i32@test_int_x86_avx512_ptestnm_w_512(<32 x i16> %x0, <32 x i16> %x1, i32
ret i32 %res2
}
+declare i64 @llvm.x86.avx512.cvtb2mask.512(<64 x i8>)
+
+define i64@test_int_x86_avx512_cvtb2mask_512(<64 x i8> %x0) {
+; AVX512BW-LABEL: test_int_x86_avx512_cvtb2mask_512:
+; AVX512BW: ## %bb.0:
+; AVX512BW-NEXT: vpmovb2m %zmm0, %k0
+; AVX512BW-NEXT: kmovq %k0, %rax
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_cvtb2mask_512:
+; AVX512F-32: # %bb.0:
+; AVX512F-32-NEXT: subl $12, %esp
+; AVX512F-32-NEXT: .cfi_def_cfa_offset 16
+; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
+; AVX512F-32-NEXT: kmovq %k0, (%esp)
+; AVX512F-32-NEXT: movl (%esp), %eax
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT: addl $12, %esp
+; AVX512F-32-NEXT: vzeroupper
+; AVX512F-32-NEXT: retl
+ %res = call i64 @llvm.x86.avx512.cvtb2mask.512(<64 x i8> %x0)
+ ret i64 %res
+}
+
+declare i32 @llvm.x86.avx512.cvtw2mask.512(<32 x i16>)
+
+define i32@test_int_x86_avx512_cvtw2mask_512(<32 x i16> %x0) {
+; AVX512BW-LABEL: test_int_x86_avx512_cvtw2mask_512:
+; AVX512BW: ## %bb.0:
+; AVX512BW-NEXT: vpmovw2m %zmm0, %k0
+; AVX512BW-NEXT: kmovd %k0, %eax
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_cvtw2mask_512:
+; AVX512F-32: # %bb.0:
+; AVX512F-32-NEXT: vpmovw2m %zmm0, %k0
+; AVX512F-32-NEXT: kmovd %k0, %eax
+; AVX512F-32-NEXT: vzeroupper
+; AVX512F-32-NEXT: retl
+ %res = call i32 @llvm.x86.avx512.cvtw2mask.512(<32 x i16> %x0)
+ ret i32 %res
+}
diff --git a/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll b/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll
index 2fa7c2c5b8a..5d6b60a5645 100644
--- a/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll
@@ -1455,47 +1455,6 @@ define <8 x i64>@test_int_x86_avx512_mask_psadb_w_512(<64 x i8> %x0, <64 x i8>
ret <8 x i64> %res2
}
-declare i64 @llvm.x86.avx512.cvtb2mask.512(<64 x i8>)
-
-define i64@test_int_x86_avx512_cvtb2mask_512(<64 x i8> %x0) {
-; AVX512BW-LABEL: test_int_x86_avx512_cvtb2mask_512:
-; AVX512BW: ## %bb.0:
-; AVX512BW-NEXT: vpmovb2m %zmm0, %k0
-; AVX512BW-NEXT: kmovq %k0, %rax
-; AVX512BW-NEXT: retq
-;
-; AVX512F-32-LABEL: test_int_x86_avx512_cvtb2mask_512:
-; AVX512F-32: # %bb.0:
-; AVX512F-32-NEXT: subl $12, %esp
-; AVX512F-32-NEXT: .cfi_def_cfa_offset 16
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
-; AVX512F-32-NEXT: kmovq %k0, (%esp)
-; AVX512F-32-NEXT: movl (%esp), %eax
-; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
-; AVX512F-32-NEXT: addl $12, %esp
-; AVX512F-32-NEXT: retl
- %res = call i64 @llvm.x86.avx512.cvtb2mask.512(<64 x i8> %x0)
- ret i64 %res
-}
-
-declare i32 @llvm.x86.avx512.cvtw2mask.512(<32 x i16>)
-
-define i32@test_int_x86_avx512_cvtw2mask_512(<32 x i16> %x0) {
-; AVX512BW-LABEL: test_int_x86_avx512_cvtw2mask_512:
-; AVX512BW: ## %bb.0:
-; AVX512BW-NEXT: vpmovw2m %zmm0, %k0
-; AVX512BW-NEXT: kmovd %k0, %eax
-; AVX512BW-NEXT: retq
-;
-; AVX512F-32-LABEL: test_int_x86_avx512_cvtw2mask_512:
-; AVX512F-32: # %bb.0:
-; AVX512F-32-NEXT: vpmovw2m %zmm0, %k0
-; AVX512F-32-NEXT: kmovd %k0, %eax
-; AVX512F-32-NEXT: retl
- %res = call i32 @llvm.x86.avx512.cvtw2mask.512(<32 x i16> %x0)
- ret i32 %res
-}
-
declare <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16>, <32 x i16>, <32 x i16>, i32)
define <32 x i16>@test_int_x86_avx512_mask_psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
diff --git a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll
index 4e343eef6fa..685f7f5fda0 100644
--- a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll
+++ b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll
@@ -3826,3 +3826,55 @@ define i16@test_int_x86_avx512_ptestnm_w_256(<16 x i16> %x0, <16 x i16> %x1, i16
ret i16 %res2
}
+declare i16 @llvm.x86.avx512.cvtb2mask.128(<16 x i8>)
+
+define i16@test_int_x86_avx512_cvtb2mask_128(<16 x i8> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtb2mask_128:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vpmovb2m %xmm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x08,0x29,0xc0]
+; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
+; CHECK-NEXT: ## kill: def %ax killed %ax killed %eax
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call i16 @llvm.x86.avx512.cvtb2mask.128(<16 x i8> %x0)
+ ret i16 %res
+}
+
+declare i32 @llvm.x86.avx512.cvtb2mask.256(<32 x i8>)
+
+define i32@test_int_x86_avx512_cvtb2mask_256(<32 x i8> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtb2mask_256:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vpmovb2m %ymm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x28,0x29,0xc0]
+; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
+; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call i32 @llvm.x86.avx512.cvtb2mask.256(<32 x i8> %x0)
+ ret i32 %res
+}
+
+declare i8 @llvm.x86.avx512.cvtw2mask.128(<8 x i16>)
+
+define i8@test_int_x86_avx512_cvtw2mask_128(<8 x i16> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtw2mask_128:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vpmovw2m %xmm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x08,0x29,0xc0]
+; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
+; CHECK-NEXT: ## kill: def %al killed %al killed %eax
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call i8 @llvm.x86.avx512.cvtw2mask.128(<8 x i16> %x0)
+ ret i8 %res
+}
+
+declare i16 @llvm.x86.avx512.cvtw2mask.256(<16 x i16>)
+
+define i16@test_int_x86_avx512_cvtw2mask_256(<16 x i16> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtw2mask_256:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vpmovw2m %ymm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x28,0x29,0xc0]
+; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
+; CHECK-NEXT: ## kill: def %ax killed %ax killed %eax
+; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call i16 @llvm.x86.avx512.cvtw2mask.256(<16 x i16> %x0)
+ ret i16 %res
+}
diff --git a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll
index 64ad66e336b..74feae79022 100644
--- a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll
@@ -2304,57 +2304,6 @@ define <16 x i16>@test_int_x86_avx512_mask_dbpsadbw_256(<32 x i8> %x0, <32 x i8>
ret <16 x i16> %res4
}
-declare i16 @llvm.x86.avx512.cvtb2mask.128(<16 x i8>)
-
-define i16@test_int_x86_avx512_cvtb2mask_128(<16 x i8> %x0) {
-; CHECK-LABEL: test_int_x86_avx512_cvtb2mask_128:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vpmovb2m %xmm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x08,0x29,0xc0]
-; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
-; CHECK-NEXT: ## kill: def %ax killed %ax killed %eax
-; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call i16 @llvm.x86.avx512.cvtb2mask.128(<16 x i8> %x0)
- ret i16 %res
-}
-
-declare i32 @llvm.x86.avx512.cvtb2mask.256(<32 x i8>)
-
-define i32@test_int_x86_avx512_cvtb2mask_256(<32 x i8> %x0) {
-; CHECK-LABEL: test_int_x86_avx512_cvtb2mask_256:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vpmovb2m %ymm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x28,0x29,0xc0]
-; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
-; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call i32 @llvm.x86.avx512.cvtb2mask.256(<32 x i8> %x0)
- ret i32 %res
-}
-
-declare i8 @llvm.x86.avx512.cvtw2mask.128(<8 x i16>)
-
-define i8@test_int_x86_avx512_cvtw2mask_128(<8 x i16> %x0) {
-; CHECK-LABEL: test_int_x86_avx512_cvtw2mask_128:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vpmovw2m %xmm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x08,0x29,0xc0]
-; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
-; CHECK-NEXT: ## kill: def %al killed %al killed %eax
-; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call i8 @llvm.x86.avx512.cvtw2mask.128(<8 x i16> %x0)
- ret i8 %res
-}
-
-declare i16 @llvm.x86.avx512.cvtw2mask.256(<16 x i16>)
-
-define i16@test_int_x86_avx512_cvtw2mask_256(<16 x i16> %x0) {
-; CHECK-LABEL: test_int_x86_avx512_cvtw2mask_256:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vpmovw2m %ymm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x28,0x29,0xc0]
-; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
-; CHECK-NEXT: ## kill: def %ax killed %ax killed %eax
-; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call i16 @llvm.x86.avx512.cvtw2mask.256(<16 x i16> %x0)
- ret i16 %res
-}
-
declare <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16>, <16 x i16>, <16 x i16>, i16)
define <16 x i16>@test_int_x86_avx512_mask_psrlv16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
diff --git a/llvm/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll
index cb30dfd32aa..de0ee1b7459 100644
--- a/llvm/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll
+++ b/llvm/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll
@@ -328,3 +328,28 @@ define <16 x i32>@test_int_x86_avx512_mask_broadcasti32x2_512(<4 x i32> %x0, <16
ret <16 x i32> %res4
}
+declare i16 @llvm.x86.avx512.cvtd2mask.512(<16 x i32>)
+
+define i16@test_int_x86_avx512_cvtd2mask_512(<16 x i32> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtd2mask_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vpmovd2m %zmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: ## kill: def %ax killed %ax killed %eax
+; CHECK-NEXT: retq
+ %res = call i16 @llvm.x86.avx512.cvtd2mask.512(<16 x i32> %x0)
+ ret i16 %res
+}
+
+declare i8 @llvm.x86.avx512.cvtq2mask.512(<8 x i64>)
+
+define i8@test_int_x86_avx512_cvtq2mask_512(<8 x i64> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtq2mask_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vpmovq2m %zmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: ## kill: def %al killed %al killed %eax
+; CHECK-NEXT: retq
+ %res = call i8 @llvm.x86.avx512.cvtq2mask.512(<8 x i64> %x0)
+ ret i8 %res
+}
diff --git a/llvm/test/CodeGen/X86/avx512dq-intrinsics.ll b/llvm/test/CodeGen/X86/avx512dq-intrinsics.ll
index e0e7b237447..6863fc8986e 100644
--- a/llvm/test/CodeGen/X86/avx512dq-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512dq-intrinsics.ll
@@ -438,29 +438,3 @@ define i8 @test_int_x86_avx512_mask_fpclass_ss_load(<4 x float>* %x0ptr, i8 %x1)
%res = call i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float> %x0, i32 4, i8 -1)
ret i8 %res
}
-
-declare i16 @llvm.x86.avx512.cvtd2mask.512(<16 x i32>)
-
-define i16@test_int_x86_avx512_cvtd2mask_512(<16 x i32> %x0) {
-; CHECK-LABEL: test_int_x86_avx512_cvtd2mask_512:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vpmovd2m %zmm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: ## kill: def %ax killed %ax killed %eax
-; CHECK-NEXT: retq
- %res = call i16 @llvm.x86.avx512.cvtd2mask.512(<16 x i32> %x0)
- ret i16 %res
-}
-
-declare i8 @llvm.x86.avx512.cvtq2mask.512(<8 x i64>)
-
-define i8@test_int_x86_avx512_cvtq2mask_512(<8 x i64> %x0) {
-; CHECK-LABEL: test_int_x86_avx512_cvtq2mask_512:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vpmovq2m %zmm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: ## kill: def %al killed %al killed %eax
-; CHECK-NEXT: retq
- %res = call i8 @llvm.x86.avx512.cvtq2mask.512(<8 x i64> %x0)
- ret i8 %res
-}
diff --git a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll
index 3a1bce05e67..1f37d790488 100644
--- a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll
+++ b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll
@@ -1804,3 +1804,54 @@ define <4 x i32>@test_int_x86_avx512_mask_broadcasti32x2_128(<4 x i32> %x0, <4 x
ret <4 x i32> %res4
}
+declare i8 @llvm.x86.avx512.cvtd2mask.128(<4 x i32>)
+
+define i8@test_int_x86_avx512_cvtd2mask_128(<4 x i32> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtd2mask_128:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vpmovd2m %xmm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x08,0x39,0xc0]
+; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
+; CHECK-NEXT: ## kill: def %al killed %al killed %eax
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call i8 @llvm.x86.avx512.cvtd2mask.128(<4 x i32> %x0)
+ ret i8 %res
+}
+
+declare i8 @llvm.x86.avx512.cvtd2mask.256(<8 x i32>)
+
+define i8@test_int_x86_avx512_cvtd2mask_256(<8 x i32> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtd2mask_256:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vpmovd2m %ymm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x28,0x39,0xc0]
+; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
+; CHECK-NEXT: ## kill: def %al killed %al killed %eax
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call i8 @llvm.x86.avx512.cvtd2mask.256(<8 x i32> %x0)
+ ret i8 %res
+}
+
+declare i8 @llvm.x86.avx512.cvtq2mask.128(<2 x i64>)
+
+define i8@test_int_x86_avx512_cvtq2mask_128(<2 x i64> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtq2mask_128:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vpmovq2m %xmm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x08,0x39,0xc0]
+; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
+; CHECK-NEXT: ## kill: def %al killed %al killed %eax
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call i8 @llvm.x86.avx512.cvtq2mask.128(<2 x i64> %x0)
+ ret i8 %res
+}
+
+declare i8 @llvm.x86.avx512.cvtq2mask.256(<4 x i64>)
+
+define i8@test_int_x86_avx512_cvtq2mask_256(<4 x i64> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtq2mask_256:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vpmovq2m %ymm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x28,0x39,0xc0]
+; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
+; CHECK-NEXT: ## kill: def %al killed %al killed %eax
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call i8 @llvm.x86.avx512.cvtq2mask.256(<4 x i64> %x0)
+ ret i8 %res
+}
diff --git a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll
index f201599c4aa..5692efdd6d5 100644
--- a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll
@@ -624,55 +624,3 @@ define i8 @test_int_x86_avx512_mask_fpclass_pd_256(<4 x double> %x0, i8 %x1) {
%res2 = add i8 %res, %res1
ret i8 %res2
}
-
-declare i8 @llvm.x86.avx512.cvtd2mask.128(<4 x i32>)
-
-define i8@test_int_x86_avx512_cvtd2mask_128(<4 x i32> %x0) {
-; CHECK-LABEL: test_int_x86_avx512_cvtd2mask_128:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vpmovd2m %xmm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x08,0x39,0xc0]
-; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
-; CHECK-NEXT: ## kill: def %al killed %al killed %eax
-; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call i8 @llvm.x86.avx512.cvtd2mask.128(<4 x i32> %x0)
- ret i8 %res
-}
-
-declare i8 @llvm.x86.avx512.cvtd2mask.256(<8 x i32>)
-
-define i8@test_int_x86_avx512_cvtd2mask_256(<8 x i32> %x0) {
-; CHECK-LABEL: test_int_x86_avx512_cvtd2mask_256:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vpmovd2m %ymm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x28,0x39,0xc0]
-; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
-; CHECK-NEXT: ## kill: def %al killed %al killed %eax
-; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call i8 @llvm.x86.avx512.cvtd2mask.256(<8 x i32> %x0)
- ret i8 %res
-}
-
-declare i8 @llvm.x86.avx512.cvtq2mask.128(<2 x i64>)
-
-define i8@test_int_x86_avx512_cvtq2mask_128(<2 x i64> %x0) {
-; CHECK-LABEL: test_int_x86_avx512_cvtq2mask_128:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vpmovq2m %xmm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x08,0x39,0xc0]
-; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
-; CHECK-NEXT: ## kill: def %al killed %al killed %eax
-; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call i8 @llvm.x86.avx512.cvtq2mask.128(<2 x i64> %x0)
- ret i8 %res
-}
-
-declare i8 @llvm.x86.avx512.cvtq2mask.256(<4 x i64>)
-
-define i8@test_int_x86_avx512_cvtq2mask_256(<4 x i64> %x0) {
-; CHECK-LABEL: test_int_x86_avx512_cvtq2mask_256:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vpmovq2m %ymm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x28,0x39,0xc0]
-; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
-; CHECK-NEXT: ## kill: def %al killed %al killed %eax
-; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call i8 @llvm.x86.avx512.cvtq2mask.256(<4 x i64> %x0)
- ret i8 %res
-}
OpenPOWER on IntegriCloud