diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/IR/AutoUpgrade.cpp | 53 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86IntrinsicsInfo.h | 24 |
2 files changed, 37 insertions, 40 deletions
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 244dabe60e3..afbe4eb9543 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -259,6 +259,8 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) { Name.startswith("avx512.cvtmask2") || // Added in 5.0 (Name.startswith("xop.vpcom") && // Added in 3.2 F->arg_size() == 2) || + Name.startswith("avx512.ptestm") || //Added in 6.0 + Name.startswith("avx512.ptestnm") || //Added in 6.0 Name.startswith("sse2.pavg") || // Added in 6.0 Name.startswith("avx2.pavg") || // Added in 6.0 Name.startswith("avx512.mask.pavg")) // Added in 6.0 @@ -826,6 +828,26 @@ static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI, return Res; } +// Applying mask on vector of i1's and make sure result is at least 8 bits wide. +static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder,Value *Vec, Value *Mask, + unsigned NumElts) { + const auto *C = dyn_cast<Constant>(Mask); + if (!C || !C->isAllOnesValue()) + Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts)); + + if (NumElts < 8) { + uint32_t Indices[8]; + for (unsigned i = 0; i != NumElts; ++i) + Indices[i] = i; + for (unsigned i = NumElts; i != 8; ++i) + Indices[i] = NumElts + i % NumElts; + Vec = Builder.CreateShuffleVector(Vec, + Constant::getNullValue(Vec->getType()), + Indices); + } + return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U))); +} + static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI, unsigned CC, bool Signed) { Value *Op0 = CI.getArgOperand(0); @@ -851,22 +873,8 @@ static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI, } Value *Mask = CI.getArgOperand(CI.getNumArgOperands() - 1); - const auto *C = dyn_cast<Constant>(Mask); - if (!C || !C->isAllOnesValue()) - Cmp = Builder.CreateAnd(Cmp, getX86MaskVec(Builder, Mask, NumElts)); - if (NumElts < 8) { - uint32_t Indices[8]; - for (unsigned i = 0; i != NumElts; ++i) - Indices[i] = i; - for (unsigned i = NumElts; i != 8; ++i) - Indices[i] = NumElts + i % NumElts; - Cmp = Builder.CreateShuffleVector(Cmp, - Constant::getNullValue(Cmp->getType()), - Indices); - } - return Builder.CreateBitCast(Cmp, IntegerType::get(CI.getContext(), - std::max(NumElts, 8U))); + return ApplyX86MaskOn1BitsVec(Builder, Cmp, Mask, NumElts); } // Replace a masked intrinsic with an older unmasked intrinsic. @@ -1038,7 +1046,20 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { ExtTy->getPrimitiveSizeInBits(); Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy); Rep = Builder.CreateVectorSplat(NumElts, Rep); - } else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))) { + } else if (IsX86 && (Name.startswith("avx512.ptestm") || + Name.startswith("avx512.ptestnm"))) { + Value *Op0 = CI->getArgOperand(0); + Value *Op1 = CI->getArgOperand(1); + Value *Mask = CI->getArgOperand(2); + Rep = Builder.CreateAnd(Op0, Op1); + llvm::Type *Ty = Op0->getType(); + Value *Zero = llvm::Constant::getNullValue(Ty); + ICmpInst::Predicate Pred = + Name.startswith("avx512.ptestm") ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ; + Rep = Builder.CreateICmp(Pred, Rep, Zero); + unsigned NumElts = Op0->getType()->getVectorNumElements(); + Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, Mask, NumElts); + } else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))){ unsigned NumElts = CI->getArgOperand(1)->getType()->getVectorNumElements(); Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0)); diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index 1c795126092..e3554333828 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -1385,30 +1385,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_psrlv_w_128, INTR_TYPE_2OP, ISD::SRL, 0), X86_INTRINSIC_DATA(avx512_psrlv_w_256, INTR_TYPE_2OP, ISD::SRL, 0), X86_INTRINSIC_DATA(avx512_psrlv_w_512, INTR_TYPE_2OP, ISD::SRL, 0), - X86_INTRINSIC_DATA(avx512_ptestm_b_128, CMP_MASK, X86ISD::TESTM, 0), - X86_INTRINSIC_DATA(avx512_ptestm_b_256, CMP_MASK, X86ISD::TESTM, 0), - X86_INTRINSIC_DATA(avx512_ptestm_b_512, CMP_MASK, X86ISD::TESTM, 0), - X86_INTRINSIC_DATA(avx512_ptestm_d_128, CMP_MASK, X86ISD::TESTM, 0), - X86_INTRINSIC_DATA(avx512_ptestm_d_256, CMP_MASK, X86ISD::TESTM, 0), - X86_INTRINSIC_DATA(avx512_ptestm_d_512, CMP_MASK, X86ISD::TESTM, 0), - X86_INTRINSIC_DATA(avx512_ptestm_q_128, CMP_MASK, X86ISD::TESTM, 0), - X86_INTRINSIC_DATA(avx512_ptestm_q_256, CMP_MASK, X86ISD::TESTM, 0), - X86_INTRINSIC_DATA(avx512_ptestm_q_512, CMP_MASK, X86ISD::TESTM, 0), - X86_INTRINSIC_DATA(avx512_ptestm_w_128, CMP_MASK, X86ISD::TESTM, 0), - X86_INTRINSIC_DATA(avx512_ptestm_w_256, CMP_MASK, X86ISD::TESTM, 0), - X86_INTRINSIC_DATA(avx512_ptestm_w_512, CMP_MASK, X86ISD::TESTM, 0), - X86_INTRINSIC_DATA(avx512_ptestnm_b_128, CMP_MASK, X86ISD::TESTNM, 0), - X86_INTRINSIC_DATA(avx512_ptestnm_b_256, CMP_MASK, X86ISD::TESTNM, 0), - X86_INTRINSIC_DATA(avx512_ptestnm_b_512, CMP_MASK, X86ISD::TESTNM, 0), - X86_INTRINSIC_DATA(avx512_ptestnm_d_128, CMP_MASK, X86ISD::TESTNM, 0), - X86_INTRINSIC_DATA(avx512_ptestnm_d_256, CMP_MASK, X86ISD::TESTNM, 0), - X86_INTRINSIC_DATA(avx512_ptestnm_d_512, CMP_MASK, X86ISD::TESTNM, 0), - X86_INTRINSIC_DATA(avx512_ptestnm_q_128, CMP_MASK, X86ISD::TESTNM, 0), - X86_INTRINSIC_DATA(avx512_ptestnm_q_256, CMP_MASK, X86ISD::TESTNM, 0), - X86_INTRINSIC_DATA(avx512_ptestnm_q_512, CMP_MASK, X86ISD::TESTNM, 0), - X86_INTRINSIC_DATA(avx512_ptestnm_w_128, CMP_MASK, X86ISD::TESTNM, 0), - X86_INTRINSIC_DATA(avx512_ptestnm_w_256, CMP_MASK, X86ISD::TESTNM, 0), - X86_INTRINSIC_DATA(avx512_ptestnm_w_512, CMP_MASK, X86ISD::TESTNM, 0), X86_INTRINSIC_DATA(avx512_rcp14_pd_128, INTR_TYPE_1OP_MASK, X86ISD::RCP14, 0), X86_INTRINSIC_DATA(avx512_rcp14_pd_256, INTR_TYPE_1OP_MASK, X86ISD::RCP14, 0), X86_INTRINSIC_DATA(avx512_rcp14_pd_512, INTR_TYPE_1OP_MASK, X86ISD::RCP14, 0), |