diff options
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/IR/AutoUpgrade.cpp | 63 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 18 | ||||
| -rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 69 | ||||
| -rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp | 29 |
4 files changed, 45 insertions, 134 deletions
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 41309f2ef74..78385ac82d3 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -168,6 +168,12 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) { Name.startswith("avx512.mask.pmull.") || // Added in 4.0 Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0 Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0 + Name == "sse2.pmulu.dq" || // Added in 7.0 + Name == "sse41.pmuldq" || // Added in 7.0 + Name == "avx2.pmulu.dq" || // Added in 7.0 + Name == "avx2.pmul.dq" || // Added in 7.0 + Name == "avx512.pmulu.dq.512" || // Added in 7.0 + Name == "avx512.pmul.dq.512" || // Added in 7.0 Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0 Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0 Name.startswith("avx512.mask.pmul.hr.sw.") || // Added in 7.0 @@ -906,6 +912,35 @@ static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI, return Res; } +static Value *upgradePMULDQ(IRBuilder<> &Builder, CallInst &CI, bool IsSigned) { + Type *Ty = CI.getType(); + + // Arguments have a vXi32 type so cast to vXi64. + Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty); + Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty); + + if (IsSigned) { + // Shift left then arithmetic shift right. + Constant *ShiftAmt = ConstantInt::get(Ty, 32); + LHS = Builder.CreateShl(LHS, ShiftAmt); + LHS = Builder.CreateAShr(LHS, ShiftAmt); + RHS = Builder.CreateShl(RHS, ShiftAmt); + RHS = Builder.CreateAShr(RHS, ShiftAmt); + } else { + // Clear the upper bits. + Constant *Mask = ConstantInt::get(Ty, 0xffffffff); + LHS = Builder.CreateAnd(LHS, Mask); + RHS = Builder.CreateAnd(RHS, Mask); + } + + Value *Res = Builder.CreateMul(LHS, RHS); + + if (CI.getNumArgOperands() == 4) + Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2)); + + return Res; +} + // Applying mask on vector of i1's and make sure result is at least 8 bits wide. static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder,Value *Vec, Value *Mask, unsigned NumElts) { @@ -1028,24 +1063,6 @@ static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, IID = Intrinsic::x86_avx512_pshuf_b_512; else llvm_unreachable("Unexpected intrinsic"); - } else if (Name.startswith("pmul.dq.")) { - if (VecWidth == 128) - IID = Intrinsic::x86_sse41_pmuldq; - else if (VecWidth == 256) - IID = Intrinsic::x86_avx2_pmul_dq; - else if (VecWidth == 512) - IID = Intrinsic::x86_avx512_pmul_dq_512; - else - llvm_unreachable("Unexpected intrinsic"); - } else if (Name.startswith("pmulu.dq.")) { - if (VecWidth == 128) - IID = Intrinsic::x86_sse2_pmulu_dq; - else if (VecWidth == 256) - IID = Intrinsic::x86_avx2_pmulu_dq; - else if (VecWidth == 512) - IID = Intrinsic::x86_avx512_pmulu_dq_512; - else - llvm_unreachable("Unexpected intrinsic"); } else if (Name.startswith("pmul.hr.sw.")) { if (VecWidth == 128) IID = Intrinsic::x86_ssse3_pmul_hr_sw_128; @@ -1455,6 +1472,16 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { Name.startswith("avx2.pminu") || Name.startswith("avx512.mask.pminu"))) { Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_ULT); + } else if (IsX86 && (Name == "sse2.pmulu.dq" || + Name == "avx2.pmulu.dq" || + Name == "avx512.pmulu.dq.512" || + Name.startswith("avx512.mask.pmulu.dq."))) { + Rep = upgradePMULDQ(Builder, *CI, /*Signed*/false); + } else if (IsX86 && (Name == "sse41.pmuldq" || + Name == "avx2.pmul.dq" || + Name == "avx512.pmul.dq.512" || + Name.startswith("avx512.mask.pmul.dq."))) { + Rep = upgradePMULDQ(Builder, *CI, /*Signed*/true); } else if (IsX86 && (Name == "sse2.cvtdq2pd" || Name == "sse2.cvtps2pd" || Name == "avx.cvtdq2.pd.256" || diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 1870a668c98..08e4f6387b6 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -20855,24 +20855,6 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, switch (IntNo) { default: return SDValue(); // Don't custom lower most intrinsics. - case Intrinsic::x86_sse41_pmuldq: - case Intrinsic::x86_avx2_pmul_dq: - case Intrinsic::x86_avx512_pmul_dq_512: { - MVT OpVT = Op.getSimpleValueType(); - return DAG.getNode(X86ISD::PMULDQ, dl, OpVT, - DAG.getBitcast(OpVT, Op.getOperand(1)), - DAG.getBitcast(OpVT, Op.getOperand(2))); - } - - case Intrinsic::x86_sse2_pmulu_dq: - case Intrinsic::x86_avx2_pmulu_dq: - case Intrinsic::x86_avx512_pmulu_dq_512: { - MVT OpVT = Op.getSimpleValueType(); - return DAG.getNode(X86ISD::PMULUDQ, dl, OpVT, - DAG.getBitcast(OpVT, Op.getOperand(1)), - DAG.getBitcast(OpVT, Op.getOperand(2))); - } - case Intrinsic::x86_avx2_permd: case Intrinsic::x86_avx2_permps: // Operands intentionally swapped. Mask is last operand to intrinsic, diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index e545cc27a70..c62b4d6cdd9 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -566,55 +566,6 @@ static Value *simplifyX86varShift(const IntrinsicInst &II, return Builder.CreateAShr(Vec, ShiftVec); } -static Value *simplifyX86muldq(const IntrinsicInst &II, - InstCombiner::BuilderTy &Builder) { - Value *Arg0 = II.getArgOperand(0); - Value *Arg1 = II.getArgOperand(1); - Type *ResTy = II.getType(); - assert(Arg0->getType()->getScalarSizeInBits() == 32 && - Arg1->getType()->getScalarSizeInBits() == 32 && - ResTy->getScalarSizeInBits() == 64 && "Unexpected muldq/muludq types"); - - // muldq/muludq(undef, undef) -> zero (matches generic mul behavior) - if (isa<UndefValue>(Arg0) || isa<UndefValue>(Arg1)) - return ConstantAggregateZero::get(ResTy); - - // Constant folding. - // PMULDQ = (mul(vXi64 sext(shuffle<0,2,..>(Arg0)), - // vXi64 sext(shuffle<0,2,..>(Arg1)))) - // PMULUDQ = (mul(vXi64 zext(shuffle<0,2,..>(Arg0)), - // vXi64 zext(shuffle<0,2,..>(Arg1)))) - if (!isa<Constant>(Arg0) || !isa<Constant>(Arg1)) - return nullptr; - - unsigned NumElts = ResTy->getVectorNumElements(); - assert(Arg0->getType()->getVectorNumElements() == (2 * NumElts) && - Arg1->getType()->getVectorNumElements() == (2 * NumElts) && - "Unexpected muldq/muludq types"); - - unsigned IntrinsicID = II.getIntrinsicID(); - bool IsSigned = (Intrinsic::x86_sse41_pmuldq == IntrinsicID || - Intrinsic::x86_avx2_pmul_dq == IntrinsicID || - Intrinsic::x86_avx512_pmul_dq_512 == IntrinsicID); - - SmallVector<unsigned, 16> ShuffleMask; - for (unsigned i = 0; i != NumElts; ++i) - ShuffleMask.push_back(i * 2); - - auto *LHS = Builder.CreateShuffleVector(Arg0, Arg0, ShuffleMask); - auto *RHS = Builder.CreateShuffleVector(Arg1, Arg1, ShuffleMask); - - if (IsSigned) { - LHS = Builder.CreateSExt(LHS, ResTy); - RHS = Builder.CreateSExt(RHS, ResTy); - } else { - LHS = Builder.CreateZExt(LHS, ResTy); - RHS = Builder.CreateZExt(RHS, ResTy); - } - - return Builder.CreateMul(LHS, RHS); -} - static Value *simplifyX86pack(IntrinsicInst &II, bool IsSigned) { Value *Arg0 = II.getArgOperand(0); Value *Arg1 = II.getArgOperand(1); @@ -2642,26 +2593,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { return replaceInstUsesWith(*II, V); break; - case Intrinsic::x86_sse2_pmulu_dq: - case Intrinsic::x86_sse41_pmuldq: - case Intrinsic::x86_avx2_pmul_dq: - case Intrinsic::x86_avx2_pmulu_dq: - case Intrinsic::x86_avx512_pmul_dq_512: - case Intrinsic::x86_avx512_pmulu_dq_512: { - if (Value *V = simplifyX86muldq(*II, Builder)) - return replaceInstUsesWith(*II, V); - - unsigned VWidth = II->getType()->getVectorNumElements(); - APInt UndefElts(VWidth, 0); - APInt DemandedElts = APInt::getAllOnesValue(VWidth); - if (Value *V = SimplifyDemandedVectorElts(II, DemandedElts, UndefElts)) { - if (V != II) - return replaceInstUsesWith(*II, V); - return II; - } - break; - } - case Intrinsic::x86_sse2_packssdw_128: case Intrinsic::x86_sse2_packsswb_128: case Intrinsic::x86_avx2_packssdw: diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 35ddd7a3eb2..0c03cc31228 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -1436,35 +1436,6 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, break; - case Intrinsic::x86_sse2_pmulu_dq: - case Intrinsic::x86_sse41_pmuldq: - case Intrinsic::x86_avx2_pmul_dq: - case Intrinsic::x86_avx2_pmulu_dq: - case Intrinsic::x86_avx512_pmul_dq_512: - case Intrinsic::x86_avx512_pmulu_dq_512: { - Value *Op0 = II->getArgOperand(0); - Value *Op1 = II->getArgOperand(1); - unsigned InnerVWidth = Op0->getType()->getVectorNumElements(); - assert((VWidth * 2) == InnerVWidth && "Unexpected input size"); - - APInt InnerDemandedElts(InnerVWidth, 0); - for (unsigned i = 0; i != VWidth; ++i) - if (DemandedElts[i]) - InnerDemandedElts.setBit(i * 2); - - UndefElts2 = APInt(InnerVWidth, 0); - TmpV = SimplifyDemandedVectorElts(Op0, InnerDemandedElts, UndefElts2, - Depth + 1); - if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; } - - UndefElts3 = APInt(InnerVWidth, 0); - TmpV = SimplifyDemandedVectorElts(Op1, InnerDemandedElts, UndefElts3, - Depth + 1); - if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; } - - break; - } - case Intrinsic::x86_sse2_packssdw_128: case Intrinsic::x86_sse2_packsswb_128: case Intrinsic::x86_sse2_packuswb_128: |

