summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/IR/AutoUpgrade.cpp63
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp18
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp69
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp29
4 files changed, 45 insertions, 134 deletions
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 41309f2ef74..78385ac82d3 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -168,6 +168,12 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
Name.startswith("avx512.mask.pmull.") || // Added in 4.0
Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
+ Name == "sse2.pmulu.dq" || // Added in 7.0
+ Name == "sse41.pmuldq" || // Added in 7.0
+ Name == "avx2.pmulu.dq" || // Added in 7.0
+ Name == "avx2.pmul.dq" || // Added in 7.0
+ Name == "avx512.pmulu.dq.512" || // Added in 7.0
+ Name == "avx512.pmul.dq.512" || // Added in 7.0
Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
Name.startswith("avx512.mask.pmul.hr.sw.") || // Added in 7.0
@@ -906,6 +912,35 @@ static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI,
return Res;
}
+static Value *upgradePMULDQ(IRBuilder<> &Builder, CallInst &CI, bool IsSigned) {
+ Type *Ty = CI.getType();
+
+ // Arguments have a vXi32 type so cast to vXi64.
+ Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
+ Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
+
+ if (IsSigned) {
+ // Shift left then arithmetic shift right.
+ Constant *ShiftAmt = ConstantInt::get(Ty, 32);
+ LHS = Builder.CreateShl(LHS, ShiftAmt);
+ LHS = Builder.CreateAShr(LHS, ShiftAmt);
+ RHS = Builder.CreateShl(RHS, ShiftAmt);
+ RHS = Builder.CreateAShr(RHS, ShiftAmt);
+ } else {
+ // Clear the upper bits.
+ Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
+ LHS = Builder.CreateAnd(LHS, Mask);
+ RHS = Builder.CreateAnd(RHS, Mask);
+ }
+
+ Value *Res = Builder.CreateMul(LHS, RHS);
+
+ if (CI.getNumArgOperands() == 4)
+ Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
+
+ return Res;
+}
+
// Applying mask on vector of i1's and make sure result is at least 8 bits wide.
static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder,Value *Vec, Value *Mask,
unsigned NumElts) {
@@ -1028,24 +1063,6 @@ static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
IID = Intrinsic::x86_avx512_pshuf_b_512;
else
llvm_unreachable("Unexpected intrinsic");
- } else if (Name.startswith("pmul.dq.")) {
- if (VecWidth == 128)
- IID = Intrinsic::x86_sse41_pmuldq;
- else if (VecWidth == 256)
- IID = Intrinsic::x86_avx2_pmul_dq;
- else if (VecWidth == 512)
- IID = Intrinsic::x86_avx512_pmul_dq_512;
- else
- llvm_unreachable("Unexpected intrinsic");
- } else if (Name.startswith("pmulu.dq.")) {
- if (VecWidth == 128)
- IID = Intrinsic::x86_sse2_pmulu_dq;
- else if (VecWidth == 256)
- IID = Intrinsic::x86_avx2_pmulu_dq;
- else if (VecWidth == 512)
- IID = Intrinsic::x86_avx512_pmulu_dq_512;
- else
- llvm_unreachable("Unexpected intrinsic");
} else if (Name.startswith("pmul.hr.sw.")) {
if (VecWidth == 128)
IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
@@ -1455,6 +1472,16 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Name.startswith("avx2.pminu") ||
Name.startswith("avx512.mask.pminu"))) {
Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_ULT);
+ } else if (IsX86 && (Name == "sse2.pmulu.dq" ||
+ Name == "avx2.pmulu.dq" ||
+ Name == "avx512.pmulu.dq.512" ||
+ Name.startswith("avx512.mask.pmulu.dq."))) {
+ Rep = upgradePMULDQ(Builder, *CI, /*Signed*/false);
+ } else if (IsX86 && (Name == "sse41.pmuldq" ||
+ Name == "avx2.pmul.dq" ||
+ Name == "avx512.pmul.dq.512" ||
+ Name.startswith("avx512.mask.pmul.dq."))) {
+ Rep = upgradePMULDQ(Builder, *CI, /*Signed*/true);
} else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
Name == "sse2.cvtps2pd" ||
Name == "avx.cvtdq2.pd.256" ||
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 1870a668c98..08e4f6387b6 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -20855,24 +20855,6 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
switch (IntNo) {
default: return SDValue(); // Don't custom lower most intrinsics.
- case Intrinsic::x86_sse41_pmuldq:
- case Intrinsic::x86_avx2_pmul_dq:
- case Intrinsic::x86_avx512_pmul_dq_512: {
- MVT OpVT = Op.getSimpleValueType();
- return DAG.getNode(X86ISD::PMULDQ, dl, OpVT,
- DAG.getBitcast(OpVT, Op.getOperand(1)),
- DAG.getBitcast(OpVT, Op.getOperand(2)));
- }
-
- case Intrinsic::x86_sse2_pmulu_dq:
- case Intrinsic::x86_avx2_pmulu_dq:
- case Intrinsic::x86_avx512_pmulu_dq_512: {
- MVT OpVT = Op.getSimpleValueType();
- return DAG.getNode(X86ISD::PMULUDQ, dl, OpVT,
- DAG.getBitcast(OpVT, Op.getOperand(1)),
- DAG.getBitcast(OpVT, Op.getOperand(2)));
- }
-
case Intrinsic::x86_avx2_permd:
case Intrinsic::x86_avx2_permps:
// Operands intentionally swapped. Mask is last operand to intrinsic,
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index e545cc27a70..c62b4d6cdd9 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -566,55 +566,6 @@ static Value *simplifyX86varShift(const IntrinsicInst &II,
return Builder.CreateAShr(Vec, ShiftVec);
}
-static Value *simplifyX86muldq(const IntrinsicInst &II,
- InstCombiner::BuilderTy &Builder) {
- Value *Arg0 = II.getArgOperand(0);
- Value *Arg1 = II.getArgOperand(1);
- Type *ResTy = II.getType();
- assert(Arg0->getType()->getScalarSizeInBits() == 32 &&
- Arg1->getType()->getScalarSizeInBits() == 32 &&
- ResTy->getScalarSizeInBits() == 64 && "Unexpected muldq/muludq types");
-
- // muldq/muludq(undef, undef) -> zero (matches generic mul behavior)
- if (isa<UndefValue>(Arg0) || isa<UndefValue>(Arg1))
- return ConstantAggregateZero::get(ResTy);
-
- // Constant folding.
- // PMULDQ = (mul(vXi64 sext(shuffle<0,2,..>(Arg0)),
- // vXi64 sext(shuffle<0,2,..>(Arg1))))
- // PMULUDQ = (mul(vXi64 zext(shuffle<0,2,..>(Arg0)),
- // vXi64 zext(shuffle<0,2,..>(Arg1))))
- if (!isa<Constant>(Arg0) || !isa<Constant>(Arg1))
- return nullptr;
-
- unsigned NumElts = ResTy->getVectorNumElements();
- assert(Arg0->getType()->getVectorNumElements() == (2 * NumElts) &&
- Arg1->getType()->getVectorNumElements() == (2 * NumElts) &&
- "Unexpected muldq/muludq types");
-
- unsigned IntrinsicID = II.getIntrinsicID();
- bool IsSigned = (Intrinsic::x86_sse41_pmuldq == IntrinsicID ||
- Intrinsic::x86_avx2_pmul_dq == IntrinsicID ||
- Intrinsic::x86_avx512_pmul_dq_512 == IntrinsicID);
-
- SmallVector<unsigned, 16> ShuffleMask;
- for (unsigned i = 0; i != NumElts; ++i)
- ShuffleMask.push_back(i * 2);
-
- auto *LHS = Builder.CreateShuffleVector(Arg0, Arg0, ShuffleMask);
- auto *RHS = Builder.CreateShuffleVector(Arg1, Arg1, ShuffleMask);
-
- if (IsSigned) {
- LHS = Builder.CreateSExt(LHS, ResTy);
- RHS = Builder.CreateSExt(RHS, ResTy);
- } else {
- LHS = Builder.CreateZExt(LHS, ResTy);
- RHS = Builder.CreateZExt(RHS, ResTy);
- }
-
- return Builder.CreateMul(LHS, RHS);
-}
-
static Value *simplifyX86pack(IntrinsicInst &II, bool IsSigned) {
Value *Arg0 = II.getArgOperand(0);
Value *Arg1 = II.getArgOperand(1);
@@ -2642,26 +2593,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
return replaceInstUsesWith(*II, V);
break;
- case Intrinsic::x86_sse2_pmulu_dq:
- case Intrinsic::x86_sse41_pmuldq:
- case Intrinsic::x86_avx2_pmul_dq:
- case Intrinsic::x86_avx2_pmulu_dq:
- case Intrinsic::x86_avx512_pmul_dq_512:
- case Intrinsic::x86_avx512_pmulu_dq_512: {
- if (Value *V = simplifyX86muldq(*II, Builder))
- return replaceInstUsesWith(*II, V);
-
- unsigned VWidth = II->getType()->getVectorNumElements();
- APInt UndefElts(VWidth, 0);
- APInt DemandedElts = APInt::getAllOnesValue(VWidth);
- if (Value *V = SimplifyDemandedVectorElts(II, DemandedElts, UndefElts)) {
- if (V != II)
- return replaceInstUsesWith(*II, V);
- return II;
- }
- break;
- }
-
case Intrinsic::x86_sse2_packssdw_128:
case Intrinsic::x86_sse2_packsswb_128:
case Intrinsic::x86_avx2_packssdw:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 35ddd7a3eb2..0c03cc31228 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -1436,35 +1436,6 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
break;
- case Intrinsic::x86_sse2_pmulu_dq:
- case Intrinsic::x86_sse41_pmuldq:
- case Intrinsic::x86_avx2_pmul_dq:
- case Intrinsic::x86_avx2_pmulu_dq:
- case Intrinsic::x86_avx512_pmul_dq_512:
- case Intrinsic::x86_avx512_pmulu_dq_512: {
- Value *Op0 = II->getArgOperand(0);
- Value *Op1 = II->getArgOperand(1);
- unsigned InnerVWidth = Op0->getType()->getVectorNumElements();
- assert((VWidth * 2) == InnerVWidth && "Unexpected input size");
-
- APInt InnerDemandedElts(InnerVWidth, 0);
- for (unsigned i = 0; i != VWidth; ++i)
- if (DemandedElts[i])
- InnerDemandedElts.setBit(i * 2);
-
- UndefElts2 = APInt(InnerVWidth, 0);
- TmpV = SimplifyDemandedVectorElts(Op0, InnerDemandedElts, UndefElts2,
- Depth + 1);
- if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
-
- UndefElts3 = APInt(InnerVWidth, 0);
- TmpV = SimplifyDemandedVectorElts(Op1, InnerDemandedElts, UndefElts3,
- Depth + 1);
- if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
-
- break;
- }
-
case Intrinsic::x86_sse2_packssdw_128:
case Intrinsic::x86_sse2_packsswb_128:
case Intrinsic::x86_sse2_packuswb_128:
OpenPOWER on IntegriCloud