diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-12-26 23:28:17 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-12-26 23:28:17 +0000 |
commit | c9cf7fc7a4aac89c388ba9d9b1ab2bf24cd1ece6 (patch) | |
tree | fe85ec18378ed79bdb13ca4f4c071503eaf1c1e3 /llvm/lib | |
parent | d9eaa54ef44693facfaa63edab37af1d0a9cb9fd (diff) | |
download | bcm5719-llvm-c9cf7fc7a4aac89c388ba9d9b1ab2bf24cd1ece6.tar.gz bcm5719-llvm-c9cf7fc7a4aac89c388ba9d9b1ab2bf24cd1ece6.zip |
[InstCombine][X86] Add DemandedElts support for PMULDQ/PMULUDQ instructions
PMULDQ/PMULUDQ vXi64 instructions only use the even numbered v2Xi32 input elements which SimplifyDemandedVectorElts should try and use.
Differential Revision: https://reviews.llvm.org/D28119
llvm-svn: 290554
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 15 | ||||
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp | 27 |
2 files changed, 42 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 9b8f696da74..8f665152703 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1996,6 +1996,21 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { return replaceInstUsesWith(*II, V); break; + case Intrinsic::x86_sse2_pmulu_dq: + case Intrinsic::x86_sse41_pmuldq: + case Intrinsic::x86_avx2_pmul_dq: + case Intrinsic::x86_avx2_pmulu_dq: { + unsigned VWidth = II->getType()->getVectorNumElements(); + APInt UndefElts(VWidth, 0); + APInt DemandedElts = APInt::getAllOnesValue(VWidth); + if (Value *V = SimplifyDemandedVectorElts(II, DemandedElts, UndefElts)) { + if (V != II) + return replaceInstUsesWith(*II, V); + return II; + } + break; + } + case Intrinsic::x86_sse41_insertps: if (Value *V = simplifyX86insertps(*II, *Builder)) return replaceInstUsesWith(*II, V); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 13ce959feae..592eeb7fa0e 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -1431,6 +1431,33 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, break; + case Intrinsic::x86_sse2_pmulu_dq: + case Intrinsic::x86_sse41_pmuldq: + case Intrinsic::x86_avx2_pmul_dq: + case Intrinsic::x86_avx2_pmulu_dq: { + Value *Op0 = II->getArgOperand(0); + Value *Op1 = II->getArgOperand(1); + unsigned InnerVWidth = Op0->getType()->getVectorNumElements(); + assert((VWidth * 2) == InnerVWidth && "Unexpected input size"); + + APInt InnerDemandedElts(InnerVWidth, 0); + for (unsigned i = 0; i != VWidth; ++i) + if (DemandedElts[i]) + InnerDemandedElts.setBit(i * 2); + + UndefElts2 = APInt(InnerVWidth, 0); + TmpV = SimplifyDemandedVectorElts(Op0, InnerDemandedElts, UndefElts2, + Depth + 1); + if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; } + + UndefElts3 = APInt(InnerVWidth, 0); + TmpV = SimplifyDemandedVectorElts(Op1, InnerDemandedElts, UndefElts3, + Depth + 1); + if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; } + + break; + } + // SSE4A instructions leave the upper 64-bits of the 128-bit result // in an undefined state. case Intrinsic::x86_sse4a_extrq: |