diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-12-26 23:28:17 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-12-26 23:28:17 +0000 |
commit | c9cf7fc7a4aac89c388ba9d9b1ab2bf24cd1ece6 (patch) | |
tree | fe85ec18378ed79bdb13ca4f4c071503eaf1c1e3 /llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp | |
parent | d9eaa54ef44693facfaa63edab37af1d0a9cb9fd (diff) | |
download | bcm5719-llvm-c9cf7fc7a4aac89c388ba9d9b1ab2bf24cd1ece6.tar.gz bcm5719-llvm-c9cf7fc7a4aac89c388ba9d9b1ab2bf24cd1ece6.zip |
[InstCombine][X86] Add DemandedElts support for PMULDQ/PMULUDQ instructions
PMULDQ/PMULUDQ vXi64 instructions only use the even numbered v2Xi32 input elements which SimplifyDemandedVectorElts should try and use.
Differential Revision: https://reviews.llvm.org/D28119
llvm-svn: 290554
Diffstat (limited to 'llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp')
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp | 27 |
1 files changed, 27 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 13ce959feae..592eeb7fa0e 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -1431,6 +1431,33 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, break; + case Intrinsic::x86_sse2_pmulu_dq: + case Intrinsic::x86_sse41_pmuldq: + case Intrinsic::x86_avx2_pmul_dq: + case Intrinsic::x86_avx2_pmulu_dq: { + Value *Op0 = II->getArgOperand(0); + Value *Op1 = II->getArgOperand(1); + unsigned InnerVWidth = Op0->getType()->getVectorNumElements(); + assert((VWidth * 2) == InnerVWidth && "Unexpected input size"); + + APInt InnerDemandedElts(InnerVWidth, 0); + for (unsigned i = 0; i != VWidth; ++i) + if (DemandedElts[i]) + InnerDemandedElts.setBit(i * 2); + + UndefElts2 = APInt(InnerVWidth, 0); + TmpV = SimplifyDemandedVectorElts(Op0, InnerDemandedElts, UndefElts2, + Depth + 1); + if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; } + + UndefElts3 = APInt(InnerVWidth, 0); + TmpV = SimplifyDemandedVectorElts(Op1, InnerDemandedElts, UndefElts3, + Depth + 1); + if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; } + + break; + } + // SSE4A instructions leave the upper 64-bits of the 128-bit result // in an undefined state. case Intrinsic::x86_sse4a_extrq: |