diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-04-24 18:12:42 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-04-24 18:12:42 +0000 |
commit | 424da1637a8af9fbdfbaeaeda3738985430d96eb (patch) | |
tree | 7b0b2a2fb45697a6a9e0f3c30341bfc230ced80e /llvm/lib | |
parent | 1c9a9f255c5064ba87e2075d11bd007c3b99ae34 (diff) | |
download | bcm5719-llvm-424da1637a8af9fbdfbaeaeda3738985430d96eb.tar.gz bcm5719-llvm-424da1637a8af9fbdfbaeaeda3738985430d96eb.zip |
[InstCombine][SSE] Demanded vector elements for scalar intrinsics (Part 1 of 2)
This patch improves support for determining the demanded vector elements through SSE scalar intrinsics:
1 - recognise that we only need the lowest element of the second input for binary scalar operations (and all the elements of the first input)
2 - recognise that the roundss/roundsd intrinsics use the lowest element of the second input and the remaining elements from the first input
Differential Revision: http://reviews.llvm.org/D17490
llvm-svn: 267356
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 52 |
1 files changed, 52 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 3c02d66cff8..596d9838b9c 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1077,6 +1077,12 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { APInt DemandedElts = APInt::getLowBitsSet(Width, DemandedWidth); return SimplifyDemandedVectorElts(Op, DemandedElts, UndefElts); }; + auto SimplifyDemandedVectorEltsHigh = [this](Value *Op, unsigned Width, + unsigned DemandedWidth) { + APInt UndefElts(Width, 0); + APInt DemandedElts = APInt::getHighBitsSet(Width, DemandedWidth); + return SimplifyDemandedVectorElts(Op, DemandedElts, UndefElts); + }; switch (II->getIntrinsicID()) { default: break; @@ -1423,6 +1429,52 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; } + case Intrinsic::x86_sse_add_ss: + case Intrinsic::x86_sse_sub_ss: + case Intrinsic::x86_sse_mul_ss: + case Intrinsic::x86_sse_div_ss: + case Intrinsic::x86_sse_min_ss: + case Intrinsic::x86_sse_max_ss: + case Intrinsic::x86_sse_cmp_ss: + case Intrinsic::x86_sse2_add_sd: + case Intrinsic::x86_sse2_sub_sd: + case Intrinsic::x86_sse2_mul_sd: + case Intrinsic::x86_sse2_div_sd: + case Intrinsic::x86_sse2_min_sd: + case Intrinsic::x86_sse2_max_sd: + case Intrinsic::x86_sse2_cmp_sd: { + // These intrinsics only demand the lowest element of the second input + // vector. + Value *Arg1 = II->getArgOperand(1); + unsigned VWidth = Arg1->getType()->getVectorNumElements(); + if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, 1)) { + II->setArgOperand(1, V); + return II; + } + break; + } + + case Intrinsic::x86_sse41_round_ss: + case Intrinsic::x86_sse41_round_sd: { + // These intrinsics demand the upper elements of the first input vector and + // the lowest element of the second input vector. + bool MadeChange = false; + Value *Arg0 = II->getArgOperand(0); + Value *Arg1 = II->getArgOperand(1); + unsigned VWidth = Arg0->getType()->getVectorNumElements(); + if (Value *V = SimplifyDemandedVectorEltsHigh(Arg0, VWidth, VWidth - 1)) { + II->setArgOperand(0, V); + MadeChange = true; + } + if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, 1)) { + II->setArgOperand(1, V); + MadeChange = true; + } + if (MadeChange) + return II; + break; + } + // Constant fold ashr( <A x Bi>, Ci ). // Constant fold lshr( <A x Bi>, Ci ). // Constant fold shl( <A x Bi>, Ci ). |