diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2015-09-17 20:32:45 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2015-09-17 20:32:45 +0000 |
commit | 61116ddc7bd7953fd90455927f8ca60e3fd713c2 (patch) | |
tree | 48c002732fb9db84da254dfdce31bd886dea1585 /llvm/lib/Transforms | |
parent | b83278687dcab59646e43e6f6907d8cef7585692 (diff) | |
download | bcm5719-llvm-61116ddc7bd7953fd90455927f8ca60e3fd713c2.tar.gz bcm5719-llvm-61116ddc7bd7953fd90455927f8ca60e3fd713c2.zip |
[InstCombine] Added vector demanded bits support for SSE4A EXTRQ/INSERTQ instructions
The SSE4A instructions EXTRQ/INSERTQ only use the lower 64-bits (or less) for many of their input vector operands and all of them have undefined upper 64-bits results.
Differential Revision: http://reviews.llvm.org/D12680
llvm-svn: 247934
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 73 | ||||
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp | 11 |
2 files changed, 83 insertions, 1 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index ad606b03138..82e81d4fd1c 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -527,6 +527,13 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (Changed) return II; } + auto SimplifyDemandedVectorEltsLow = [this](Value *Op, unsigned Width, unsigned DemandedWidth) + { + APInt UndefElts(Width, 0); + APInt DemandedElts = APInt::getLowBitsSet(Width, DemandedWidth); + return SimplifyDemandedVectorElts(Op, DemandedElts, UndefElts); + }; + switch (II->getIntrinsicID()) { default: break; case Intrinsic::objectsize: { @@ -975,6 +982,54 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { return ReplaceInstUsesWith(*II, V); break; + case Intrinsic::x86_sse4a_extrq: { + // EXTRQ uses only the lowest 64-bits of the first 128-bit vector + // operands and the lowest 16-bits of the second. + Value *Op0 = II->getArgOperand(0); + Value *Op1 = II->getArgOperand(1); + unsigned VWidth0 = Op0->getType()->getVectorNumElements(); + unsigned VWidth1 = Op1->getType()->getVectorNumElements(); + assert(VWidth0 == 2 && VWidth1 == 16 && "Unexpected operand sizes"); + + if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) { + II->setArgOperand(0, V); + return II; + } + if (Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 2)) { + II->setArgOperand(1, V); + return II; + } + break; + } + + case Intrinsic::x86_sse4a_extrqi: { + // EXTRQI uses only the lowest 64-bits of the first 128-bit vector + // operand. + Value *Op = II->getArgOperand(0); + unsigned VWidth = Op->getType()->getVectorNumElements(); + assert(VWidth == 2 && "Unexpected operand size"); + + if (Value *V = SimplifyDemandedVectorEltsLow(Op, VWidth, 1)) { + II->setArgOperand(0, V); + return II; + } + break; + } + + case Intrinsic::x86_sse4a_insertq: { + // INSERTQ uses only the lowest 64-bits of the first 128-bit vector + // operand. + Value *Op = II->getArgOperand(0); + unsigned VWidth = Op->getType()->getVectorNumElements(); + assert(VWidth == 2 && "Unexpected operand size"); + + if (Value *V = SimplifyDemandedVectorEltsLow(Op, VWidth, 1)) { + II->setArgOperand(0, V); + return II; + } + break; + } + case Intrinsic::x86_sse4a_insertqi: { // insertqi x, y, 64, 0 can just copy y's lower bits and leave the top // ones undef @@ -1051,6 +1106,24 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { } } } + + // INSERTQI uses only the lowest 64-bits of the first two 128-bit vector + // operands. + Value *Op0 = II->getArgOperand(0); + Value *Op1 = II->getArgOperand(1); + unsigned VWidth0 = Op0->getType()->getVectorNumElements(); + unsigned VWidth1 = Op1->getType()->getVectorNumElements(); + assert(VWidth0 == 2 && VWidth1 == 2 && "Unexpected operand sizes"); + + if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) { + II->setArgOperand(0, V); + return II; + } + + if (Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 1)) { + II->setArgOperand(1, V); + return II; + } break; } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 142e071fa21..c68a0315784 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -412,7 +412,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, Value *LHS, *RHS; if (matchSelectPattern(I, LHS, RHS).Flavor != SPF_UNKNOWN) return nullptr; - + if (SimplifyDemandedBits(I->getOperandUse(2), DemandedMask, RHSKnownZero, RHSKnownOne, Depth + 1) || SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, LHSKnownZero, @@ -1237,6 +1237,15 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, // like undef&0. The result is known zero, not undef. UndefElts &= UndefElts2; break; + + // SSE4A instructions leave the upper 64-bits of the 128-bit result + // in an undefined state. + case Intrinsic::x86_sse4a_extrq: + case Intrinsic::x86_sse4a_extrqi: + case Intrinsic::x86_sse4a_insertq: + case Intrinsic::x86_sse4a_insertqi: + UndefElts |= APInt::getHighBitsSet(VWidth, VWidth / 2); + break; } break; } |