diff options
| author | Craig Topper <craig.topper@gmail.com> | 2016-12-14 03:17:30 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@gmail.com> | 2016-12-14 03:17:30 +0000 |
| commit | eb6a20e79ed4a5ac8d5f61f7372bb633c7362378 (patch) | |
| tree | f582a662bab3c3b210043da9047358b16053f51c /llvm/lib/Transforms | |
| parent | a0372dec261d137973b29ed14376230c372b4290 (diff) | |
| download | bcm5719-llvm-eb6a20e79ed4a5ac8d5f61f7372bb633c7362378.tar.gz bcm5719-llvm-eb6a20e79ed4a5ac8d5f61f7372bb633c7362378.zip | |
[X86][InstCombine] Teach SimplifyDemandedVectorElts to handle scalar round intrinsics more correctly.
Now we only pass bit 0 of the DemandedElts to optimize operand 1 as we recurse since the upper bits are unused. Similarly we clear bit 0 for optimizing operand 0.
Also calculate UndefElts correctly.
Simplify InstCombineCalls for these instrinics to just call SimplifyDemandedVectorElts for the call instrution to reuse this support.
llvm-svn: 289629
Diffstat (limited to 'llvm/lib/Transforms')
| -rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 29 | ||||
| -rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp | 30 |
2 files changed, 21 insertions, 38 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 4c41484ddbf..5ee68ccf75a 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1437,12 +1437,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { APInt DemandedElts = APInt::getLowBitsSet(Width, DemandedWidth); return SimplifyDemandedVectorElts(Op, DemandedElts, UndefElts); }; - auto SimplifyDemandedVectorEltsHigh = [this](Value *Op, unsigned Width, - unsigned DemandedWidth) { - APInt UndefElts(Width, 0); - APInt DemandedElts = APInt::getHighBitsSet(Width, DemandedWidth); - return SimplifyDemandedVectorElts(Op, DemandedElts, UndefElts); - }; switch (II->getIntrinsicID()) { default: break; @@ -1799,33 +1793,14 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; } - case Intrinsic::x86_sse41_round_ss: - case Intrinsic::x86_sse41_round_sd: { - // These intrinsics demand the upper elements of the first input vector and - // the lowest element of the second input vector. - bool MadeChange = false; - Value *Arg0 = II->getArgOperand(0); - Value *Arg1 = II->getArgOperand(1); - unsigned VWidth = Arg0->getType()->getVectorNumElements(); - if (Value *V = SimplifyDemandedVectorEltsHigh(Arg0, VWidth, VWidth - 1)) { - II->setArgOperand(0, V); - MadeChange = true; - } - if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, 1)) { - II->setArgOperand(1, V); - MadeChange = true; - } - if (MadeChange) - return II; - break; - } - case Intrinsic::x86_sse_cmp_ss: case Intrinsic::x86_sse_min_ss: case Intrinsic::x86_sse_max_ss: case Intrinsic::x86_sse2_cmp_sd: case Intrinsic::x86_sse2_min_sd: case Intrinsic::x86_sse2_max_sd: + case Intrinsic::x86_sse41_round_ss: + case Intrinsic::x86_sse41_round_sd: case Intrinsic::x86_xop_vfrcz_ss: case Intrinsic::x86_xop_vfrcz_sd: { unsigned VWidth = II->getType()->getVectorNumElements(); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 3038fd58ad0..3fd18a3b24d 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -1321,25 +1321,33 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, break; } - // Binary scalar-as-vector operations that work column-wise. A dest element - // is a function of the corresponding input elements from the two inputs. + // Binary scalar-as-vector operations that work column-wise. The high + // elements come from operand 0 and the low element comes from operand 1. case Intrinsic::x86_sse41_round_ss: - case Intrinsic::x86_sse41_round_sd: - TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts, + case Intrinsic::x86_sse41_round_sd: { + // Don't use the low element of operand 0. + APInt DemandedElts2 = DemandedElts; + DemandedElts2.clearBit(0); + TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts2, UndefElts, Depth + 1); if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; } - TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts, - UndefElts2, Depth + 1); - if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; } // If lowest element of a scalar op isn't used then use Arg0. - if (DemandedElts.getLoBits(1) != 1) + if (!DemandedElts[0]) return II->getArgOperand(0); - // Output elements are undefined if both are undefined. Consider things - // like undef&0. The result is known zero, not undef. - UndefElts &= UndefElts2; + // Only lower element is used for operand 1. + DemandedElts = 1; + TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts, + UndefElts2, Depth + 1); + if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; } + + // Take the high undef elements from operand 0 and take the lower element + // from operand 1. + UndefElts.clearBit(0); + UndefElts |= UndefElts2[0]; break; + } case Intrinsic::x86_fma_vfmadd_ss: case Intrinsic::x86_fma_vfmsub_ss: |

