diff options
Diffstat (limited to 'llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp')
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp | 39 |
1 files changed, 38 insertions, 1 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index ef5be69bf8a..da2617c57cf 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -1179,16 +1179,41 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, switch (II->getIntrinsicID()) { default: break; + // Unary vector operations that work column-wise. + case Intrinsic::x86_sse_rcp_ss: + case Intrinsic::x86_sse_rsqrt_ss: + case Intrinsic::x86_sse_sqrt_ss: + case Intrinsic::x86_sse2_sqrt_sd: + case Intrinsic::x86_xop_vfrcz_ss: + case Intrinsic::x86_xop_vfrcz_sd: + TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts, + UndefElts, Depth + 1); + if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; } + + // If lowest element of a scalar op isn't used then use Arg0. + if (DemandedElts.getLoBits(1) != 1) + return II->getArgOperand(0); + // TODO: If only low elt lower SQRT to FSQRT (with rounding/exceptions checks). + break; + // Binary vector operations that work column-wise. A dest element is a // function of the corresponding input elements from the two inputs. + case Intrinsic::x86_sse_add_ss: case Intrinsic::x86_sse_sub_ss: case Intrinsic::x86_sse_mul_ss: + case Intrinsic::x86_sse_div_ss: case Intrinsic::x86_sse_min_ss: case Intrinsic::x86_sse_max_ss: + case Intrinsic::x86_sse_cmp_ss: + case Intrinsic::x86_sse2_add_sd: case Intrinsic::x86_sse2_sub_sd: case Intrinsic::x86_sse2_mul_sd: + case Intrinsic::x86_sse2_div_sd: case Intrinsic::x86_sse2_min_sd: case Intrinsic::x86_sse2_max_sd: + case Intrinsic::x86_sse2_cmp_sd: + case Intrinsic::x86_sse41_round_ss: + case Intrinsic::x86_sse41_round_sd: TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts, UndefElts, Depth + 1); if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; } @@ -1201,11 +1226,14 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, if (DemandedElts == 1) { switch (II->getIntrinsicID()) { default: break; + case Intrinsic::x86_sse_add_ss: case Intrinsic::x86_sse_sub_ss: case Intrinsic::x86_sse_mul_ss: + case Intrinsic::x86_sse2_add_sd: case Intrinsic::x86_sse2_sub_sd: case Intrinsic::x86_sse2_mul_sd: - // TODO: Lower MIN/MAX/ABS/etc + // TODO: Lower MIN/MAX/etc. + // TODO: Lower DIV (with rounding/exceptions checks). Value *LHS = II->getArgOperand(0); Value *RHS = II->getArgOperand(1); // Extract the element as scalars. @@ -1216,6 +1244,11 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, switch (II->getIntrinsicID()) { default: llvm_unreachable("Case stmts out of sync!"); + case Intrinsic::x86_sse_add_ss: + case Intrinsic::x86_sse2_add_sd: + TmpV = InsertNewInstWith(BinaryOperator::CreateFAdd(LHS, RHS, + II->getName()), *II); + break; case Intrinsic::x86_sse_sub_ss: case Intrinsic::x86_sse2_sub_sd: TmpV = InsertNewInstWith(BinaryOperator::CreateFSub(LHS, RHS, @@ -1238,6 +1271,10 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, } } + // If lowest element of a scalar op isn't used then use Arg0. + if (DemandedElts.getLoBits(1) != 1) + return II->getArgOperand(0); + // Output elements are undefined if both are undefined. Consider things // like undef&0. The result is known zero, not undef. UndefElts &= UndefElts2; |