summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2016-04-24 18:23:14 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2016-04-24 18:23:14 +0000
commit83020942d32f462bd47064dcf6351e5c98f5c61b (patch)
tree4da7fbf10002493a2748b47c824eeadcf7f97715 /llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
parent424da1637a8af9fbdfbaeaeda3738985430d96eb (diff)
downloadbcm5719-llvm-83020942d32f462bd47064dcf6351e5c98f5c61b.tar.gz
bcm5719-llvm-83020942d32f462bd47064dcf6351e5c98f5c61b.zip
[InstCombine][SSE] Demanded vector elements for scalar intrinsics (Part 2 of 2)
Split from D17490. This patch improves support for determining the demanded vector elements through SSE scalar intrinsics: 1 - demanded vector element support for unary and some extra binary scalar intrinsics (RCP/RSQRT/SQRT/FRCZ and ADD/CMP/DIV/ROUND). 2 - addss/addsd get simplified to a fadd call if we aren't interested in the pass through elements 3 - if we don't need the lowest element of a scalar operation then just use the first argument (the pass through elements) directly We can add support for propagating demanded elements through any equivalent packed SSE intrinsics in a future patch (these wouldn't use the pass through patterns). Differential Revision: http://reviews.llvm.org/D19318 llvm-svn: 267357
Diffstat (limited to 'llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp')
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp39
1 files changed, 38 insertions, 1 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index ef5be69bf8a..da2617c57cf 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -1179,16 +1179,41 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
switch (II->getIntrinsicID()) {
default: break;
+ // Unary vector operations that work column-wise.
+ case Intrinsic::x86_sse_rcp_ss:
+ case Intrinsic::x86_sse_rsqrt_ss:
+ case Intrinsic::x86_sse_sqrt_ss:
+ case Intrinsic::x86_sse2_sqrt_sd:
+ case Intrinsic::x86_xop_vfrcz_ss:
+ case Intrinsic::x86_xop_vfrcz_sd:
+ TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
+ UndefElts, Depth + 1);
+ if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
+
+ // If lowest element of a scalar op isn't used then use Arg0.
+ if (DemandedElts.getLoBits(1) != 1)
+ return II->getArgOperand(0);
+ // TODO: If only low elt lower SQRT to FSQRT (with rounding/exceptions checks).
+ break;
+
// Binary vector operations that work column-wise. A dest element is a
// function of the corresponding input elements from the two inputs.
+ case Intrinsic::x86_sse_add_ss:
case Intrinsic::x86_sse_sub_ss:
case Intrinsic::x86_sse_mul_ss:
+ case Intrinsic::x86_sse_div_ss:
case Intrinsic::x86_sse_min_ss:
case Intrinsic::x86_sse_max_ss:
+ case Intrinsic::x86_sse_cmp_ss:
+ case Intrinsic::x86_sse2_add_sd:
case Intrinsic::x86_sse2_sub_sd:
case Intrinsic::x86_sse2_mul_sd:
+ case Intrinsic::x86_sse2_div_sd:
case Intrinsic::x86_sse2_min_sd:
case Intrinsic::x86_sse2_max_sd:
+ case Intrinsic::x86_sse2_cmp_sd:
+ case Intrinsic::x86_sse41_round_ss:
+ case Intrinsic::x86_sse41_round_sd:
TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
UndefElts, Depth + 1);
if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
@@ -1201,11 +1226,14 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
if (DemandedElts == 1) {
switch (II->getIntrinsicID()) {
default: break;
+ case Intrinsic::x86_sse_add_ss:
case Intrinsic::x86_sse_sub_ss:
case Intrinsic::x86_sse_mul_ss:
+ case Intrinsic::x86_sse2_add_sd:
case Intrinsic::x86_sse2_sub_sd:
case Intrinsic::x86_sse2_mul_sd:
- // TODO: Lower MIN/MAX/ABS/etc
+ // TODO: Lower MIN/MAX/etc.
+ // TODO: Lower DIV (with rounding/exceptions checks).
Value *LHS = II->getArgOperand(0);
Value *RHS = II->getArgOperand(1);
// Extract the element as scalars.
@@ -1216,6 +1244,11 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
switch (II->getIntrinsicID()) {
default: llvm_unreachable("Case stmts out of sync!");
+ case Intrinsic::x86_sse_add_ss:
+ case Intrinsic::x86_sse2_add_sd:
+ TmpV = InsertNewInstWith(BinaryOperator::CreateFAdd(LHS, RHS,
+ II->getName()), *II);
+ break;
case Intrinsic::x86_sse_sub_ss:
case Intrinsic::x86_sse2_sub_sd:
TmpV = InsertNewInstWith(BinaryOperator::CreateFSub(LHS, RHS,
@@ -1238,6 +1271,10 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
}
}
+ // If lowest element of a scalar op isn't used then use Arg0.
+ if (DemandedElts.getLoBits(1) != 1)
+ return II->getArgOperand(0);
+
// Output elements are undefined if both are undefined. Consider things
// like undef&0. The result is known zero, not undef.
UndefElts &= UndefElts2;
OpenPOWER on IntegriCloud