summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@gmail.com>2016-12-11 08:54:52 +0000
committerCraig Topper <craig.topper@gmail.com>2016-12-11 08:54:52 +0000
commit23ebd9564f34600d186e53ee3497319c28e73cde (patch)
treeabeafbde293cd5c731631b664bef03decbc73b95 /llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
parent1c002267fb8323491d58630bfd56e79312190dea (diff)
downloadbcm5719-llvm-23ebd9564f34600d186e53ee3497319c28e73cde.tar.gz
bcm5719-llvm-23ebd9564f34600d186e53ee3497319c28e73cde.zip
[X86][InstCombine] Add support for scalar FMA intrinsics to SimplifyDemandedVectorElts.
This teaches SimplifyDemandedElts that the FMA can be removed if the lower element isn't used. It also teaches it that if upper elements of the first operand aren't used then we can simplify them. llvm-svn: 289377
Diffstat (limited to 'llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp')
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp29
1 files changed, 29 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 9a136190198..839eff7cb75 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -981,6 +981,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
bool MadeChange = false;
APInt UndefElts2(VWidth, 0);
+ APInt UndefElts3(VWidth, 0);
Value *TmpV;
switch (I->getOpcode()) {
default: break;
@@ -1298,6 +1299,34 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
UndefElts &= UndefElts2;
break;
+ case Intrinsic::x86_fma_vfmadd_ss:
+ case Intrinsic::x86_fma_vfmsub_ss:
+ case Intrinsic::x86_fma_vfnmadd_ss:
+ case Intrinsic::x86_fma_vfnmsub_ss:
+ case Intrinsic::x86_fma_vfmadd_sd:
+ case Intrinsic::x86_fma_vfmsub_sd:
+ case Intrinsic::x86_fma_vfnmadd_sd:
+ case Intrinsic::x86_fma_vfnmsub_sd:
+ TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
+ UndefElts, Depth + 1);
+ if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
+ TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts,
+ UndefElts2, Depth + 1);
+ if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
+ TmpV = SimplifyDemandedVectorElts(II->getArgOperand(2), DemandedElts,
+ UndefElts3, Depth + 1);
+ if (TmpV) { II->setArgOperand(2, TmpV); MadeChange = true; }
+
+ // If lowest element of a scalar op isn't used then use Arg0.
+ if (DemandedElts.getLoBits(1) != 1)
+ return II->getArgOperand(0);
+
+ // Output elements are undefined if all three are undefined. Consider
+ // things like undef&0. The result is known zero, not undef.
+ UndefElts &= UndefElts2;
+ UndefElts &= UndefElts3;
+ break;
+
// SSE4A instructions leave the upper 64-bits of the 128-bit result
// in an undefined state.
case Intrinsic::x86_sse4a_extrq:
OpenPOWER on IntegriCloud