diff options
author | Craig Topper <craig.topper@gmail.com> | 2016-12-11 08:54:52 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@gmail.com> | 2016-12-11 08:54:52 +0000 |
commit | 23ebd9564f34600d186e53ee3497319c28e73cde (patch) | |
tree | abeafbde293cd5c731631b664bef03decbc73b95 /llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp | |
parent | 1c002267fb8323491d58630bfd56e79312190dea (diff) | |
download | bcm5719-llvm-23ebd9564f34600d186e53ee3497319c28e73cde.tar.gz bcm5719-llvm-23ebd9564f34600d186e53ee3497319c28e73cde.zip |
[X86][InstCombine] Add support for scalar FMA intrinsics to SimplifyDemandedVectorElts.
This teaches SimplifyDemandedElts that the FMA can be removed if the lower element isn't used. It also teaches it that if upper elements of the first operand aren't used then we can simplify them.
llvm-svn: 289377
Diffstat (limited to 'llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp')
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp | 29 |
1 files changed, 29 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 9a136190198..839eff7cb75 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -981,6 +981,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, bool MadeChange = false; APInt UndefElts2(VWidth, 0); + APInt UndefElts3(VWidth, 0); Value *TmpV; switch (I->getOpcode()) { default: break; @@ -1298,6 +1299,34 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, UndefElts &= UndefElts2; break; + case Intrinsic::x86_fma_vfmadd_ss: + case Intrinsic::x86_fma_vfmsub_ss: + case Intrinsic::x86_fma_vfnmadd_ss: + case Intrinsic::x86_fma_vfnmsub_ss: + case Intrinsic::x86_fma_vfmadd_sd: + case Intrinsic::x86_fma_vfmsub_sd: + case Intrinsic::x86_fma_vfnmadd_sd: + case Intrinsic::x86_fma_vfnmsub_sd: + TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts, + UndefElts, Depth + 1); + if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; } + TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts, + UndefElts2, Depth + 1); + if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; } + TmpV = SimplifyDemandedVectorElts(II->getArgOperand(2), DemandedElts, + UndefElts3, Depth + 1); + if (TmpV) { II->setArgOperand(2, TmpV); MadeChange = true; } + + // If lowest element of a scalar op isn't used then use Arg0. + if (DemandedElts.getLoBits(1) != 1) + return II->getArgOperand(0); + + // Output elements are undefined if all three are undefined. Consider + // things like undef&0. The result is known zero, not undef. + UndefElts &= UndefElts2; + UndefElts &= UndefElts3; + break; + // SSE4A instructions leave the upper 64-bits of the 128-bit result // in an undefined state. case Intrinsic::x86_sse4a_extrq: |