diff options
Diffstat (limited to 'llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp')
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp | 29 |
1 files changed, 29 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 9a136190198..839eff7cb75 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -981,6 +981,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, bool MadeChange = false; APInt UndefElts2(VWidth, 0); + APInt UndefElts3(VWidth, 0); Value *TmpV; switch (I->getOpcode()) { default: break; @@ -1298,6 +1299,34 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, UndefElts &= UndefElts2; break; + case Intrinsic::x86_fma_vfmadd_ss: + case Intrinsic::x86_fma_vfmsub_ss: + case Intrinsic::x86_fma_vfnmadd_ss: + case Intrinsic::x86_fma_vfnmsub_ss: + case Intrinsic::x86_fma_vfmadd_sd: + case Intrinsic::x86_fma_vfmsub_sd: + case Intrinsic::x86_fma_vfnmadd_sd: + case Intrinsic::x86_fma_vfnmsub_sd: + TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts, + UndefElts, Depth + 1); + if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; } + TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts, + UndefElts2, Depth + 1); + if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; } + TmpV = SimplifyDemandedVectorElts(II->getArgOperand(2), DemandedElts, + UndefElts3, Depth + 1); + if (TmpV) { II->setArgOperand(2, TmpV); MadeChange = true; } + + // If lowest element of a scalar op isn't used then use Arg0. + if (DemandedElts.getLoBits(1) != 1) + return II->getArgOperand(0); + + // Output elements are undefined if all three are undefined. Consider + // things like undef&0. The result is known zero, not undef. + UndefElts &= UndefElts2; + UndefElts &= UndefElts3; + break; + // SSE4A instructions leave the upper 64-bits of the 128-bit result // in an undefined state. case Intrinsic::x86_sse4a_extrq: |