summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp')
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp29
1 files changed, 29 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 9a136190198..839eff7cb75 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -981,6 +981,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
bool MadeChange = false;
APInt UndefElts2(VWidth, 0);
+ APInt UndefElts3(VWidth, 0);
Value *TmpV;
switch (I->getOpcode()) {
default: break;
@@ -1298,6 +1299,34 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
UndefElts &= UndefElts2;
break;
+ case Intrinsic::x86_fma_vfmadd_ss:
+ case Intrinsic::x86_fma_vfmsub_ss:
+ case Intrinsic::x86_fma_vfnmadd_ss:
+ case Intrinsic::x86_fma_vfnmsub_ss:
+ case Intrinsic::x86_fma_vfmadd_sd:
+ case Intrinsic::x86_fma_vfmsub_sd:
+ case Intrinsic::x86_fma_vfnmadd_sd:
+ case Intrinsic::x86_fma_vfnmsub_sd:
+ TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
+ UndefElts, Depth + 1);
+ if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
+ TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts,
+ UndefElts2, Depth + 1);
+ if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
+ TmpV = SimplifyDemandedVectorElts(II->getArgOperand(2), DemandedElts,
+ UndefElts3, Depth + 1);
+ if (TmpV) { II->setArgOperand(2, TmpV); MadeChange = true; }
+
+ // If lowest element of a scalar op isn't used then use Arg0.
+ if (DemandedElts.getLoBits(1) != 1)
+ return II->getArgOperand(0);
+
+ // Output elements are undefined if all three are undefined. Consider
+ // things like undef&0. The result is known zero, not undef.
+ UndefElts &= UndefElts2;
+ UndefElts &= UndefElts3;
+ break;
+
// SSE4A instructions leave the upper 64-bits of the 128-bit result
// in an undefined state.
case Intrinsic::x86_sse4a_extrq:
OpenPOWER on IntegriCloud