summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@gmail.com>2016-12-15 03:49:45 +0000
committerCraig Topper <craig.topper@gmail.com>2016-12-15 03:49:45 +0000
commitab5f355d8cce89635936b3f7e6ab28f5ef729436 (patch)
tree144c1e96f3b9d75442a717978be2d776e20d4c91 /llvm/lib
parentfd7ed23ee7a803ba4ccaa9574ccbba6d5f6177c9 (diff)
downloadbcm5719-llvm-ab5f355d8cce89635936b3f7e6ab28f5ef729436.tar.gz
bcm5719-llvm-ab5f355d8cce89635936b3f7e6ab28f5ef729436.zip
[AVX-512][InstCombine] Add masked scalar FMA intrinsics to SimplifyDemandedVectorElts.
llvm-svn: 289759
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp10
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp35
2 files changed, 45 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 1b2afe911c6..cb741fba43b 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1783,6 +1783,16 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::x86_avx512_mask_sub_sd_round:
case Intrinsic::x86_avx512_mask_max_sd_round:
case Intrinsic::x86_avx512_mask_min_sd_round:
+ case Intrinsic::x86_avx512_mask_vfmadd_ss:
+ case Intrinsic::x86_avx512_mask_vfmadd_sd:
+ case Intrinsic::x86_avx512_maskz_vfmadd_ss:
+ case Intrinsic::x86_avx512_maskz_vfmadd_sd:
+ case Intrinsic::x86_avx512_mask3_vfmadd_ss:
+ case Intrinsic::x86_avx512_mask3_vfmadd_sd:
+ case Intrinsic::x86_avx512_mask3_vfmsub_ss:
+ case Intrinsic::x86_avx512_mask3_vfmsub_sd:
+ case Intrinsic::x86_avx512_mask3_vfnmsub_ss:
+ case Intrinsic::x86_avx512_mask3_vfnmsub_sd:
case Intrinsic::x86_fma_vfmadd_ss:
case Intrinsic::x86_fma_vfmsub_ss:
case Intrinsic::x86_fma_vfnmadd_ss:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index e9d83b38f54..745f42b358d 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -1372,6 +1372,10 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
case Intrinsic::x86_fma_vfmsub_sd:
case Intrinsic::x86_fma_vfnmadd_sd:
case Intrinsic::x86_fma_vfnmsub_sd:
+ case Intrinsic::x86_avx512_mask_vfmadd_ss:
+ case Intrinsic::x86_avx512_mask_vfmadd_sd:
+ case Intrinsic::x86_avx512_maskz_vfmadd_ss:
+ case Intrinsic::x86_avx512_maskz_vfmadd_sd:
TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
UndefElts, Depth + 1);
if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
@@ -1396,6 +1400,37 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
break;
+ case Intrinsic::x86_avx512_mask3_vfmadd_ss:
+ case Intrinsic::x86_avx512_mask3_vfmadd_sd:
+ case Intrinsic::x86_avx512_mask3_vfmsub_ss:
+ case Intrinsic::x86_avx512_mask3_vfmsub_sd:
+ case Intrinsic::x86_avx512_mask3_vfnmsub_ss:
+ case Intrinsic::x86_avx512_mask3_vfnmsub_sd:
+ // These intrinsics get the passthru bits from operand 2.
+ TmpV = SimplifyDemandedVectorElts(II->getArgOperand(2), DemandedElts,
+ UndefElts, Depth + 1);
+ if (TmpV) { II->setArgOperand(2, TmpV); MadeChange = true; }
+
+ // If lowest element of a scalar op isn't used then use Arg2.
+ if (!DemandedElts[0])
+ return II->getArgOperand(2);
+
+ // Only lower element is used for operand 0 and 1.
+ DemandedElts = 1;
+ TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
+ UndefElts2, Depth + 1);
+ if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
+ TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts,
+ UndefElts3, Depth + 1);
+ if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
+
+ // Lower element is undefined if all three lower elements are undefined.
+ // Consider things like undef&0. The result is known zero, not undef.
+ if (!UndefElts2[0] || !UndefElts3[0])
+ UndefElts.clearBit(0);
+
+ break;
+
// SSE4A instructions leave the upper 64-bits of the 128-bit result
// in an undefined state.
case Intrinsic::x86_sse4a_extrq:
OpenPOWER on IntegriCloud