summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@gmail.com>2016-12-14 05:43:05 +0000
committerCraig Topper <craig.topper@gmail.com>2016-12-14 05:43:05 +0000
commitdfd268d76bee38463593947b3ea017647b05d1e6 (patch)
tree9ec782b8c689c3f3b87adfa0d05a8812be5c1db9 /llvm/lib
parent8e13bc4562c2b921b0bd4e46a1c119f6ddfba07e (diff)
downloadbcm5719-llvm-dfd268d76bee38463593947b3ea017647b05d1e6.tar.gz
bcm5719-llvm-dfd268d76bee38463593947b3ea017647b05d1e6.zip
[X86][InstCombine] Handle scalar fmadd intrinsics correctly in SimplifyDemandedVectorElts.
Now we pass a modified version of DemandedElts to each operand and we calculate undef elts correctly. llvm-svn: 289632
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp16
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp21
2 files changed, 22 insertions, 15 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 5ee68ccf75a..6aa9c36ef6a 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1754,14 +1754,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
break;
}
- case Intrinsic::x86_fma_vfmadd_ss:
- case Intrinsic::x86_fma_vfmsub_ss:
- case Intrinsic::x86_fma_vfnmadd_ss:
- case Intrinsic::x86_fma_vfnmsub_ss:
- case Intrinsic::x86_fma_vfmadd_sd:
- case Intrinsic::x86_fma_vfmsub_sd:
- case Intrinsic::x86_fma_vfnmadd_sd:
- case Intrinsic::x86_fma_vfnmsub_sd:
case Intrinsic::x86_avx512_mask_add_ss_round:
case Intrinsic::x86_avx512_mask_div_ss_round:
case Intrinsic::x86_avx512_mask_mul_ss_round:
@@ -1793,6 +1785,14 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
break;
}
+ case Intrinsic::x86_fma_vfmadd_ss:
+ case Intrinsic::x86_fma_vfmsub_ss:
+ case Intrinsic::x86_fma_vfnmadd_ss:
+ case Intrinsic::x86_fma_vfnmsub_ss:
+ case Intrinsic::x86_fma_vfmadd_sd:
+ case Intrinsic::x86_fma_vfmsub_sd:
+ case Intrinsic::x86_fma_vfnmadd_sd:
+ case Intrinsic::x86_fma_vfnmsub_sd:
case Intrinsic::x86_sse_cmp_ss:
case Intrinsic::x86_sse_min_ss:
case Intrinsic::x86_sse_max_ss:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 3fd18a3b24d..1e8432afe15 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -1349,6 +1349,9 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
break;
}
+ // Three input scalar-as-vector operations that work column-wise. The high
+ // elements come from operand 0 and the low element is a function of all
+ // three inputs.
case Intrinsic::x86_fma_vfmadd_ss:
case Intrinsic::x86_fma_vfmsub_ss:
case Intrinsic::x86_fma_vfnmadd_ss:
@@ -1360,6 +1363,13 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
UndefElts, Depth + 1);
if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
+
+ // If lowest element of a scalar op isn't used then use Arg0.
+ if (!DemandedElts[0])
+ return II->getArgOperand(0);
+
+ // Only lower element is used for operand 1 and 2.
+ DemandedElts = 1;
TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts,
UndefElts2, Depth + 1);
if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
@@ -1367,14 +1377,11 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
UndefElts3, Depth + 1);
if (TmpV) { II->setArgOperand(2, TmpV); MadeChange = true; }
- // If lowest element of a scalar op isn't used then use Arg0.
- if (DemandedElts.getLoBits(1) != 1)
- return II->getArgOperand(0);
+ // Lower element is undefined if all three lower elements are undefined.
+ // Consider things like undef&0. The result is known zero, not undef.
+ if (!UndefElts2[0] || !UndefElts3[0])
+ UndefElts.clearBit(0);
- // Output elements are undefined if all three are undefined. Consider
- // things like undef&0. The result is known zero, not undef.
- UndefElts &= UndefElts2;
- UndefElts &= UndefElts3;
break;
// SSE4A instructions leave the upper 64-bits of the 128-bit result
OpenPOWER on IntegriCloud