diff options
author | Craig Topper <craig.topper@gmail.com> | 2016-12-14 05:43:05 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@gmail.com> | 2016-12-14 05:43:05 +0000 |
commit | dfd268d76bee38463593947b3ea017647b05d1e6 (patch) | |
tree | 9ec782b8c689c3f3b87adfa0d05a8812be5c1db9 /llvm/lib | |
parent | 8e13bc4562c2b921b0bd4e46a1c119f6ddfba07e (diff) | |
download | bcm5719-llvm-dfd268d76bee38463593947b3ea017647b05d1e6.tar.gz bcm5719-llvm-dfd268d76bee38463593947b3ea017647b05d1e6.zip |
[X86][InstCombine] Handle scalar fmadd intrinsics correctly in SimplifyDemandedVectorElts.
Now we pass a modified version of DemandedElts to each operand and we calculate undef elts correctly.
llvm-svn: 289632
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 16 | ||||
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp | 21 |
2 files changed, 22 insertions, 15 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 5ee68ccf75a..6aa9c36ef6a 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1754,14 +1754,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; } - case Intrinsic::x86_fma_vfmadd_ss: - case Intrinsic::x86_fma_vfmsub_ss: - case Intrinsic::x86_fma_vfnmadd_ss: - case Intrinsic::x86_fma_vfnmsub_ss: - case Intrinsic::x86_fma_vfmadd_sd: - case Intrinsic::x86_fma_vfmsub_sd: - case Intrinsic::x86_fma_vfnmadd_sd: - case Intrinsic::x86_fma_vfnmsub_sd: case Intrinsic::x86_avx512_mask_add_ss_round: case Intrinsic::x86_avx512_mask_div_ss_round: case Intrinsic::x86_avx512_mask_mul_ss_round: @@ -1793,6 +1785,14 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; } + case Intrinsic::x86_fma_vfmadd_ss: + case Intrinsic::x86_fma_vfmsub_ss: + case Intrinsic::x86_fma_vfnmadd_ss: + case Intrinsic::x86_fma_vfnmsub_ss: + case Intrinsic::x86_fma_vfmadd_sd: + case Intrinsic::x86_fma_vfmsub_sd: + case Intrinsic::x86_fma_vfnmadd_sd: + case Intrinsic::x86_fma_vfnmsub_sd: case Intrinsic::x86_sse_cmp_ss: case Intrinsic::x86_sse_min_ss: case Intrinsic::x86_sse_max_ss: diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 3fd18a3b24d..1e8432afe15 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -1349,6 +1349,9 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, break; } + // Three input scalar-as-vector operations that work column-wise. The high + // elements come from operand 0 and the low element is a function of all + // three inputs. case Intrinsic::x86_fma_vfmadd_ss: case Intrinsic::x86_fma_vfmsub_ss: case Intrinsic::x86_fma_vfnmadd_ss: @@ -1360,6 +1363,13 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts, UndefElts, Depth + 1); if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; } + + // If lowest element of a scalar op isn't used then use Arg0. + if (!DemandedElts[0]) + return II->getArgOperand(0); + + // Only lower element is used for operand 1 and 2. + DemandedElts = 1; TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts, UndefElts2, Depth + 1); if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; } @@ -1367,14 +1377,11 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, UndefElts3, Depth + 1); if (TmpV) { II->setArgOperand(2, TmpV); MadeChange = true; } - // If lowest element of a scalar op isn't used then use Arg0. - if (DemandedElts.getLoBits(1) != 1) - return II->getArgOperand(0); + // Lower element is undefined if all three lower elements are undefined. + // Consider things like undef&0. The result is known zero, not undef. + if (!UndefElts2[0] || !UndefElts3[0]) + UndefElts.clearBit(0); - // Output elements are undefined if all three are undefined. Consider - // things like undef&0. The result is known zero, not undef. - UndefElts &= UndefElts2; - UndefElts &= UndefElts3; break; // SSE4A instructions leave the upper 64-bits of the 128-bit result |