diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 16 | ||||
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp | 21 |
2 files changed, 22 insertions, 15 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 5ee68ccf75a..6aa9c36ef6a 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1754,14 +1754,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; } - case Intrinsic::x86_fma_vfmadd_ss: - case Intrinsic::x86_fma_vfmsub_ss: - case Intrinsic::x86_fma_vfnmadd_ss: - case Intrinsic::x86_fma_vfnmsub_ss: - case Intrinsic::x86_fma_vfmadd_sd: - case Intrinsic::x86_fma_vfmsub_sd: - case Intrinsic::x86_fma_vfnmadd_sd: - case Intrinsic::x86_fma_vfnmsub_sd: case Intrinsic::x86_avx512_mask_add_ss_round: case Intrinsic::x86_avx512_mask_div_ss_round: case Intrinsic::x86_avx512_mask_mul_ss_round: @@ -1793,6 +1785,14 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; } + case Intrinsic::x86_fma_vfmadd_ss: + case Intrinsic::x86_fma_vfmsub_ss: + case Intrinsic::x86_fma_vfnmadd_ss: + case Intrinsic::x86_fma_vfnmsub_ss: + case Intrinsic::x86_fma_vfmadd_sd: + case Intrinsic::x86_fma_vfmsub_sd: + case Intrinsic::x86_fma_vfnmadd_sd: + case Intrinsic::x86_fma_vfnmsub_sd: case Intrinsic::x86_sse_cmp_ss: case Intrinsic::x86_sse_min_ss: case Intrinsic::x86_sse_max_ss: diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 3fd18a3b24d..1e8432afe15 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -1349,6 +1349,9 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, break; } + // Three input scalar-as-vector operations that work column-wise. The high + // elements come from operand 0 and the low element is a function of all + // three inputs. case Intrinsic::x86_fma_vfmadd_ss: case Intrinsic::x86_fma_vfmsub_ss: case Intrinsic::x86_fma_vfnmadd_ss: @@ -1360,6 +1363,13 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts, UndefElts, Depth + 1); if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; } + + // If lowest element of a scalar op isn't used then use Arg0. + if (!DemandedElts[0]) + return II->getArgOperand(0); + + // Only lower element is used for operand 1 and 2. + DemandedElts = 1; TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts, UndefElts2, Depth + 1); if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; } @@ -1367,14 +1377,11 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, UndefElts3, Depth + 1); if (TmpV) { II->setArgOperand(2, TmpV); MadeChange = true; } - // If lowest element of a scalar op isn't used then use Arg0. - if (DemandedElts.getLoBits(1) != 1) - return II->getArgOperand(0); + // Lower element is undefined if all three lower elements are undefined. + // Consider things like undef&0. The result is known zero, not undef. + if (!UndefElts2[0] || !UndefElts3[0]) + UndefElts.clearBit(0); - // Output elements are undefined if all three are undefined. Consider - // things like undef&0. The result is known zero, not undef. - UndefElts &= UndefElts2; - UndefElts &= UndefElts3; break; // SSE4A instructions leave the upper 64-bits of the 128-bit result |