diff options
| author | Philip Reames <listmail@philipreames.com> | 2019-03-19 20:10:00 +0000 |
|---|---|---|
| committer | Philip Reames <listmail@philipreames.com> | 2019-03-19 20:10:00 +0000 |
| commit | 70537abe525cd6499feae5f34e5ae3d107a0f940 (patch) | |
| tree | c43cc3fe1b5d556d9b8ad7061bf6a0e06a5a4612 /llvm/lib | |
| parent | 611d1f98c587b85b4eb60559ff774d73a34b102b (diff) | |
| download | bcm5719-llvm-70537abe525cd6499feae5f34e5ae3d107a0f940.tar.gz bcm5719-llvm-70537abe525cd6499feae5f34e5ae3d107a0f940.zip | |
Demanded elements support for masked.load and masked.gather
Teach instcombine to propagate demanded elements through a masked load or masked gather instruction. This is in the broader context of improving vector pointer instcombine under https://reviews.llvm.org/D57140.
Differential Revision: https://reviews.llvm.org/D57372
llvm-svn: 356510
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp | 20 |
1 files changed, 20 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 94e2eed05f8..b79a4d78648 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -1436,6 +1436,26 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, IntrinsicInst *II = dyn_cast<IntrinsicInst>(I); if (!II) break; switch (II->getIntrinsicID()) { + case Intrinsic::masked_gather: // fallthrough + case Intrinsic::masked_load: { + APInt DemandedPtrs(DemandedElts), DemandedPassThrough(DemandedElts); + if (auto *CV = dyn_cast<ConstantVector>(II->getOperand(2))) + for (unsigned i = 0; i < VWidth; i++) { + Constant *CElt = CV->getAggregateElement(i); + if (CElt->isNullValue()) + DemandedPtrs.clearBit(i); + else if (CElt->isAllOnesValue()) + DemandedPassThrough.clearBit(i); + } + if (II->getIntrinsicID() == Intrinsic::masked_gather) + simplifyAndSetOp(II, 0, DemandedPtrs, UndefElts2); + simplifyAndSetOp(II, 3, DemandedPassThrough, UndefElts3); + + // Output elements are undefined if the element from both sources are. + // TODO: can strengthen via mask as well. + UndefElts = UndefElts2 & UndefElts3; + break; + } case Intrinsic::x86_xop_vfrcz_ss: case Intrinsic::x86_xop_vfrcz_sd: // The instructions for these intrinsics are speced to zero upper bits not |

