summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorPhilip Reames <listmail@philipreames.com>2019-03-19 20:10:00 +0000
committerPhilip Reames <listmail@philipreames.com>2019-03-19 20:10:00 +0000
commit70537abe525cd6499feae5f34e5ae3d107a0f940 (patch)
treec43cc3fe1b5d556d9b8ad7061bf6a0e06a5a4612 /llvm/lib
parent611d1f98c587b85b4eb60559ff774d73a34b102b (diff)
downloadbcm5719-llvm-70537abe525cd6499feae5f34e5ae3d107a0f940.tar.gz
bcm5719-llvm-70537abe525cd6499feae5f34e5ae3d107a0f940.zip
Demanded elements support for masked.load and masked.gather
Teach instcombine to propagate demanded elements through a masked load or masked gather instruction. This is in the broader context of improving vector pointer instcombine under https://reviews.llvm.org/D57140. Differential Revision: https://reviews.llvm.org/D57372 llvm-svn: 356510
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp20
1 files changed, 20 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 94e2eed05f8..b79a4d78648 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -1436,6 +1436,26 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
if (!II) break;
switch (II->getIntrinsicID()) {
+ case Intrinsic::masked_gather: // fallthrough
+ case Intrinsic::masked_load: {
+ APInt DemandedPtrs(DemandedElts), DemandedPassThrough(DemandedElts);
+ if (auto *CV = dyn_cast<ConstantVector>(II->getOperand(2)))
+ for (unsigned i = 0; i < VWidth; i++) {
+ Constant *CElt = CV->getAggregateElement(i);
+ if (CElt->isNullValue())
+ DemandedPtrs.clearBit(i);
+ else if (CElt->isAllOnesValue())
+ DemandedPassThrough.clearBit(i);
+ }
+ if (II->getIntrinsicID() == Intrinsic::masked_gather)
+ simplifyAndSetOp(II, 0, DemandedPtrs, UndefElts2);
+ simplifyAndSetOp(II, 3, DemandedPassThrough, UndefElts3);
+
+ // Output elements are undefined if the element from both sources are.
+ // TODO: can strengthen via mask as well.
+ UndefElts = UndefElts2 & UndefElts3;
+ break;
+ }
case Intrinsic::x86_xop_vfrcz_ss:
case Intrinsic::x86_xop_vfrcz_sd:
// The instructions for these intrinsics are speced to zero upper bits not
OpenPOWER on IntegriCloud