summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2019-08-13 10:51:39 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2019-08-13 10:51:39 +0000
commit1a8d790cf5f89c1df718844f13e934e39bef6ef5 (patch)
tree31817f539c43873765d44b88f6733dd72932e5d7 /llvm/lib/Target
parentdc06b0bc9ad055d06535462d91bfc2a744b2f589 (diff)
downloadbcm5719-llvm-1a8d790cf5f89c1df718844f13e934e39bef6ef5.tar.gz
bcm5719-llvm-1a8d790cf5f89c1df718844f13e934e39bef6ef5.zip
[X86] SimplifyDemandedVectorElts - attempt to recombine target shuffle using DemandedElts mask (reapplied)
If we don't demand all elements, then attempt to combine to a simpler shuffle. At the moment we can only do this if Depth == 0 as combineX86ShufflesRecursively uses Depth to track whether the shuffle has really changed or not - we'll need to change this before we can properly start merging combineX86ShufflesRecursively into SimplifyDemandedVectorElts. The insertps-combine.ll regression is because XFormVExtractWithShuffleIntoLoad can't see through shuffles of different widths - this will be fixed in a follow-up commit. Reapplying this as rL368307 had to be reverted as part of rL368660 to revert rL368276 llvm-svn: 368662
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp17
1 files changed, 17 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a82f2138562..66d7b76a98d 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -33920,6 +33920,23 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
return true;
}
+ // If we don't demand all elements, then attempt to combine to a simpler
+ // shuffle.
+ // TODO: Handle other depths, but first we need to handle the fact that
+ // it might combine to the same shuffle.
+ if (!DemandedElts.isAllOnesValue() && Depth == 0) {
+ SmallVector<int, 32> DemandedMask(NumElts, SM_SentinelUndef);
+ for (int i = 0; i != NumElts; ++i)
+ if (DemandedElts[i])
+ DemandedMask[i] = i;
+
+ SDValue NewShuffle = combineX86ShufflesRecursively(
+ {Op}, 0, Op, DemandedMask, {}, Depth, /*HasVarMask*/ false,
+ /*AllowVarMask*/ true, TLO.DAG, Subtarget);
+ if (NewShuffle)
+ return TLO.CombineTo(Op, NewShuffle);
+ }
+
// Extract known zero/undef elements.
// TODO - Propagate input undef/zero elts.
for (int i = 0; i != NumElts; ++i) {
OpenPOWER on IntegriCloud