diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2019-08-13 10:51:39 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2019-08-13 10:51:39 +0000 |
commit | 1a8d790cf5f89c1df718844f13e934e39bef6ef5 (patch) | |
tree | 31817f539c43873765d44b88f6733dd72932e5d7 /llvm/lib/Target | |
parent | dc06b0bc9ad055d06535462d91bfc2a744b2f589 (diff) | |
download | bcm5719-llvm-1a8d790cf5f89c1df718844f13e934e39bef6ef5.tar.gz bcm5719-llvm-1a8d790cf5f89c1df718844f13e934e39bef6ef5.zip |
[X86] SimplifyDemandedVectorElts - attempt to recombine target shuffle using DemandedElts mask (reapplied)
If we don't demand all elements, then attempt to combine to a simpler shuffle.
At the moment we can only do this if Depth == 0 as combineX86ShufflesRecursively uses Depth to track whether the shuffle has really changed or not - we'll need to change this before we can properly start merging combineX86ShufflesRecursively into SimplifyDemandedVectorElts.
The insertps-combine.ll regression is because XFormVExtractWithShuffleIntoLoad can't see through shuffles of different widths - this will be fixed in a follow-up commit.
Reapplying this as rL368307 had to be reverted as part of rL368660 to revert rL368276
llvm-svn: 368662
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 17 |
1 files changed, 17 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index a82f2138562..66d7b76a98d 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -33920,6 +33920,23 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( return true; } + // If we don't demand all elements, then attempt to combine to a simpler + // shuffle. + // TODO: Handle other depths, but first we need to handle the fact that + // it might combine to the same shuffle. + if (!DemandedElts.isAllOnesValue() && Depth == 0) { + SmallVector<int, 32> DemandedMask(NumElts, SM_SentinelUndef); + for (int i = 0; i != NumElts; ++i) + if (DemandedElts[i]) + DemandedMask[i] = i; + + SDValue NewShuffle = combineX86ShufflesRecursively( + {Op}, 0, Op, DemandedMask, {}, Depth, /*HasVarMask*/ false, + /*AllowVarMask*/ true, TLO.DAG, Subtarget); + if (NewShuffle) + return TLO.CombineTo(Op, NewShuffle); + } + // Extract known zero/undef elements. // TODO - Propagate input undef/zero elts. for (int i = 0; i != NumElts; ++i) { |