diff options
| author | Craig Topper <craig.topper@intel.com> | 2017-08-11 16:20:05 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2017-08-11 16:20:05 +0000 |
| commit | 0f30fe963436694837e9ac4c9328c4261f9374bb (patch) | |
| tree | 255da5666bfda693c1c93e133b72bf108a613ccc /llvm/lib/Target | |
| parent | 9c52574886001d7216ec09bf7363bd62f4706a29 (diff) | |
| download | bcm5719-llvm-0f30fe963436694837e9ac4c9328c4261f9374bb.tar.gz bcm5719-llvm-0f30fe963436694837e9ac4c9328c4261f9374bb.zip | |
[x86] Enable some support for lowerVectorShuffleWithUndefHalf with AVX-512
Summary:
This teaches 512-bit shuffles to detect unused halfs in order to reduce shuffle size.
We may need to refine the 512-bit exit point. I couldn't remember if we had good cross lane shuffles for 8/16 bit with AVX-512 or not.
I believe this is step towards being able to handle D36454 without a special case.
From here we need to improve our ability to combine extract_subvector with insert_subvector and other extract_subvectors. And we need to support narrowing binary operations where we don't demand all elements. This may be improvements to DAGCombiner::narrowExtractedVectorBinOp(by recognizing an insert_subvector in addition to concat) or we may need a target specific combiner.
Reviewers: RKSimon, zvi, delena, jbhateja
Reviewed By: RKSimon, jbhateja
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D36601
llvm-svn: 310724
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 14 |
1 files changed, 12 insertions, 2 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index be94cb6e494..8c5b6e62989 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -12256,7 +12256,7 @@ static SDValue lowerVectorShuffleByMerging128BitLanes( return DAG.getVectorShuffle(VT, DL, LaneShuffle, DAG.getUNDEF(VT), NewMask); } -/// Lower shuffles where an entire half of a 256-bit vector is UNDEF. +/// Lower shuffles where an entire half of a 256 or 512-bit vector is UNDEF. /// This allows for fast cases such as subvector extraction/insertion /// or shuffling smaller vector types which can lower more efficiently. static SDValue lowerVectorShuffleWithUndefHalf(const SDLoc &DL, MVT VT, @@ -12264,7 +12264,8 @@ static SDValue lowerVectorShuffleWithUndefHalf(const SDLoc &DL, MVT VT, ArrayRef<int> Mask, const X86Subtarget &Subtarget, SelectionDAG &DAG) { - assert(VT.is256BitVector() && "Expected 256-bit vector"); + assert((VT.is256BitVector() || VT.is512BitVector()) && + "Expected 256-bit or 512-bit vector"); unsigned NumElts = VT.getVectorNumElements(); unsigned HalfNumElts = NumElts / 2; @@ -12360,6 +12361,10 @@ static SDValue lowerVectorShuffleWithUndefHalf(const SDLoc &DL, MVT VT, } } + // AVX512 - XXXXuuuu - always extract lowers. + if (VT.is512BitVector() && !(UndefUpper && NumUpperHalves == 0)) + return SDValue(); + auto GetHalfVector = [&](int HalfIdx) { if (HalfIdx < 0) return DAG.getUNDEF(HalfVT); @@ -13703,6 +13708,11 @@ static SDValue lower512BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask, DL, VT, V1, V2, Mask, Zeroable, Subtarget, DAG)) return Insertion; + // Handle special cases where the lower or upper half is UNDEF. + if (SDValue V = + lowerVectorShuffleWithUndefHalf(DL, VT, V1, V2, Mask, Subtarget, DAG)) + return V; + // Check for being able to broadcast a single element. if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, VT, V1, V2, Mask, Subtarget, DAG)) |

