diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-03-08 09:36:39 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-03-08 09:36:39 +0000 |
commit | 836bcc689f4e3aa39ec1788cd107c93e04804c66 (patch) | |
tree | d930ca1d0cc45e233273087e53472c1e4bc83b02 | |
parent | ed739d902d497273a60de44ae8132b4db599bfbb (diff) | |
download | bcm5719-llvm-836bcc689f4e3aa39ec1788cd107c93e04804c66.tar.gz bcm5719-llvm-836bcc689f4e3aa39ec1788cd107c93e04804c66.zip |
[X86][SSE] combineX86ShufflesRecursively can handle shuffle masks up to 64 elements wide
By defining the mask types as SmallVector<int, 16> we were causing a lot of unnecessary heap usage.
llvm-svn: 297267
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 15 |
1 files changed, 7 insertions, 8 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index c9087235ce8..533ee7c6a43 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -27591,7 +27591,7 @@ static bool combineX86ShufflesRecursively(ArrayRef<SDValue> SrcOps, "Can only combine shuffles of the same vector register size."); // Extract target shuffle mask and resolve sentinels and inputs. - SmallVector<int, 16> OpMask; + SmallVector<int, 64> OpMask; SmallVector<SDValue, 2> OpInputs; if (!resolveTargetShuffleInputs(Op, OpInputs, OpMask)) return false; @@ -27634,8 +27634,7 @@ static bool combineX86ShufflesRecursively(ArrayRef<SDValue> SrcOps, (RootRatio == 1) != (OpRatio == 1)) && "Must not have a ratio for both incoming and op masks!"); - SmallVector<int, 16> Mask; - Mask.reserve(MaskWidth); + SmallVector<int, 64> Mask((unsigned)MaskWidth, SM_SentinelUndef); // Merge this shuffle operation's mask into our accumulated mask. Note that // this shuffle's mask will be the first applied to the input, followed by the @@ -27645,7 +27644,7 @@ static bool combineX86ShufflesRecursively(ArrayRef<SDValue> SrcOps, int RootIdx = i / RootRatio; if (RootMask[RootIdx] < 0) { // This is a zero or undef lane, we're done. - Mask.push_back(RootMask[RootIdx]); + Mask[i] = RootMask[RootIdx]; continue; } @@ -27655,7 +27654,7 @@ static bool combineX86ShufflesRecursively(ArrayRef<SDValue> SrcOps, // than the SrcOp we're currently inserting. if ((RootMaskedIdx < (SrcOpIndex * MaskWidth)) || (((SrcOpIndex + 1) * MaskWidth) <= RootMaskedIdx)) { - Mask.push_back(RootMaskedIdx); + Mask[i] = RootMaskedIdx; continue; } @@ -27665,7 +27664,7 @@ static bool combineX86ShufflesRecursively(ArrayRef<SDValue> SrcOps, if (OpMask[OpIdx] < 0) { // The incoming lanes are zero or undef, it doesn't matter which ones we // are using. - Mask.push_back(OpMask[OpIdx]); + Mask[i] = OpMask[OpIdx]; continue; } @@ -27681,7 +27680,7 @@ static bool combineX86ShufflesRecursively(ArrayRef<SDValue> SrcOps, OpMaskedIdx += InputIdx1 * MaskWidth; } - Mask.push_back(OpMaskedIdx); + Mask[i] = OpMaskedIdx; } // Handle the all undef/zero cases early. @@ -27734,7 +27733,7 @@ static bool combineX86ShufflesRecursively(ArrayRef<SDValue> SrcOps, // elements, and shrink them to the half-width mask. It does this in a loop // so it will reduce the size of the mask to the minimal width mask which // performs an equivalent shuffle. - SmallVector<int, 16> WidenedMask; + SmallVector<int, 64> WidenedMask; while (Mask.size() > 1 && canWidenShuffleElements(Mask, WidenedMask)) { Mask = std::move(WidenedMask); } |