[X86][SSE] combineX86ShufflesRecursively can handle shuffle masks up to 64 elements wide

By defining the mask types as SmallVector<int, 16> we were causing a lot of unnecessary heap usage. llvm-svn: 297267
author: Simon Pilgrim <llvm-dev@redking.me.uk> 2017-03-08 09:36:39 +0000
committer: Simon Pilgrim <llvm-dev@redking.me.uk> 2017-03-08 09:36:39 +0000
commit: 836bcc689f4e3aa39ec1788cd107c93e04804c66 (patch)
tree: d930ca1d0cc45e233273087e53472c1e4bc83b02
parent: ed739d902d497273a60de44ae8132b4db599bfbb (diff)
download: bcm5719-llvm-836bcc689f4e3aa39ec1788cd107c93e04804c66.tar.gz
bcm5719-llvm-836bcc689f4e3aa39ec1788cd107c93e04804c66.zip
1 files changed, 7 insertions, 8 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c9087235ce8..533ee7c6a43 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -27591,7 +27591,7 @@ static bool combineX86ShufflesRecursively(ArrayRef<SDValue> SrcOps,
          "Can only combine shuffles of the same vector register size.");
 
   // Extract target shuffle mask and resolve sentinels and inputs.
-  SmallVector<int, 16> OpMask;
+  SmallVector<int, 64> OpMask;
   SmallVector<SDValue, 2> OpInputs;
   if (!resolveTargetShuffleInputs(Op, OpInputs, OpMask))
     return false;
@@ -27634,8 +27634,7 @@ static bool combineX86ShufflesRecursively(ArrayRef<SDValue> SrcOps,
           (RootRatio == 1) != (OpRatio == 1)) &&
          "Must not have a ratio for both incoming and op masks!");
 
-  SmallVector<int, 16> Mask;
-  Mask.reserve(MaskWidth);
+  SmallVector<int, 64> Mask((unsigned)MaskWidth, SM_SentinelUndef);
 
   // Merge this shuffle operation's mask into our accumulated mask. Note that
   // this shuffle's mask will be the first applied to the input, followed by the
@@ -27645,7 +27644,7 @@ static bool combineX86ShufflesRecursively(ArrayRef<SDValue> SrcOps,
     int RootIdx = i / RootRatio;
     if (RootMask[RootIdx] < 0) {
       // This is a zero or undef lane, we're done.
-      Mask.push_back(RootMask[RootIdx]);
+      Mask[i] = RootMask[RootIdx];
       continue;
     }
 
@@ -27655,7 +27654,7 @@ static bool combineX86ShufflesRecursively(ArrayRef<SDValue> SrcOps,
     // than the SrcOp we're currently inserting.
     if ((RootMaskedIdx < (SrcOpIndex * MaskWidth)) ||
         (((SrcOpIndex + 1) * MaskWidth) <= RootMaskedIdx)) {
-      Mask.push_back(RootMaskedIdx);
+      Mask[i] = RootMaskedIdx;
       continue;
     }
 
@@ -27665,7 +27664,7 @@ static bool combineX86ShufflesRecursively(ArrayRef<SDValue> SrcOps,
     if (OpMask[OpIdx] < 0) {
       // The incoming lanes are zero or undef, it doesn't matter which ones we
       // are using.
-      Mask.push_back(OpMask[OpIdx]);
+      Mask[i] = OpMask[OpIdx];
       continue;
     }
 
@@ -27681,7 +27680,7 @@ static bool combineX86ShufflesRecursively(ArrayRef<SDValue> SrcOps,
       OpMaskedIdx += InputIdx1 * MaskWidth;
     }
 
-    Mask.push_back(OpMaskedIdx);
+    Mask[i] = OpMaskedIdx;
   }
 
   // Handle the all undef/zero cases early.
@@ -27734,7 +27733,7 @@ static bool combineX86ShufflesRecursively(ArrayRef<SDValue> SrcOps,
   // elements, and shrink them to the half-width mask. It does this in a loop
   // so it will reduce the size of the mask to the minimal width mask which
   // performs an equivalent shuffle.
-  SmallVector<int, 16> WidenedMask;
+  SmallVector<int, 64> WidenedMask;
   while (Mask.size() > 1 && canWidenShuffleElements(Mask, WidenedMask)) {
     Mask = std::move(WidenedMask);
   }
author	Simon Pilgrim <llvm-dev@redking.me.uk>	2017-03-08 09:36:39 +0000
committer	Simon Pilgrim <llvm-dev@redking.me.uk>	2017-03-08 09:36:39 +0000
commit	836bcc689f4e3aa39ec1788cd107c93e04804c66 (patch)
tree	d930ca1d0cc45e233273087e53472c1e4bc83b02
parent	ed739d902d497273a60de44ae8132b4db599bfbb (diff)
download	bcm5719-llvm-836bcc689f4e3aa39ec1788cd107c93e04804c66.tar.gz bcm5719-llvm-836bcc689f4e3aa39ec1788cd107c93e04804c66.zip