Revert rL356864 : [X86][SSE41] Start shuffle combining from ZERO_EXTEND_VECTOR_INREG (PR40685)

Enable SSE41 ZERO_EXTEND_VECTOR_INREG shuffle combines - for the PMOVZX(PSHUFD(V)) -> UNPCKH(V,0) pattern we reduce the shuffles (port5-bottleneck on Intel) at the expense of creating a zero (pxor v,v) and an extra register move - which is a good trade off as these are pretty cheap and in most cases it doesn't increase register pressure. This also exposed a missed opportunity to use combine to ZERO_EXTEND_VECTOR_INREG with folded loads - even if we're in the float domain. ........ Causes PR41249 llvm-svn: 357057
author: Simon Pilgrim <llvm-dev@redking.me.uk> 2019-03-27 10:25:02 +0000
committer: Simon Pilgrim <llvm-dev@redking.me.uk> 2019-03-27 10:25:02 +0000
commit: ccb71b2985a2a5073cedd1d734fb0fc2712d68f0 (patch)
tree: ceece4e12f9af39814a7e1b5b2b0a380de5495c4 /llvm/lib/Target
parent: ab0f18076b11972429e7d04cb818582f949f03b5 (diff)
download: bcm5719-llvm-ccb71b2985a2a5073cedd1d734fb0fc2712d68f0.tar.gz
bcm5719-llvm-ccb71b2985a2a5073cedd1d734fb0fc2712d68f0.zip
1 files changed, 28 insertions, 33 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 1bf029ac887..67631f72be4 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -30885,39 +30885,33 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
 
   // Match against a ZERO_EXTEND_VECTOR_INREG/VZEXT instruction.
   // TODO: Add 512-bit vector support (split AVX512F and AVX512BW).
-  if ((MaskVT.is128BitVector() && Subtarget.hasSSE41()) ||
-      (MaskVT.is256BitVector() && Subtarget.hasInt256())) {
-    // Allow this with FloatDomain if we'll be able to fold the load.
-    SDValue BC1 = peekThroughOneUseBitcasts(V1);
-    if (AllowIntDomain ||
-        (BC1.hasOneUse() && BC1.getOpcode() == ISD::SCALAR_TO_VECTOR &&
-         MayFoldLoad(BC1.getOperand(0)))) {
-      unsigned MaxScale = 64 / MaskEltSize;
-      for (unsigned Scale = 2; Scale <= MaxScale; Scale *= 2) {
-        bool Match = true;
-        unsigned NumDstElts = NumMaskElts / Scale;
-        for (unsigned i = 0; i != NumDstElts && Match; ++i) {
-          Match &= isUndefOrEqual(Mask[i * Scale], (int)i);
-          Match &= isUndefOrZeroInRange(Mask, (i * Scale) + 1, Scale - 1);
-        }
-        if (Match) {
-          unsigned SrcSize = std::max(128u, NumDstElts * MaskEltSize);
-          MVT ScalarTy = MaskVT.isInteger() ? MaskVT.getScalarType()
-                                            : MVT::getIntegerVT(MaskEltSize);
-          SrcVT = MVT::getVectorVT(ScalarTy, SrcSize / MaskEltSize);
-
-          if (SrcVT.getSizeInBits() != MaskVT.getSizeInBits())
-            V1 = extractSubVector(V1, 0, DAG, DL, SrcSize);
-
-          if (SrcVT.getVectorNumElements() == NumDstElts)
-            Shuffle = unsigned(ISD::ZERO_EXTEND);
-          else
-            Shuffle = unsigned(ISD::ZERO_EXTEND_VECTOR_INREG);
+  if (AllowIntDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSE41()) ||
+                         (MaskVT.is256BitVector() && Subtarget.hasInt256()))) {
+    unsigned MaxScale = 64 / MaskEltSize;
+    for (unsigned Scale = 2; Scale <= MaxScale; Scale *= 2) {
+      bool Match = true;
+      unsigned NumDstElts = NumMaskElts / Scale;
+      for (unsigned i = 0; i != NumDstElts && Match; ++i) {
+        Match &= isUndefOrEqual(Mask[i * Scale], (int)i);
+        Match &= isUndefOrZeroInRange(Mask, (i * Scale) + 1, Scale - 1);
+      }
+      if (Match) {
+        unsigned SrcSize = std::max(128u, NumDstElts * MaskEltSize);
+        MVT ScalarTy = MaskVT.isInteger() ? MaskVT.getScalarType() :
+                                            MVT::getIntegerVT(MaskEltSize);
+        SrcVT = MVT::getVectorVT(ScalarTy, SrcSize / MaskEltSize);
+
+        if (SrcVT.getSizeInBits() != MaskVT.getSizeInBits())
+          V1 = extractSubVector(V1, 0, DAG, DL, SrcSize);
+
+        if (SrcVT.getVectorNumElements() == NumDstElts)
+          Shuffle = unsigned(ISD::ZERO_EXTEND);
+        else
+          Shuffle = unsigned(ISD::ZERO_EXTEND_VECTOR_INREG);
 
-          DstVT = MVT::getIntegerVT(Scale * MaskEltSize);
-          DstVT = MVT::getVectorVT(DstVT, NumDstElts);
-          return true;
-        }
+        DstVT = MVT::getIntegerVT(Scale * MaskEltSize);
+        DstVT = MVT::getVectorVT(DstVT, NumDstElts);
+        return true;
       }
     }
   }
@@ -42622,7 +42616,8 @@ static SDValue combineExtInVec(SDNode *N, SelectionDAG &DAG,
     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, In.getOperand(0));
 
   // Attempt to combine as a shuffle.
-  if (Subtarget.hasSSE41() && N->getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
+  // TODO: SSE41 support
+  if (Subtarget.hasAVX() && N->getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
     SDValue Op(N, 0);
     if (TLI.isTypeLegal(VT) && TLI.isTypeLegal(In.getValueType()))
       if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget))
author	Simon Pilgrim <llvm-dev@redking.me.uk>	2019-03-27 10:25:02 +0000
committer	Simon Pilgrim <llvm-dev@redking.me.uk>	2019-03-27 10:25:02 +0000
commit	ccb71b2985a2a5073cedd1d734fb0fc2712d68f0 (patch)
tree	ceece4e12f9af39814a7e1b5b2b0a380de5495c4 /llvm/lib/Target
parent	ab0f18076b11972429e7d04cb818582f949f03b5 (diff)
download	bcm5719-llvm-ccb71b2985a2a5073cedd1d734fb0fc2712d68f0.tar.gz bcm5719-llvm-ccb71b2985a2a5073cedd1d734fb0fc2712d68f0.zip