[X86][SSE] Add support for combining VZEXT_MOVL target shuffles

Includes adding more general support for the pattern: VZEXT_MOVL(VZEXT_LOAD(ptr)) -> VZEXT_LOAD(ptr) This has unearthed a couple of latent poor codegen issues (MINSS/MAXSS scalar load folding and MOVDDUP/BROADCAST load folding patterns), which will be fixed shortly. Its also reduced a couple of tests so that they no longer reach the instruction threshold necessary to be combined to PSHUFB (see PR26183). llvm-svn: 279646
author: Simon Pilgrim <llvm-dev@redking.me.uk> 2016-08-24 18:07:53 +0000
committer: Simon Pilgrim <llvm-dev@redking.me.uk> 2016-08-24 18:07:53 +0000
commit: 941bd6bbae6ba1fbf3d46cfb365d1015703ef448 (patch)
tree: 8eddc2a47b1117c80aff16e57288bb09d505d194 /llvm/lib/Target/X86/X86ISelLowering.cpp
parent: 26d9c41ff6b863253e1e9b17bd5acffc30ead692 (diff)
download: bcm5719-llvm-941bd6bbae6ba1fbf3d46cfb365d1015703ef448.tar.gz
bcm5719-llvm-941bd6bbae6ba1fbf3d46cfb365d1015703ef448.zip
1 files changed, 31 insertions, 12 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 293d5a4ac14..a32c1526b68 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -4236,6 +4236,21 @@ static bool isUndefOrInRange(ArrayRef<int> Mask,
   return true;
 }
 
+/// Return true if Val is undef, zero or if its value falls within the
+/// specified range (L, H].
+static bool isUndefOrZeroOrInRange(int Val, int Low, int Hi) {
+  return isUndefOrZero(Val) || (Val >= Low && Val < Hi);
+}
+
+/// Return true if every element in Mask is undef, zero or if its value
+/// falls within the specified range (L, H].
+static bool isUndefOrZeroOrInRange(ArrayRef<int> Mask, int Low, int Hi) {
+  for (int M : Mask)
+    if (!isUndefOrZeroOrInRange(M, Low, Hi))
+      return false;
+  return true;
+}
+
 /// Return true if every element in Mask, beginning
 /// from position Pos and ending in Pos+Size, falls within the specified
 /// sequential range (Low, Low+Size]. or is undef.
@@ -25347,6 +25362,21 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
   unsigned Shuffle, PermuteImm;
 
   if (UnaryShuffle) {
+    // If we are shuffling a X86ISD::VZEXT_LOAD then we can use the load
+    // directly if we don't shuffle the lower element and we shuffle the upper
+    // (zero) elements within themselves.
+    if (V1.getOpcode() == X86ISD::VZEXT_LOAD &&
+        (V1.getScalarValueSizeInBits() % MaskEltSizeInBits) == 0) {
+      unsigned Scale = V1.getScalarValueSizeInBits() / MaskEltSizeInBits;
+      ArrayRef<int> HiMask(Mask.data() + Scale, NumMaskElts - Scale);
+      if (isSequentialOrUndefInRange(Mask, 0, Scale, 0) &&
+          isUndefOrZeroOrInRange(HiMask, Scale, NumMaskElts)) {
+        DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, V1),
+                      /*AddTo*/ true);
+        return true;
+      }
+    }
+
     if (matchUnaryVectorShuffle(MaskVT, Mask, Subtarget, Shuffle, ShuffleVT)) {
       if (Depth == 1 && Root.getOpcode() == Shuffle)
         return false; // Nothing to do!
@@ -30502,17 +30532,6 @@ static SDValue combineBT(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
-static SDValue combineVZextMovl(SDNode *N, SelectionDAG &DAG) {
-  SDValue Op = peekThroughBitcasts(N->getOperand(0));
-  EVT VT = N->getValueType(0), OpVT = Op.getValueType();
-  if (Op.getOpcode() == X86ISD::VZEXT_LOAD &&
-      VT.getVectorElementType().getSizeInBits() ==
-      OpVT.getVectorElementType().getSizeInBits()) {
-    return DAG.getBitcast(VT, Op);
-  }
-  return SDValue();
-}
-
 static SDValue combineSignExtendInReg(SDNode *N, SelectionDAG &DAG,
                                       const X86Subtarget &Subtarget) {
   EVT VT = N->getValueType(0);
@@ -31498,7 +31517,6 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
   case X86ISD::FAND:        return combineFAnd(N, DAG, Subtarget);
   case X86ISD::FANDN:       return combineFAndn(N, DAG, Subtarget);
   case X86ISD::BT:          return combineBT(N, DAG, DCI);
-  case X86ISD::VZEXT_MOVL:  return combineVZextMovl(N, DAG);
   case ISD::ANY_EXTEND:
   case ISD::ZERO_EXTEND:    return combineZext(N, DAG, DCI, Subtarget);
   case ISD::SIGN_EXTEND:    return combineSext(N, DAG, DCI, Subtarget);
@@ -31534,6 +31552,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
   case X86ISD::VPERMILPI:
   case X86ISD::VPERMILPV:
   case X86ISD::VPERM2X128:
+  case X86ISD::VZEXT_MOVL:
   case ISD::VECTOR_SHUFFLE: return combineShuffle(N, DAG, DCI,Subtarget);
   case ISD::FMA:            return combineFMA(N, DAG, Subtarget);
   case ISD::MGATHER:
author	Simon Pilgrim <llvm-dev@redking.me.uk>	2016-08-24 18:07:53 +0000
committer	Simon Pilgrim <llvm-dev@redking.me.uk>	2016-08-24 18:07:53 +0000
commit	941bd6bbae6ba1fbf3d46cfb365d1015703ef448 (patch)
tree	8eddc2a47b1117c80aff16e57288bb09d505d194 /llvm/lib/Target/X86/X86ISelLowering.cpp
parent	26d9c41ff6b863253e1e9b17bd5acffc30ead692 (diff)
download	bcm5719-llvm-941bd6bbae6ba1fbf3d46cfb365d1015703ef448.tar.gz bcm5719-llvm-941bd6bbae6ba1fbf3d46cfb365d1015703ef448.zip