This commit contains a few changes that had to go in together.

1. Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B)) (and also scalar_to_vector). 2. Xor/and/or are indifferent to the swizzle operation (shuffle of one src). Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A, B)) 3. Optimize swizzles of shuffles: shuff(shuff(x, y), undef) -> shuff(x, y). 4. Fix an X86ISelLowering optimization which was very bitcast-sensitive. Code which was previously compiled to this: movd (%rsi), %xmm0 movdqa .LCPI0_0(%rip), %xmm2 pshufb %xmm2, %xmm0 movd (%rdi), %xmm1 pshufb %xmm2, %xmm1 pxor %xmm0, %xmm1 pshufb .LCPI0_1(%rip), %xmm1 movd %xmm1, (%rdi) ret Now compiles to this: movl (%rsi), %eax xorl %eax, (%rdi) ret llvm-svn: 153848
author: Nadav Rotem <nadav.rotem@intel.com> 2012-04-01 19:31:22 +0000
committer: Nadav Rotem <nadav.rotem@intel.com> 2012-04-01 19:31:22 +0000
commit: b078350872693f37726e78caa1c413dd736cff4e (patch)
tree: dee74084ef3a55b07056b0d49ab27e103f58d842 /llvm/lib
parent: ac19edd2b0b31077d4fc288491f7c19e7c9b7654 (diff)
download: bcm5719-llvm-b078350872693f37726e78caa1c413dd736cff4e.tar.gz
bcm5719-llvm-b078350872693f37726e78caa1c413dd736cff4e.zip
2 files changed, 99 insertions, 8 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index bac644a42a8..5e88fcbb0e1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2336,6 +2336,68 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
                        ORNode, N0.getOperand(1));
   }
 
+  // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
+  // Only perform this optimization after type legalization and before
+  // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
+  // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
+  // we don't want to undo this promotion.
+  // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
+  // on scalars.
+  if ((N0.getOpcode() == ISD::BITCAST || N0.getOpcode() == ISD::SCALAR_TO_VECTOR)
+      && Level == AfterLegalizeVectorOps) {
+    SDValue In0 = N0.getOperand(0);
+    SDValue In1 = N1.getOperand(0);
+    EVT In0Ty = In0.getValueType();
+    EVT In1Ty = In1.getValueType();
+    // If both incoming values are integers, and the original types are the same.
+    if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
+      SDValue Op = DAG.getNode(N->getOpcode(), N->getDebugLoc(), In0Ty, In0, In1);
+      SDValue BC = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, Op);
+      AddToWorkList(Op.getNode());
+      return BC;
+    }
+  }
+
+  // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
+  // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
+  // If both shuffles use the same mask, and both shuffle within a single
+  // vector, then it is worthwhile to move the swizzle after the operation.
+  // The type-legalizer generates this pattern when loading illegal
+  // vector types from memory. In many cases this allows additional shuffle
+  // optimizations.
+  if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
+    ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
+    ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
+    SDValue In0 = SVN0->getOperand(0);
+    SDValue In1 = SVN1->getOperand(0);
+    EVT In0Ty = In0.getValueType();
+    EVT In1Ty = In1.getValueType();
+
+    unsigned NumElts = VT.getVectorNumElements();
+    // Check that both shuffles are swizzles.
+    bool SingleVecShuff = (N0.getOperand(1).getOpcode() == ISD::UNDEF &&
+                           N1.getOperand(1).getOpcode() == ISD::UNDEF);
+
+    // Check that both shuffles use the same mask. The masks are known to be of
+    // the same length because the result vector type is the same.
+    bool SameMask = true;
+    for (unsigned i = 0; i != NumElts; ++i) {
+      int Idx0 = SVN0->getMaskElt(i);
+      int Idx1 = SVN1->getMaskElt(i);
+      if (Idx0 != Idx1) {
+        SameMask = false;
+        break;
+      }
+    }
+
+    if (SameMask && SingleVecShuff && In0Ty == In1Ty) {
+      SDValue Op = DAG.getNode(N->getOpcode(), N->getDebugLoc(), VT, In0, In1);
+      SDValue Shuff = DAG.getVectorShuffle(VT, N->getDebugLoc(), Op,
+                                          DAG.getUNDEF(VT), &SVN0->getMask()[0]);
+      AddToWorkList(Op.getNode());
+      return Shuff;
+    }
+  }
   return SDValue();
 }
 
@@ -7721,6 +7783,36 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
         return N0;
     }
   }
+
+  // If this shuffle node is simply a swizzle of another shuffle node,
+  // optimize shuffle(shuffle(x, y), undef) -> shuffle(x, y).
+  if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
+      N1.getOpcode() == ISD::UNDEF) {
+
+    SmallVector<int, 8> NewMask;
+    ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
+
+    EVT InVT = N0.getValueType();
+    int InNumElts = InVT.getVectorNumElements();
+
+    for (unsigned i = 0; i != NumElts; ++i) {
+      int Idx = SVN->getMaskElt(i);
+      // If we access the second (undef) operand then this index can be
+      // canonicalized to undef as well.
+      if (Idx >= InNumElts)
+        Idx = -1;
+      // Next, this index comes from the first value, which is the incoming
+      // shuffle. Adopt the incoming index.
+      if (Idx >= 0)
+        Idx = OtherSV->getMaskElt(Idx);
+
+      NewMask.push_back(Idx);
+    }
+
+    return DAG.getVectorShuffle(VT, N->getDebugLoc(), OtherSV->getOperand(0),
+                                OtherSV->getOperand(1), &NewMask[0]);
+  }
+
   return SDValue();
 }
 
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 88f38298db0..69a60361314 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -14000,13 +14000,14 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
         return SDValue();
 
       // Validate that X, Y, and Mask are BIT_CONVERTS, and see through them.
-      if (Mask.getOpcode() != ISD::BITCAST ||
-          X.getOpcode() != ISD::BITCAST ||
-          Y.getOpcode() != ISD::BITCAST)
-        return SDValue();
-
       // Look through mask bitcast.
-      Mask = Mask.getOperand(0);
+      if (Mask.getOpcode() == ISD::BITCAST)
+        Mask = Mask.getOperand(0);
+      if (X.getOpcode() == ISD::BITCAST)
+        X = X.getOperand(0);
+      if (Y.getOpcode() == ISD::BITCAST)
+        Y = Y.getOperand(0);
+
       EVT MaskVT = Mask.getValueType();
 
       // Validate that the Mask operand is a vector sra node.
@@ -14027,8 +14028,6 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
       // Now we know we at least have a plendvb with the mask val.  See if
       // we can form a psignb/w/d.
       // psign = x.type == y.type == mask.type && y = sub(0, x);
-      X = X.getOperand(0);
-      Y = Y.getOperand(0);
       if (Y.getOpcode() == ISD::SUB && Y.getOperand(1) == X &&
           ISD::isBuildVectorAllZeros(Y.getOperand(0).getNode()) &&
           X.getValueType() == MaskVT && Y.getValueType() == MaskVT) {
author	Nadav Rotem <nadav.rotem@intel.com>	2012-04-01 19:31:22 +0000
committer	Nadav Rotem <nadav.rotem@intel.com>	2012-04-01 19:31:22 +0000
commit	b078350872693f37726e78caa1c413dd736cff4e (patch)
tree	dee74084ef3a55b07056b0d49ab27e103f58d842 /llvm/lib
parent	ac19edd2b0b31077d4fc288491f7c19e7c9b7654 (diff)
download	bcm5719-llvm-b078350872693f37726e78caa1c413dd736cff4e.tar.gz bcm5719-llvm-b078350872693f37726e78caa1c413dd736cff4e.zip