diff options
| author | Nadav Rotem <nadav.rotem@intel.com> | 2012-04-01 19:31:22 +0000 | 
|---|---|---|
| committer | Nadav Rotem <nadav.rotem@intel.com> | 2012-04-01 19:31:22 +0000 | 
| commit | b078350872693f37726e78caa1c413dd736cff4e (patch) | |
| tree | dee74084ef3a55b07056b0d49ab27e103f58d842 /llvm/lib | |
| parent | ac19edd2b0b31077d4fc288491f7c19e7c9b7654 (diff) | |
| download | bcm5719-llvm-b078350872693f37726e78caa1c413dd736cff4e.tar.gz bcm5719-llvm-b078350872693f37726e78caa1c413dd736cff4e.zip | |
This commit contains a few changes that had to go in together.
1. Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
   (and also scalar_to_vector).
2. Xor/and/or are indifferent to the swizzle operation (shuffle of one src).
   Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A, B))
3. Optimize swizzles of shuffles:  shuff(shuff(x, y), undef) -> shuff(x, y).
4. Fix an X86ISelLowering optimization which was very bitcast-sensitive.
Code which was previously compiled to this:
movd    (%rsi), %xmm0
movdqa  .LCPI0_0(%rip), %xmm2
pshufb  %xmm2, %xmm0
movd    (%rdi), %xmm1
pshufb  %xmm2, %xmm1
pxor    %xmm0, %xmm1
pshufb  .LCPI0_1(%rip), %xmm1
movd    %xmm1, (%rdi)
ret
Now compiles to this:
movl    (%rsi), %eax
xorl    %eax, (%rdi)
ret
llvm-svn: 153848
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 92 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 15 | 
2 files changed, 99 insertions, 8 deletions
| diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index bac644a42a8..5e88fcbb0e1 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2336,6 +2336,68 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {                         ORNode, N0.getOperand(1));    } +  // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B)) +  // Only perform this optimization after type legalization and before +  // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by +  // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and +  // we don't want to undo this promotion. +  // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper +  // on scalars. +  if ((N0.getOpcode() == ISD::BITCAST || N0.getOpcode() == ISD::SCALAR_TO_VECTOR) +      && Level == AfterLegalizeVectorOps) { +    SDValue In0 = N0.getOperand(0); +    SDValue In1 = N1.getOperand(0); +    EVT In0Ty = In0.getValueType(); +    EVT In1Ty = In1.getValueType(); +    // If both incoming values are integers, and the original types are the same. +    if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) { +      SDValue Op = DAG.getNode(N->getOpcode(), N->getDebugLoc(), In0Ty, In0, In1); +      SDValue BC = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, Op); +      AddToWorkList(Op.getNode()); +      return BC; +    } +  } + +  // Xor/and/or are indifferent to the swizzle operation (shuffle of one value). +  // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B)) +  // If both shuffles use the same mask, and both shuffle within a single +  // vector, then it is worthwhile to move the swizzle after the operation. +  // The type-legalizer generates this pattern when loading illegal +  // vector types from memory. In many cases this allows additional shuffle +  // optimizations. +  if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) { +    ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0); +    ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1); +    SDValue In0 = SVN0->getOperand(0); +    SDValue In1 = SVN1->getOperand(0); +    EVT In0Ty = In0.getValueType(); +    EVT In1Ty = In1.getValueType(); + +    unsigned NumElts = VT.getVectorNumElements(); +    // Check that both shuffles are swizzles. +    bool SingleVecShuff = (N0.getOperand(1).getOpcode() == ISD::UNDEF && +                           N1.getOperand(1).getOpcode() == ISD::UNDEF); + +    // Check that both shuffles use the same mask. The masks are known to be of +    // the same length because the result vector type is the same. +    bool SameMask = true; +    for (unsigned i = 0; i != NumElts; ++i) { +      int Idx0 = SVN0->getMaskElt(i); +      int Idx1 = SVN1->getMaskElt(i); +      if (Idx0 != Idx1) { +        SameMask = false; +        break; +      } +    } + +    if (SameMask && SingleVecShuff && In0Ty == In1Ty) { +      SDValue Op = DAG.getNode(N->getOpcode(), N->getDebugLoc(), VT, In0, In1); +      SDValue Shuff = DAG.getVectorShuffle(VT, N->getDebugLoc(), Op, +                                          DAG.getUNDEF(VT), &SVN0->getMask()[0]); +      AddToWorkList(Op.getNode()); +      return Shuff; +    } +  }    return SDValue();  } @@ -7721,6 +7783,36 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {          return N0;      }    } + +  // If this shuffle node is simply a swizzle of another shuffle node, +  // optimize shuffle(shuffle(x, y), undef) -> shuffle(x, y). +  if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && +      N1.getOpcode() == ISD::UNDEF) { + +    SmallVector<int, 8> NewMask; +    ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0); + +    EVT InVT = N0.getValueType(); +    int InNumElts = InVT.getVectorNumElements(); + +    for (unsigned i = 0; i != NumElts; ++i) { +      int Idx = SVN->getMaskElt(i); +      // If we access the second (undef) operand then this index can be +      // canonicalized to undef as well. +      if (Idx >= InNumElts) +        Idx = -1; +      // Next, this index comes from the first value, which is the incoming +      // shuffle. Adopt the incoming index. +      if (Idx >= 0) +        Idx = OtherSV->getMaskElt(Idx); + +      NewMask.push_back(Idx); +    } + +    return DAG.getVectorShuffle(VT, N->getDebugLoc(), OtherSV->getOperand(0), +                                OtherSV->getOperand(1), &NewMask[0]); +  } +    return SDValue();  } diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 88f38298db0..69a60361314 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -14000,13 +14000,14 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,          return SDValue();        // Validate that X, Y, and Mask are BIT_CONVERTS, and see through them. -      if (Mask.getOpcode() != ISD::BITCAST || -          X.getOpcode() != ISD::BITCAST || -          Y.getOpcode() != ISD::BITCAST) -        return SDValue(); -        // Look through mask bitcast. -      Mask = Mask.getOperand(0); +      if (Mask.getOpcode() == ISD::BITCAST) +        Mask = Mask.getOperand(0); +      if (X.getOpcode() == ISD::BITCAST) +        X = X.getOperand(0); +      if (Y.getOpcode() == ISD::BITCAST) +        Y = Y.getOperand(0); +        EVT MaskVT = Mask.getValueType();        // Validate that the Mask operand is a vector sra node. @@ -14027,8 +14028,6 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,        // Now we know we at least have a plendvb with the mask val.  See if        // we can form a psignb/w/d.        // psign = x.type == y.type == mask.type && y = sub(0, x); -      X = X.getOperand(0); -      Y = Y.getOperand(0);        if (Y.getOpcode() == ISD::SUB && Y.getOperand(1) == X &&            ISD::isBuildVectorAllZeros(Y.getOperand(0).getNode()) &&            X.getValueType() == MaskVT && Y.getValueType() == MaskVT) { | 

