diff options
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 92 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 15 | ||||
| -rw-r--r-- | llvm/test/CodeGen/ARM/reg_sequence.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/CellSPU/rotate_ops.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/2011-10-27-tstore.ll | 12 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/SwizzleShuff.ll | 14 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vec_shuffle-37.ll | 10 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/widen_shuffle-1.ll | 2 | 
8 files changed, 127 insertions, 22 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index bac644a42a8..5e88fcbb0e1 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2336,6 +2336,68 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {                         ORNode, N0.getOperand(1));    } +  // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B)) +  // Only perform this optimization after type legalization and before +  // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by +  // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and +  // we don't want to undo this promotion. +  // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper +  // on scalars. +  if ((N0.getOpcode() == ISD::BITCAST || N0.getOpcode() == ISD::SCALAR_TO_VECTOR) +      && Level == AfterLegalizeVectorOps) { +    SDValue In0 = N0.getOperand(0); +    SDValue In1 = N1.getOperand(0); +    EVT In0Ty = In0.getValueType(); +    EVT In1Ty = In1.getValueType(); +    // If both incoming values are integers, and the original types are the same. +    if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) { +      SDValue Op = DAG.getNode(N->getOpcode(), N->getDebugLoc(), In0Ty, In0, In1); +      SDValue BC = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, Op); +      AddToWorkList(Op.getNode()); +      return BC; +    } +  } + +  // Xor/and/or are indifferent to the swizzle operation (shuffle of one value). +  // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B)) +  // If both shuffles use the same mask, and both shuffle within a single +  // vector, then it is worthwhile to move the swizzle after the operation. +  // The type-legalizer generates this pattern when loading illegal +  // vector types from memory. In many cases this allows additional shuffle +  // optimizations. +  if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) { +    ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0); +    ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1); +    SDValue In0 = SVN0->getOperand(0); +    SDValue In1 = SVN1->getOperand(0); +    EVT In0Ty = In0.getValueType(); +    EVT In1Ty = In1.getValueType(); + +    unsigned NumElts = VT.getVectorNumElements(); +    // Check that both shuffles are swizzles. +    bool SingleVecShuff = (N0.getOperand(1).getOpcode() == ISD::UNDEF && +                           N1.getOperand(1).getOpcode() == ISD::UNDEF); + +    // Check that both shuffles use the same mask. The masks are known to be of +    // the same length because the result vector type is the same. +    bool SameMask = true; +    for (unsigned i = 0; i != NumElts; ++i) { +      int Idx0 = SVN0->getMaskElt(i); +      int Idx1 = SVN1->getMaskElt(i); +      if (Idx0 != Idx1) { +        SameMask = false; +        break; +      } +    } + +    if (SameMask && SingleVecShuff && In0Ty == In1Ty) { +      SDValue Op = DAG.getNode(N->getOpcode(), N->getDebugLoc(), VT, In0, In1); +      SDValue Shuff = DAG.getVectorShuffle(VT, N->getDebugLoc(), Op, +                                          DAG.getUNDEF(VT), &SVN0->getMask()[0]); +      AddToWorkList(Op.getNode()); +      return Shuff; +    } +  }    return SDValue();  } @@ -7721,6 +7783,36 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {          return N0;      }    } + +  // If this shuffle node is simply a swizzle of another shuffle node, +  // optimize shuffle(shuffle(x, y), undef) -> shuffle(x, y). +  if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && +      N1.getOpcode() == ISD::UNDEF) { + +    SmallVector<int, 8> NewMask; +    ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0); + +    EVT InVT = N0.getValueType(); +    int InNumElts = InVT.getVectorNumElements(); + +    for (unsigned i = 0; i != NumElts; ++i) { +      int Idx = SVN->getMaskElt(i); +      // If we access the second (undef) operand then this index can be +      // canonicalized to undef as well. +      if (Idx >= InNumElts) +        Idx = -1; +      // Next, this index comes from the first value, which is the incoming +      // shuffle. Adopt the incoming index. +      if (Idx >= 0) +        Idx = OtherSV->getMaskElt(Idx); + +      NewMask.push_back(Idx); +    } + +    return DAG.getVectorShuffle(VT, N->getDebugLoc(), OtherSV->getOperand(0), +                                OtherSV->getOperand(1), &NewMask[0]); +  } +    return SDValue();  } diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 88f38298db0..69a60361314 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -14000,13 +14000,14 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,          return SDValue();        // Validate that X, Y, and Mask are BIT_CONVERTS, and see through them. -      if (Mask.getOpcode() != ISD::BITCAST || -          X.getOpcode() != ISD::BITCAST || -          Y.getOpcode() != ISD::BITCAST) -        return SDValue(); -        // Look through mask bitcast. -      Mask = Mask.getOperand(0); +      if (Mask.getOpcode() == ISD::BITCAST) +        Mask = Mask.getOperand(0); +      if (X.getOpcode() == ISD::BITCAST) +        X = X.getOperand(0); +      if (Y.getOpcode() == ISD::BITCAST) +        Y = Y.getOperand(0); +        EVT MaskVT = Mask.getValueType();        // Validate that the Mask operand is a vector sra node. @@ -14027,8 +14028,6 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,        // Now we know we at least have a plendvb with the mask val.  See if        // we can form a psignb/w/d.        // psign = x.type == y.type == mask.type && y = sub(0, x); -      X = X.getOperand(0); -      Y = Y.getOperand(0);        if (Y.getOpcode() == ISD::SUB && Y.getOperand(1) == X &&            ISD::isBuildVectorAllZeros(Y.getOperand(0).getNode()) &&            X.getValueType() == MaskVT && Y.getValueType() == MaskVT) { diff --git a/llvm/test/CodeGen/ARM/reg_sequence.ll b/llvm/test/CodeGen/ARM/reg_sequence.ll index 78b4e7ea84c..05794e4ebdd 100644 --- a/llvm/test/CodeGen/ARM/reg_sequence.ll +++ b/llvm/test/CodeGen/ARM/reg_sequence.ll @@ -273,7 +273,7 @@ define arm_aapcs_vfpcc i32 @t10() nounwind {  entry:  ; CHECK: t10:  ; CHECK: vmov.i32 q[[Q0:[0-9]+]], #0x3f000000 -; CHECK: vmul.f32 q8, q8, d0[0] +; CHECK: vmul.f32 q8, q8, d[[DREG:[0-1]+]]  ; CHECK: vadd.f32 q8, q8, q8    %0 = shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]    %1 = insertelement <4 x float> %0, float undef, i32 1 ; <<4 x float>> [#uses=1] diff --git a/llvm/test/CodeGen/CellSPU/rotate_ops.ll b/llvm/test/CodeGen/CellSPU/rotate_ops.ll index 97709352760..8b7af20b4a9 100644 --- a/llvm/test/CodeGen/CellSPU/rotate_ops.ll +++ b/llvm/test/CodeGen/CellSPU/rotate_ops.ll @@ -1,5 +1,5 @@  ; RUN: llc < %s -march=cellspu -o %t1.s -; RUN: grep rot          %t1.s | count 86 +; RUN: grep rot          %t1.s | count 85  ; RUN: grep roth         %t1.s | count 8  ; RUN: grep roti.*5      %t1.s | count 1  ; RUN: grep roti.*27     %t1.s | count 1 diff --git a/llvm/test/CodeGen/X86/2011-10-27-tstore.ll b/llvm/test/CodeGen/X86/2011-10-27-tstore.ll index 016e02c3d5d..1712f345653 100644 --- a/llvm/test/CodeGen/X86/2011-10-27-tstore.ll +++ b/llvm/test/CodeGen/X86/2011-10-27-tstore.ll @@ -3,14 +3,14 @@  target triple = "x86_64-unknown-linux-gnu"  ;CHECK: ltstore -;CHECK: pshufd -;CHECK: pshufd -;CHECK: ret -define void @ltstore() { +;CHECK: movq +;CHECK-NEXT: movq +;CHECK-NEXT: ret +define void @ltstore(<4 x i32>* %pIn, <2 x i32>* %pOut) {  entry: -  %in = load <4 x i32>* undef +  %in = load <4 x i32>* %pIn    %j = shufflevector <4 x i32> %in, <4 x i32> undef, <2 x i32> <i32 0, i32 1> -  store <2 x i32> %j, <2 x i32>* undef +  store <2 x i32> %j, <2 x i32>* %pOut    ret void  } diff --git a/llvm/test/CodeGen/X86/SwizzleShuff.ll b/llvm/test/CodeGen/X86/SwizzleShuff.ll new file mode 100644 index 00000000000..11b702e3d1b --- /dev/null +++ b/llvm/test/CodeGen/X86/SwizzleShuff.ll @@ -0,0 +1,14 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s + +; Check that we perform a scalar XOR on i32. + +; CHECK: pull_bitcast +; CHECK: xorl +; CHECK: ret +define void @pull_bitcast (<4 x i8>* %pA, <4 x i8>* %pB) { +  %A = load <4 x i8>* %pA +  %B = load <4 x i8>* %pB +  %C = xor <4 x i8> %A, %B +  store <4 x i8> %C, <4 x i8>* %pA +  ret void +} diff --git a/llvm/test/CodeGen/X86/vec_shuffle-37.ll b/llvm/test/CodeGen/X86/vec_shuffle-37.ll index 06083989382..65486cb80c9 100644 --- a/llvm/test/CodeGen/X86/vec_shuffle-37.ll +++ b/llvm/test/CodeGen/X86/vec_shuffle-37.ll @@ -27,11 +27,11 @@ entry:  define void @t02(<8 x i32>* %source, <2 x i32>* %dest) nounwind noinline {  entry:  ; CHECK: t02 -; CHECK: movaps -; CHECK: shufps -; CHECK: pshufd -; CHECK: movq -; CHECK: ret +; CHECK: mov +; CHECK-NEXT: mov +; CHECK-NEXT: mov +; CHECK-NEXT: mov +; CHECK-NEXT: ret    %0 = bitcast <8 x i32>* %source to <4 x i32>*    %arrayidx = getelementptr inbounds <4 x i32>* %0, i64 3    %tmp2 = load <4 x i32>* %arrayidx, align 16 diff --git a/llvm/test/CodeGen/X86/widen_shuffle-1.ll b/llvm/test/CodeGen/X86/widen_shuffle-1.ll index 7bebb274f6e..94200537168 100644 --- a/llvm/test/CodeGen/X86/widen_shuffle-1.ll +++ b/llvm/test/CodeGen/X86/widen_shuffle-1.ll @@ -33,7 +33,7 @@ entry:  define void @shuf3(<4 x float> %tmp10, <4 x float> %vecinit15, <4 x float>* %dst) nounwind {  entry:  ; CHECK: shuf3: -; CHECK: shufps +; CHECK: shufd    %shuffle.i.i.i12 = shufflevector <4 x float> %tmp10, <4 x float> %vecinit15, <4 x i32> <i32 0, i32 1, i32 4, i32 5>    %tmp25.i.i = shufflevector <4 x float> %shuffle.i.i.i12, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>     %tmp1.i.i = shufflevector <3 x float> %tmp25.i.i, <3 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>  | 

