diff options
| author | Evan Cheng <evan.cheng@apple.com> | 2008-09-25 20:50:48 +0000 | 
|---|---|---|
| committer | Evan Cheng <evan.cheng@apple.com> | 2008-09-25 20:50:48 +0000 | 
| commit | 74c9ed91b0b34ea79189adad5e4b41e6a012be07 (patch) | |
| tree | 2e77c3fbb7789b7c7815f560442edd5a490a4f73 | |
| parent | c50ada2f5678d8745b115d9a5de51309094845f0 (diff) | |
| download | bcm5719-llvm-74c9ed91b0b34ea79189adad5e4b41e6a012be07.tar.gz bcm5719-llvm-74c9ed91b0b34ea79189adad5e4b41e6a012be07.zip  | |
With sse3 and when the source is a load or has multiple uses, favors movddup over shuffp*, pshufd, etc. Without sse3 or when the source is from a register, make use of movlhps
llvm-svn: 56620
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 77 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.h | 4 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 33 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vec_shuffle-22.ll | 16 | 
4 files changed, 113 insertions, 17 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 55238eb863e..d0b726f13f2 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2516,6 +2516,21 @@ bool X86::isSplatLoMask(SDNode *N) {    return true;  } +/// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a shuffle of elements that is suitable for input to MOVDDUP. +bool X86::isMOVDDUPMask(SDNode *N) { +  assert(N->getOpcode() == ISD::BUILD_VECTOR); + +  unsigned e = N->getNumOperands() / 2; +  for (unsigned i = 0; i < e; ++i) +    if (!isUndefOrEqual(N->getOperand(i), i)) +      return false; +  for (unsigned i = 0; i < e; ++i) +    if (!isUndefOrEqual(N->getOperand(e+i), i)) +      return false; +  return true; +} +  /// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle  /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP*  /// instructions. @@ -2683,15 +2698,14 @@ static bool ShouldXformToMOVHLPS(SDNode *Mask) {  /// is promoted to a vector. It also returns the LoadSDNode by reference if  /// required.  static bool isScalarLoadToVector(SDNode *N, LoadSDNode **LD = NULL) { -  if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) { -    N = N->getOperand(0).getNode(); -    if (ISD::isNON_EXTLoad(N)) { -      if (LD) -        *LD = cast<LoadSDNode>(N); -      return true; -    } -  } -  return false; +  if (N->getOpcode() != ISD::SCALAR_TO_VECTOR) +    return false; +  N = N->getOperand(0).getNode(); +  if (!ISD::isNON_EXTLoad(N)) +    return false; +  if (LD) +    *LD = cast<LoadSDNode>(N); +  return true;  }  /// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to @@ -2943,6 +2957,46 @@ static SDValue PromoteSplat(SDValue Op, SelectionDAG &DAG, bool HasSSE2) {    return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle);  } +/// isVectorLoad - Returns true if the node is a vector load, a scalar +/// load that's promoted to vector, or a load bitcasted. +static bool isVectorLoad(SDValue Op) { +  assert(Op.getValueType().isVector() && "Expected a vector type"); +  if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR || +      Op.getOpcode() == ISD::BIT_CONVERT) { +    return isa<LoadSDNode>(Op.getOperand(0)); +  } +  return isa<LoadSDNode>(Op); +} + + +/// CanonicalizeMovddup - Cannonicalize movddup shuffle to v2f64. +/// +static SDValue CanonicalizeMovddup(SDValue Op, SDValue V1, SDValue Mask, +                                   SelectionDAG &DAG, bool HasSSE3) { +  // If we have sse3 and shuffle has more than one use or input is a load, then +  // use movddup. Otherwise, use movlhps. +  bool UseMovddup = HasSSE3 && (!Op.hasOneUse() || isVectorLoad(V1)); +  MVT PVT = UseMovddup ? MVT::v2f64 : MVT::v4f32; +  MVT VT = Op.getValueType(); +  if (VT == PVT) +    return Op; +  unsigned NumElems = PVT.getVectorNumElements(); +  if (NumElems == 2) { +    SDValue Cst = DAG.getTargetConstant(0, MVT::i32); +    Mask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, Cst, Cst); +  } else { +    assert(NumElems == 4); +    SDValue Cst0 = DAG.getTargetConstant(0, MVT::i32); +    SDValue Cst1 = DAG.getTargetConstant(1, MVT::i32); +    Mask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Cst0, Cst1, Cst0, Cst1); +  } + +  V1 = DAG.getNode(ISD::BIT_CONVERT, PVT, V1); +  SDValue Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, PVT, V1, +                                DAG.getNode(ISD::UNDEF, PVT), Mask); +  return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle); +} +  /// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified  /// vector of zero or undef vector.  This produces a shuffle where the low  /// element of V2 is swizzled into the zero/undef vector, landing at element @@ -3894,6 +3948,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {      return PromoteSplat(Op, DAG, Subtarget->hasSSE2());    } +  // Canonicalize movddup shuffles. +  if (V2IsUndef && Subtarget->hasSSE2() && +      X86::isMOVDDUPMask(PermMask.getNode())) +    return CanonicalizeMovddup(Op, V1, PermMask, DAG, Subtarget->hasSSE3()); +    // If the shuffle can be profitably rewritten as a narrower shuffle, then    // do it!    if (VT == MVT::v8i16 || VT == MVT::v16i8) { diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index a2d8a11dc3f..784069b4d20 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -295,6 +295,10 @@ namespace llvm {      /// specifies a splat of zero element.      bool isSplatLoMask(SDNode *N); +    /// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand +    /// specifies a shuffle of elements that is suitable for input to MOVDDUP. +    bool isMOVDDUPMask(SDNode *N); +      /// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle      /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP*      /// instructions. diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index d67beac3e95..f9de8e74758 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -208,6 +208,10 @@ def SSE_splat_lo_mask : PatLeaf<(build_vector), [{    return X86::isSplatLoMask(N);  }]>; +def MOVDDUP_shuffle_mask : PatLeaf<(build_vector), [{ +  return X86::isMOVDDUPMask(N); +}]>; +  def MOVHLPS_shuffle_mask : PatLeaf<(build_vector), [{    return X86::isMOVHLPSMask(N);  }]>; @@ -755,6 +759,11 @@ def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:  } // AddedComplexity  } // Constraints = "$src1 = $dst" +let AddedComplexity = 15 in +def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef), MOVDDUP_shuffle_mask)), +          (MOVLHPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>; + +  // Arithmetic @@ -2452,16 +2461,24 @@ def MOVSLDUPrm : S3SI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),  def MOVDDUPrr  : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),                        "movddup\t{$src, $dst|$dst, $src}", -                      [(set VR128:$dst, (v2f64 (vector_shuffle -                                                VR128:$src, (undef), -                                                SSE_splat_lo_mask)))]>; +                      [(set VR128:$dst, +                         (v2f64 (vector_shuffle VR128:$src, (undef), +                                                MOVDDUP_shuffle_mask)))]>;  def MOVDDUPrm  : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),                        "movddup\t{$src, $dst|$dst, $src}", -                      [(set VR128:$dst, -                        (v2f64 (vector_shuffle -                                (scalar_to_vector (loadf64 addr:$src)), -                                (undef), -                                SSE_splat_lo_mask)))]>; +                    [(set VR128:$dst, +                      (v2f64 (vector_shuffle +                              (scalar_to_vector (loadf64 addr:$src)), +                              (undef), MOVDDUP_shuffle_mask)))]>; + +def : Pat<(vector_shuffle +             (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src)))), +             (undef), MOVDDUP_shuffle_mask), +          (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>; +def : Pat<(vector_shuffle +            (memopv2f64 addr:$src), (undef), MOVDDUP_shuffle_mask), +          (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>; +  // Arithmetic  let Constraints = "$src1 = $dst" in { diff --git a/llvm/test/CodeGen/X86/vec_shuffle-22.ll b/llvm/test/CodeGen/X86/vec_shuffle-22.ll new file mode 100644 index 00000000000..5648356333b --- /dev/null +++ b/llvm/test/CodeGen/X86/vec_shuffle-22.ll @@ -0,0 +1,16 @@ +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2       | not grep shuf +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2,-sse3 | grep movlhps | count 2 +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse3       | grep movddup | count 1 + +define <4 x float> @t1(<4 x float> %a) nounwind  { +entry: +        %tmp1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> < i32 0, i32 1, i32 0, i32 1 >       ; <<4 x float>> [#uses=1] +        ret <4 x float> %tmp1 +} + +define <4 x i32> @t2(<4 x i32>* %a) nounwind { +entry: +        %tmp1 = load <4 x i32>* %a; +	%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> < i32 0, i32 1, i32 0, i32 1 >		; <<4 x i32>> [#uses=1] +	ret <4 x i32> %tmp2 +}  | 

