diff options
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 143 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vec_insert-4.ll (renamed from llvm/test/CodeGen/X86/vec_insert_4.ll) | 0 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vec_insert-9.ll | 9 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vec_set.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vec_shuffle.ll | 3 | 
5 files changed, 80 insertions, 77 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 704f9c65a59..960655806b7 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -3613,6 +3613,54 @@ X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,    return SDValue();  } +static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts, +                                        DebugLoc &dl, SelectionDAG &DAG) { +  EVT EltVT = VT.getVectorElementType(); +  unsigned NumElems = Elts.size(); +   +  // FIXME: check for zeroes +  LoadSDNode *LDBase = NULL; +  unsigned LastLoadedElt = -1U; +  for (unsigned i = 0; i < NumElems; ++i) { +    SDValue Elt = Elts[i]; +     +    if (!Elt.getNode() || +        (Elt.getOpcode() != ISD::UNDEF && !ISD::isNON_EXTLoad(Elt.getNode()))) +      return SDValue(); +    if (!LDBase) { +      if (Elt.getNode()->getOpcode() == ISD::UNDEF) +        return SDValue(); +      LDBase = cast<LoadSDNode>(Elt.getNode()); +      LastLoadedElt = i; +      continue; +    } +    if (Elt.getOpcode() == ISD::UNDEF) +      continue; + +    LoadSDNode *LD = cast<LoadSDNode>(Elt); +    if (!DAG.isConsecutiveLoad(LD, LDBase, EltVT.getSizeInBits()/8, i)) +      return SDValue(); +    LastLoadedElt = i; +  } +                                        +  if (LastLoadedElt == NumElems - 1) { +    if (DAG.InferPtrAlignment(LDBase->getBasePtr()) >= 16) +      return DAG.getLoad(VT, dl, LDBase->getChain(), LDBase->getBasePtr(), +                         LDBase->getSrcValue(), LDBase->getSrcValueOffset(), +                         LDBase->isVolatile(), LDBase->isNonTemporal(), 0); +    return DAG.getLoad(VT, dl, LDBase->getChain(), LDBase->getBasePtr(), +                       LDBase->getSrcValue(), LDBase->getSrcValueOffset(), +                       LDBase->isVolatile(), LDBase->isNonTemporal(), +                       LDBase->getAlignment()); +  } else if (NumElems == 4 && LastLoadedElt == 1) { +    SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other); +    SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() }; +    SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2); +    return DAG.getNode(ISD::BIT_CONVERT, dl, VT, ResNode); +  } +  return SDValue(); +} +  SDValue  X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {    DebugLoc dl = Op.getDebugLoc(); @@ -3841,14 +3889,18 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {      return DAG.getVectorShuffle(VT, dl, V[0], V[1], &MaskVec[0]);    } -  if (Values.size() > 2) { -    // If we have SSE 4.1, Expand into a number of inserts unless the number of -    // values to be inserted is equal to the number of elements, in which case -    // use the unpack code below in the hopes of matching the consecutive elts -    // load merge pattern for shuffles. -    // FIXME: We could probably just check that here directly. -    if (Values.size() < NumElems && VT.getSizeInBits() == 128 && -        getSubtarget()->hasSSE41()) { +  if (Values.size() > 1 && VT.getSizeInBits() == 128) { +    // Check for a build vector of consecutive loads. +    for (unsigned i = 0; i < NumElems; ++i) +      V[i] = Op.getOperand(i); +     +    // Check for elements which are consecutive loads. +    SDValue LD = EltsFromConsecutiveLoads(VT, V, dl, DAG); +    if (LD.getNode()) +      return LD; +     +    // For SSE 4.1, use inserts into undef.   +    if (getSubtarget()->hasSSE41()) {        V[0] = DAG.getUNDEF(VT);        for (unsigned i = 0; i < NumElems; ++i)          if (Op.getOperand(i).getOpcode() != ISD::UNDEF) @@ -3856,7 +3908,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {                               Op.getOperand(i), DAG.getIntPtrConstant(i));        return V[0];      } -    // Expand into a number of unpckl*. +     +    // Otherwise, expand into a number of unpckl*      // e.g. for v4f32      //   Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>      //         : unpcklps 1, 3 ==> Y: <?, ?, 3, 1> @@ -3871,7 +3924,6 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {      }      return V[0];    } -    return SDValue();  } @@ -8797,83 +8849,24 @@ bool X86TargetLowering::isGAPlusOffset(SDNode *N,    return TargetLowering::isGAPlusOffset(N, GA, Offset);  } -static bool EltsFromConsecutiveLoads(ShuffleVectorSDNode *N, unsigned NumElems, -                                     EVT EltVT, LoadSDNode *&LDBase, -                                     unsigned &LastLoadedElt, -                                     SelectionDAG &DAG, MachineFrameInfo *MFI, -                                     const TargetLowering &TLI) { -  LDBase = NULL; -  LastLoadedElt = -1U; -  for (unsigned i = 0; i < NumElems; ++i) { -    if (N->getMaskElt(i) < 0) { -      if (!LDBase) -        return false; -      continue; -    } - -    SDValue Elt = DAG.getShuffleScalarElt(N, i); -    if (!Elt.getNode() || -        (Elt.getOpcode() != ISD::UNDEF && !ISD::isNON_EXTLoad(Elt.getNode()))) -      return false; -    if (!LDBase) { -      if (Elt.getNode()->getOpcode() == ISD::UNDEF) -        return false; -      LDBase = cast<LoadSDNode>(Elt.getNode()); -      LastLoadedElt = i; -      continue; -    } -    if (Elt.getOpcode() == ISD::UNDEF) -      continue; - -    LoadSDNode *LD = cast<LoadSDNode>(Elt); -    if (!DAG.isConsecutiveLoad(LD, LDBase, EltVT.getSizeInBits()/8, i)) -      return false; -    LastLoadedElt = i; -  } -  return true; -} -  /// PerformShuffleCombine - Combine a vector_shuffle that is equal to  /// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load  /// if the load addresses are consecutive, non-overlapping, and in the right -/// order.  In the case of v2i64, it will see if it can rewrite the -/// shuffle to be an appropriate build vector so it can take advantage of -// performBuildVectorCombine. +/// order.  static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,                                       const TargetLowering &TLI) {    DebugLoc dl = N->getDebugLoc();    EVT VT = N->getValueType(0); -  EVT EltVT = VT.getVectorElementType();    ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); -  unsigned NumElems = VT.getVectorNumElements();    if (VT.getSizeInBits() != 128)      return SDValue(); -  // Try to combine a vector_shuffle into a 128-bit load. -  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); -  LoadSDNode *LD = NULL; -  unsigned LastLoadedElt; -  if (!EltsFromConsecutiveLoads(SVN, NumElems, EltVT, LD, LastLoadedElt, DAG, -                                MFI, TLI)) -    return SDValue(); - -  if (LastLoadedElt == NumElems - 1) { -    if (DAG.InferPtrAlignment(LD->getBasePtr()) >= 16) -      return DAG.getLoad(VT, dl, LD->getChain(), LD->getBasePtr(), -                         LD->getSrcValue(), LD->getSrcValueOffset(), -                         LD->isVolatile(), LD->isNonTemporal(), 0); -    return DAG.getLoad(VT, dl, LD->getChain(), LD->getBasePtr(), -                       LD->getSrcValue(), LD->getSrcValueOffset(), -                       LD->isVolatile(), LD->isNonTemporal(), -                       LD->getAlignment()); -  } else if (NumElems == 4 && LastLoadedElt == 1) { -    SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other); -    SDValue Ops[] = { LD->getChain(), LD->getBasePtr() }; -    SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2); -    return DAG.getNode(ISD::BIT_CONVERT, dl, VT, ResNode); -  } -  return SDValue(); +  SmallVector<SDValue, 16> Elts; +  for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) +    Elts.push_back(DAG.getShuffleScalarElt(SVN, i)); +   +  return EltsFromConsecutiveLoads(VT, Elts, dl, DAG);  }  /// PerformShuffleCombine - Detect vector gather/scatter index generation diff --git a/llvm/test/CodeGen/X86/vec_insert_4.ll b/llvm/test/CodeGen/X86/vec_insert-4.ll index 2c31e56b4af..2c31e56b4af 100644 --- a/llvm/test/CodeGen/X86/vec_insert_4.ll +++ b/llvm/test/CodeGen/X86/vec_insert-4.ll diff --git a/llvm/test/CodeGen/X86/vec_insert-9.ll b/llvm/test/CodeGen/X86/vec_insert-9.ll new file mode 100644 index 00000000000..2e829df1f8d --- /dev/null +++ b/llvm/test/CodeGen/X86/vec_insert-9.ll @@ -0,0 +1,9 @@ +; RUN: llc < %s -march=x86 -mattr=+sse41 > %t +; RUN: grep pinsrd %t | count 2 + +define <4 x i32> @var_insert2(<4 x i32> %x, i32 %val, i32 %idx) nounwind  { +entry: +	%tmp3 = insertelement <4 x i32> undef, i32 %val, i32 0		; <<4 x i32>> [#uses=1] +	%tmp4 = insertelement <4 x i32> %tmp3, i32 %idx, i32 3		; <<4 x i32>> [#uses=1] +	ret <4 x i32> %tmp4 +} diff --git a/llvm/test/CodeGen/X86/vec_set.ll b/llvm/test/CodeGen/X86/vec_set.ll index c316df887c1..7f5f8dd213a 100644 --- a/llvm/test/CodeGen/X86/vec_set.ll +++ b/llvm/test/CodeGen/X86/vec_set.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 | grep punpckl | count 7 +; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | grep punpckl | count 7  define void @test(<8 x i16>* %b, i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) nounwind {          %tmp = insertelement <8 x i16> zeroinitializer, i16 %a0, i32 0          ; <<8 x i16>> [#uses=1] diff --git a/llvm/test/CodeGen/X86/vec_shuffle.ll b/llvm/test/CodeGen/X86/vec_shuffle.ll index c05b79a54a1..2a48de22098 100644 --- a/llvm/test/CodeGen/X86/vec_shuffle.ll +++ b/llvm/test/CodeGen/X86/vec_shuffle.ll @@ -1,5 +1,6 @@  ; RUN: llc < %s -march=x86 -mcpu=core2 -o %t -; RUN: grep shufp   %t | count 1 +; RUN: grep movq    %t | count 1 +; RUN: grep pshufd  %t | count 1  ; RUN: grep movupd  %t | count 1  ; RUN: grep pshufhw %t | count 1  | 

