diff options
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 29 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.h | 4 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 76 | 
3 files changed, 104 insertions, 5 deletions
| diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 01951e636ae..823d0709d3c 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2376,7 +2376,9 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {      abort();    }    case ISD::BUILD_VECTOR: { +    std::set<SDOperand> Values;      SDOperand Elt0 = Op.getOperand(0); +    Values.insert(Elt0);      bool Elt0IsZero = (isa<ConstantSDNode>(Elt0) &&                         cast<ConstantSDNode>(Elt0)->getValue() == 0) ||        (isa<ConstantFPSDNode>(Elt0) && @@ -2384,15 +2386,16 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {      bool RestAreZero = true;      unsigned NumElems = Op.getNumOperands();      for (unsigned i = 1; i < NumElems; ++i) { -      SDOperand V = Op.getOperand(i); -      if (ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(V)) { +      SDOperand Elt = Op.getOperand(i); +      if (ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Elt)) {          if (!FPC->isExactlyValue(+0.0))            RestAreZero = false; -      } else if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(V)) { +      } else if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {          if (!C->isNullValue())            RestAreZero = false;        } else          RestAreZero = false; +      Values.insert(Elt);      }      if (RestAreZero) { @@ -2402,6 +2405,25 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {        return DAG.getNode(X86ISD::ZEXT_S2VEC, Op.getValueType(), Elt0);      } +    if (Values.size() > 2) { +      // Expand into a number of unpckl*. +      // e.g. for v4f32 +      //   Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0> +      //         : unpcklps 1, 3 ==> Y: <?, ?, 3, 1> +      //   Step 2: unpcklps X, Y ==>    <3, 2, 1, 0> +      MVT::ValueType VT = Op.getValueType(); +      std::vector<SDOperand> V(NumElems); +      for (unsigned i = 0; i < NumElems; ++i) +        V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); +      NumElems >>= 1; +      while (NumElems != 0) { +        for (unsigned i = 0; i < NumElems; ++i) +          V[i] = DAG.getNode(X86ISD::UNPCKL, VT, V[i], V[i + NumElems]); +        NumElems >>= 1; +      } +      return V[0]; +    } +      return SDOperand();    }    } @@ -2439,6 +2461,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {    case X86ISD::Wrapper:            return "X86ISD::Wrapper";    case X86ISD::S2VEC:              return "X86ISD::S2VEC";    case X86ISD::ZEXT_S2VEC:         return "X86ISD::ZEXT_S2VEC"; +  case X86ISD::UNPCKL:             return "X86ISD::UNPCKL";    }  } diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 1dc90e536e1..71d7751e48d 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -153,6 +153,10 @@ namespace llvm {        /// ZEXT_S2VEC - SCALAR_TO_VECTOR with zero extension. The destination base        /// does not have to match the operand type.        ZEXT_S2VEC, + +      /// UNPCKL - Unpack and interleave low. This corresponds to X86::UNPCKLPS, +      /// X86::PUNPCKL*. +      UNPCKL,      };      // X86 specific condition code. These correspond to X86_*_COND in diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index bdd43fbc397..a1946aae278 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -28,6 +28,11 @@ def X86s2vec   : SDNode<"X86ISD::S2VEC",  def X86zexts2vec : SDNode<"X86ISD::ZEXT_S2VEC",                            SDTypeProfile<1, 1, []>, []>; +def SDTUnpckl : SDTypeProfile<1, 2, +                              [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>; +def X86unpckl  : SDNode<"X86ISD::UNPCKL", SDTUnpckl, +                        []>; +  //===----------------------------------------------------------------------===//  // SSE pattern fragments  //===----------------------------------------------------------------------===// @@ -787,10 +792,14 @@ def UNPCKHPDrm : PDI<0x15, MRMSrcMem,                      "unpckhpd {$src2, $dst|$dst, $src2}", []>;  def UNPCKLPSrr : PSI<0x14, MRMSrcReg,                       (ops VR128:$dst, VR128:$src1, VR128:$src2), -                    "unpcklps {$src2, $dst|$dst, $src2}", []>; +                    "unpcklps {$src2, $dst|$dst, $src2}", +                    [(set VR128:$dst, (v4f32 (X86unpckl VR128:$src1, +                                              VR128:$src2)))]>;  def UNPCKLPSrm : PSI<0x14, MRMSrcMem,                       (ops VR128:$dst, VR128:$src1, f128mem:$src2), -                    "unpcklps {$src2, $dst|$dst, $src2}", []>; +                    "unpcklps {$src2, $dst|$dst, $src2}", +                    [(set VR128:$dst, (v4f32 (X86unpckl VR128:$src1, +                                              (load addr:$src2))))]>;  def UNPCKLPDrr : PDI<0x14, MRMSrcReg,                       (ops VR128:$dst, VR128:$src1, VR128:$src2),                      "unpcklpd {$src2, $dst|$dst, $src2}", []>; @@ -885,6 +894,69 @@ def PSUBDrm : PDI<0xFA, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),                    "psubd {$src2, $dst|$dst, $src2}",                [(set VR128:$dst, (v4i32 (sub VR128:$src1,                                          (load addr:$src2))))]>; + +// Unpack and interleave +def PUNPCKLBWrr : PDI<0x60, MRMSrcReg,  +                      (ops VR128:$dst, VR128:$src1, VR128:$src2), +                      "punpcklbw {$src2, $dst|$dst, $src2}", +                      [(set VR128:$dst, (v16i8 (X86unpckl VR128:$src1, +                                                VR128:$src2)))]>; +def PUNPCKLBWrm : PDI<0x60, MRMSrcMem,  +                      (ops VR128:$dst, VR128:$src1, i128mem:$src2), +                      "punpcklbw {$src2, $dst|$dst, $src2}", +                      [(set VR128:$dst, (v16i8 (X86unpckl VR128:$src1, +                                                (load addr:$src2))))]>; +def PUNPCKLWDrr : PDI<0x61, MRMSrcReg,  +                      (ops VR128:$dst, VR128:$src1, VR128:$src2), +                      "punpcklwd {$src2, $dst|$dst, $src2}", +                      [(set VR128:$dst, (v8i16 (X86unpckl VR128:$src1, +                                                VR128:$src2)))]>; +def PUNPCKLWDrm : PDI<0x61, MRMSrcMem,  +                      (ops VR128:$dst, VR128:$src1, i128mem:$src2), +                      "punpcklwd {$src2, $dst|$dst, $src2}", +                      [(set VR128:$dst, (v8i16 (X86unpckl VR128:$src1, +                                                (load addr:$src2))))]>; +def PUNPCKLDQrr : PDI<0x62, MRMSrcReg,  +                      (ops VR128:$dst, VR128:$src1, VR128:$src2), +                      "punpckldq {$src2, $dst|$dst, $src2}", +                      [(set VR128:$dst, (v4i32 (X86unpckl VR128:$src1, +                                                VR128:$src2)))]>; +def PUNPCKLDQrm : PDI<0x62, MRMSrcMem,  +                      (ops VR128:$dst, VR128:$src1, i128mem:$src2), +                      "punpckldq {$src2, $dst|$dst, $src2}", +                      [(set VR128:$dst, (v4i32 (X86unpckl VR128:$src1, +                                                (load addr:$src2))))]>; +def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,  +                       (ops VR128:$dst, VR128:$src1, VR128:$src2), +                       "punpcklqdq {$src2, $dst|$dst, $src2}", []>; +def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,  +                       (ops VR128:$dst, VR128:$src1, i128mem:$src2), +                       "punpcklqdq {$src2, $dst|$dst, $src2}", []>; + +def PUNPCKHBWrr : PDI<0x68, MRMSrcReg,  +                      (ops VR128:$dst, VR128:$src1, VR128:$src2), +                      "punpckhbw {$src2, $dst|$dst, $src2}", []>; +def PUNPCKHBWrm : PDI<0x68, MRMSrcMem,  +                      (ops VR128:$dst, VR128:$src1, i128mem:$src2), +                      "punpckhbw {$src2, $dst|$dst, $src2}", []>; +def PUNPCKHWDrr : PDI<0x69, MRMSrcReg,  +                      (ops VR128:$dst, VR128:$src1, VR128:$src2), +                      "punpckhwd {$src2, $dst|$dst, $src2}", []>; +def PUNPCKHWDrm : PDI<0x69, MRMSrcMem,  +                      (ops VR128:$dst, VR128:$src1, i128mem:$src2), +                      "punpckhwd {$src2, $dst|$dst, $src2}", []>; +def PUNPCKHDQrr : PDI<0x6A, MRMSrcReg,  +                      (ops VR128:$dst, VR128:$src1, VR128:$src2), +                      "punpckhdq {$src2, $dst|$dst, $src2}", []>; +def PUNPCKHDQrm : PDI<0x6A, MRMSrcMem,  +                      (ops VR128:$dst, VR128:$src1, i128mem:$src2), +                      "punpckhdq {$src2, $dst|$dst, $src2}", []>; +def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,  +                       (ops VR128:$dst, VR128:$src1, VR128:$src2), +                       "punpckhdq {$src2, $dst|$dst, $src2}", []>; +def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,  +                      (ops VR128:$dst, VR128:$src1, i128mem:$src2), +                      "punpckhqdq {$src2, $dst|$dst, $src2}", []>;  }  //===----------------------------------------------------------------------===// | 

