diff options
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 128 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.h | 16 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 18 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 132 | 
4 files changed, 116 insertions, 178 deletions
| diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 96c6f410719..6ebba0e4566 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2843,18 +2843,10 @@ static bool isTargetShuffle(unsigned Opcode) {    case X86ISD::MOVDDUP:    case X86ISD::MOVSS:    case X86ISD::MOVSD: -  case X86ISD::UNPCKLPS: -  case X86ISD::UNPCKLPD: -  case X86ISD::PUNPCKLWD: -  case X86ISD::PUNPCKLBW: -  case X86ISD::PUNPCKLDQ: -  case X86ISD::PUNPCKLQDQ: -  case X86ISD::UNPCKHPS: -  case X86ISD::UNPCKHPD: -  case X86ISD::PUNPCKHWD: -  case X86ISD::PUNPCKHBW: -  case X86ISD::PUNPCKHDQ: -  case X86ISD::PUNPCKHQDQ: +  case X86ISD::UNPCKLP: +  case X86ISD::PUNPCKL: +  case X86ISD::UNPCKHP: +  case X86ISD::PUNPCKH:    case X86ISD::VPERMILPS:    case X86ISD::VPERMILPSY:    case X86ISD::VPERMILPD: @@ -2920,18 +2912,10 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,    case X86ISD::MOVLPD:    case X86ISD::MOVSS:    case X86ISD::MOVSD: -  case X86ISD::UNPCKLPS: -  case X86ISD::UNPCKLPD: -  case X86ISD::PUNPCKLWD: -  case X86ISD::PUNPCKLBW: -  case X86ISD::PUNPCKLDQ: -  case X86ISD::PUNPCKLQDQ: -  case X86ISD::UNPCKHPS: -  case X86ISD::UNPCKHPD: -  case X86ISD::PUNPCKHWD: -  case X86ISD::PUNPCKHBW: -  case X86ISD::PUNPCKHDQ: -  case X86ISD::PUNPCKHQDQ: +  case X86ISD::UNPCKLP: +  case X86ISD::PUNPCKL: +  case X86ISD::UNPCKHP: +  case X86ISD::PUNPCKH:      return DAG.getNode(Opc, dl, VT, V1, V2);    }    return SDValue(); @@ -4635,24 +4619,16 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,                         cast<ConstantSDNode>(ImmN)->getZExtValue(),                         ShuffleMask);        break; -    case X86ISD::PUNPCKHBW: -    case X86ISD::PUNPCKHWD: -    case X86ISD::PUNPCKHDQ: -    case X86ISD::PUNPCKHQDQ: +    case X86ISD::PUNPCKH:        DecodePUNPCKHMask(NumElems, ShuffleMask);        break; -    case X86ISD::UNPCKHPS: -    case X86ISD::UNPCKHPD: +    case X86ISD::UNPCKHP:        DecodeUNPCKHPMask(VT, ShuffleMask);        break; -    case X86ISD::PUNPCKLBW: -    case X86ISD::PUNPCKLWD: -    case X86ISD::PUNPCKLDQ: -    case X86ISD::PUNPCKLQDQ: +    case X86ISD::PUNPCKL:        DecodePUNPCKLMask(VT, ShuffleMask);        break; -    case X86ISD::UNPCKLPS: -    case X86ISD::UNPCKLPD: +    case X86ISD::UNPCKLP:        DecodeUNPCKLPMask(VT, ShuffleMask);        break;      case X86ISD::MOVHLPS: @@ -6568,22 +6544,20 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasXMMInt) {  static inline unsigned getUNPCKLOpcode(EVT VT, bool HasAVX2) {    switch(VT.getSimpleVT().SimpleTy) { -  case MVT::v4i32: return X86ISD::PUNPCKLDQ; -  case MVT::v2i64: return X86ISD::PUNPCKLQDQ; +  case MVT::v32i8: +  case MVT::v16i8: +  case MVT::v16i16: +  case MVT::v8i16: +  case MVT::v4i32: +  case MVT::v2i64: return X86ISD::PUNPCKL;    case MVT::v8i32: -    if (HasAVX2)   return X86ISD::PUNPCKLDQ; -    // else use fp unit for int unpack. -  case MVT::v8f32: -  case MVT::v4f32: return X86ISD::UNPCKLPS;    case MVT::v4i64: -    if (HasAVX2)   return X86ISD::PUNPCKLQDQ; +    if (HasAVX2)   return X86ISD::PUNPCKL;      // else use fp unit for int unpack. +  case MVT::v8f32: +  case MVT::v4f32:    case MVT::v4f64: -  case MVT::v2f64: return X86ISD::UNPCKLPD; -  case MVT::v32i8: -  case MVT::v16i8: return X86ISD::PUNPCKLBW; -  case MVT::v16i16: -  case MVT::v8i16: return X86ISD::PUNPCKLWD; +  case MVT::v2f64: return X86ISD::UNPCKLP;    default:      llvm_unreachable("Unknown type for unpckl");    } @@ -6592,22 +6566,20 @@ static inline unsigned getUNPCKLOpcode(EVT VT, bool HasAVX2) {  static inline unsigned getUNPCKHOpcode(EVT VT, bool HasAVX2) {    switch(VT.getSimpleVT().SimpleTy) { -  case MVT::v4i32: return X86ISD::PUNPCKHDQ; -  case MVT::v2i64: return X86ISD::PUNPCKHQDQ; +  case MVT::v32i8: +  case MVT::v16i8: +  case MVT::v16i16: +  case MVT::v8i16: +  case MVT::v4i32: +  case MVT::v2i64: return X86ISD::PUNPCKH; +  case MVT::v4i64:    case MVT::v8i32: -    if (HasAVX2)   return X86ISD::PUNPCKHDQ; +    if (HasAVX2)   return X86ISD::PUNPCKH;      // else use fp unit for int unpack.    case MVT::v8f32: -  case MVT::v4f32: return X86ISD::UNPCKHPS; -  case MVT::v4i64: -    if (HasAVX2)   return X86ISD::PUNPCKHQDQ; -    // else use fp unit for int unpack. +  case MVT::v4f32:    case MVT::v4f64: -  case MVT::v2f64: return X86ISD::UNPCKHPD; -  case MVT::v32i8: -  case MVT::v16i8: return X86ISD::PUNPCKHBW; -  case MVT::v16i16: -  case MVT::v8i16: return X86ISD::PUNPCKHWD; +  case MVT::v2f64: return X86ISD::UNPCKHP;    default:      llvm_unreachable("Unknown type for unpckh");    } @@ -6910,9 +6882,9 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {    if (ShuffleVectorSDNode::isSplatMask(&M[0], VT) &&        SVOp->getSplatIndex() == 0 && V2IsUndef) {      if (VT == MVT::v2f64) -      return getTargetShuffleNode(X86ISD::UNPCKLPD, dl, VT, V1, V1, DAG); +      return getTargetShuffleNode(X86ISD::UNPCKLP, dl, VT, V1, V1, DAG);      if (VT == MVT::v2i64) -      return getTargetShuffleNode(X86ISD::PUNPCKLQDQ, dl, VT, V1, V1, DAG); +      return getTargetShuffleNode(X86ISD::PUNPCKL, dl, VT, V1, V1, DAG);    }    if (isPSHUFHWMask(M, VT)) @@ -11266,18 +11238,10 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {    case X86ISD::MOVSLDUP_LD:        return "X86ISD::MOVSLDUP_LD";    case X86ISD::MOVSD:              return "X86ISD::MOVSD";    case X86ISD::MOVSS:              return "X86ISD::MOVSS"; -  case X86ISD::UNPCKLPS:           return "X86ISD::UNPCKLPS"; -  case X86ISD::UNPCKLPD:           return "X86ISD::UNPCKLPD"; -  case X86ISD::UNPCKHPS:           return "X86ISD::UNPCKHPS"; -  case X86ISD::UNPCKHPD:           return "X86ISD::UNPCKHPD"; -  case X86ISD::PUNPCKLBW:          return "X86ISD::PUNPCKLBW"; -  case X86ISD::PUNPCKLWD:          return "X86ISD::PUNPCKLWD"; -  case X86ISD::PUNPCKLDQ:          return "X86ISD::PUNPCKLDQ"; -  case X86ISD::PUNPCKLQDQ:         return "X86ISD::PUNPCKLQDQ"; -  case X86ISD::PUNPCKHBW:          return "X86ISD::PUNPCKHBW"; -  case X86ISD::PUNPCKHWD:          return "X86ISD::PUNPCKHWD"; -  case X86ISD::PUNPCKHDQ:          return "X86ISD::PUNPCKHDQ"; -  case X86ISD::PUNPCKHQDQ:         return "X86ISD::PUNPCKHQDQ"; +  case X86ISD::UNPCKLP:            return "X86ISD::UNPCKLP"; +  case X86ISD::UNPCKHP:            return "X86ISD::UNPCKHP"; +  case X86ISD::PUNPCKL:            return "X86ISD::PUNPCKL"; +  case X86ISD::PUNPCKH:            return "X86ISD::PUNPCKH";    case X86ISD::VBROADCAST:         return "X86ISD::VBROADCAST";    case X86ISD::VPERMILPS:          return "X86ISD::VPERMILPS";    case X86ISD::VPERMILPSY:         return "X86ISD::VPERMILPSY"; @@ -14857,18 +14821,10 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,    case X86ISD::SHUFPS:      // Handle all target specific shuffles    case X86ISD::SHUFPD:    case X86ISD::PALIGN: -  case X86ISD::PUNPCKHBW: -  case X86ISD::PUNPCKHWD: -  case X86ISD::PUNPCKHDQ: -  case X86ISD::PUNPCKHQDQ: -  case X86ISD::UNPCKHPS: -  case X86ISD::UNPCKHPD: -  case X86ISD::PUNPCKLBW: -  case X86ISD::PUNPCKLWD: -  case X86ISD::PUNPCKLDQ: -  case X86ISD::PUNPCKLQDQ: -  case X86ISD::UNPCKLPS: -  case X86ISD::UNPCKLPD: +  case X86ISD::PUNPCKH: +  case X86ISD::UNPCKHP: +  case X86ISD::PUNPCKL: +  case X86ISD::UNPCKLP:    case X86ISD::MOVHLPS:    case X86ISD::MOVLHPS:    case X86ISD::PSHUFD: diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index ccff3a5ea69..582b6b522c8 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -273,18 +273,10 @@ namespace llvm {        MOVLPD,        MOVSD,        MOVSS, -      UNPCKLPS, -      UNPCKLPD, -      UNPCKHPS, -      UNPCKHPD, -      PUNPCKLBW, -      PUNPCKLWD, -      PUNPCKLDQ, -      PUNPCKLQDQ, -      PUNPCKHBW, -      PUNPCKHWD, -      PUNPCKHDQ, -      PUNPCKHQDQ, +      UNPCKLP, +      UNPCKHP, +      PUNPCKL, +      PUNPCKH,        VPERMILPS,        VPERMILPSY,        VPERMILPD, diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index 791bbe6566c..32392dd5529 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -130,21 +130,11 @@ def X86Movhlpd : SDNode<"X86ISD::MOVHLPD", SDTShuff2Op>;  def X86Movlps : SDNode<"X86ISD::MOVLPS", SDTShuff2Op>;  def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>; -def X86Unpcklps  : SDNode<"X86ISD::UNPCKLPS", SDTShuff2Op>; -def X86Unpcklpd  : SDNode<"X86ISD::UNPCKLPD", SDTShuff2Op>; +def X86Unpcklp : SDNode<"X86ISD::UNPCKLP", SDTShuff2Op>; +def X86Unpckhp : SDNode<"X86ISD::UNPCKHP", SDTShuff2Op>; -def X86Unpckhps  : SDNode<"X86ISD::UNPCKHPS", SDTShuff2Op>; -def X86Unpckhpd  : SDNode<"X86ISD::UNPCKHPD", SDTShuff2Op>; - -def X86Punpcklbw  : SDNode<"X86ISD::PUNPCKLBW", SDTShuff2Op>; -def X86Punpcklwd  : SDNode<"X86ISD::PUNPCKLWD", SDTShuff2Op>; -def X86Punpckldq  : SDNode<"X86ISD::PUNPCKLDQ", SDTShuff2Op>; -def X86Punpcklqdq : SDNode<"X86ISD::PUNPCKLQDQ", SDTShuff2Op>; - -def X86Punpckhbw  : SDNode<"X86ISD::PUNPCKHBW", SDTShuff2Op>; -def X86Punpckhwd  : SDNode<"X86ISD::PUNPCKHWD", SDTShuff2Op>; -def X86Punpckhdq  : SDNode<"X86ISD::PUNPCKHDQ", SDTShuff2Op>; -def X86Punpckhqdq : SDNode<"X86ISD::PUNPCKHQDQ", SDTShuff2Op>; +def X86Punpckl : SDNode<"X86ISD::PUNPCKL", SDTShuff2Op>; +def X86Punpckh : SDNode<"X86ISD::PUNPCKH", SDTShuff2Op>;  def X86VPermilps  : SDNode<"X86ISD::VPERMILPS", SDTShuff2OpI>;  def X86VPermilpsy : SDNode<"X86ISD::VPERMILPSY", SDTShuff2OpI>; diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 7cadac16d7d..87df492121b 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -1159,11 +1159,11 @@ let Predicates = [HasAVX] in {                   (bc_v4i32 (v2i64 (X86vzload addr:$src2)))),              (VMOVHPSrm VR128:$src1, addr:$src2)>; -  // FIXME: Instead of X86Unpcklpd, there should be a X86Movlhpd here, the problem +  // FIXME: Instead of X86Unpcklp, there should be a X86Movlhpd here, the problem    // is during lowering, where it's not possible to recognize the load fold cause    // it has two uses through a bitcast. One use disappears at isel time and the    // fold opportunity reappears. -  def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, +  def : Pat<(v2f64 (X86Unpcklp VR128:$src1,                        (scalar_to_vector (loadf64 addr:$src2)))),              (VMOVHPDrm VR128:$src1, addr:$src2)>; @@ -1174,10 +1174,10 @@ let Predicates = [HasAVX] in {    // Store patterns    def : Pat<(store (f64 (vector_extract -            (v2f64 (X86Unpckhps VR128:$src, (undef))), (iPTR 0))), addr:$dst), +            (v2f64 (X86Unpckhp VR128:$src, (undef))), (iPTR 0))), addr:$dst),              (VMOVHPSmr addr:$dst, VR128:$src)>;    def : Pat<(store (f64 (vector_extract -            (v2f64 (X86Unpckhpd VR128:$src, (undef))), (iPTR 0))), addr:$dst), +            (v2f64 (X86Unpckhp VR128:$src, (undef))), (iPTR 0))), addr:$dst),              (VMOVHPDmr addr:$dst, VR128:$src)>;  } @@ -1194,16 +1194,16 @@ let Predicates = [HasSSE1] in {    // Store patterns    def : Pat<(store (f64 (vector_extract -            (v2f64 (X86Unpckhps VR128:$src, (undef))), (iPTR 0))), addr:$dst), +            (v2f64 (X86Unpckhp VR128:$src, (undef))), (iPTR 0))), addr:$dst),              (MOVHPSmr addr:$dst, VR128:$src)>;  }  let Predicates = [HasSSE2] in { -  // FIXME: Instead of X86Unpcklpd, there should be a X86Movlhpd here, the problem +  // FIXME: Instead of X86Unpcklp, there should be a X86Movlhpd here, the problem    // is during lowering, where it's not possible to recognize the load fold cause    // it has two uses through a bitcast. One use disappears at isel time and the    // fold opportunity reappears. -  def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, +  def : Pat<(v2f64 (X86Unpcklp VR128:$src1,                        (scalar_to_vector (loadf64 addr:$src2)))),              (MOVHPDrm VR128:$src1, addr:$src2)>; @@ -1214,7 +1214,7 @@ let Predicates = [HasSSE2] in {    // Store patterns    def : Pat<(store (f64 (vector_extract -            (v2f64 (X86Unpckhpd VR128:$src, (undef))), (iPTR 0))),addr:$dst), +            (v2f64 (X86Unpckhp VR128:$src, (undef))), (iPTR 0))),addr:$dst),              (MOVHPDmr addr:$dst, VR128:$src)>;  } @@ -2430,27 +2430,27 @@ let AddedComplexity = 10 in {  } // AddedComplexity  let Predicates = [HasSSE1] in { -  def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))), +  def : Pat<(v4f32 (X86Unpcklp VR128:$src1, (memopv4f32 addr:$src2))),              (UNPCKLPSrm VR128:$src1, addr:$src2)>; -  def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)), +  def : Pat<(v4f32 (X86Unpcklp VR128:$src1, VR128:$src2)),              (UNPCKLPSrr VR128:$src1, VR128:$src2)>; -  def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))), +  def : Pat<(v4f32 (X86Unpckhp VR128:$src1, (memopv4f32 addr:$src2))),              (UNPCKHPSrm VR128:$src1, addr:$src2)>; -  def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)), +  def : Pat<(v4f32 (X86Unpckhp VR128:$src1, VR128:$src2)),              (UNPCKHPSrr VR128:$src1, VR128:$src2)>;  }  let Predicates = [HasSSE2] in { -  def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))), +  def : Pat<(v2f64 (X86Unpcklp VR128:$src1, (memopv2f64 addr:$src2))),              (UNPCKLPDrm VR128:$src1, addr:$src2)>; -  def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)), +  def : Pat<(v2f64 (X86Unpcklp VR128:$src1, VR128:$src2)),              (UNPCKLPDrr VR128:$src1, VR128:$src2)>; -  def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))), +  def : Pat<(v2f64 (X86Unpckhp VR128:$src1, (memopv2f64 addr:$src2))),              (UNPCKHPDrm VR128:$src1, addr:$src2)>; -  def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)), +  def : Pat<(v2f64 (X86Unpckhp VR128:$src1, VR128:$src2)),              (UNPCKHPDrr VR128:$src1, VR128:$src2)>; -  // FIXME: Instead of X86Movddup, there should be a X86Unpcklpd here, the +  // FIXME: Instead of X86Movddup, there should be a X86Unpcklp here, the    // problem is during lowering, where it's not possible to recognize the load    // fold cause it has two uses through a bitcast. One use disappears at isel    // time and the fold opportunity reappears. @@ -2463,59 +2463,59 @@ let Predicates = [HasSSE2] in {  }  let Predicates = [HasAVX] in { -  def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))), +  def : Pat<(v4f32 (X86Unpcklp VR128:$src1, (memopv4f32 addr:$src2))),              (VUNPCKLPSrm VR128:$src1, addr:$src2)>; -  def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)), +  def : Pat<(v4f32 (X86Unpcklp VR128:$src1, VR128:$src2)),              (VUNPCKLPSrr VR128:$src1, VR128:$src2)>; -  def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))), +  def : Pat<(v4f32 (X86Unpckhp VR128:$src1, (memopv4f32 addr:$src2))),              (VUNPCKHPSrm VR128:$src1, addr:$src2)>; -  def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)), +  def : Pat<(v4f32 (X86Unpckhp VR128:$src1, VR128:$src2)),              (VUNPCKHPSrr VR128:$src1, VR128:$src2)>; -  def : Pat<(v8f32 (X86Unpcklps VR256:$src1, (memopv8f32 addr:$src2))), +  def : Pat<(v8f32 (X86Unpcklp VR256:$src1, (memopv8f32 addr:$src2))),              (VUNPCKLPSYrm VR256:$src1, addr:$src2)>; -  def : Pat<(v8f32 (X86Unpcklps VR256:$src1, VR256:$src2)), +  def : Pat<(v8f32 (X86Unpcklp VR256:$src1, VR256:$src2)),              (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>; -  def : Pat<(v8i32 (X86Unpcklps VR256:$src1, VR256:$src2)), +  def : Pat<(v8i32 (X86Unpcklp VR256:$src1, VR256:$src2)),              (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>; -  def : Pat<(v8i32 (X86Unpcklps VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))), +  def : Pat<(v8i32 (X86Unpcklp VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),              (VUNPCKLPSYrm VR256:$src1, addr:$src2)>; -  def : Pat<(v8f32 (X86Unpckhps VR256:$src1, (memopv8f32 addr:$src2))), +  def : Pat<(v8f32 (X86Unpckhp VR256:$src1, (memopv8f32 addr:$src2))),              (VUNPCKHPSYrm VR256:$src1, addr:$src2)>; -  def : Pat<(v8f32 (X86Unpckhps VR256:$src1, VR256:$src2)), +  def : Pat<(v8f32 (X86Unpckhp VR256:$src1, VR256:$src2)),              (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>; -  def : Pat<(v8i32 (X86Unpckhps VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))), +  def : Pat<(v8i32 (X86Unpckhp VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),              (VUNPCKHPSYrm VR256:$src1, addr:$src2)>; -  def : Pat<(v8i32 (X86Unpckhps VR256:$src1, VR256:$src2)), +  def : Pat<(v8i32 (X86Unpckhp VR256:$src1, VR256:$src2)),              (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>; -  def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))), +  def : Pat<(v2f64 (X86Unpcklp VR128:$src1, (memopv2f64 addr:$src2))),              (VUNPCKLPDrm VR128:$src1, addr:$src2)>; -  def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)), +  def : Pat<(v2f64 (X86Unpcklp VR128:$src1, VR128:$src2)),              (VUNPCKLPDrr VR128:$src1, VR128:$src2)>; -  def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))), +  def : Pat<(v2f64 (X86Unpckhp VR128:$src1, (memopv2f64 addr:$src2))),              (VUNPCKHPDrm VR128:$src1, addr:$src2)>; -  def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)), +  def : Pat<(v2f64 (X86Unpckhp VR128:$src1, VR128:$src2)),              (VUNPCKHPDrr VR128:$src1, VR128:$src2)>; -  def : Pat<(v4f64 (X86Unpcklpd VR256:$src1, (memopv4f64 addr:$src2))), +  def : Pat<(v4f64 (X86Unpcklp VR256:$src1, (memopv4f64 addr:$src2))),              (VUNPCKLPDYrm VR256:$src1, addr:$src2)>; -  def : Pat<(v4f64 (X86Unpcklpd VR256:$src1, VR256:$src2)), +  def : Pat<(v4f64 (X86Unpcklp VR256:$src1, VR256:$src2)),              (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>; -  def : Pat<(v4i64 (X86Unpcklpd VR256:$src1, (memopv4i64 addr:$src2))), +  def : Pat<(v4i64 (X86Unpcklp VR256:$src1, (memopv4i64 addr:$src2))),              (VUNPCKLPDYrm VR256:$src1, addr:$src2)>; -  def : Pat<(v4i64 (X86Unpcklpd VR256:$src1, VR256:$src2)), +  def : Pat<(v4i64 (X86Unpcklp VR256:$src1, VR256:$src2)),              (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>; -  def : Pat<(v4f64 (X86Unpckhpd VR256:$src1, (memopv4f64 addr:$src2))), +  def : Pat<(v4f64 (X86Unpckhp VR256:$src1, (memopv4f64 addr:$src2))),              (VUNPCKHPDYrm VR256:$src1, addr:$src2)>; -  def : Pat<(v4f64 (X86Unpckhpd VR256:$src1, VR256:$src2)), +  def : Pat<(v4f64 (X86Unpckhp VR256:$src1, VR256:$src2)),              (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>; -  def : Pat<(v4i64 (X86Unpckhpd VR256:$src1, (memopv4i64 addr:$src2))), +  def : Pat<(v4i64 (X86Unpckhp VR256:$src1, (memopv4i64 addr:$src2))),              (VUNPCKHPDYrm VR256:$src1, addr:$src2)>; -  def : Pat<(v4i64 (X86Unpckhpd VR256:$src1, VR256:$src2)), +  def : Pat<(v4i64 (X86Unpckhp VR256:$src1, VR256:$src2)),              (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>; -  // FIXME: Instead of X86Movddup, there should be a X86Unpcklpd here, the +  // FIXME: Instead of X86Movddup, there should be a X86Unpcklp here, the    // problem is during lowering, where it's not possible to recognize the load    // fold cause it has two uses through a bitcast. One use disappears at isel    // time and the fold opportunity reappears. @@ -4198,62 +4198,62 @@ multiclass sse2_unpack_y<bits<8> opc, string OpcodeStr, ValueType vt,  }  let Predicates = [HasAVX] in { -  defm VPUNPCKLBW  : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Punpcklbw, +  defm VPUNPCKLBW  : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Punpckl,                                   bc_v16i8, 0>, VEX_4V; -  defm VPUNPCKLWD  : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Punpcklwd, +  defm VPUNPCKLWD  : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Punpckl,                                   bc_v8i16, 0>, VEX_4V; -  defm VPUNPCKLDQ  : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Punpckldq, +  defm VPUNPCKLDQ  : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Punpckl,                                   bc_v4i32, 0>, VEX_4V; -  defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Punpcklqdq, +  defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Punpckl,                                   bc_v2i64, 0>, VEX_4V; -  defm VPUNPCKHBW  : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Punpckhbw, +  defm VPUNPCKHBW  : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Punpckh,                                   bc_v16i8, 0>, VEX_4V; -  defm VPUNPCKHWD  : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Punpckhwd, +  defm VPUNPCKHWD  : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Punpckh,                                   bc_v8i16, 0>, VEX_4V; -  defm VPUNPCKHDQ  : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Punpckhdq, +  defm VPUNPCKHDQ  : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Punpckh,                                   bc_v4i32, 0>, VEX_4V; -  defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Punpckhqdq, +  defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Punpckh,                                   bc_v2i64, 0>, VEX_4V;  }  let Predicates = [HasAVX2] in { -  defm VPUNPCKLBW  : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Punpcklbw, +  defm VPUNPCKLBW  : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Punpckl,                                     bc_v32i8>, VEX_4V; -  defm VPUNPCKLWD  : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Punpcklwd, +  defm VPUNPCKLWD  : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Punpckl,                                     bc_v16i16>, VEX_4V; -  defm VPUNPCKLDQ  : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Punpckldq, +  defm VPUNPCKLDQ  : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Punpckl,                                     bc_v8i32>, VEX_4V; -  defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Punpcklqdq, +  defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Punpckl,                                     bc_v4i64>, VEX_4V; -  defm VPUNPCKHBW  : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Punpckhbw, +  defm VPUNPCKHBW  : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Punpckh,                                     bc_v32i8>, VEX_4V; -  defm VPUNPCKHWD  : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Punpckhwd, +  defm VPUNPCKHWD  : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Punpckh,                                     bc_v16i16>, VEX_4V; -  defm VPUNPCKHDQ  : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Punpckhdq, +  defm VPUNPCKHDQ  : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Punpckh,                                     bc_v8i32>, VEX_4V; -  defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Punpckhqdq, +  defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Punpckh,                                     bc_v4i64>, VEX_4V;  }  let Constraints = "$src1 = $dst" in { -  defm PUNPCKLBW  : sse2_unpack<0x60, "punpcklbw", v16i8, X86Punpcklbw, +  defm PUNPCKLBW  : sse2_unpack<0x60, "punpcklbw", v16i8, X86Punpckl,                                  bc_v16i8>; -  defm PUNPCKLWD  : sse2_unpack<0x61, "punpcklwd", v8i16, X86Punpcklwd, +  defm PUNPCKLWD  : sse2_unpack<0x61, "punpcklwd", v8i16, X86Punpckl,                                  bc_v8i16>; -  defm PUNPCKLDQ  : sse2_unpack<0x62, "punpckldq", v4i32, X86Punpckldq, +  defm PUNPCKLDQ  : sse2_unpack<0x62, "punpckldq", v4i32, X86Punpckl,                                  bc_v4i32>; -  defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Punpcklqdq, +  defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Punpckl,                                  bc_v2i64>; -  defm PUNPCKHBW  : sse2_unpack<0x68, "punpckhbw", v16i8, X86Punpckhbw, +  defm PUNPCKHBW  : sse2_unpack<0x68, "punpckhbw", v16i8, X86Punpckh,                                  bc_v16i8>; -  defm PUNPCKHWD  : sse2_unpack<0x69, "punpckhwd", v8i16, X86Punpckhwd, +  defm PUNPCKHWD  : sse2_unpack<0x69, "punpckhwd", v8i16, X86Punpckh,                                  bc_v8i16>; -  defm PUNPCKHDQ  : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Punpckhdq, +  defm PUNPCKHDQ  : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Punpckh,                                  bc_v4i32>; -  defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Punpckhqdq, +  defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Punpckh,                                  bc_v2i64>;  }  } // ExeDomain = SSEPackedInt | 

