diff options
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 89 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.h | 7 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 6 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 21 | 
4 files changed, 36 insertions, 87 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index d662d12b7a1..4f14a0e20b4 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -5391,75 +5391,59 @@ static SDValue LowerVECTOR_SHUFFLEtoBlend(SDValue Op,    SDValue V1 = SVOp->getOperand(0);    SDValue V2 = SVOp->getOperand(1);    DebugLoc dl = SVOp->getDebugLoc(); +  LLVMContext *Context = DAG.getContext();    EVT VT = Op.getValueType();    EVT InVT = V1.getValueType(); +  EVT EltVT = VT.getVectorElementType(); +  unsigned EltSize = EltVT.getSizeInBits();    int MaskSize = VT.getVectorNumElements();    int InSize = InVT.getVectorNumElements(); -  if (!Subtarget->hasSSE41()) +  // TODO: At the moment we only use AVX blends. We could also use SSE4 blends. +  if (!Subtarget->hasAVX())      return SDValue();    if (MaskSize != InSize)      return SDValue(); -  int ISDNo = 0; -  MVT OpTy; - -  switch (VT.getSimpleVT().SimpleTy) { -  default: return SDValue(); -  case MVT::v8i16: -           ISDNo = X86ISD::BLENDPW; -           OpTy = MVT::v8i16; -           break; -  case MVT::v4i32: -  case MVT::v4f32: -           ISDNo = X86ISD::BLENDPS; -           OpTy = MVT::v4f32; -           break; -  case MVT::v2i64: -  case MVT::v2f64: -           ISDNo = X86ISD::BLENDPD; -           OpTy = MVT::v2f64; -           break; -  case MVT::v8i32: -  case MVT::v8f32: -           if (!Subtarget->hasAVX()) -             return SDValue(); -           ISDNo = X86ISD::BLENDPS; -           OpTy = MVT::v8f32; -           break; -  case MVT::v4i64: -  case MVT::v4f64: -           if (!Subtarget->hasAVX()) -             return SDValue(); -           ISDNo = X86ISD::BLENDPD; -           OpTy = MVT::v4f64; -           break; -  case MVT::v16i16: -           if (!Subtarget->hasAVX2()) -             return SDValue(); -           ISDNo = X86ISD::BLENDPW; -           OpTy = MVT::v16i16; -           break; -  } -  assert(ISDNo && "Invalid Op Number"); - -  unsigned MaskVals = 0; +  SmallVector<Constant*,2> MaskVals; +  ConstantInt *Zero = ConstantInt::get(*Context, APInt(EltSize, 0)); +  ConstantInt *NegOne = ConstantInt::get(*Context, APInt(EltSize, -1));    for (int i = 0; i < MaskSize; ++i) {      int EltIdx = SVOp->getMaskElt(i);      if (EltIdx == i || EltIdx == -1) -      MaskVals |= (1<<i); +      MaskVals.push_back(NegOne);      else if (EltIdx == (i + MaskSize)) -      continue; // Bit is set to zero; +      MaskVals.push_back(Zero);      else return SDValue();    } -  V1 = DAG.getNode(ISD::BITCAST, dl, OpTy, V1); -  V2 = DAG.getNode(ISD::BITCAST, dl, OpTy, V2); -  SDValue Ret =  DAG.getNode(ISDNo, dl, OpTy, V1, V2, -                             DAG.getConstant(MaskVals, MVT::i32)); -  return DAG.getNode(ISD::BITCAST, dl, VT, Ret); +  Constant *MaskC = ConstantVector::get(MaskVals); +  EVT MaskTy = EVT::getEVT(MaskC->getType()); +  assert(MaskTy.getSizeInBits() == VT.getSizeInBits() && "Invalid mask size"); +  SDValue MaskIdx = DAG.getConstantPool(MaskC, PtrTy); +  unsigned Alignment = cast<ConstantPoolSDNode>(MaskIdx)->getAlignment(); +  SDValue Mask = DAG.getLoad(MaskTy, dl, DAG.getEntryNode(), MaskIdx, +                             MachinePointerInfo::getConstantPool(), +                             false, false, false, Alignment); + +  if (Subtarget->hasAVX2() && MaskTy == MVT::v32i8) +    return DAG.getNode(ISD::VSELECT, dl, VT, Mask, V1, V2); + +  if (Subtarget->hasAVX()) { +    switch (MaskTy.getSimpleVT().SimpleTy) { +    default: return SDValue(); +    case MVT::v16i8: +    case MVT::v4i32: +    case MVT::v2i64: +    case MVT::v8i32: +    case MVT::v4i64: +             return DAG.getNode(ISD::VSELECT, dl, VT, Mask, V1, V2); +    } +  } + +  return SDValue();  }  // v8i16 shuffles - Prefer shuffles in the following order: @@ -11066,9 +11050,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {    case X86ISD::ANDNP:              return "X86ISD::ANDNP";    case X86ISD::PSIGN:              return "X86ISD::PSIGN";    case X86ISD::BLENDV:             return "X86ISD::BLENDV"; -  case X86ISD::BLENDPW:            return "X86ISD::BLENDPW"; -  case X86ISD::BLENDPS:            return "X86ISD::BLENDPS"; -  case X86ISD::BLENDPD:            return "X86ISD::BLENDPD";    case X86ISD::HADD:               return "X86ISD::HADD";    case X86ISD::HSUB:               return "X86ISD::HSUB";    case X86ISD::FHADD:              return "X86ISD::FHADD"; diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 4e0073365a7..ca8efe64dd4 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -175,14 +175,9 @@ namespace llvm {        /// PSIGN - Copy integer sign.        PSIGN, -      /// BLENDV - Blend where the selector is an XMM. +      /// BLEND family of opcodes        BLENDV, -      /// BLENDxx - Blend where the selector is an immediate. -      BLENDPW, -      BLENDPS, -      BLENDPD, -        /// HADD - Integer horizontal add.        HADD, diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index 041a64f336f..ae3ed1bcb32 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -126,8 +126,6 @@ def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,                                   SDTCisSameAs<0,2>, SDTCisInt<3>]>;  def SDTVBroadcast : SDTypeProfile<1, 1, [SDTCisVec<0>]>; -def SDTBlend : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, -SDTCisSameAs<1,2>, SDTCisVT<3, i32>]>;  def X86PAlign : SDNode<"X86ISD::PALIGN", SDTShuff3OpI>; @@ -160,10 +158,6 @@ def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>;  def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>; -def X86Blendpw : SDNode<"X86ISD::BLENDPW", SDTBlend>; -def X86Blendps : SDNode<"X86ISD::BLENDPS", SDTBlend>; -def X86Blendpd : SDNode<"X86ISD::BLENDPD", SDTBlend>; -  //===----------------------------------------------------------------------===//  // SSE Complex Patterns  //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 7741f409db0..f4e44184915 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -6735,22 +6735,12 @@ let Predicates = [HasAVX] in {    def : Pat<(v4f64 (vselect (v4i64 VR256:$mask), (v4f64 VR256:$src1),                              (v4f64 VR256:$src2))),              (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>; - -  def : Pat<(v8f32 (X86Blendps (v8f32 VR256:$src1), (v8f32 VR256:$src2), -                               (imm:$mask))), -            (VBLENDPSYrri VR256:$src2, VR256:$src1, imm:$mask)>; -  def : Pat<(v4f64 (X86Blendpd (v4f64 VR256:$src1), (v4f64 VR256:$src2), -                               (imm:$mask))), -            (VBLENDPDYrri VR256:$src2, VR256:$src1, imm:$mask)>;  }  let Predicates = [HasAVX2] in {    def : Pat<(v32i8 (vselect (v32i8 VR256:$mask), (v32i8 VR256:$src1),                              (v32i8 VR256:$src2))),              (VPBLENDVBYrr VR256:$src2, VR256:$src1, VR256:$mask)>; -  def : Pat<(v16i16 (X86Blendpw (v16i16 VR256:$src1), (v16i16 VR256:$src2), -                               (imm:$mask))), -            (VPBLENDWYrri VR256:$src2, VR256:$src1, imm:$mask)>;  }  /// SS41I_ternary_int - SSE 4.1 ternary operator @@ -6799,17 +6789,6 @@ let Predicates = [HasSSE41] in {    def : Pat<(v2f64 (vselect (v2i64 XMM0), (v2f64 VR128:$src1),                              (v2f64 VR128:$src2))),              (BLENDVPDrr0 VR128:$src2, VR128:$src1)>; - -  def : Pat<(v8i16 (X86Blendpw (v8i16 VR128:$src1), (v8i16 VR128:$src2), -                               (imm:$mask))), -            (VPBLENDWrri VR128:$src2, VR128:$src1, imm:$mask)>; -  def : Pat<(v4f32 (X86Blendps (v4f32 VR128:$src1), (v4f32 VR128:$src2), -                               (imm:$mask))), -            (VBLENDPSrri VR128:$src2, VR128:$src1, imm:$mask)>; -  def : Pat<(v2f64 (X86Blendpd (v2f64 VR128:$src1), (v2f64 VR128:$src2), -                               (imm:$mask))), -            (VBLENDPDrri VR128:$src2, VR128:$src1, imm:$mask)>; -  }  let Predicates = [HasAVX] in  | 

