diff options
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 89 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.h | 7 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 6 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 21 |
4 files changed, 36 insertions, 87 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index d662d12b7a1..4f14a0e20b4 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -5391,75 +5391,59 @@ static SDValue LowerVECTOR_SHUFFLEtoBlend(SDValue Op, SDValue V1 = SVOp->getOperand(0); SDValue V2 = SVOp->getOperand(1); DebugLoc dl = SVOp->getDebugLoc(); + LLVMContext *Context = DAG.getContext(); EVT VT = Op.getValueType(); EVT InVT = V1.getValueType(); + EVT EltVT = VT.getVectorElementType(); + unsigned EltSize = EltVT.getSizeInBits(); int MaskSize = VT.getVectorNumElements(); int InSize = InVT.getVectorNumElements(); - if (!Subtarget->hasSSE41()) + // TODO: At the moment we only use AVX blends. We could also use SSE4 blends. + if (!Subtarget->hasAVX()) return SDValue(); if (MaskSize != InSize) return SDValue(); - int ISDNo = 0; - MVT OpTy; - - switch (VT.getSimpleVT().SimpleTy) { - default: return SDValue(); - case MVT::v8i16: - ISDNo = X86ISD::BLENDPW; - OpTy = MVT::v8i16; - break; - case MVT::v4i32: - case MVT::v4f32: - ISDNo = X86ISD::BLENDPS; - OpTy = MVT::v4f32; - break; - case MVT::v2i64: - case MVT::v2f64: - ISDNo = X86ISD::BLENDPD; - OpTy = MVT::v2f64; - break; - case MVT::v8i32: - case MVT::v8f32: - if (!Subtarget->hasAVX()) - return SDValue(); - ISDNo = X86ISD::BLENDPS; - OpTy = MVT::v8f32; - break; - case MVT::v4i64: - case MVT::v4f64: - if (!Subtarget->hasAVX()) - return SDValue(); - ISDNo = X86ISD::BLENDPD; - OpTy = MVT::v4f64; - break; - case MVT::v16i16: - if (!Subtarget->hasAVX2()) - return SDValue(); - ISDNo = X86ISD::BLENDPW; - OpTy = MVT::v16i16; - break; - } - assert(ISDNo && "Invalid Op Number"); - - unsigned MaskVals = 0; + SmallVector<Constant*,2> MaskVals; + ConstantInt *Zero = ConstantInt::get(*Context, APInt(EltSize, 0)); + ConstantInt *NegOne = ConstantInt::get(*Context, APInt(EltSize, -1)); for (int i = 0; i < MaskSize; ++i) { int EltIdx = SVOp->getMaskElt(i); if (EltIdx == i || EltIdx == -1) - MaskVals |= (1<<i); + MaskVals.push_back(NegOne); else if (EltIdx == (i + MaskSize)) - continue; // Bit is set to zero; + MaskVals.push_back(Zero); else return SDValue(); } - V1 = DAG.getNode(ISD::BITCAST, dl, OpTy, V1); - V2 = DAG.getNode(ISD::BITCAST, dl, OpTy, V2); - SDValue Ret = DAG.getNode(ISDNo, dl, OpTy, V1, V2, - DAG.getConstant(MaskVals, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Ret); + Constant *MaskC = ConstantVector::get(MaskVals); + EVT MaskTy = EVT::getEVT(MaskC->getType()); + assert(MaskTy.getSizeInBits() == VT.getSizeInBits() && "Invalid mask size"); + SDValue MaskIdx = DAG.getConstantPool(MaskC, PtrTy); + unsigned Alignment = cast<ConstantPoolSDNode>(MaskIdx)->getAlignment(); + SDValue Mask = DAG.getLoad(MaskTy, dl, DAG.getEntryNode(), MaskIdx, + MachinePointerInfo::getConstantPool(), + false, false, false, Alignment); + + if (Subtarget->hasAVX2() && MaskTy == MVT::v32i8) + return DAG.getNode(ISD::VSELECT, dl, VT, Mask, V1, V2); + + if (Subtarget->hasAVX()) { + switch (MaskTy.getSimpleVT().SimpleTy) { + default: return SDValue(); + case MVT::v16i8: + case MVT::v4i32: + case MVT::v2i64: + case MVT::v8i32: + case MVT::v4i64: + return DAG.getNode(ISD::VSELECT, dl, VT, Mask, V1, V2); + } + } + + return SDValue(); } // v8i16 shuffles - Prefer shuffles in the following order: @@ -11066,9 +11050,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::ANDNP: return "X86ISD::ANDNP"; case X86ISD::PSIGN: return "X86ISD::PSIGN"; case X86ISD::BLENDV: return "X86ISD::BLENDV"; - case X86ISD::BLENDPW: return "X86ISD::BLENDPW"; - case X86ISD::BLENDPS: return "X86ISD::BLENDPS"; - case X86ISD::BLENDPD: return "X86ISD::BLENDPD"; case X86ISD::HADD: return "X86ISD::HADD"; case X86ISD::HSUB: return "X86ISD::HSUB"; case X86ISD::FHADD: return "X86ISD::FHADD"; diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 4e0073365a7..ca8efe64dd4 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -175,14 +175,9 @@ namespace llvm { /// PSIGN - Copy integer sign. PSIGN, - /// BLENDV - Blend where the selector is an XMM. + /// BLEND family of opcodes BLENDV, - /// BLENDxx - Blend where the selector is an immediate. - BLENDPW, - BLENDPS, - BLENDPD, - /// HADD - Integer horizontal add. HADD, diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index 041a64f336f..ae3ed1bcb32 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -126,8 +126,6 @@ def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>, SDTCisInt<3>]>; def SDTVBroadcast : SDTypeProfile<1, 1, [SDTCisVec<0>]>; -def SDTBlend : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, -SDTCisSameAs<1,2>, SDTCisVT<3, i32>]>; def X86PAlign : SDNode<"X86ISD::PALIGN", SDTShuff3OpI>; @@ -160,10 +158,6 @@ def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>; def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>; -def X86Blendpw : SDNode<"X86ISD::BLENDPW", SDTBlend>; -def X86Blendps : SDNode<"X86ISD::BLENDPS", SDTBlend>; -def X86Blendpd : SDNode<"X86ISD::BLENDPD", SDTBlend>; - //===----------------------------------------------------------------------===// // SSE Complex Patterns //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 7741f409db0..f4e44184915 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -6735,22 +6735,12 @@ let Predicates = [HasAVX] in { def : Pat<(v4f64 (vselect (v4i64 VR256:$mask), (v4f64 VR256:$src1), (v4f64 VR256:$src2))), (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>; - - def : Pat<(v8f32 (X86Blendps (v8f32 VR256:$src1), (v8f32 VR256:$src2), - (imm:$mask))), - (VBLENDPSYrri VR256:$src2, VR256:$src1, imm:$mask)>; - def : Pat<(v4f64 (X86Blendpd (v4f64 VR256:$src1), (v4f64 VR256:$src2), - (imm:$mask))), - (VBLENDPDYrri VR256:$src2, VR256:$src1, imm:$mask)>; } let Predicates = [HasAVX2] in { def : Pat<(v32i8 (vselect (v32i8 VR256:$mask), (v32i8 VR256:$src1), (v32i8 VR256:$src2))), (VPBLENDVBYrr VR256:$src2, VR256:$src1, VR256:$mask)>; - def : Pat<(v16i16 (X86Blendpw (v16i16 VR256:$src1), (v16i16 VR256:$src2), - (imm:$mask))), - (VPBLENDWYrri VR256:$src2, VR256:$src1, imm:$mask)>; } /// SS41I_ternary_int - SSE 4.1 ternary operator @@ -6799,17 +6789,6 @@ let Predicates = [HasSSE41] in { def : Pat<(v2f64 (vselect (v2i64 XMM0), (v2f64 VR128:$src1), (v2f64 VR128:$src2))), (BLENDVPDrr0 VR128:$src2, VR128:$src1)>; - - def : Pat<(v8i16 (X86Blendpw (v8i16 VR128:$src1), (v8i16 VR128:$src2), - (imm:$mask))), - (VPBLENDWrri VR128:$src2, VR128:$src1, imm:$mask)>; - def : Pat<(v4f32 (X86Blendps (v4f32 VR128:$src1), (v4f32 VR128:$src2), - (imm:$mask))), - (VBLENDPSrri VR128:$src2, VR128:$src1, imm:$mask)>; - def : Pat<(v2f64 (X86Blendpd (v2f64 VR128:$src1), (v2f64 VR128:$src2), - (imm:$mask))), - (VBLENDPDrri VR128:$src2, VR128:$src1, imm:$mask)>; - } let Predicates = [HasAVX] in |