diff options
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 124 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx-blend.ll | 12 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/blend-msb.ll | 12 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/sse41-blend.ll | 6 |
4 files changed, 17 insertions, 137 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 552d420b805..8ea1790e52b 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -11797,43 +11797,6 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { return SDValue(); } -// This function assumes its argument is a BUILD_VECTOR of constants or -// undef SDNodes. i.e: ISD::isBuildVectorOfConstantSDNodes(BuildVector) is -// true. -static bool BUILD_VECTORtoBlendMask(BuildVectorSDNode *BuildVector, - unsigned &MaskValue) { - MaskValue = 0; - unsigned NumElems = BuildVector->getNumOperands(); - // There are 2 lanes if (NumElems > 8), and 1 lane otherwise. - unsigned NumLanes = (NumElems - 1) / 8 + 1; - unsigned NumElemsInLane = NumElems / NumLanes; - - // Blend for v16i16 should be symetric for the both lanes. - for (unsigned i = 0; i < NumElemsInLane; ++i) { - SDValue EltCond = BuildVector->getOperand(i); - SDValue SndLaneEltCond = - (NumLanes == 2) ? BuildVector->getOperand(i + NumElemsInLane) : EltCond; - - int Lane1Cond = -1, Lane2Cond = -1; - if (isa<ConstantSDNode>(EltCond)) - Lane1Cond = !isZero(EltCond); - if (isa<ConstantSDNode>(SndLaneEltCond)) - Lane2Cond = !isZero(SndLaneEltCond); - - if (Lane1Cond == Lane2Cond || Lane2Cond < 0) - // Lane1Cond != 0, means we want the first argument. - // Lane1Cond == 0, means we want the second argument. - // The encoding of this argument is 0 for the first argument, 1 - // for the second. Therefore, invert the condition. - MaskValue |= !Lane1Cond << i; - else if (Lane1Cond < 0) - MaskValue |= !Lane2Cond << i; - else - return false; - } - return true; -} - /// \brief Try to lower a VSELECT instruction to an immediate-controlled blend /// instruction. static SDValue lowerVSELECTtoBLENDI(SDValue Op, const X86Subtarget *Subtarget, @@ -11883,17 +11846,18 @@ static SDValue lowerVSELECTtoBLENDI(SDValue Op, const X86Subtarget *Subtarget, } else { // Everything else uses a generic blend mask computation with a custom type. if (VT.isInteger()) { - if (VT.is256BitVector()) { - // The 256-bit integer blend instructions are only available on AVX2. - if (!Subtarget->hasAVX2()) - return SDValue(); - - // We do the blend on v8i32 for 256-bit integer types. - BlendVT = MVT::v8i32; - } else { + if (VT.is256BitVector()) + // We cast to floating point types if integer blends aren't available, + // and we coerce integer blends when available to occur on the v8i32 + // type. + BlendVT = Subtarget->hasAVX2() + ? MVT::v8i32 + : MVT::getVectorVT( + MVT::getFloatingPointVT(VT.getScalarSizeInBits()), + VT.getVectorNumElements()); + else // For 128-bit vectors we do the blend on v8i16 types. BlendVT = MVT::v8i16; - } } assert(BlendVT.getVectorNumElements() <= 8 && "Cannot blend more than 8 elements with an immediate!"); @@ -21718,57 +21682,6 @@ matchIntegerMINMAX(SDValue Cond, EVT VT, SDValue LHS, SDValue RHS, return std::make_pair(Opc, NeedSplit); } -static SDValue -TransformVSELECTtoBlendVECTOR_SHUFFLE(SDNode *N, SelectionDAG &DAG, - const X86Subtarget *Subtarget) { - SDLoc dl(N); - SDValue Cond = N->getOperand(0); - SDValue LHS = N->getOperand(1); - SDValue RHS = N->getOperand(2); - - if (Cond.getOpcode() == ISD::SIGN_EXTEND) { - SDValue CondSrc = Cond->getOperand(0); - if (CondSrc->getOpcode() == ISD::SIGN_EXTEND_INREG) - Cond = CondSrc->getOperand(0); - } - - MVT VT = N->getSimpleValueType(0); - MVT EltVT = VT.getVectorElementType(); - unsigned NumElems = VT.getVectorNumElements(); - // There is no blend with immediate in AVX-512. - if (VT.is512BitVector()) - return SDValue(); - - if (!Subtarget->hasSSE41() || EltVT == MVT::i8) - return SDValue(); - if (!Subtarget->hasInt256() && VT == MVT::v16i16) - return SDValue(); - - if (!ISD::isBuildVectorOfConstantSDNodes(Cond.getNode())) - return SDValue(); - - // A vselect where all conditions and data are constants can be optimized into - // a single vector load by SelectionDAGLegalize::ExpandBUILD_VECTOR(). - if (ISD::isBuildVectorOfConstantSDNodes(LHS.getNode()) && - ISD::isBuildVectorOfConstantSDNodes(RHS.getNode())) - return SDValue(); - - unsigned MaskValue = 0; - if (!BUILD_VECTORtoBlendMask(cast<BuildVectorSDNode>(Cond), MaskValue)) - return SDValue(); - - SmallVector<int, 8> ShuffleMask(NumElems, -1); - for (unsigned i = 0; i < NumElems; ++i) { - // Be sure we emit undef where we can. - if (Cond.getOperand(i)->getOpcode() == ISD::UNDEF) - ShuffleMask[i] = -1; - else - ShuffleMask[i] = i + NumElems * ((MaskValue >> i) & 1); - } - - return DAG.getVectorShuffle(VT, dl, LHS, RHS, &ShuffleMask[0]); -} - /// PerformSELECTCombine - Do target-specific dag combines on SELECT and VSELECT /// nodes. static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, @@ -22318,23 +22231,6 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, DCI.CommitTargetLoweringOpt(TLO); } - // We should generate an X86ISD::BLENDI from a vselect if its argument - // is a sign_extend_inreg of an any_extend of a BUILD_VECTOR of - // constants. This specific pattern gets generated when we split a - // selector for a 512 bit vector in a machine without AVX512 (but with - // 256-bit vectors), during legalization: - // - // (vselect (sign_extend (any_extend (BUILD_VECTOR)) i1) LHS RHS) - // - // Iff we find this pattern and the build_vectors are built from - // constants, we translate the vselect into a shuffle_vector that we - // know will be matched by LowerVECTOR_SHUFFLEtoBlend. - if (N->getOpcode() == ISD::VSELECT && !DCI.isBeforeLegalize()) { - SDValue Shuffle = TransformVSELECTtoBlendVECTOR_SHUFFLE(N, DAG, Subtarget); - if (Shuffle.getNode()) - return Shuffle; - } - return SDValue(); } diff --git a/llvm/test/CodeGen/X86/avx-blend.ll b/llvm/test/CodeGen/X86/avx-blend.ll index d2a22d70947..17a4f71e48e 100644 --- a/llvm/test/CodeGen/X86/avx-blend.ll +++ b/llvm/test/CodeGen/X86/avx-blend.ll @@ -21,7 +21,7 @@ define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) { ;CHECK-LABEL: vsel_i32: -;CHECK: vblendps $10, %xmm1, %xmm0, %xmm0 +;CHECK: vpblendw {{.*}} ## xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] ;CHECK: ret define <4 x i32> @vsel_i32(<4 x i32> %v1, <4 x i32> %v2) { %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i32> %v1, <4 x i32> %v2 @@ -61,13 +61,7 @@ define <16 x i8> @vsel_i8(<16 x i8> %v1, <16 x i8> %v2) { ;CHECK-LABEL: vsel_float8: ;CHECK-NOT: vinsertf128 -; <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false> -; which translates into the boolean mask (big endian representation): -; 00010001 = 17. -; '1' means takes the first argument, '0' means takes the second argument. -; This is the opposite of the intel syntax, thus we expect -; the inverted mask: 11101110 = 238. -;CHECK: vblendps $238, %ymm1, %ymm0, %ymm0 +;CHECK: vblendps {{.*}} ## ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] ;CHECK: ret define <8 x float> @vsel_float8(<8 x float> %v1, <8 x float> %v2) { %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x float> %v1, <8 x float> %v2 @@ -76,7 +70,7 @@ define <8 x float> @vsel_float8(<8 x float> %v1, <8 x float> %v2) { ;CHECK-LABEL: vsel_i328: ;CHECK-NOT: vinsertf128 -;CHECK: vblendps $238, %ymm1, %ymm0, %ymm0 +;CHECK: vblendps {{.*}} ## ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] ;CHECK-NEXT: ret define <8 x i32> @vsel_i328(<8 x i32> %v1, <8 x i32> %v2) { %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i32> %v1, <8 x i32> %v2 diff --git a/llvm/test/CodeGen/X86/blend-msb.ll b/llvm/test/CodeGen/X86/blend-msb.ll index 34aaf2c31ac..c4a6d32ae5a 100644 --- a/llvm/test/CodeGen/X86/blend-msb.ll +++ b/llvm/test/CodeGen/X86/blend-msb.ll @@ -22,17 +22,7 @@ define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) { } ;CHECK-LABEL: vsel_8xi16: -; The select mask is -; <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false> -; which translates into the boolean mask (big endian representation): -; 00010001 = 17. -; '1' means takes the first argument, '0' means takes the second argument. -; This is the opposite of the intel syntax, thus we expect -; the inverted mask: 11101110 = 238. -; According to the ABI: -; v1 is in xmm0 => first argument is xmm0. -; v2 is in xmm1 => second argument is xmm1. -;CHECK: pblendw $238, %xmm1, %xmm0 +;CHECK: pblendw {{.*}} ## xmm0 = xmm0[0],xmm1[1,2,3],xmm0[4],xmm1[5,6,7] ;CHECK: ret define <8 x i16> @vsel_8xi16(<8 x i16> %v1, <8 x i16> %v2) { %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i16> %v1, <8 x i16> %v2 diff --git a/llvm/test/CodeGen/X86/sse41-blend.ll b/llvm/test/CodeGen/X86/sse41-blend.ll index 3992da0b512..4e1722da8d0 100644 --- a/llvm/test/CodeGen/X86/sse41-blend.ll +++ b/llvm/test/CodeGen/X86/sse41-blend.ll @@ -10,7 +10,7 @@ define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) { ;CHECK-LABEL: vsel_4xi8: -;CHECK: blendps +;CHECK: blendw ;CHECK: ret define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) { %vsel = select <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i8> %v1, <4 x i8> %v2 @@ -18,7 +18,7 @@ define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) { } ;CHECK-LABEL: vsel_4xi16: -;CHECK: blendps +;CHECK: blendw ;CHECK: ret define <4 x i16> @vsel_4xi16(<4 x i16> %v1, <4 x i16> %v2) { %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 true>, <4 x i16> %v1, <4 x i16> %v2 @@ -27,7 +27,7 @@ define <4 x i16> @vsel_4xi16(<4 x i16> %v1, <4 x i16> %v2) { ;CHECK-LABEL: vsel_i32: -;CHECK: blendps +;CHECK: blendw ;CHECK: ret define <4 x i32> @vsel_i32(<4 x i32> %v1, <4 x i32> %v2) { %vsel = select <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> %v1, <4 x i32> %v2 |