summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichael Kuperstein <mkuper@google.com>2016-05-16 18:27:00 +0000
committerMichael Kuperstein <mkuper@google.com>2016-05-16 18:27:00 +0000
commitac2088d122204325e30e85bb62ec0ab204f04f1b (patch)
tree273dd4378ec2ea597ec981428d4410be8c4ee29d
parent379a1952b37247975d2df8d23498675c9c8cc730 (diff)
downloadbcm5719-llvm-ac2088d122204325e30e85bb62ec0ab204f04f1b.tar.gz
bcm5719-llvm-ac2088d122204325e30e85bb62ec0ab204f04f1b.zip
[X86] Remove transformVSELECTtoBlendVECTOR_SHUFFLE
The new X86 shuffle lowering can do just fine without transforming vselects into vector_shuffles. It looks like the only thing this code does right now is cause trouble - in particular, it can lead to combine/legalization infinite loops. Note that it's not completely NFC, since some of the shuffle masks get inverted, which may cause slight differences further down the line. We may want to find a way to invert those masks, but that's orthogonal to this commit. This fixes the hang in PR27689. llvm-svn: 269676
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp110
-rw-r--r--llvm/test/CodeGen/X86/vector-blend.ll19
2 files changed, 10 insertions, 119 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 81fa97f583d..1e3b5225bf6 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -11973,53 +11973,6 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget &Subtarget,
llvm_unreachable("Unimplemented!");
}
-// This function assumes its argument is a BUILD_VECTOR of constants or
-// undef SDNodes. i.e: ISD::isBuildVectorOfConstantSDNodes(BuildVector) is
-// true.
-static bool BUILD_VECTORtoBlendMask(BuildVectorSDNode *BuildVector,
- unsigned &MaskValue) {
- MaskValue = 0;
- unsigned NumElems = BuildVector->getNumOperands();
-
- // There are 2 lanes if (NumElems > 8), and 1 lane otherwise.
- // We don't handle the >2 lanes case right now.
- unsigned NumLanes = (NumElems - 1) / 8 + 1;
- if (NumLanes > 2)
- return false;
-
- unsigned NumElemsInLane = NumElems / NumLanes;
-
- // Blend for v16i16 should be symmetric for the both lanes.
- for (unsigned i = 0; i < NumElemsInLane; ++i) {
- SDValue EltCond = BuildVector->getOperand(i);
- SDValue SndLaneEltCond =
- (NumLanes == 2) ? BuildVector->getOperand(i + NumElemsInLane) : EltCond;
-
- int Lane1Cond = -1, Lane2Cond = -1;
- if (isa<ConstantSDNode>(EltCond))
- Lane1Cond = !isNullConstant(EltCond);
- if (isa<ConstantSDNode>(SndLaneEltCond))
- Lane2Cond = !isNullConstant(SndLaneEltCond);
-
- unsigned LaneMask = 0;
- if (Lane1Cond == Lane2Cond || Lane2Cond < 0)
- // Lane1Cond != 0, means we want the first argument.
- // Lane1Cond == 0, means we want the second argument.
- // The encoding of this argument is 0 for the first argument, 1
- // for the second. Therefore, invert the condition.
- LaneMask = !Lane1Cond << i;
- else if (Lane1Cond < 0)
- LaneMask = !Lane2Cond << i;
- else
- return false;
-
- MaskValue |= LaneMask;
- if (NumLanes == 2)
- MaskValue |= LaneMask << NumElemsInLane;
- }
- return true;
-}
-
/// \brief Try to lower a VSELECT instruction to a vector shuffle.
static SDValue lowerVSELECTtoVectorShuffle(SDValue Op,
const X86Subtarget &Subtarget,
@@ -25539,50 +25492,6 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-static SDValue
-transformVSELECTtoBlendVECTOR_SHUFFLE(SDNode *N, SelectionDAG &DAG,
- const X86Subtarget &Subtarget) {
- SDLoc dl(N);
- SDValue Cond = N->getOperand(0);
- SDValue LHS = N->getOperand(1);
- SDValue RHS = N->getOperand(2);
-
- if (Cond.getOpcode() == ISD::SIGN_EXTEND) {
- SDValue CondSrc = Cond->getOperand(0);
- if (CondSrc->getOpcode() == ISD::SIGN_EXTEND_INREG)
- Cond = CondSrc->getOperand(0);
- }
-
- if (!ISD::isBuildVectorOfConstantSDNodes(Cond.getNode()))
- return SDValue();
-
- // A vselect where all conditions and data are constants can be optimized into
- // a single vector load by SelectionDAGLegalize::ExpandBUILD_VECTOR().
- if (ISD::isBuildVectorOfConstantSDNodes(LHS.getNode()) &&
- ISD::isBuildVectorOfConstantSDNodes(RHS.getNode()))
- return SDValue();
-
- unsigned MaskValue = 0;
- if (!BUILD_VECTORtoBlendMask(cast<BuildVectorSDNode>(Cond), MaskValue))
- return SDValue();
-
- MVT VT = N->getSimpleValueType(0);
- unsigned NumElems = VT.getVectorNumElements();
- SmallVector<int, 8> ShuffleMask(NumElems, -1);
- for (unsigned i = 0; i < NumElems; ++i) {
- // Be sure we emit undef where we can.
- if (Cond.getOperand(i)->isUndef())
- ShuffleMask[i] = -1;
- else
- ShuffleMask[i] = i + NumElems * ((MaskValue >> i) & 1);
- }
-
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (!TLI.isShuffleMaskLegal(ShuffleMask, VT))
- return SDValue();
- return DAG.getVectorShuffle(VT, dl, LHS, RHS, &ShuffleMask[0]);
-}
-
/// Do target-specific dag combines on SELECT and VSELECT nodes.
static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
@@ -25996,25 +25905,6 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
}
}
- // We should generate an X86ISD::BLENDI from a vselect if its argument
- // is a sign_extend_inreg of an any_extend of a BUILD_VECTOR of
- // constants. This specific pattern gets generated when we split a
- // selector for a 512 bit vector in a machine without AVX512 (but with
- // 256-bit vectors), during legalization:
- //
- // (vselect (sign_extend (any_extend (BUILD_VECTOR)) i1) LHS RHS)
- //
- // Iff we find this pattern and the build_vectors are built from
- // constants, we translate the vselect into a shuffle_vector that we
- // know will be matched by LowerVECTOR_SHUFFLEtoBlend.
- if ((N->getOpcode() == ISD::VSELECT ||
- N->getOpcode() == X86ISD::SHRUNKBLEND) &&
- !DCI.isBeforeLegalize() && !VT.is512BitVector()) {
- if (SDValue Shuffle =
- transformVSELECTtoBlendVECTOR_SHUFFLE(N, DAG, Subtarget))
- return Shuffle;
- }
-
// If this is a *dynamic* select (non-constant condition) and we can match
// this node with one of the variable blend instructions, restructure the
// condition so that the blends can use the high bit of each element and use
diff --git a/llvm/test/CodeGen/X86/vector-blend.ll b/llvm/test/CodeGen/X86/vector-blend.ll
index 3e00612d430..309fa98145c 100644
--- a/llvm/test/CodeGen/X86/vector-blend.ll
+++ b/llvm/test/CodeGen/X86/vector-blend.ll
@@ -273,15 +273,15 @@ define <16 x i8> @vsel_i8(<16 x i8> %v1, <16 x i8> %v2) {
; SSE41-LABEL: vsel_i8:
; SSE41: # BB#0: # %entry
; SSE41-NEXT: movdqa %xmm0, %xmm2
-; SSE41-NEXT: movaps {{.*#+}} xmm0 = [0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255]
-; SSE41-NEXT: pblendvb %xmm1, %xmm2
-; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
+; SSE41-NEXT: pblendvb %xmm2, %xmm1
+; SSE41-NEXT: movdqa %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: vsel_i8:
; AVX: # BB#0: # %entry
-; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255]
-; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
+; AVX-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
entry:
%vsel = select <16 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <16 x i8> %v1, <16 x i8> %v2
@@ -652,10 +652,11 @@ define <32 x i8> @constant_pblendvb_avx2(<32 x i8> %xyzw, <32 x i8> %abcd) {
; SSE41-LABEL: constant_pblendvb_avx2:
; SSE41: # BB#0: # %entry
; SSE41-NEXT: movdqa %xmm0, %xmm4
-; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
-; SSE41-NEXT: pblendvb %xmm2, %xmm4
-; SSE41-NEXT: pblendvb %xmm3, %xmm1
-; SSE41-NEXT: movdqa %xmm4, %xmm0
+; SSE41-NEXT: movaps {{.*#+}} xmm0 = [0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0]
+; SSE41-NEXT: pblendvb %xmm4, %xmm2
+; SSE41-NEXT: pblendvb %xmm1, %xmm3
+; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: movdqa %xmm3, %xmm1
; SSE41-NEXT: retq
;
; AVX1-LABEL: constant_pblendvb_avx2:
OpenPOWER on IntegriCloud