From fe3fac805accd43dbfbc5ee87a065cbfe0a1d80e Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 27 Jan 2018 19:48:13 +0000 Subject: [X86][SSE] Simplify demanded elements from BROADCAST shuffle source. If broadcasting from another shuffle, attempt to simplify it. We can probably generalize this a lot more (embedding in combineX86ShufflesRecursively), but BROADCAST is one of the more troublesome as it accepts inputs of different sizes to the result. llvm-svn: 323602 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'llvm/lib') diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 58246c976ce..ad06e996f36 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -28242,6 +28242,14 @@ static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef Mask, unsigned NumMaskElts = Mask.size(); unsigned MaskEltSize = MaskVT.getScalarSizeInBits(); + // Match against a VZEXT_MOVL vXi32 zero-extending instruction. + if (MaskEltSize == 32 && isUndefOrEqual(Mask[0], 0) && + isUndefOrZero(Mask[1]) && isUndefInRange(Mask, 2, NumMaskElts - 2)) { + Shuffle = X86ISD::VZEXT_MOVL; + SrcVT = DstVT = !Subtarget.hasSSE2() ? MVT::v4f32 : MaskVT; + return true; + } + // Match against a ZERO_EXTEND_VECTOR_INREG/VZEXT instruction. // TODO: Add 512-bit vector support (split AVX512F and AVX512BW). if (AllowIntDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSE41()) || @@ -29790,6 +29798,28 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, } switch (Opcode) { + case X86ISD::VBROADCAST: { + // If broadcasting from another shuffle, attempt to simplify it. + // TODO - we really need a general SimplifyDemandedVectorElts mechanism. + SDValue Src = N.getOperand(0); + SDValue BC = peekThroughBitcasts(Src); + EVT SrcVT = Src.getValueType(); + EVT BCVT = BC.getValueType(); + if (isTargetShuffle(BC.getOpcode()) && + VT.getScalarSizeInBits() % BCVT.getScalarSizeInBits() == 0) { + unsigned Scale = VT.getScalarSizeInBits() / BCVT.getScalarSizeInBits(); + SmallVector DemandedMask(BCVT.getVectorNumElements(), + SM_SentinelUndef); + for (unsigned i = 0; i != Scale; ++i) + DemandedMask[i] = i; + if (SDValue Res = combineX86ShufflesRecursively( + {BC}, 0, BC, DemandedMask, {}, /*Depth*/ 1, + /*HasVarMask*/ false, DAG, DCI, Subtarget)) + return DAG.getNode(X86ISD::VBROADCAST, DL, VT, + DAG.getBitcast(SrcVT, Res)); + } + return SDValue(); + } case X86ISD::PSHUFD: case X86ISD::PSHUFLW: case X86ISD::PSHUFHW: -- cgit v1.2.3