summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2018-01-27 19:48:13 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2018-01-27 19:48:13 +0000
commitfe3fac805accd43dbfbc5ee87a065cbfe0a1d80e (patch)
tree6cc7ecf300604257259ea6a7c6f79ff7a010b3ff /llvm/lib
parent73e88d394b5e98c5dcc4127f52e0af5bbaa196fe (diff)
downloadbcm5719-llvm-fe3fac805accd43dbfbc5ee87a065cbfe0a1d80e.tar.gz
bcm5719-llvm-fe3fac805accd43dbfbc5ee87a065cbfe0a1d80e.zip
[X86][SSE] Simplify demanded elements from BROADCAST shuffle source.
If broadcasting from another shuffle, attempt to simplify it. We can probably generalize this a lot more (embedding in combineX86ShufflesRecursively), but BROADCAST is one of the more troublesome as it accepts inputs of different sizes to the result. llvm-svn: 323602
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp30
1 files changed, 30 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 58246c976ce..ad06e996f36 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -28242,6 +28242,14 @@ static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
unsigned NumMaskElts = Mask.size();
unsigned MaskEltSize = MaskVT.getScalarSizeInBits();
+ // Match against a VZEXT_MOVL vXi32 zero-extending instruction.
+ if (MaskEltSize == 32 && isUndefOrEqual(Mask[0], 0) &&
+ isUndefOrZero(Mask[1]) && isUndefInRange(Mask, 2, NumMaskElts - 2)) {
+ Shuffle = X86ISD::VZEXT_MOVL;
+ SrcVT = DstVT = !Subtarget.hasSSE2() ? MVT::v4f32 : MaskVT;
+ return true;
+ }
+
// Match against a ZERO_EXTEND_VECTOR_INREG/VZEXT instruction.
// TODO: Add 512-bit vector support (split AVX512F and AVX512BW).
if (AllowIntDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSE41()) ||
@@ -29790,6 +29798,28 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
}
switch (Opcode) {
+ case X86ISD::VBROADCAST: {
+ // If broadcasting from another shuffle, attempt to simplify it.
+ // TODO - we really need a general SimplifyDemandedVectorElts mechanism.
+ SDValue Src = N.getOperand(0);
+ SDValue BC = peekThroughBitcasts(Src);
+ EVT SrcVT = Src.getValueType();
+ EVT BCVT = BC.getValueType();
+ if (isTargetShuffle(BC.getOpcode()) &&
+ VT.getScalarSizeInBits() % BCVT.getScalarSizeInBits() == 0) {
+ unsigned Scale = VT.getScalarSizeInBits() / BCVT.getScalarSizeInBits();
+ SmallVector<int, 16> DemandedMask(BCVT.getVectorNumElements(),
+ SM_SentinelUndef);
+ for (unsigned i = 0; i != Scale; ++i)
+ DemandedMask[i] = i;
+ if (SDValue Res = combineX86ShufflesRecursively(
+ {BC}, 0, BC, DemandedMask, {}, /*Depth*/ 1,
+ /*HasVarMask*/ false, DAG, DCI, Subtarget))
+ return DAG.getNode(X86ISD::VBROADCAST, DL, VT,
+ DAG.getBitcast(SrcVT, Res));
+ }
+ return SDValue();
+ }
case X86ISD::PSHUFD:
case X86ISD::PSHUFLW:
case X86ISD::PSHUFHW:
OpenPOWER on IntegriCloud