summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2016-11-29 14:18:51 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2016-11-29 14:18:51 +0000
commit35c47c494d58af1ed41934f49ea40484238f73eb (patch)
tree4213c83e81db4da50fa38752a138233ca91b7047 /llvm/lib
parentda22b3cb8a679e0ca2677d14dec54ed004768021 (diff)
downloadbcm5719-llvm-35c47c494d58af1ed41934f49ea40484238f73eb.tar.gz
bcm5719-llvm-35c47c494d58af1ed41934f49ea40484238f73eb.zip
[X86][SSE] Add initial support for combining target shuffles to (V)PMOVZX.
We can only handle 128-bit vectors until we support target shuffle inputs of different size to the output. llvm-svn: 288140
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp52
1 files changed, 37 insertions, 15 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 02e06572422..c902cb2a36d 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -25445,7 +25445,7 @@ bool X86TargetLowering::isGAPlusOffset(SDNode *N,
// TODO: Investigate sharing more of this with shuffle lowering.
static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
const X86Subtarget &Subtarget,
- unsigned &Shuffle, MVT &ShuffleVT) {
+ unsigned &Shuffle, MVT &SrcVT, MVT &DstVT) {
unsigned NumMaskElts = Mask.size();
unsigned MaskEltSize = MaskVT.getScalarSizeInBits();
bool FloatDomain = MaskVT.isFloatingPoint() ||
@@ -25456,27 +25456,48 @@ static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
isUndefOrEqual(Mask[0], 0) &&
isUndefOrZeroInRange(Mask, 1, NumMaskElts - 1)) {
Shuffle = X86ISD::VZEXT_MOVL;
- ShuffleVT = !Subtarget.hasSSE2() ? MVT::v4f32 : MaskVT;
+ SrcVT = DstVT = !Subtarget.hasSSE2() ? MVT::v4f32 : MaskVT;
return true;
}
+ // Match against a VZEXT instruction.
+ // TODO: Add 256/512-bit vector support.
+ if (!FloatDomain && MaskVT.is128BitVector() && Subtarget.hasSSE41()) {
+ unsigned MaxScale = 64 / MaskEltSize;
+ for (unsigned Scale = 2; Scale <= MaxScale; Scale *= 2) {
+ bool Match = true;
+ unsigned NumDstElts = NumMaskElts / Scale;
+ for (unsigned i = 0; i != NumDstElts && Match; ++i) {
+ Match &= isUndefOrEqual(Mask[i * Scale], (int)i);
+ Match &= isUndefOrZeroInRange(Mask, (i * Scale) + 1, Scale - 1);
+ }
+ if (Match) {
+ SrcVT = MaskVT;
+ DstVT = MVT::getIntegerVT(Scale * MaskEltSize);
+ DstVT = MVT::getVectorVT(DstVT, NumDstElts);
+ Shuffle = X86ISD::VZEXT;
+ return true;
+ }
+ }
+ }
+
// Check if we have SSE3 which will let us use MOVDDUP etc. The
// instructions are no slower than UNPCKLPD but has the option to
// fold the input operand into even an unaligned memory load.
if (MaskVT.is128BitVector() && Subtarget.hasSSE3() && FloatDomain) {
if (isTargetShuffleEquivalent(Mask, {0, 0})) {
Shuffle = X86ISD::MOVDDUP;
- ShuffleVT = MVT::v2f64;
+ SrcVT = DstVT = MVT::v2f64;
return true;
}
if (isTargetShuffleEquivalent(Mask, {0, 0, 2, 2})) {
Shuffle = X86ISD::MOVSLDUP;
- ShuffleVT = MVT::v4f32;
+ SrcVT = DstVT = MVT::v4f32;
return true;
}
if (isTargetShuffleEquivalent(Mask, {1, 1, 3, 3})) {
Shuffle = X86ISD::MOVSHDUP;
- ShuffleVT = MVT::v4f32;
+ SrcVT = DstVT = MVT::v4f32;
return true;
}
}
@@ -25485,17 +25506,17 @@ static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
assert(Subtarget.hasAVX() && "AVX required for 256-bit vector shuffles");
if (isTargetShuffleEquivalent(Mask, {0, 0, 2, 2})) {
Shuffle = X86ISD::MOVDDUP;
- ShuffleVT = MVT::v4f64;
+ SrcVT = DstVT = MVT::v4f64;
return true;
}
if (isTargetShuffleEquivalent(Mask, {0, 0, 2, 2, 4, 4, 6, 6})) {
Shuffle = X86ISD::MOVSLDUP;
- ShuffleVT = MVT::v8f32;
+ SrcVT = DstVT = MVT::v8f32;
return true;
}
if (isTargetShuffleEquivalent(Mask, {1, 1, 3, 3, 5, 5, 7, 7})) {
Shuffle = X86ISD::MOVSHDUP;
- ShuffleVT = MVT::v8f32;
+ SrcVT = DstVT = MVT::v8f32;
return true;
}
}
@@ -25505,19 +25526,19 @@ static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
"AVX512 required for 512-bit vector shuffles");
if (isTargetShuffleEquivalent(Mask, {0, 0, 2, 2, 4, 4, 6, 6})) {
Shuffle = X86ISD::MOVDDUP;
- ShuffleVT = MVT::v8f64;
+ SrcVT = DstVT = MVT::v8f64;
return true;
}
if (isTargetShuffleEquivalent(
Mask, {0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14})) {
Shuffle = X86ISD::MOVSLDUP;
- ShuffleVT = MVT::v16f32;
+ SrcVT = DstVT = MVT::v16f32;
return true;
}
if (isTargetShuffleEquivalent(
Mask, {1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15})) {
Shuffle = X86ISD::MOVSHDUP;
- ShuffleVT = MVT::v16f32;
+ SrcVT = DstVT = MVT::v16f32;
return true;
}
}
@@ -25526,7 +25547,7 @@ static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
if (Subtarget.hasAVX2()) {
SmallVector<int, 64> BroadcastMask(NumMaskElts, 0);
if (isTargetShuffleEquivalent(Mask, BroadcastMask)) {
- ShuffleVT = MaskVT;
+ SrcVT = DstVT = MaskVT;
Shuffle = X86ISD::VBROADCAST;
return true;
}
@@ -25954,7 +25975,7 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
MaskVT = MVT::getVectorVT(MaskVT, NumMaskElts);
// Attempt to match the mask against known shuffle patterns.
- MVT ShuffleVT;
+ MVT ShuffleSrcVT, ShuffleVT;
unsigned Shuffle, PermuteImm;
if (UnaryShuffle) {
@@ -25973,12 +25994,13 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
}
}
- if (matchUnaryVectorShuffle(MaskVT, Mask, Subtarget, Shuffle, ShuffleVT)) {
+ if (matchUnaryVectorShuffle(MaskVT, Mask, Subtarget, Shuffle, ShuffleSrcVT,
+ ShuffleVT)) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
return false; // Nothing to do!
if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
return false; // AVX512 Writemask clash.
- Res = DAG.getBitcast(ShuffleVT, V1);
+ Res = DAG.getBitcast(ShuffleSrcVT, V1);
DCI.AddToWorklist(Res.getNode());
Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res);
DCI.AddToWorklist(Res.getNode());
OpenPOWER on IntegriCloud