summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2016-07-13 15:10:43 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2016-07-13 15:10:43 +0000
commit48d83407602b15f80310bba0bdf5731ed194e092 (patch)
tree39b37e1cf162524943af11f168ef0f7e189a87a6 /llvm/lib/Target
parentbd69903692dd3e919f7b3f4b1b4cf70ca2c1f376 (diff)
downloadbcm5719-llvm-48d83407602b15f80310bba0bdf5731ed194e092.tar.gz
bcm5719-llvm-48d83407602b15f80310bba0bdf5731ed194e092.zip
[X86][AVX] Add support for target shuffle combining to VPERMILPS variable shuffle mask
Added AVX512F VPERMILPS shuffle decoding support llvm-svn: 275270
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp32
-rw-r--r--llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp9
2 files changed, 37 insertions, 4 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c779414cd0f..990671b9159 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -3829,6 +3829,7 @@ static bool isTargetShuffleVariableMask(unsigned Opcode) {
switch (Opcode) {
default: return false;
case X86ISD::PSHUFB:
+ case X86ISD::VPERMILPV:
return true;
}
}
@@ -25211,13 +25212,42 @@ static bool combineX86ShuffleChain(SDValue Input, SDValue Root,
if (Depth < 2)
return false;
+ if (is128BitLaneCrossingShuffleMask(MaskVT, Mask))
+ return false;
+
+ bool MaskContainsZeros =
+ llvm::any_of(Mask, [](int M) { return M == SM_SentinelZero; });
+
+ // If we have a single input shuffle with different shuffle patterns in the
+ // the 128-bit lanes use the variable mask to VPERMILPS.
+ // TODO Combine other mask types at higher depths.
+ if (HasVariableMask && !MaskContainsZeros &&
+ ((MaskVT == MVT::v8f32 && Subtarget.hasAVX()) ||
+ (MaskVT == MVT::v16f32 && Subtarget.hasAVX512()))) {
+ SmallVector<SDValue, 16> VPermIdx;
+ for (int M : Mask) {
+ SDValue Idx =
+ M < 0 ? DAG.getUNDEF(MVT::i32) : DAG.getConstant(M % 4, DL, MVT::i32);
+ VPermIdx.push_back(Idx);
+ }
+ MVT VPermMaskVT = MVT::getVectorVT(MVT::i32, NumMaskElts);
+ SDValue VPermMask = DAG.getBuildVector(VPermMaskVT, DL, VPermIdx);
+ DCI.AddToWorklist(VPermMask.getNode());
+ Res = DAG.getBitcast(MaskVT, Input);
+ DCI.AddToWorklist(Res.getNode());
+ Res = DAG.getNode(X86ISD::VPERMILPV, DL, MaskVT, Res, VPermMask);
+ DCI.AddToWorklist(Res.getNode());
+ DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
+ /*AddTo*/ true);
+ return true;
+ }
+
// If we have 3 or more shuffle instructions or a chain involving a variable
// mask, we can replace them with a single PSHUFB instruction profitably.
// Intel's manuals suggest only using PSHUFB if doing so replacing 5
// instructions, but in practice PSHUFB tends to be *very* fast so we're
// more aggressive.
if ((Depth >= 3 || HasVariableMask) &&
- !is128BitLaneCrossingShuffleMask(MaskVT, Mask) &&
((VT.is128BitVector() && Subtarget.hasSSSE3()) ||
(VT.is256BitVector() && Subtarget.hasAVX2()) ||
(VT.is512BitVector() && Subtarget.hasBWI()))) {
diff --git a/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp b/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
index 8c700e3f5cd..1adc92cfda6 100644
--- a/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
+++ b/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
@@ -104,9 +104,11 @@ void DecodeVPERMILPMask(const Constant *C, unsigned ElSize,
// <4 x i32> <i32 -2147483648, i32 -2147483648,
// i32 -2147483648, i32 -2147483648>
- unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
+ if (ElSize != 32 && ElSize != 64)
+ return;
- if (MaskTySize != 128 && MaskTySize != 256) // FIXME: Add support for AVX-512.
+ unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
+ if (MaskTySize != 128 && MaskTySize != 256 && MaskTySize != 512)
return;
// Only support vector types.
@@ -126,7 +128,8 @@ void DecodeVPERMILPMask(const Constant *C, unsigned ElSize,
return;
unsigned NumElements = MaskTySize / ElSize;
- assert((NumElements == 2 || NumElements == 4 || NumElements == 8) &&
+ assert((NumElements == 2 || NumElements == 4 || NumElements == 8 ||
+ NumElements == 16) &&
"Unexpected number of vector elements.");
ShuffleMask.reserve(NumElements);
unsigned NumElementsPerLane = 128 / ElSize;
OpenPOWER on IntegriCloud