diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-07-13 15:10:43 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-07-13 15:10:43 +0000 |
| commit | 48d83407602b15f80310bba0bdf5731ed194e092 (patch) | |
| tree | 39b37e1cf162524943af11f168ef0f7e189a87a6 /llvm/lib/Target | |
| parent | bd69903692dd3e919f7b3f4b1b4cf70ca2c1f376 (diff) | |
| download | bcm5719-llvm-48d83407602b15f80310bba0bdf5731ed194e092.tar.gz bcm5719-llvm-48d83407602b15f80310bba0bdf5731ed194e092.zip | |
[X86][AVX] Add support for target shuffle combining to VPERMILPS variable shuffle mask
Added AVX512F VPERMILPS shuffle decoding support
llvm-svn: 275270
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 32 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp | 9 |
2 files changed, 37 insertions, 4 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index c779414cd0f..990671b9159 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -3829,6 +3829,7 @@ static bool isTargetShuffleVariableMask(unsigned Opcode) { switch (Opcode) { default: return false; case X86ISD::PSHUFB: + case X86ISD::VPERMILPV: return true; } } @@ -25211,13 +25212,42 @@ static bool combineX86ShuffleChain(SDValue Input, SDValue Root, if (Depth < 2) return false; + if (is128BitLaneCrossingShuffleMask(MaskVT, Mask)) + return false; + + bool MaskContainsZeros = + llvm::any_of(Mask, [](int M) { return M == SM_SentinelZero; }); + + // If we have a single input shuffle with different shuffle patterns in the + // the 128-bit lanes use the variable mask to VPERMILPS. + // TODO Combine other mask types at higher depths. + if (HasVariableMask && !MaskContainsZeros && + ((MaskVT == MVT::v8f32 && Subtarget.hasAVX()) || + (MaskVT == MVT::v16f32 && Subtarget.hasAVX512()))) { + SmallVector<SDValue, 16> VPermIdx; + for (int M : Mask) { + SDValue Idx = + M < 0 ? DAG.getUNDEF(MVT::i32) : DAG.getConstant(M % 4, DL, MVT::i32); + VPermIdx.push_back(Idx); + } + MVT VPermMaskVT = MVT::getVectorVT(MVT::i32, NumMaskElts); + SDValue VPermMask = DAG.getBuildVector(VPermMaskVT, DL, VPermIdx); + DCI.AddToWorklist(VPermMask.getNode()); + Res = DAG.getBitcast(MaskVT, Input); + DCI.AddToWorklist(Res.getNode()); + Res = DAG.getNode(X86ISD::VPERMILPV, DL, MaskVT, Res, VPermMask); + DCI.AddToWorklist(Res.getNode()); + DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res), + /*AddTo*/ true); + return true; + } + // If we have 3 or more shuffle instructions or a chain involving a variable // mask, we can replace them with a single PSHUFB instruction profitably. // Intel's manuals suggest only using PSHUFB if doing so replacing 5 // instructions, but in practice PSHUFB tends to be *very* fast so we're // more aggressive. if ((Depth >= 3 || HasVariableMask) && - !is128BitLaneCrossingShuffleMask(MaskVT, Mask) && ((VT.is128BitVector() && Subtarget.hasSSSE3()) || (VT.is256BitVector() && Subtarget.hasAVX2()) || (VT.is512BitVector() && Subtarget.hasBWI()))) { diff --git a/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp b/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp index 8c700e3f5cd..1adc92cfda6 100644 --- a/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp +++ b/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp @@ -104,9 +104,11 @@ void DecodeVPERMILPMask(const Constant *C, unsigned ElSize, // <4 x i32> <i32 -2147483648, i32 -2147483648, // i32 -2147483648, i32 -2147483648> - unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits(); + if (ElSize != 32 && ElSize != 64) + return; - if (MaskTySize != 128 && MaskTySize != 256) // FIXME: Add support for AVX-512. + unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits(); + if (MaskTySize != 128 && MaskTySize != 256 && MaskTySize != 512) return; // Only support vector types. @@ -126,7 +128,8 @@ void DecodeVPERMILPMask(const Constant *C, unsigned ElSize, return; unsigned NumElements = MaskTySize / ElSize; - assert((NumElements == 2 || NumElements == 4 || NumElements == 8) && + assert((NumElements == 2 || NumElements == 4 || NumElements == 8 || + NumElements == 16) && "Unexpected number of vector elements."); ShuffleMask.reserve(NumElements); unsigned NumElementsPerLane = 128 / ElSize; |

