diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-03-06 21:54:52 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-03-06 21:54:52 +0000 |
commit | 253ca348b2ea5fbde89377dfbbedab100cef4e7a (patch) | |
tree | 545634a99ba3b24f8074b422395e6d2024242bea /llvm/lib | |
parent | dc11054f20a3dc532319217ddc5a9f32a2ccaa07 (diff) | |
download | bcm5719-llvm-253ca348b2ea5fbde89377dfbbedab100cef4e7a.tar.gz bcm5719-llvm-253ca348b2ea5fbde89377dfbbedab100cef4e7a.zip |
[X86][AVX512] Fixed VPERMT2* shuffle mask decoding and enabled target shuffle combining.
Patch to add support for target shuffle combining of X86ISD::VPERMV3 nodes, including support for detecting unary shuffles.
This uncovered several issues with the X86ISD::VPERMV3 shuffle mask decoding of non-64 bit shuffle mask elements - the bit masking wasn't being correctly computed.
Removed non-constant pool mask decode path as we have no way of testing it right now.
Differential Revision: http://reviews.llvm.org/D17916
llvm-svn: 262809
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp | 5 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 8 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp | 7 |
3 files changed, 9 insertions, 11 deletions
diff --git a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp index b7b444a80e4..f007349d184 100644 --- a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -482,8 +482,9 @@ void DecodeVPERMVMask(ArrayRef<uint64_t> RawMask, void DecodeVPERMV3Mask(ArrayRef<uint64_t> RawMask, SmallVectorImpl<int> &ShuffleMask) { - for (int i = 0, e = RawMask.size(); i < e; ++i) { - uint64_t M = RawMask[i]; + uint64_t EltMaskSize = (RawMask.size() * 2) - 1; + for (auto M : RawMask) { + M &= EltMaskSize; ShuffleMask.push_back((int)M); } } diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 4608df1281d..c3c6cf331ac 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -5138,16 +5138,11 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero, return false; } case X86ISD::VPERMV3: { + IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(2); // Unlike most shuffle nodes, VPERMV3's mask operand is the middle one. Ops.push_back(N->getOperand(0)); Ops.push_back(N->getOperand(2)); SDValue MaskNode = N->getOperand(1); - SmallVector<uint64_t, 32> RawMask; - unsigned MaskLoBits = Log2_64(VT.getVectorNumElements() * 2); - if (getTargetShuffleMaskIndices(MaskNode, MaskLoBits, RawMask)) { - DecodeVPERMV3Mask(RawMask, Mask); - break; - } if (auto *C = getTargetShuffleMaskConstant(MaskNode)) { DecodeVPERMV3Mask(C, VT, Mask); break; @@ -29202,6 +29197,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::MOVDDUP: case X86ISD::MOVSS: case X86ISD::MOVSD: + case X86ISD::VPERMV3: case X86ISD::VPERMILPI: case X86ISD::VPERMILPV: case X86ISD::VPERM2X128: diff --git a/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp b/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp index e84d76adff4..4e7714eeb2e 100644 --- a/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp +++ b/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp @@ -191,6 +191,7 @@ void DecodeVPERMV3Mask(const Constant *C, MVT VT, Type *MaskTy = C->getType(); unsigned NumElements = MaskTy->getVectorNumElements(); if (NumElements == VT.getVectorNumElements()) { + unsigned EltMaskSize = Log2_64(NumElements * 2); for (unsigned i = 0; i < NumElements; ++i) { Constant *COp = C->getAggregateElement(i); if (!COp) { @@ -200,9 +201,9 @@ void DecodeVPERMV3Mask(const Constant *C, MVT VT, if (isa<UndefValue>(COp)) ShuffleMask.push_back(SM_SentinelUndef); else { - uint64_t Element = cast<ConstantInt>(COp)->getZExtValue(); - Element &= (1 << NumElements*2) - 1; - ShuffleMask.push_back(Element); + APInt Element = cast<ConstantInt>(COp)->getValue(); + Element = Element.getLoBits(EltMaskSize); + ShuffleMask.push_back(Element.getZExtValue()); } } } |