summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2016-03-06 21:54:52 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2016-03-06 21:54:52 +0000
commit253ca348b2ea5fbde89377dfbbedab100cef4e7a (patch)
tree545634a99ba3b24f8074b422395e6d2024242bea /llvm/lib
parentdc11054f20a3dc532319217ddc5a9f32a2ccaa07 (diff)
downloadbcm5719-llvm-253ca348b2ea5fbde89377dfbbedab100cef4e7a.tar.gz
bcm5719-llvm-253ca348b2ea5fbde89377dfbbedab100cef4e7a.zip
[X86][AVX512] Fixed VPERMT2* shuffle mask decoding and enabled target shuffle combining.
Patch to add support for target shuffle combining of X86ISD::VPERMV3 nodes, including support for detecting unary shuffles. This uncovered several issues with the X86ISD::VPERMV3 shuffle mask decoding of non-64 bit shuffle mask elements - the bit masking wasn't being correctly computed. Removed non-constant pool mask decode path as we have no way of testing it right now. Differential Revision: http://reviews.llvm.org/D17916 llvm-svn: 262809
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp5
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp8
-rw-r--r--llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp7
3 files changed, 9 insertions, 11 deletions
diff --git a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
index b7b444a80e4..f007349d184 100644
--- a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -482,8 +482,9 @@ void DecodeVPERMVMask(ArrayRef<uint64_t> RawMask,
void DecodeVPERMV3Mask(ArrayRef<uint64_t> RawMask,
SmallVectorImpl<int> &ShuffleMask) {
- for (int i = 0, e = RawMask.size(); i < e; ++i) {
- uint64_t M = RawMask[i];
+ uint64_t EltMaskSize = (RawMask.size() * 2) - 1;
+ for (auto M : RawMask) {
+ M &= EltMaskSize;
ShuffleMask.push_back((int)M);
}
}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 4608df1281d..c3c6cf331ac 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -5138,16 +5138,11 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
return false;
}
case X86ISD::VPERMV3: {
+ IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(2);
// Unlike most shuffle nodes, VPERMV3's mask operand is the middle one.
Ops.push_back(N->getOperand(0));
Ops.push_back(N->getOperand(2));
SDValue MaskNode = N->getOperand(1);
- SmallVector<uint64_t, 32> RawMask;
- unsigned MaskLoBits = Log2_64(VT.getVectorNumElements() * 2);
- if (getTargetShuffleMaskIndices(MaskNode, MaskLoBits, RawMask)) {
- DecodeVPERMV3Mask(RawMask, Mask);
- break;
- }
if (auto *C = getTargetShuffleMaskConstant(MaskNode)) {
DecodeVPERMV3Mask(C, VT, Mask);
break;
@@ -29202,6 +29197,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::MOVDDUP:
case X86ISD::MOVSS:
case X86ISD::MOVSD:
+ case X86ISD::VPERMV3:
case X86ISD::VPERMILPI:
case X86ISD::VPERMILPV:
case X86ISD::VPERM2X128:
diff --git a/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp b/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
index e84d76adff4..4e7714eeb2e 100644
--- a/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
+++ b/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
@@ -191,6 +191,7 @@ void DecodeVPERMV3Mask(const Constant *C, MVT VT,
Type *MaskTy = C->getType();
unsigned NumElements = MaskTy->getVectorNumElements();
if (NumElements == VT.getVectorNumElements()) {
+ unsigned EltMaskSize = Log2_64(NumElements * 2);
for (unsigned i = 0; i < NumElements; ++i) {
Constant *COp = C->getAggregateElement(i);
if (!COp) {
@@ -200,9 +201,9 @@ void DecodeVPERMV3Mask(const Constant *C, MVT VT,
if (isa<UndefValue>(COp))
ShuffleMask.push_back(SM_SentinelUndef);
else {
- uint64_t Element = cast<ConstantInt>(COp)->getZExtValue();
- Element &= (1 << NumElements*2) - 1;
- ShuffleMask.push_back(Element);
+ APInt Element = cast<ConstantInt>(COp)->getValue();
+ Element = Element.getLoBits(EltMaskSize);
+ ShuffleMask.push_back(Element.getZExtValue());
}
}
}
OpenPOWER on IntegriCloud