diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 62 |
1 files changed, 57 insertions, 5 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 25ad59f919b..2863ec39421 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -5395,8 +5395,9 @@ static bool setTargetShuffleZeroElements(SDValue N, bool IsUnary; if (!isTargetShuffle(N.getOpcode())) return false; - if (!getTargetShuffleMask(N.getNode(), N.getSimpleValueType(), true, Ops, - Mask, IsUnary)) + + MVT VT = N.getSimpleValueType(); + if (!getTargetShuffleMask(N.getNode(), VT, true, Ops, Mask, IsUnary)) return false; SDValue V1 = Ops[0]; @@ -5458,9 +5459,61 @@ static bool setTargetShuffleZeroElements(SDValue N, } } + assert(VT.getVectorNumElements() == Mask.size() && + "Different mask size from vector size!"); return true; } +// Attempt to decode ops that could be represented as a shuffle mask. +// The decoded shuffle mask may contain a different number of elements to the +// destination value type. +static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask, + SmallVectorImpl<SDValue> &Ops) { + Mask.clear(); + Ops.clear(); + + MVT VT = N.getSimpleValueType(); + unsigned NumElts = VT.getVectorNumElements(); + + unsigned Opcode = N.getOpcode(); + switch (Opcode) { + case X86ISD::VSHLI: + case X86ISD::VSRLI: { + uint64_t ShiftVal = N.getConstantOperandVal(1); + // Out of range bit shifts are guaranteed to be zero. + if (VT.getScalarSizeInBits() <= ShiftVal) { + Mask.append(NumElts, SM_SentinelZero); + return true; + } + + // We can only decode 'whole byte' bit shifts as shuffles. + if ((ShiftVal % 8) != 0) + break; + + uint64_t ByteShift = ShiftVal / 8; + unsigned NumBytes = VT.getSizeInBits() / 8; + unsigned NumBytesPerElt = VT.getScalarSizeInBits() / 8; + Ops.push_back(N.getOperand(0)); + + // Clear mask to all zeros and insert the shifted byte indices. + Mask.append(NumBytes, SM_SentinelZero); + + if (X86ISD::VSHLI == Opcode) { + for (unsigned i = 0; i != NumBytes; i += NumBytesPerElt) + for (unsigned j = ByteShift; j != NumBytesPerElt; ++j) + Mask[i + j] = i + j - ByteShift; + } else { + for (unsigned i = 0; i != NumBytes; i += NumBytesPerElt) + for (unsigned j = ByteShift; j != NumBytesPerElt; ++j) + Mask[i + j - ByteShift] = i + j; + } + return true; + } + } + + return false; +} + /// Calls setTargetShuffleZeroElements to resolve a target shuffle mask's inputs /// and set the SM_SentinelUndef and SM_SentinelZero values. Then check the /// remaining input indices in case we now have a unary shuffle and adjust the @@ -5470,7 +5523,8 @@ static bool resolveTargetShuffleInputs(SDValue Op, SDValue &Op0, SDValue &Op1, SmallVectorImpl<int> &Mask) { SmallVector<SDValue, 2> Ops; if (!setTargetShuffleZeroElements(Op, Mask, Ops)) - return false; + if (!getFauxShuffleMask(Op, Mask, Ops)) + return false; int NumElts = Mask.size(); bool Op0InUse = any_of(Mask, [NumElts](int Idx) { @@ -26299,8 +26353,6 @@ static bool combineX86ShufflesRecursively(ArrayRef<SDValue> SrcOps, Ops.push_back(Input1); } - assert(VT.getVectorNumElements() == OpMask.size() && - "Different mask size from vector size!"); assert(((RootMask.size() > OpMask.size() && RootMask.size() % OpMask.size() == 0) || (OpMask.size() > RootMask.size() && |