diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-11-28 16:25:01 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-11-28 16:25:01 +0000 |
| commit | 3f10e669817f174cb99da771e1cd6ecbbfb44fa1 (patch) | |
| tree | 3944b493e1be9c23aee12fb82b3b5fd81dc18b95 /llvm/lib | |
| parent | 7fcacd8e0e9fd5dc937da96c77b7b2239e022f69 (diff) | |
| download | bcm5719-llvm-3f10e669817f174cb99da771e1cd6ecbbfb44fa1.tar.gz bcm5719-llvm-3f10e669817f174cb99da771e1cd6ecbbfb44fa1.zip | |
[X86][SSE] Added support for combining bit-shifts with shuffles.
Bit-shifts by a whole number of bytes can be represented as a shuffle mask suitable for combining.
Added a 'getFauxShuffleMask' function to allow us to create shuffle masks from other suitable operations.
llvm-svn: 288040
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 62 |
1 files changed, 57 insertions, 5 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 25ad59f919b..2863ec39421 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -5395,8 +5395,9 @@ static bool setTargetShuffleZeroElements(SDValue N, bool IsUnary; if (!isTargetShuffle(N.getOpcode())) return false; - if (!getTargetShuffleMask(N.getNode(), N.getSimpleValueType(), true, Ops, - Mask, IsUnary)) + + MVT VT = N.getSimpleValueType(); + if (!getTargetShuffleMask(N.getNode(), VT, true, Ops, Mask, IsUnary)) return false; SDValue V1 = Ops[0]; @@ -5458,9 +5459,61 @@ static bool setTargetShuffleZeroElements(SDValue N, } } + assert(VT.getVectorNumElements() == Mask.size() && + "Different mask size from vector size!"); return true; } +// Attempt to decode ops that could be represented as a shuffle mask. +// The decoded shuffle mask may contain a different number of elements to the +// destination value type. +static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask, + SmallVectorImpl<SDValue> &Ops) { + Mask.clear(); + Ops.clear(); + + MVT VT = N.getSimpleValueType(); + unsigned NumElts = VT.getVectorNumElements(); + + unsigned Opcode = N.getOpcode(); + switch (Opcode) { + case X86ISD::VSHLI: + case X86ISD::VSRLI: { + uint64_t ShiftVal = N.getConstantOperandVal(1); + // Out of range bit shifts are guaranteed to be zero. + if (VT.getScalarSizeInBits() <= ShiftVal) { + Mask.append(NumElts, SM_SentinelZero); + return true; + } + + // We can only decode 'whole byte' bit shifts as shuffles. + if ((ShiftVal % 8) != 0) + break; + + uint64_t ByteShift = ShiftVal / 8; + unsigned NumBytes = VT.getSizeInBits() / 8; + unsigned NumBytesPerElt = VT.getScalarSizeInBits() / 8; + Ops.push_back(N.getOperand(0)); + + // Clear mask to all zeros and insert the shifted byte indices. + Mask.append(NumBytes, SM_SentinelZero); + + if (X86ISD::VSHLI == Opcode) { + for (unsigned i = 0; i != NumBytes; i += NumBytesPerElt) + for (unsigned j = ByteShift; j != NumBytesPerElt; ++j) + Mask[i + j] = i + j - ByteShift; + } else { + for (unsigned i = 0; i != NumBytes; i += NumBytesPerElt) + for (unsigned j = ByteShift; j != NumBytesPerElt; ++j) + Mask[i + j - ByteShift] = i + j; + } + return true; + } + } + + return false; +} + /// Calls setTargetShuffleZeroElements to resolve a target shuffle mask's inputs /// and set the SM_SentinelUndef and SM_SentinelZero values. Then check the /// remaining input indices in case we now have a unary shuffle and adjust the @@ -5470,7 +5523,8 @@ static bool resolveTargetShuffleInputs(SDValue Op, SDValue &Op0, SDValue &Op1, SmallVectorImpl<int> &Mask) { SmallVector<SDValue, 2> Ops; if (!setTargetShuffleZeroElements(Op, Mask, Ops)) - return false; + if (!getFauxShuffleMask(Op, Mask, Ops)) + return false; int NumElts = Mask.size(); bool Op0InUse = any_of(Mask, [NumElts](int Idx) { @@ -26299,8 +26353,6 @@ static bool combineX86ShufflesRecursively(ArrayRef<SDValue> SrcOps, Ops.push_back(Input1); } - assert(VT.getVectorNumElements() == OpMask.size() && - "Different mask size from vector size!"); assert(((RootMask.size() > OpMask.size() && RootMask.size() % OpMask.size() == 0) || (OpMask.size() > RootMask.size() && |

