diff options
| author | Ahmed Bougacha <ahmed.bougacha@gmail.com> | 2016-05-28 14:38:04 +0000 | 
|---|---|---|
| committer | Ahmed Bougacha <ahmed.bougacha@gmail.com> | 2016-05-28 14:38:04 +0000 | 
| commit | a3dc1ba142c5c1f8e4b53e501597904c23e5c61f (patch) | |
| tree | 3e05fc945d1446b6a9c528d9d461c9ea29ac8c34 /llvm/lib/Target | |
| parent | 2d39bb3c6a45a699e83d544647e714575879b50c (diff) | |
| download | bcm5719-llvm-a3dc1ba142c5c1f8e4b53e501597904c23e5c61f.tar.gz bcm5719-llvm-a3dc1ba142c5c1f8e4b53e501597904c23e5c61f.zip | |
[X86] Try to zero elts when lowering 256-bit shuffle with PSHUFB.
Otherwise we fallback to a blend of PSHUFBs later on.
Differential Revision: http://reviews.llvm.org/D19661
llvm-svn: 271113
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 101 | 
1 files changed, 66 insertions, 35 deletions
| diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 1d92d7b3392..f3b4c70b712 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -7170,6 +7170,59 @@ static SmallBitVector computeZeroableShuffleElements(ArrayRef<int> Mask,    return Zeroable;  } +/// Mutate a shuffle mask, replacing zeroable elements with SM_SentinelZero. +static void computeZeroableShuffleMask(MutableArrayRef<int> Mask, +                                       SDValue V1, SDValue V2) { +  SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2); +  for (int i = 0, Size = Mask.size(); i < Size; ++i) { +    if (Mask[i] != SM_SentinelUndef && Zeroable[i]) +      Mask[i] = SM_SentinelZero; +  } +} + +/// Try to lower a shuffle with a single PSHUFB of V1. +/// This is only possible if V2 is unused (at all, or only for zero elements). +static SDValue lowerVectorShuffleWithPSHUFB(SDLoc DL, MVT VT, +                                            ArrayRef<int> Mask, SDValue V1, +                                            SDValue V2, +                                            const X86Subtarget &Subtarget, +                                            SelectionDAG &DAG) { +  const int NumBytes = VT.is128BitVector() ? 16 : 32; +  const int NumEltBytes = VT.getScalarSizeInBits() / 8; + +  assert((Subtarget.hasSSSE3() && VT.is128BitVector()) || +         (Subtarget.hasAVX2() && VT.is256BitVector())); + +  SmallVector<int, 32> ZeroableMask(Mask.begin(), Mask.end()); +  computeZeroableShuffleMask(ZeroableMask, V1, V2); + +  if (!isSingleInputShuffleMask(ZeroableMask) || +      is128BitLaneCrossingShuffleMask(VT, Mask)) +    return SDValue(); + +  SmallVector<SDValue, 32> PSHUFBMask(NumBytes); +  // Sign bit set in i8 mask means zero element. +  SDValue ZeroMask = DAG.getConstant(0x80, DL, MVT::i8); + +  for (int i = 0; i < NumBytes; ++i) { +    int M = ZeroableMask[i / NumEltBytes]; +    if (M == SM_SentinelUndef) { +      PSHUFBMask[i] = DAG.getUNDEF(MVT::i8); +    } else if (M == SM_SentinelZero) { +      PSHUFBMask[i] = ZeroMask; +    } else { +      M = M * NumEltBytes + (i % NumEltBytes); +      M = i < 16 ? M : M - 16; +      PSHUFBMask[i] = DAG.getConstant(M, DL, MVT::i8); +    } +  } + +  MVT I8VT = MVT::getVectorVT(MVT::i8, NumBytes); +  return DAG.getBitcast( +      VT, DAG.getNode(X86ISD::PSHUFB, DL, I8VT, DAG.getBitcast(I8VT, V1), +                      DAG.getBuildVector(I8VT, DL, PSHUFBMask))); +} +  // X86 has dedicated unpack instructions that can handle specific blend  // operations: UNPCKH and UNPCKL.  static SDValue lowerVectorShuffleWithUNPCK(SDLoc DL, MVT VT, ArrayRef<int> Mask, @@ -11389,26 +11442,12 @@ static SDValue lowerV16I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,        return lowerV8I16GeneralSingleInputVectorShuffle(            DL, MVT::v16i16, V1, RepeatedMask, Subtarget, DAG);      } - -    SDValue PSHUFBMask[32]; -    for (int i = 0; i < 16; ++i) { -      if (Mask[i] == -1) { -        PSHUFBMask[2 * i] = PSHUFBMask[2 * i + 1] = DAG.getUNDEF(MVT::i8); -        continue; -      } - -      int M = i < 8 ? Mask[i] : Mask[i] - 8; -      assert(M >= 0 && M < 8 && "Invalid single-input mask!"); -      PSHUFBMask[2 * i] = DAG.getConstant(2 * M, DL, MVT::i8); -      PSHUFBMask[2 * i + 1] = DAG.getConstant(2 * M + 1, DL, MVT::i8); -    } -    return DAG.getBitcast( -        MVT::v16i16, -        DAG.getNode(X86ISD::PSHUFB, DL, MVT::v32i8, -                    DAG.getBitcast(MVT::v32i8, V1), -                    DAG.getBuildVector(MVT::v32i8, DL, PSHUFBMask)));    } +  if (SDValue PSHUFB = lowerVectorShuffleWithPSHUFB(DL, MVT::v16i16, Mask, V1, +                                                    V2, Subtarget, DAG)) +    return PSHUFB; +    // Try to simplify this by merging 128-bit lanes to enable a lane-based    // shuffle.    if (SDValue Result = lowerVectorShuffleByMerging128BitLanes( @@ -11471,24 +11510,16 @@ static SDValue lowerV32I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,            DL, MVT::v32i8, V1, V2, Mask, Subtarget, DAG))      return V; -  if (isSingleInputShuffleMask(Mask)) { -    // There are no generalized cross-lane shuffle operations available on i8 -    // element types. -    if (is128BitLaneCrossingShuffleMask(MVT::v32i8, Mask)) -      return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v32i8, V1, V2, -                                                     Mask, DAG); - -    SDValue PSHUFBMask[32]; -    for (int i = 0; i < 32; ++i) -      PSHUFBMask[i] = -          Mask[i] < 0 -              ? DAG.getUNDEF(MVT::i8) -              : DAG.getConstant(Mask[i] < 16 ? Mask[i] : Mask[i] - 16, DL, -                                MVT::i8); +  // There are no generalized cross-lane shuffle operations available on i8 +  // element types. +  if (isSingleInputShuffleMask(Mask) && +      is128BitLaneCrossingShuffleMask(MVT::v32i8, Mask)) +    return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v32i8, V1, V2, Mask, +                                                   DAG); -    return DAG.getNode(X86ISD::PSHUFB, DL, MVT::v32i8, V1, -                       DAG.getBuildVector(MVT::v32i8, DL, PSHUFBMask)); -  } +  if (SDValue PSHUFB = lowerVectorShuffleWithPSHUFB(DL, MVT::v32i8, Mask, V1, +                                                    V2, Subtarget, DAG)) +    return PSHUFB;    // Try to simplify this by merging 128-bit lanes to enable a lane-based    // shuffle. | 

