diff options
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 113 |
1 files changed, 91 insertions, 22 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index b6a692ee187..fd00185a11a 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -11629,6 +11629,81 @@ static SDValue lowerVectorShuffleAsTruncBroadcast(const SDLoc &DL, MVT VT, DAG.getNode(ISD::TRUNCATE, DL, EltVT, Scalar)); } +/// Test whether this can be lowered with a single SHUFPS instruction. +/// +/// This is used to disable more specialized lowerings when the shufps lowering +/// will happen to be efficient. +static bool isSingleSHUFPSMask(ArrayRef<int> Mask) { + // This routine only handles 128-bit shufps. + assert(Mask.size() == 4 && "Unsupported mask size!"); + assert(Mask[0] >= -1 && Mask[0] < 8 && "Out of bound mask element!"); + assert(Mask[1] >= -1 && Mask[1] < 8 && "Out of bound mask element!"); + assert(Mask[2] >= -1 && Mask[2] < 8 && "Out of bound mask element!"); + assert(Mask[3] >= -1 && Mask[3] < 8 && "Out of bound mask element!"); + + // To lower with a single SHUFPS we need to have the low half and high half + // each requiring a single input. + if (Mask[0] >= 0 && Mask[1] >= 0 && (Mask[0] < 4) != (Mask[1] < 4)) + return false; + if (Mask[2] >= 0 && Mask[3] >= 0 && (Mask[2] < 4) != (Mask[3] < 4)) + return false; + + return true; +} + +/// If we are extracting two 128-bit halves of a vector and shuffling the +/// result, match that to a 256-bit AVX2 vperm* instruction to avoid a +/// multi-shuffle lowering. +static SDValue lowerShuffleOfExtractsAsVperm(const SDLoc &DL, SDValue N0, + SDValue N1, ArrayRef<int> Mask, + SelectionDAG &DAG) { + EVT VT = N0.getValueType(); + assert((VT.is128BitVector() && + (VT.getScalarSizeInBits() == 32 || VT.getScalarSizeInBits() == 64)) && + "VPERM* family of shuffles requires 32-bit or 64-bit elements"); + + // Check that both sources are extracts of the same source vector. + if (!N0.hasOneUse() || !N1.hasOneUse() || + N0.getOpcode() != ISD::EXTRACT_SUBVECTOR || + N1.getOpcode() != ISD::EXTRACT_SUBVECTOR || + N0.getOperand(0) != N1.getOperand(0)) + return SDValue(); + + SDValue WideVec = N0.getOperand(0); + EVT WideVT = WideVec.getValueType(); + if (!WideVT.is256BitVector() || !isa<ConstantSDNode>(N0.getOperand(1)) || + !isa<ConstantSDNode>(N1.getOperand(1))) + return SDValue(); + + // Match extracts of each half of the wide source vector. Commute the shuffle + // if the extract of the low half is N1. + unsigned NumElts = VT.getVectorNumElements(); + SmallVector<int, 4> NewMask(Mask.begin(), Mask.end()); + APInt ExtIndex0 = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); + APInt ExtIndex1 = cast<ConstantSDNode>(N1.getOperand(1))->getAPIntValue(); + if (ExtIndex1 == 0 && ExtIndex0 == NumElts) { + std::swap(ExtIndex0, ExtIndex1); + ShuffleVectorSDNode::commuteMask(NewMask); + } + if (ExtIndex0 != 0 || ExtIndex1 != NumElts) + return SDValue(); + + // Final bailout: if the mask is simple, we are better off using an extract + // and a simple narrow shuffle. + if (NumElts == 4 && isSingleSHUFPSMask(NewMask)) + return SDValue(); + + // Extend the shuffle mask with undef elements. + NewMask.append(NumElts, -1); + + // shuf (extract X, 0), (extract X, 4), M --> extract (shuf X, undef, M'), 0 + SDValue Shuf = DAG.getVectorShuffle(WideVT, DL, WideVec, DAG.getUNDEF(WideVT), + NewMask); + // This is free: ymm -> xmm. + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuf, + DAG.getIntPtrConstant(0, DL)); +} + /// Try to lower broadcast of a single element. /// /// For convenience, this code also bundles all of the subtarget feature set @@ -12116,6 +12191,10 @@ static SDValue lowerV2F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask, assert(Mask[0] < 2 && "We sort V1 to be the first input."); assert(Mask[1] >= 2 && "We sort V2 to be the second input."); + if (Subtarget.hasAVX2()) + if (SDValue Extract = lowerShuffleOfExtractsAsVperm(DL, V1, V2, Mask, DAG)) + return Extract; + // When loading a scalar and then shuffling it into a vector we can often do // the insertion cheaply. if (SDValue Insertion = lowerVectorShuffleAsElementInsertion( @@ -12193,6 +12272,10 @@ static SDValue lowerV2I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask, assert(Mask[0] < 2 && "We sort V1 to be the first input."); assert(Mask[1] >= 2 && "We sort V2 to be the second input."); + if (Subtarget.hasAVX2()) + if (SDValue Extract = lowerShuffleOfExtractsAsVperm(DL, V1, V2, Mask, DAG)) + return Extract; + // Try to use shift instructions. if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v2i64, V1, V2, Mask, Zeroable, Subtarget, DAG)) @@ -12252,28 +12335,6 @@ static SDValue lowerV2I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask, DAG.getVectorShuffle(MVT::v2f64, DL, V1, V2, Mask)); } -/// Test whether this can be lowered with a single SHUFPS instruction. -/// -/// This is used to disable more specialized lowerings when the shufps lowering -/// will happen to be efficient. -static bool isSingleSHUFPSMask(ArrayRef<int> Mask) { - // This routine only handles 128-bit shufps. - assert(Mask.size() == 4 && "Unsupported mask size!"); - assert(Mask[0] >= -1 && Mask[0] < 8 && "Out of bound mask element!"); - assert(Mask[1] >= -1 && Mask[1] < 8 && "Out of bound mask element!"); - assert(Mask[2] >= -1 && Mask[2] < 8 && "Out of bound mask element!"); - assert(Mask[3] >= -1 && Mask[3] < 8 && "Out of bound mask element!"); - - // To lower with a single SHUFPS we need to have the low half and high half - // each requiring a single input. - if (Mask[0] >= 0 && Mask[1] >= 0 && (Mask[0] < 4) != (Mask[1] < 4)) - return false; - if (Mask[2] >= 0 && Mask[3] >= 0 && (Mask[2] < 4) != (Mask[3] < 4)) - return false; - - return true; -} - /// Lower a vector shuffle using the SHUFPS instruction. /// /// This is a helper routine dedicated to lowering vector shuffles using SHUFPS. @@ -12413,6 +12474,10 @@ static SDValue lowerV4F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask, getV4X86ShuffleImm8ForMask(Mask, DL, DAG)); } + if (Subtarget.hasAVX2()) + if (SDValue Extract = lowerShuffleOfExtractsAsVperm(DL, V1, V2, Mask, DAG)) + return Extract; + // There are special ways we can lower some single-element blends. However, we // have custom ways we can lower more complex single-element blends below that // we defer to if both this and BLENDPS fail to match, so restrict this to @@ -12501,6 +12566,10 @@ static SDValue lowerV4I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask, getV4X86ShuffleImm8ForMask(Mask, DL, DAG)); } + if (Subtarget.hasAVX2()) + if (SDValue Extract = lowerShuffleOfExtractsAsVperm(DL, V1, V2, Mask, DAG)) + return Extract; + // Try to use shift instructions. if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v4i32, V1, V2, Mask, Zeroable, Subtarget, DAG)) |

