diff options
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 29 | 
1 files changed, 27 insertions, 2 deletions
| diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index d5a9a2ea3c2..36d783771fd 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -8151,8 +8151,10 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(      InputV = ShuffleOffset(InputV);      // For 256-bit vectors, we only need the lower (128-bit) input half. -    if (VT.is256BitVector()) -      InputV = extract128BitVector(InputV, 0, DAG, DL); +    // For 512-bit vectors, we only need the lower input half or quarter. +    if (VT.getSizeInBits() > 128) +      InputV = extractSubVector(InputV, 0, DAG, DL, +                                std::max(128, (int)VT.getSizeInBits() / Scale));      InputV = DAG.getNode(X86ISD::VZEXT, DL, ExtVT, InputV);      return DAG.getBitcast(VT, InputV); @@ -12034,6 +12036,14 @@ static SDValue lowerV16I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,    assert(V2.getSimpleValueType() == MVT::v16i32 && "Bad operand type!");    assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!"); +  // Whenever we can lower this as a zext, that instruction is strictly faster +  // than any alternative. It also allows us to fold memory operands into the +  // shuffle in many cases. +  if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(DL, MVT::v16i32, V1, +                                                         V2, Mask, Subtarget, +                                                         DAG)) +    return ZExt; +    // If the shuffle mask is repeated in each 128-bit lane we can use more    // efficient instructions that mirror the shuffles across the four 128-bit    // lanes. @@ -12074,6 +12084,14 @@ static SDValue lowerV32I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,    assert(Mask.size() == 32 && "Unexpected mask size for v32 shuffle!");    assert(Subtarget.hasBWI() && "We can only lower v32i16 with AVX-512-BWI!"); +  // Whenever we can lower this as a zext, that instruction is strictly faster +  // than any alternative. It also allows us to fold memory operands into the +  // shuffle in many cases. +  if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(DL, MVT::v32i16, V1, +                                                         V2, Mask, Subtarget, +                                                         DAG)) +    return ZExt; +    // Use dedicated unpack instructions for masks that match their pattern.    if (SDValue V =            lowerVectorShuffleWithUNPCK(DL, MVT::v32i16, Mask, V1, V2, DAG)) @@ -12113,6 +12131,13 @@ static SDValue lowerV64I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,    assert(Mask.size() == 64 && "Unexpected mask size for v64 shuffle!");    assert(Subtarget.hasBWI() && "We can only lower v64i8 with AVX-512-BWI!"); +  // Whenever we can lower this as a zext, that instruction is strictly faster +  // than any alternative. It also allows us to fold memory operands into the +  // shuffle in many cases. +  if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(DL, MVT::v64i8, V1, V2, +                                                         Mask, Subtarget, DAG)) +    return ZExt; +    // Use dedicated unpack instructions for masks that match their pattern.    if (SDValue V =            lowerVectorShuffleWithUNPCK(DL, MVT::v64i8, Mask, V1, V2, DAG)) | 

