diff options
| author | Craig Topper <craig.topper@gmail.com> | 2016-10-13 05:29:41 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@gmail.com> | 2016-10-13 05:29:41 +0000 |
| commit | ff23af42992283cc06e65aa1a967330a67a36b74 (patch) | |
| tree | f89dac3c30e09cb0395e38caae68f3f0d4b89d45 /llvm/lib/Target | |
| parent | 05242739c202bf4e22fb5ddaab99d04fbd1be404 (diff) | |
| download | bcm5719-llvm-ff23af42992283cc06e65aa1a967330a67a36b74.tar.gz bcm5719-llvm-ff23af42992283cc06e65aa1a967330a67a36b74.zip | |
[AVX-512] Teach shuffle lowering to recognize 512-bit zero extends.
llvm-svn: 284105
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 29 |
1 files changed, 27 insertions, 2 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index d5a9a2ea3c2..36d783771fd 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -8151,8 +8151,10 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend( InputV = ShuffleOffset(InputV); // For 256-bit vectors, we only need the lower (128-bit) input half. - if (VT.is256BitVector()) - InputV = extract128BitVector(InputV, 0, DAG, DL); + // For 512-bit vectors, we only need the lower input half or quarter. + if (VT.getSizeInBits() > 128) + InputV = extractSubVector(InputV, 0, DAG, DL, + std::max(128, (int)VT.getSizeInBits() / Scale)); InputV = DAG.getNode(X86ISD::VZEXT, DL, ExtVT, InputV); return DAG.getBitcast(VT, InputV); @@ -12034,6 +12036,14 @@ static SDValue lowerV16I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask, assert(V2.getSimpleValueType() == MVT::v16i32 && "Bad operand type!"); assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!"); + // Whenever we can lower this as a zext, that instruction is strictly faster + // than any alternative. It also allows us to fold memory operands into the + // shuffle in many cases. + if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(DL, MVT::v16i32, V1, + V2, Mask, Subtarget, + DAG)) + return ZExt; + // If the shuffle mask is repeated in each 128-bit lane we can use more // efficient instructions that mirror the shuffles across the four 128-bit // lanes. @@ -12074,6 +12084,14 @@ static SDValue lowerV32I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask, assert(Mask.size() == 32 && "Unexpected mask size for v32 shuffle!"); assert(Subtarget.hasBWI() && "We can only lower v32i16 with AVX-512-BWI!"); + // Whenever we can lower this as a zext, that instruction is strictly faster + // than any alternative. It also allows us to fold memory operands into the + // shuffle in many cases. + if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(DL, MVT::v32i16, V1, + V2, Mask, Subtarget, + DAG)) + return ZExt; + // Use dedicated unpack instructions for masks that match their pattern. if (SDValue V = lowerVectorShuffleWithUNPCK(DL, MVT::v32i16, Mask, V1, V2, DAG)) @@ -12113,6 +12131,13 @@ static SDValue lowerV64I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask, assert(Mask.size() == 64 && "Unexpected mask size for v64 shuffle!"); assert(Subtarget.hasBWI() && "We can only lower v64i8 with AVX-512-BWI!"); + // Whenever we can lower this as a zext, that instruction is strictly faster + // than any alternative. It also allows us to fold memory operands into the + // shuffle in many cases. + if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(DL, MVT::v64i8, V1, V2, + Mask, Subtarget, DAG)) + return ZExt; + // Use dedicated unpack instructions for masks that match their pattern. if (SDValue V = lowerVectorShuffleWithUNPCK(DL, MVT::v64i8, Mask, V1, V2, DAG)) |

