diff options
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 7 |
1 files changed, 4 insertions, 3 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index ff00d9e5e00..4df7621bee2 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -12151,6 +12151,10 @@ static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1, const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG) { + // With AVX2, use VPERMQ/VPERMPD for unary shuffles to allow memory folding. + if (Subtarget.hasAVX2() && V2.isUndef()) + return SDValue(); + SmallVector<int, 4> WidenedMask; if (!canWidenShuffleElements(Mask, WidenedMask)) return SDValue(); @@ -12174,9 +12178,6 @@ static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1, // subvector. bool OnlyUsesV1 = isShuffleEquivalent(V1, V2, Mask, {0, 1, 0, 1}); if (OnlyUsesV1 || isShuffleEquivalent(V1, V2, Mask, {0, 1, 4, 5})) { - // With AVX2, use VPERMQ/VPERMPD to allow memory folding. - if (Subtarget.hasAVX2() && V2.isUndef()) - return SDValue(); // With AVX1, use vperm2f128 (below) to allow load folding. Otherwise, // this will likely become vinsertf128 which can't fold a 256-bit memop. |