diff options
| author | Chandler Carruth <chandlerc@gmail.com> | 2014-07-10 04:34:06 +0000 |
|---|---|---|
| committer | Chandler Carruth <chandlerc@gmail.com> | 2014-07-10 04:34:06 +0000 |
| commit | 7d2ffb549285b814da284cfced222c827bffa90f (patch) | |
| tree | f1fd9dc3ad057ad0b3eb4b56b38cf8a990f41142 /llvm/lib/Target/X86/X86ISelLowering.cpp | |
| parent | 05b9ebf2f96b7d6f4d8110195d4d728e892e84c8 (diff) | |
| download | bcm5719-llvm-7d2ffb549285b814da284cfced222c827bffa90f.tar.gz bcm5719-llvm-7d2ffb549285b814da284cfced222c827bffa90f.zip | |
[x86] Initial improvements to the new shuffle lowering for v16i8
shuffles specifically for cases where a small subset of the elements in
the input vector are actually used.
This is specifically targetted at improving the shuffles generated for
trunc operations, but also helps out splat-like operations.
There is still some really low-hanging fruit here that I want to address
but this is a huge step in the right direction.
llvm-svn: 212680
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 46 |
1 files changed, 36 insertions, 10 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 140b2a830ce..57b21cce12c 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -7815,16 +7815,42 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2, SDValue Zero = getZeroVector(MVT::v8i16, Subtarget, DAG, DL); - auto buildLoAndHiV8s = - [&](SDValue V, ArrayRef<int> LoBlendMask, ArrayRef<int> HiBlendMask) { - SDValue LoV = - DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, - DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i8, V, Zero)); - SDValue HiV = - DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, - DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i8, V, Zero)); - SDValue BlendedLo = DAG.getVectorShuffle(MVT::v8i16, DL, LoV, HiV, LoBlendMask); - SDValue BlendedHi = DAG.getVectorShuffle(MVT::v8i16, DL, LoV, HiV, HiBlendMask); + auto buildLoAndHiV8s = [&](SDValue V, MutableArrayRef<int> LoBlendMask, + MutableArrayRef<int> HiBlendMask) { + SDValue V1, V2; + // Check if any of the odd lanes in the v16i8 are used. If not, we can mask + // them out and avoid using UNPCK{L,H} to extract the elements of V as + // i16s. + if (std::none_of(LoBlendMask.begin(), LoBlendMask.end(), + [](int M) { return M >= 0 && M % 2 == 1; }) && + std::none_of(HiBlendMask.begin(), HiBlendMask.end(), + [](int M) { return M >= 0 && M % 2 == 1; })) { + // Use a mask to drop the high bytes. + V1 = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V); + V1 = DAG.getNode(ISD::AND, DL, MVT::v8i16, V1, + DAG.getConstant(0x00FF, MVT::v8i16)); + + // This will be a single vector shuffle instead of a blend so nuke V2. + V2 = DAG.getUNDEF(MVT::v8i16); + + // Squash the masks to point directly into V1. + for (int &M : LoBlendMask) + if (M >= 0) + M /= 2; + for (int &M : HiBlendMask) + if (M >= 0) + M /= 2; + } else { + // Otherwise just unpack the low half of V into V1 and the high half into + // V2 so that we can blend them as i16s. + V1 = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, + DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i8, V, Zero)); + V2 = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, + DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i8, V, Zero)); + } + + SDValue BlendedLo = DAG.getVectorShuffle(MVT::v8i16, DL, V1, V2, LoBlendMask); + SDValue BlendedHi = DAG.getVectorShuffle(MVT::v8i16, DL, V1, V2, HiBlendMask); return std::make_pair(BlendedLo, BlendedHi); }; SDValue V1Lo, V1Hi, V2Lo, V2Hi; |

