diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-08-29 10:51:08 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-08-29 10:51:08 +0000 |
| commit | 6b9bf7ecbc152d5941712f967cd93d1c0e2042a7 (patch) | |
| tree | f186aae5f625f35467587cd64022ec269a8b3ad1 /llvm/lib/Target | |
| parent | edc318166fc9b2882fdab0d004bc3bdb598197d6 (diff) | |
| download | bcm5719-llvm-6b9bf7ecbc152d5941712f967cd93d1c0e2042a7.tar.gz bcm5719-llvm-6b9bf7ecbc152d5941712f967cd93d1c0e2042a7.zip | |
[X86][AVX] Prefer VPBLENDW+VPBLENDD to VPBLENDVB for v16i16 blend shuffles
Noticed while looking at D49562 codegen - we can avoid a large constant mask load and a slow VPBLENDVB select op by using VPBLENDW+VPBLENDD instead.
TODO: As discussed on the patch, we should investigate adding VPBLENDVB handling to target shuffle combining as well, that will allow us to extend this to VPBLENDW+VPBLENDW+VPBLENDD.
Differential Revision: https://reviews.llvm.org/D50074
llvm-svn: 340913
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 16 |
1 files changed, 14 insertions, 2 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 97e64b57681..3c01c32add1 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -9862,7 +9862,6 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1, case MVT::v8f32: return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V2, DAG.getConstant(BlendMask, DL, MVT::i8)); - case MVT::v4i64: case MVT::v8i32: assert(Subtarget.hasAVX2() && "256-bit integer blends require AVX2!"); @@ -9894,7 +9893,6 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1, DAG.getNode(X86ISD::BLENDI, DL, MVT::v8i16, V1, V2, DAG.getConstant(BlendMask, DL, MVT::i8))); } - case MVT::v16i16: { assert(Subtarget.hasAVX2() && "256-bit integer blends require AVX2!"); SmallVector<int, 8> RepeatedMask; @@ -9908,6 +9906,20 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1, return DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2, DAG.getConstant(BlendMask, DL, MVT::i8)); } + // Use PBLENDW for lower/upper lanes and then blend lanes. + // TODO - we should allow 2 PBLENDW here and leave shuffle combine to + // merge to VSELECT where useful. + uint64_t LoMask = BlendMask & 0xFF; + uint64_t HiMask = (BlendMask >> 8) & 0xFF; + if (LoMask == 0 || LoMask == 255 || HiMask == 0 || HiMask == 255) { + SDValue Lo = DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2, + DAG.getConstant(LoMask, DL, MVT::i8)); + SDValue Hi = DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2, + DAG.getConstant(HiMask, DL, MVT::i8)); + return DAG.getVectorShuffle( + MVT::v16i16, DL, Lo, Hi, + {0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31}); + } LLVM_FALLTHROUGH; } case MVT::v16i8: |

