summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2018-08-29 10:51:08 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2018-08-29 10:51:08 +0000
commit6b9bf7ecbc152d5941712f967cd93d1c0e2042a7 (patch)
treef186aae5f625f35467587cd64022ec269a8b3ad1 /llvm/lib/Target
parentedc318166fc9b2882fdab0d004bc3bdb598197d6 (diff)
downloadbcm5719-llvm-6b9bf7ecbc152d5941712f967cd93d1c0e2042a7.tar.gz
bcm5719-llvm-6b9bf7ecbc152d5941712f967cd93d1c0e2042a7.zip
[X86][AVX] Prefer VPBLENDW+VPBLENDD to VPBLENDVB for v16i16 blend shuffles
Noticed while looking at D49562 codegen - we can avoid a large constant mask load and a slow VPBLENDVB select op by using VPBLENDW+VPBLENDD instead. TODO: As discussed on the patch, we should investigate adding VPBLENDVB handling to target shuffle combining as well, that will allow us to extend this to VPBLENDW+VPBLENDW+VPBLENDD. Differential Revision: https://reviews.llvm.org/D50074 llvm-svn: 340913
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp16
1 files changed, 14 insertions, 2 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 97e64b57681..3c01c32add1 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -9862,7 +9862,6 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
case MVT::v8f32:
return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V2,
DAG.getConstant(BlendMask, DL, MVT::i8));
-
case MVT::v4i64:
case MVT::v8i32:
assert(Subtarget.hasAVX2() && "256-bit integer blends require AVX2!");
@@ -9894,7 +9893,6 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
DAG.getNode(X86ISD::BLENDI, DL, MVT::v8i16, V1, V2,
DAG.getConstant(BlendMask, DL, MVT::i8)));
}
-
case MVT::v16i16: {
assert(Subtarget.hasAVX2() && "256-bit integer blends require AVX2!");
SmallVector<int, 8> RepeatedMask;
@@ -9908,6 +9906,20 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
return DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2,
DAG.getConstant(BlendMask, DL, MVT::i8));
}
+ // Use PBLENDW for lower/upper lanes and then blend lanes.
+ // TODO - we should allow 2 PBLENDW here and leave shuffle combine to
+ // merge to VSELECT where useful.
+ uint64_t LoMask = BlendMask & 0xFF;
+ uint64_t HiMask = (BlendMask >> 8) & 0xFF;
+ if (LoMask == 0 || LoMask == 255 || HiMask == 0 || HiMask == 255) {
+ SDValue Lo = DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2,
+ DAG.getConstant(LoMask, DL, MVT::i8));
+ SDValue Hi = DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2,
+ DAG.getConstant(HiMask, DL, MVT::i8));
+ return DAG.getVectorShuffle(
+ MVT::v16i16, DL, Lo, Hi,
+ {0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31});
+ }
LLVM_FALLTHROUGH;
}
case MVT::v16i8:
OpenPOWER on IntegriCloud