diff options
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 16 |
1 files changed, 14 insertions, 2 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 97e64b57681..3c01c32add1 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -9862,7 +9862,6 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1, case MVT::v8f32: return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V2, DAG.getConstant(BlendMask, DL, MVT::i8)); - case MVT::v4i64: case MVT::v8i32: assert(Subtarget.hasAVX2() && "256-bit integer blends require AVX2!"); @@ -9894,7 +9893,6 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1, DAG.getNode(X86ISD::BLENDI, DL, MVT::v8i16, V1, V2, DAG.getConstant(BlendMask, DL, MVT::i8))); } - case MVT::v16i16: { assert(Subtarget.hasAVX2() && "256-bit integer blends require AVX2!"); SmallVector<int, 8> RepeatedMask; @@ -9908,6 +9906,20 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1, return DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2, DAG.getConstant(BlendMask, DL, MVT::i8)); } + // Use PBLENDW for lower/upper lanes and then blend lanes. + // TODO - we should allow 2 PBLENDW here and leave shuffle combine to + // merge to VSELECT where useful. + uint64_t LoMask = BlendMask & 0xFF; + uint64_t HiMask = (BlendMask >> 8) & 0xFF; + if (LoMask == 0 || LoMask == 255 || HiMask == 0 || HiMask == 255) { + SDValue Lo = DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2, + DAG.getConstant(LoMask, DL, MVT::i8)); + SDValue Hi = DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2, + DAG.getConstant(HiMask, DL, MVT::i8)); + return DAG.getVectorShuffle( + MVT::v16i16, DL, Lo, Hi, + {0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31}); + } LLVM_FALLTHROUGH; } case MVT::v16i8: |