diff options
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 12 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-trunc.ll | 6 |
2 files changed, 14 insertions, 4 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index c6ab4fb70f6..15bd238833d 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -40306,6 +40306,18 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG, : ISD::SIGN_EXTEND_VECTOR_INREG; return DAG.getNode(ExtOp, SDLoc(N), OpVT, InVec.getOperand(0)); } + if (InOpcode == ISD::BITCAST) { + // TODO - do this for target shuffles in general. + SDValue InVecBC = peekThroughOneUseBitcasts(InVec); + if (InVecBC.getOpcode() == X86ISD::PSHUFB && OpVT.is128BitVector()) { + SDLoc DL(N); + SDValue SubPSHUFB = + DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, + extract128BitVector(InVecBC.getOperand(0), 0, DAG, DL), + extract128BitVector(InVecBC.getOperand(1), 0, DAG, DL)); + return DAG.getBitcast(OpVT, SubPSHUFB); + } + } } return SDValue(); diff --git a/llvm/test/CodeGen/X86/vector-trunc.ll b/llvm/test/CodeGen/X86/vector-trunc.ll index 0d00f8af5a8..db3692f318f 100644 --- a/llvm/test/CodeGen/X86/vector-trunc.ll +++ b/llvm/test/CodeGen/X86/vector-trunc.ll @@ -1922,16 +1922,14 @@ define <8 x i16> @PR32160(<8 x i32> %x) { ; ; AVX2-SLOW-LABEL: PR32160: ; AVX2-SLOW: # %bb.0: -; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,2,2,3,4,5,6,7] +; AVX2-SLOW-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] ; AVX2-SLOW-NEXT: vpbroadcastd %xmm0, %xmm0 ; AVX2-SLOW-NEXT: vzeroupper ; AVX2-SLOW-NEXT: retq ; ; AVX2-FAST-LABEL: PR32160: ; AVX2-FAST: # %bb.0: -; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31] -; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,4,5,4,5,4,5,4,5,4,5,4,5,4,5] +; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,8,9,8,9,8,9,8,9] ; AVX2-FAST-NEXT: vzeroupper ; AVX2-FAST-NEXT: retq ; |

