summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp12
-rw-r--r--llvm/test/CodeGen/X86/vector-trunc.ll6
2 files changed, 14 insertions, 4 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c6ab4fb70f6..15bd238833d 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -40306,6 +40306,18 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG,
: ISD::SIGN_EXTEND_VECTOR_INREG;
return DAG.getNode(ExtOp, SDLoc(N), OpVT, InVec.getOperand(0));
}
+ if (InOpcode == ISD::BITCAST) {
+ // TODO - do this for target shuffles in general.
+ SDValue InVecBC = peekThroughOneUseBitcasts(InVec);
+ if (InVecBC.getOpcode() == X86ISD::PSHUFB && OpVT.is128BitVector()) {
+ SDLoc DL(N);
+ SDValue SubPSHUFB =
+ DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8,
+ extract128BitVector(InVecBC.getOperand(0), 0, DAG, DL),
+ extract128BitVector(InVecBC.getOperand(1), 0, DAG, DL));
+ return DAG.getBitcast(OpVT, SubPSHUFB);
+ }
+ }
}
return SDValue();
diff --git a/llvm/test/CodeGen/X86/vector-trunc.ll b/llvm/test/CodeGen/X86/vector-trunc.ll
index 0d00f8af5a8..db3692f318f 100644
--- a/llvm/test/CodeGen/X86/vector-trunc.ll
+++ b/llvm/test/CodeGen/X86/vector-trunc.ll
@@ -1922,16 +1922,14 @@ define <8 x i16> @PR32160(<8 x i32> %x) {
;
; AVX2-SLOW-LABEL: PR32160:
; AVX2-SLOW: # %bb.0:
-; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
-; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,2,2,3,4,5,6,7]
+; AVX2-SLOW-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
; AVX2-SLOW-NEXT: vpbroadcastd %xmm0, %xmm0
; AVX2-SLOW-NEXT: vzeroupper
; AVX2-SLOW-NEXT: retq
;
; AVX2-FAST-LABEL: PR32160:
; AVX2-FAST: # %bb.0:
-; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
-; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,4,5,4,5,4,5,4,5,4,5,4,5,4,5]
+; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,8,9,8,9,8,9,8,9]
; AVX2-FAST-NEXT: vzeroupper
; AVX2-FAST-NEXT: retq
;
OpenPOWER on IntegriCloud