summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2017-07-03 20:58:16 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2017-07-03 20:58:16 +0000
commitfa6e67526780ec76b39f511f348376d1d8d421a2 (patch)
treeb0fb6a92ce4e56c09aa79eb1733f8b2f2f4be95f
parentfa00e34b8840414d43fad6883347b1b4d39bdd16 (diff)
downloadbcm5719-llvm-fa6e67526780ec76b39f511f348376d1d8d421a2.tar.gz
bcm5719-llvm-fa6e67526780ec76b39f511f348376d1d8d421a2.zip
[X86][SSE4A] Add support for combining from EXTRQI/INSERTQI shuffles
llvm-svn: 307048
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp22
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-combining-sse4a.ll17
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-sse4a.ll10
3 files changed, 31 insertions, 18 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index be80ecd50d5..1f4bc356943 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -4217,6 +4217,8 @@ static bool isTargetShuffle(unsigned Opcode) {
case X86ISD::PSHUFLW:
case X86ISD::SHUFP:
case X86ISD::INSERTPS:
+ case X86ISD::EXTRQI:
+ case X86ISD::INSERTQI:
case X86ISD::PALIGNR:
case X86ISD::VSHLDQ:
case X86ISD::VSRLDQ:
@@ -5554,6 +5556,24 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
DecodeINSERTPSMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
break;
+ case X86ISD::EXTRQI:
+ if (VT == MVT::v16i8 && isa<ConstantSDNode>(N->getOperand(1)) &&
+ isa<ConstantSDNode>(N->getOperand(2))) {
+ int BitLen = N->getConstantOperandVal(1);
+ int BitIdx = N->getConstantOperandVal(2);
+ DecodeEXTRQIMask(BitLen, BitIdx, Mask);
+ IsUnary = true;
+ }
+ break;
+ case X86ISD::INSERTQI:
+ if (VT == MVT::v16i8 && isa<ConstantSDNode>(N->getOperand(2)) &&
+ isa<ConstantSDNode>(N->getOperand(3))) {
+ int BitLen = N->getConstantOperandVal(2);
+ int BitIdx = N->getConstantOperandVal(3);
+ DecodeINSERTQIMask(BitLen, BitIdx, Mask);
+ IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
+ }
+ break;
case X86ISD::UNPCKH:
DecodeUNPCKHMask(VT, Mask);
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
@@ -35544,6 +35564,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::PINSRW: return combineVectorInsert(N, DAG, DCI, Subtarget);
case X86ISD::SHUFP: // Handle all target specific shuffles
case X86ISD::INSERTPS:
+ case X86ISD::EXTRQI:
+ case X86ISD::INSERTQI:
case X86ISD::PALIGNR:
case X86ISD::VSHLDQ:
case X86ISD::VSRLDQ:
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-sse4a.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-sse4a.ll
index 92cfbe124e7..0f2fe73912d 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-sse4a.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-sse4a.ll
@@ -11,14 +11,12 @@ declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>)
define <16 x i8> @combine_extrqi_pshufb_16i8(<16 x i8> %a0) {
; SSE-LABEL: combine_extrqi_pshufb_16i8:
; SSE: # BB#0:
-; SSE-NEXT: extrq {{.*#+}} xmm0 = xmm0[1,2],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
-; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4],zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
+; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,2],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
; SSE-NEXT: retq
;
; AVX-LABEL: combine_extrqi_pshufb_16i8:
; AVX: # BB#0:
-; AVX-NEXT: extrq {{.*#+}} xmm0 = xmm0[1,2],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4],zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
+; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,2],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
; AVX-NEXT: retq
%1 = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 2, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%2 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 255, i8 255, i8 255, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
@@ -53,20 +51,19 @@ define <8 x i16> @combine_extrqi_pshufb_8i16(<8 x i16> %a0) {
define <16 x i8> @combine_insertqi_pshufb_16i8(<16 x i8> %a0, <16 x i8> %a1) {
; SSSE3-LABEL: combine_insertqi_pshufb_16i8:
; SSSE3: # BB#0:
-; SSSE3-NEXT: insertq {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3,4,5,6,7,u,u,u,u,u,u,u,u]
-; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
+; SSSE3-NEXT: movl $65535, %eax # imm = 0xFFFF
+; SSSE3-NEXT: movd %eax, %xmm0
+; SSSE3-NEXT: pand %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE42-LABEL: combine_insertqi_pshufb_16i8:
; SSE42: # BB#0:
-; SSE42-NEXT: insertq {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3,4,5,6,7,u,u,u,u,u,u,u,u]
-; SSE42-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
+; SSE42-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; SSE42-NEXT: retq
;
; AVX-LABEL: combine_insertqi_pshufb_16i8:
; AVX: # BB#0:
-; AVX-NEXT: insertq {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3,4,5,6,7,u,u,u,u,u,u,u,u]
-; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
+; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX-NEXT: retq
%1 = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 16, i32 17, i32 18, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%2 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 0, i8 1, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-sse4a.ll b/llvm/test/CodeGen/X86/vector-shuffle-sse4a.ll
index 03a1cefc069..72bacd1f783 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-sse4a.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-sse4a.ll
@@ -66,10 +66,7 @@ define <16 x i8> @shuf_0zzzzzzz1zzzzzzz(<16 x i8> %a0) {
;
; BTVER1-LABEL: shuf_0zzzzzzz1zzzzzzz:
; BTVER1: # BB#0:
-; BTVER1-NEXT: movdqa %xmm0, %xmm1
-; BTVER1-NEXT: extrq {{.*#+}} xmm1 = xmm1[1],zero,zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u]
-; BTVER1-NEXT: extrq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
-; BTVER1-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; BTVER1-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
; BTVER1-NEXT: retq
;
; BTVER2-LABEL: shuf_0zzzzzzz1zzzzzzz:
@@ -91,10 +88,7 @@ define <16 x i8> @shuf_2zzzzzzz3zzzzzzz(<16 x i8> %a0) {
;
; BTVER1-LABEL: shuf_2zzzzzzz3zzzzzzz:
; BTVER1: # BB#0:
-; BTVER1-NEXT: movdqa %xmm0, %xmm1
-; BTVER1-NEXT: extrq {{.*#+}} xmm1 = xmm1[3],zero,zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u]
-; BTVER1-NEXT: extrq {{.*#+}} xmm0 = xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
-; BTVER1-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; BTVER1-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
; BTVER1-NEXT: retq
;
; BTVER2-LABEL: shuf_2zzzzzzz3zzzzzzz:
OpenPOWER on IntegriCloud