diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-07-03 20:58:16 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-07-03 20:58:16 +0000 |
| commit | fa6e67526780ec76b39f511f348376d1d8d421a2 (patch) | |
| tree | b0fb6a92ce4e56c09aa79eb1733f8b2f2f4be95f | |
| parent | fa00e34b8840414d43fad6883347b1b4d39bdd16 (diff) | |
| download | bcm5719-llvm-fa6e67526780ec76b39f511f348376d1d8d421a2.tar.gz bcm5719-llvm-fa6e67526780ec76b39f511f348376d1d8d421a2.zip | |
[X86][SSE4A] Add support for combining from EXTRQI/INSERTQI shuffles
llvm-svn: 307048
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 22 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-combining-sse4a.ll | 17 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-sse4a.ll | 10 |
3 files changed, 31 insertions, 18 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index be80ecd50d5..1f4bc356943 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -4217,6 +4217,8 @@ static bool isTargetShuffle(unsigned Opcode) { case X86ISD::PSHUFLW: case X86ISD::SHUFP: case X86ISD::INSERTPS: + case X86ISD::EXTRQI: + case X86ISD::INSERTQI: case X86ISD::PALIGNR: case X86ISD::VSHLDQ: case X86ISD::VSRLDQ: @@ -5554,6 +5556,24 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero, DecodeINSERTPSMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask); IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); break; + case X86ISD::EXTRQI: + if (VT == MVT::v16i8 && isa<ConstantSDNode>(N->getOperand(1)) && + isa<ConstantSDNode>(N->getOperand(2))) { + int BitLen = N->getConstantOperandVal(1); + int BitIdx = N->getConstantOperandVal(2); + DecodeEXTRQIMask(BitLen, BitIdx, Mask); + IsUnary = true; + } + break; + case X86ISD::INSERTQI: + if (VT == MVT::v16i8 && isa<ConstantSDNode>(N->getOperand(2)) && + isa<ConstantSDNode>(N->getOperand(3))) { + int BitLen = N->getConstantOperandVal(2); + int BitIdx = N->getConstantOperandVal(3); + DecodeINSERTQIMask(BitLen, BitIdx, Mask); + IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); + } + break; case X86ISD::UNPCKH: DecodeUNPCKHMask(VT, Mask); IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); @@ -35544,6 +35564,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::PINSRW: return combineVectorInsert(N, DAG, DCI, Subtarget); case X86ISD::SHUFP: // Handle all target specific shuffles case X86ISD::INSERTPS: + case X86ISD::EXTRQI: + case X86ISD::INSERTQI: case X86ISD::PALIGNR: case X86ISD::VSHLDQ: case X86ISD::VSRLDQ: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-sse4a.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-sse4a.ll index 92cfbe124e7..0f2fe73912d 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-sse4a.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-sse4a.ll @@ -11,14 +11,12 @@ declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) define <16 x i8> @combine_extrqi_pshufb_16i8(<16 x i8> %a0) { ; SSE-LABEL: combine_extrqi_pshufb_16i8: ; SSE: # BB#0: -; SSE-NEXT: extrq {{.*#+}} xmm0 = xmm0[1,2],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] -; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4],zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] +; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,2],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] ; SSE-NEXT: retq ; ; AVX-LABEL: combine_extrqi_pshufb_16i8: ; AVX: # BB#0: -; AVX-NEXT: extrq {{.*#+}} xmm0 = xmm0[1,2],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] -; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4],zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,2],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] ; AVX-NEXT: retq %1 = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 2, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> %2 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 255, i8 255, i8 255, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>) @@ -53,20 +51,19 @@ define <8 x i16> @combine_extrqi_pshufb_8i16(<8 x i16> %a0) { define <16 x i8> @combine_insertqi_pshufb_16i8(<16 x i8> %a0, <16 x i8> %a1) { ; SSSE3-LABEL: combine_insertqi_pshufb_16i8: ; SSSE3: # BB#0: -; SSSE3-NEXT: insertq {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3,4,5,6,7,u,u,u,u,u,u,u,u] -; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] +; SSSE3-NEXT: movl $65535, %eax # imm = 0xFFFF +; SSSE3-NEXT: movd %eax, %xmm0 +; SSSE3-NEXT: pand %xmm1, %xmm0 ; SSSE3-NEXT: retq ; ; SSE42-LABEL: combine_insertqi_pshufb_16i8: ; SSE42: # BB#0: -; SSE42-NEXT: insertq {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3,4,5,6,7,u,u,u,u,u,u,u,u] -; SSE42-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero +; SSE42-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; SSE42-NEXT: retq ; ; AVX-LABEL: combine_insertqi_pshufb_16i8: ; AVX: # BB#0: -; AVX-NEXT: insertq {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3,4,5,6,7,u,u,u,u,u,u,u,u] -; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero +; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX-NEXT: retq %1 = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 16, i32 17, i32 18, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> %2 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 0, i8 1, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>) diff --git a/llvm/test/CodeGen/X86/vector-shuffle-sse4a.ll b/llvm/test/CodeGen/X86/vector-shuffle-sse4a.ll index 03a1cefc069..72bacd1f783 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-sse4a.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-sse4a.ll @@ -66,10 +66,7 @@ define <16 x i8> @shuf_0zzzzzzz1zzzzzzz(<16 x i8> %a0) { ; ; BTVER1-LABEL: shuf_0zzzzzzz1zzzzzzz: ; BTVER1: # BB#0: -; BTVER1-NEXT: movdqa %xmm0, %xmm1 -; BTVER1-NEXT: extrq {{.*#+}} xmm1 = xmm1[1],zero,zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u] -; BTVER1-NEXT: extrq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] -; BTVER1-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; BTVER1-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero ; BTVER1-NEXT: retq ; ; BTVER2-LABEL: shuf_0zzzzzzz1zzzzzzz: @@ -91,10 +88,7 @@ define <16 x i8> @shuf_2zzzzzzz3zzzzzzz(<16 x i8> %a0) { ; ; BTVER1-LABEL: shuf_2zzzzzzz3zzzzzzz: ; BTVER1: # BB#0: -; BTVER1-NEXT: movdqa %xmm0, %xmm1 -; BTVER1-NEXT: extrq {{.*#+}} xmm1 = xmm1[3],zero,zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u] -; BTVER1-NEXT: extrq {{.*#+}} xmm0 = xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] -; BTVER1-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; BTVER1-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero ; BTVER1-NEXT: retq ; ; BTVER2-LABEL: shuf_2zzzzzzz3zzzzzzz: |

