diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-11-28 16:25:01 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-11-28 16:25:01 +0000 |
| commit | 3f10e669817f174cb99da771e1cd6ecbbfb44fa1 (patch) | |
| tree | 3944b493e1be9c23aee12fb82b3b5fd81dc18b95 /llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll | |
| parent | 7fcacd8e0e9fd5dc937da96c77b7b2239e022f69 (diff) | |
| download | bcm5719-llvm-3f10e669817f174cb99da771e1cd6ecbbfb44fa1.tar.gz bcm5719-llvm-3f10e669817f174cb99da771e1cd6ecbbfb44fa1.zip | |
[X86][SSE] Added support for combining bit-shifts with shuffles.
Bit-shifts by a whole number of bytes can be represented as a shuffle mask suitable for combining.
Added a 'getFauxShuffleMask' function to allow us to create shuffle masks from other suitable operations.
llvm-svn: 288040
Diffstat (limited to 'llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll')
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll | 18 |
1 files changed, 6 insertions, 12 deletions
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll index 2be4a06363e..396d6a34156 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll @@ -599,14 +599,12 @@ define <32 x i8> @combine_pshufb_not_as_pshufw(<32 x i8> %a0) { define <32 x i8> @combine_psrlw_pshufb(<16 x i16> %a0) { ; X32-LABEL: combine_psrlw_pshufb: ; X32: # BB#0: -; X32-NEXT: vpsrlw $8, %ymm0, %ymm0 -; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14,17,16,19,18,21,20,23,22,25,24,27,26,29,28,31,30] +; X32-NEXT: vpshufb {{.*#+}} ymm0 = zero,ymm0[1],zero,ymm0[3],zero,ymm0[5],zero,ymm0[7],zero,ymm0[9],zero,ymm0[11],zero,ymm0[13],zero,ymm0[15],zero,ymm0[17],zero,ymm0[19],zero,ymm0[21],zero,ymm0[23],zero,ymm0[25],zero,ymm0[27],zero,ymm0[29],zero,ymm0[31] ; X32-NEXT: retl ; ; X64-LABEL: combine_psrlw_pshufb: ; X64: # BB#0: -; X64-NEXT: vpsrlw $8, %ymm0, %ymm0 -; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14,17,16,19,18,21,20,23,22,25,24,27,26,29,28,31,30] +; X64-NEXT: vpshufb {{.*#+}} ymm0 = zero,ymm0[1],zero,ymm0[3],zero,ymm0[5],zero,ymm0[7],zero,ymm0[9],zero,ymm0[11],zero,ymm0[13],zero,ymm0[15],zero,ymm0[17],zero,ymm0[19],zero,ymm0[21],zero,ymm0[23],zero,ymm0[25],zero,ymm0[27],zero,ymm0[29],zero,ymm0[31] ; X64-NEXT: retq %1 = lshr <16 x i16> %a0, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> %2 = bitcast <16 x i16> %1 to <32 x i8> @@ -617,14 +615,12 @@ define <32 x i8> @combine_psrlw_pshufb(<16 x i16> %a0) { define <32 x i8> @combine_pslld_pshufb(<8 x i32> %a0) { ; X32-LABEL: combine_pslld_pshufb: ; X32: # BB#0: -; X32-NEXT: vpslld $24, %ymm0, %ymm0 -; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12,19,18,17,16,23,22,21,20,27,26,25,24,31,30,29,28] +; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0],zero,zero,zero,ymm0[4],zero,zero,zero,ymm0[8],zero,zero,zero,ymm0[12],zero,zero,zero,ymm0[16],zero,zero,zero,ymm0[20],zero,zero,zero,ymm0[24],zero,zero,zero,ymm0[28],zero,zero,zero ; X32-NEXT: retl ; ; X64-LABEL: combine_pslld_pshufb: ; X64: # BB#0: -; X64-NEXT: vpslld $24, %ymm0, %ymm0 -; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12,19,18,17,16,23,22,21,20,27,26,25,24,31,30,29,28] +; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0],zero,zero,zero,ymm0[4],zero,zero,zero,ymm0[8],zero,zero,zero,ymm0[12],zero,zero,zero,ymm0[16],zero,zero,zero,ymm0[20],zero,zero,zero,ymm0[24],zero,zero,zero,ymm0[28],zero,zero,zero ; X64-NEXT: retq %1 = shl <8 x i32> %a0, <i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24> %2 = bitcast <8 x i32> %1 to <32 x i8> @@ -635,14 +631,12 @@ define <32 x i8> @combine_pslld_pshufb(<8 x i32> %a0) { define <32 x i8> @combine_psrlq_pshufb(<4 x i64> %a0) { ; X32-LABEL: combine_psrlq_pshufb: ; X32: # BB#0: -; X32-NEXT: vpsrlq $32, %ymm0, %ymm0 -; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,23,22,21,20,19,18,17,31,30,29,28,27,26,25,24,23] +; X32-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,zero,zero,ymm0[7,6,5,4],zero,zero,zero,zero,ymm0[15,14,13,12],zero,zero,zero,zero,ymm0[23,22,21],zero,zero,zero,zero,ymm0[31,30,29,28],zero ; X32-NEXT: retl ; ; X64-LABEL: combine_psrlq_pshufb: ; X64: # BB#0: -; X64-NEXT: vpsrlq $32, %ymm0, %ymm0 -; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,23,22,21,20,19,18,17,31,30,29,28,27,26,25,24,23] +; X64-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,zero,zero,ymm0[7,6,5,4],zero,zero,zero,zero,ymm0[15,14,13,12],zero,zero,zero,zero,ymm0[23,22,21],zero,zero,zero,zero,ymm0[31,30,29,28],zero ; X64-NEXT: retq %1 = lshr <4 x i64> %a0, <i64 32, i64 32, i64 32, i64 32> %2 = bitcast <4 x i64> %1 to <32 x i8> |

