diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2019-02-03 16:51:33 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2019-02-03 16:51:33 +0000 |
| commit | 1fce5a8b75155963979c4f38158f96ff2ed30442 (patch) | |
| tree | af590e0ad33df6d222e3eba672e4608eef136726 /llvm/test | |
| parent | 837552fe9f216d53beedd0a62f9a94854934e46e (diff) | |
| download | bcm5719-llvm-1fce5a8b75155963979c4f38158f96ff2ed30442.tar.gz bcm5719-llvm-1fce5a8b75155963979c4f38158f96ff2ed30442.zip | |
[X86][AVX] Support shuffle combining for VBROADCAST with smaller vector sources
getTargetShuffleMask can only do this safely if we're extracting the lowest subvector from a vector of the same result type.
llvm-svn: 352999
Diffstat (limited to 'llvm/test')
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll | 21 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-avx512.ll | 10 |
2 files changed, 14 insertions, 17 deletions
diff --git a/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll b/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll index 4d285006a73..174a9e5bd32 100644 --- a/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll +++ b/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll @@ -2174,8 +2174,7 @@ define <4 x i64> @test_masked_8xi64_to_4xi64_perm_mask7(<8 x i64> %vec, <4 x i64 ; CHECK-LABEL: test_masked_8xi64_to_4xi64_perm_mask7: ; CHECK: # %bb.0: ; CHECK-NEXT: vextracti32x4 $2, %zmm0, %xmm3 -; CHECK-NEXT: vpbroadcastq %xmm3, %ymm3 -; CHECK-NEXT: vmovdqa {{.*#+}} ymm4 = [2,0,3,7] +; CHECK-NEXT: vmovdqa {{.*#+}} ymm4 = [2,0,3,4] ; CHECK-NEXT: vpermi2q %ymm3, %ymm0, %ymm4 ; CHECK-NEXT: vptestnmq %ymm2, %ymm2, %k1 ; CHECK-NEXT: vpblendmq %ymm4, %ymm1, %ymm0 {%k1} @@ -2189,9 +2188,8 @@ define <4 x i64> @test_masked_8xi64_to_4xi64_perm_mask7(<8 x i64> %vec, <4 x i64 define <4 x i64> @test_masked_z_8xi64_to_4xi64_perm_mask7(<8 x i64> %vec, <4 x i64> %mask) { ; CHECK-LABEL: test_masked_z_8xi64_to_4xi64_perm_mask7: ; CHECK: # %bb.0: -; CHECK-NEXT: vextracti32x4 $2, %zmm0, %xmm2 -; CHECK-NEXT: vpbroadcastq %xmm2, %ymm3 -; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [2,0,3,7] +; CHECK-NEXT: vextracti32x4 $2, %zmm0, %xmm3 +; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [2,0,3,4] ; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1 ; CHECK-NEXT: vpermi2q %ymm3, %ymm0, %ymm2 {%k1} {z} ; CHECK-NEXT: vmovdqa %ymm2, %ymm0 @@ -3806,9 +3804,8 @@ define <4 x double> @test_masked_z_8xdouble_to_4xdouble_perm_mask2(<8 x double> define <4 x double> @test_8xdouble_to_4xdouble_perm_mask3(<8 x double> %vec) { ; CHECK-LABEL: test_8xdouble_to_4xdouble_perm_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vextractf32x4 $2, %zmm0, %xmm1 -; CHECK-NEXT: vbroadcastsd %xmm1, %ymm2 -; CHECK-NEXT: vmovapd {{.*#+}} ymm1 = [0,2,1,7] +; CHECK-NEXT: vextractf32x4 $2, %zmm0, %xmm2 +; CHECK-NEXT: vmovapd {{.*#+}} ymm1 = [0,2,1,4] ; CHECK-NEXT: vpermi2pd %ymm2, %ymm0, %ymm1 ; CHECK-NEXT: vmovapd %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -3819,8 +3816,7 @@ define <4 x double> @test_masked_8xdouble_to_4xdouble_perm_mask3(<8 x double> %v ; CHECK-LABEL: test_masked_8xdouble_to_4xdouble_perm_mask3: ; CHECK: # %bb.0: ; CHECK-NEXT: vextractf32x4 $2, %zmm0, %xmm3 -; CHECK-NEXT: vbroadcastsd %xmm3, %ymm3 -; CHECK-NEXT: vmovapd {{.*#+}} ymm4 = [0,2,1,7] +; CHECK-NEXT: vmovapd {{.*#+}} ymm4 = [0,2,1,4] ; CHECK-NEXT: vpermi2pd %ymm3, %ymm0, %ymm4 ; CHECK-NEXT: vxorpd %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: vcmpeqpd %ymm0, %ymm2, %k1 @@ -3835,9 +3831,8 @@ define <4 x double> @test_masked_8xdouble_to_4xdouble_perm_mask3(<8 x double> %v define <4 x double> @test_masked_z_8xdouble_to_4xdouble_perm_mask3(<8 x double> %vec, <4 x double> %mask) { ; CHECK-LABEL: test_masked_z_8xdouble_to_4xdouble_perm_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vextractf32x4 $2, %zmm0, %xmm2 -; CHECK-NEXT: vbroadcastsd %xmm2, %ymm3 -; CHECK-NEXT: vmovapd {{.*#+}} ymm2 = [0,2,1,7] +; CHECK-NEXT: vextractf32x4 $2, %zmm0, %xmm3 +; CHECK-NEXT: vmovapd {{.*#+}} ymm2 = [0,2,1,4] ; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4 ; CHECK-NEXT: vcmpeqpd %ymm4, %ymm1, %k1 ; CHECK-NEXT: vpermi2pd %ymm3, %ymm0, %ymm2 {%k1} {z} diff --git a/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll b/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll index ee0af90d093..2092b3bf453 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll @@ -190,9 +190,10 @@ define <4 x i64> @expand4(<2 x i64> %a ) { define <8 x float> @expand5(<4 x float> %a ) { ; SKX64-LABEL: expand5: ; SKX64: # %bb.0: -; SKX64-NEXT: vbroadcastss %xmm0, %ymm0 +; SKX64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 ; SKX64-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; SKX64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7] +; SKX64-NEXT: vmovaps {{.*#+}} ymm2 = [8,0,10,0,12,0,14,0] +; SKX64-NEXT: vpermt2ps %ymm1, %ymm2, %ymm0 ; SKX64-NEXT: retq ; ; KNL64-LABEL: expand5: @@ -204,9 +205,10 @@ define <8 x float> @expand5(<4 x float> %a ) { ; ; SKX32-LABEL: expand5: ; SKX32: # %bb.0: -; SKX32-NEXT: vbroadcastss %xmm0, %ymm0 +; SKX32-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 ; SKX32-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; SKX32-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7] +; SKX32-NEXT: vmovaps {{.*#+}} ymm2 = [8,0,10,0,12,0,14,0] +; SKX32-NEXT: vpermt2ps %ymm1, %ymm2, %ymm0 ; SKX32-NEXT: retl ; ; KNL32-LABEL: expand5: |

