summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2019-02-03 16:51:33 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2019-02-03 16:51:33 +0000
commit1fce5a8b75155963979c4f38158f96ff2ed30442 (patch)
treeaf590e0ad33df6d222e3eba672e4608eef136726 /llvm/test
parent837552fe9f216d53beedd0a62f9a94854934e46e (diff)
downloadbcm5719-llvm-1fce5a8b75155963979c4f38158f96ff2ed30442.tar.gz
bcm5719-llvm-1fce5a8b75155963979c4f38158f96ff2ed30442.zip
[X86][AVX] Support shuffle combining for VBROADCAST with smaller vector sources
getTargetShuffleMask can only do this safely if we're extracting the lowest subvector from a vector of the same result type. llvm-svn: 352999
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll21
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-avx512.ll10
2 files changed, 14 insertions, 17 deletions
diff --git a/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll b/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll
index 4d285006a73..174a9e5bd32 100644
--- a/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll
+++ b/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll
@@ -2174,8 +2174,7 @@ define <4 x i64> @test_masked_8xi64_to_4xi64_perm_mask7(<8 x i64> %vec, <4 x i64
; CHECK-LABEL: test_masked_8xi64_to_4xi64_perm_mask7:
; CHECK: # %bb.0:
; CHECK-NEXT: vextracti32x4 $2, %zmm0, %xmm3
-; CHECK-NEXT: vpbroadcastq %xmm3, %ymm3
-; CHECK-NEXT: vmovdqa {{.*#+}} ymm4 = [2,0,3,7]
+; CHECK-NEXT: vmovdqa {{.*#+}} ymm4 = [2,0,3,4]
; CHECK-NEXT: vpermi2q %ymm3, %ymm0, %ymm4
; CHECK-NEXT: vptestnmq %ymm2, %ymm2, %k1
; CHECK-NEXT: vpblendmq %ymm4, %ymm1, %ymm0 {%k1}
@@ -2189,9 +2188,8 @@ define <4 x i64> @test_masked_8xi64_to_4xi64_perm_mask7(<8 x i64> %vec, <4 x i64
define <4 x i64> @test_masked_z_8xi64_to_4xi64_perm_mask7(<8 x i64> %vec, <4 x i64> %mask) {
; CHECK-LABEL: test_masked_z_8xi64_to_4xi64_perm_mask7:
; CHECK: # %bb.0:
-; CHECK-NEXT: vextracti32x4 $2, %zmm0, %xmm2
-; CHECK-NEXT: vpbroadcastq %xmm2, %ymm3
-; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [2,0,3,7]
+; CHECK-NEXT: vextracti32x4 $2, %zmm0, %xmm3
+; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [2,0,3,4]
; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1
; CHECK-NEXT: vpermi2q %ymm3, %ymm0, %ymm2 {%k1} {z}
; CHECK-NEXT: vmovdqa %ymm2, %ymm0
@@ -3806,9 +3804,8 @@ define <4 x double> @test_masked_z_8xdouble_to_4xdouble_perm_mask2(<8 x double>
define <4 x double> @test_8xdouble_to_4xdouble_perm_mask3(<8 x double> %vec) {
; CHECK-LABEL: test_8xdouble_to_4xdouble_perm_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vextractf32x4 $2, %zmm0, %xmm1
-; CHECK-NEXT: vbroadcastsd %xmm1, %ymm2
-; CHECK-NEXT: vmovapd {{.*#+}} ymm1 = [0,2,1,7]
+; CHECK-NEXT: vextractf32x4 $2, %zmm0, %xmm2
+; CHECK-NEXT: vmovapd {{.*#+}} ymm1 = [0,2,1,4]
; CHECK-NEXT: vpermi2pd %ymm2, %ymm0, %ymm1
; CHECK-NEXT: vmovapd %ymm1, %ymm0
; CHECK-NEXT: retq
@@ -3819,8 +3816,7 @@ define <4 x double> @test_masked_8xdouble_to_4xdouble_perm_mask3(<8 x double> %v
; CHECK-LABEL: test_masked_8xdouble_to_4xdouble_perm_mask3:
; CHECK: # %bb.0:
; CHECK-NEXT: vextractf32x4 $2, %zmm0, %xmm3
-; CHECK-NEXT: vbroadcastsd %xmm3, %ymm3
-; CHECK-NEXT: vmovapd {{.*#+}} ymm4 = [0,2,1,7]
+; CHECK-NEXT: vmovapd {{.*#+}} ymm4 = [0,2,1,4]
; CHECK-NEXT: vpermi2pd %ymm3, %ymm0, %ymm4
; CHECK-NEXT: vxorpd %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vcmpeqpd %ymm0, %ymm2, %k1
@@ -3835,9 +3831,8 @@ define <4 x double> @test_masked_8xdouble_to_4xdouble_perm_mask3(<8 x double> %v
define <4 x double> @test_masked_z_8xdouble_to_4xdouble_perm_mask3(<8 x double> %vec, <4 x double> %mask) {
; CHECK-LABEL: test_masked_z_8xdouble_to_4xdouble_perm_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vextractf32x4 $2, %zmm0, %xmm2
-; CHECK-NEXT: vbroadcastsd %xmm2, %ymm3
-; CHECK-NEXT: vmovapd {{.*#+}} ymm2 = [0,2,1,7]
+; CHECK-NEXT: vextractf32x4 $2, %zmm0, %xmm3
+; CHECK-NEXT: vmovapd {{.*#+}} ymm2 = [0,2,1,4]
; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %ymm4, %ymm1, %k1
; CHECK-NEXT: vpermi2pd %ymm3, %ymm0, %ymm2 {%k1} {z}
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll b/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll
index ee0af90d093..2092b3bf453 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll
@@ -190,9 +190,10 @@ define <4 x i64> @expand4(<2 x i64> %a ) {
define <8 x float> @expand5(<4 x float> %a ) {
; SKX64-LABEL: expand5:
; SKX64: # %bb.0:
-; SKX64-NEXT: vbroadcastss %xmm0, %ymm0
+; SKX64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; SKX64-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; SKX64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
+; SKX64-NEXT: vmovaps {{.*#+}} ymm2 = [8,0,10,0,12,0,14,0]
+; SKX64-NEXT: vpermt2ps %ymm1, %ymm2, %ymm0
; SKX64-NEXT: retq
;
; KNL64-LABEL: expand5:
@@ -204,9 +205,10 @@ define <8 x float> @expand5(<4 x float> %a ) {
;
; SKX32-LABEL: expand5:
; SKX32: # %bb.0:
-; SKX32-NEXT: vbroadcastss %xmm0, %ymm0
+; SKX32-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; SKX32-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; SKX32-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
+; SKX32-NEXT: vmovaps {{.*#+}} ymm2 = [8,0,10,0,12,0,14,0]
+; SKX32-NEXT: vpermt2ps %ymm1, %ymm2, %ymm0
; SKX32-NEXT: retl
;
; KNL32-LABEL: expand5:
OpenPOWER on IntegriCloud