summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/X86')
-rw-r--r--llvm/test/CodeGen/X86/avx512bw-intrinsics.ll6
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll3
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-avx512.ll34
-rw-r--r--llvm/test/CodeGen/X86/widened-broadcast.ll73
-rw-r--r--llvm/test/CodeGen/X86/x86-interleaved-access.ll12
5 files changed, 37 insertions, 91 deletions
diff --git a/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll b/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll
index 3337f42eb14..51f9a382ccb 100644
--- a/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll
@@ -2216,9 +2216,9 @@ define i32@test_int_x86_avx512_kunpck_wd(i32 %x0, i32 %x1) {
;
; AVX512F-32-LABEL: test_int_x86_avx512_kunpck_wd:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
-; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
-; AVX512F-32-NEXT: kunpckwd %k1, %k0, %k0
+; AVX512F-32-NEXT: kmovw {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: kunpckwd %k0, %k1, %k0
; AVX512F-32-NEXT: kmovd %k0, %eax
; AVX512F-32-NEXT: retl
%res = call i32 @llvm.x86.avx512.kunpck.wd(i32 %x0, i32 %x1)
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll
index fa3471c2fe4..2e65bd8c75c 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll
@@ -282,8 +282,7 @@ define <16 x i32> @shuffle_v16i32_0_1_2_19_u_u_u_u_u_u_u_u_u_u_u_u(<16 x i32> %a
define <8 x float> @shuffle_v16f32_extract_256(float* %RET, float* %a) {
; ALL-LABEL: shuffle_v16f32_extract_256:
; ALL: # BB#0:
-; ALL-NEXT: vmovups (%rsi), %zmm0
-; ALL-NEXT: vextractf32x8 $1, %zmm0, %ymm0
+; ALL-NEXT: vmovups 32(%rsi), %ymm0
; ALL-NEXT: retq
%ptr_a = bitcast float* %a to <16 x float>*
%v_a = load <16 x float>, <16 x float>* %ptr_a, align 4
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll b/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll
index 5aab21749d1..706edd27a3f 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll
@@ -511,11 +511,10 @@ define <8 x float> @expand14(<4 x float> %a) {
;
; KNL64-LABEL: expand14:
; KNL64: # BB#0:
+; KNL64-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,3,0,0]
+; KNL64-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,1,1]
; KNL64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3]
; KNL64-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,3]
-; KNL64-NEXT: vmovaps {{.*#+}} ymm1 = <0,2,4,0,u,u,u,u>
-; KNL64-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,3,0,0]
-; KNL64-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,1,1]
; KNL64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3],ymm0[4],ymm1[5,6,7]
; KNL64-NEXT: retq
;
@@ -529,11 +528,10 @@ define <8 x float> @expand14(<4 x float> %a) {
;
; KNL32-LABEL: expand14:
; KNL32: # BB#0:
+; KNL32-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,3,0,0]
+; KNL32-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,1,1]
; KNL32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3]
; KNL32-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,3]
-; KNL32-NEXT: vmovaps {{.*#+}} ymm1 = <0,2,4,0,u,u,u,u>
-; KNL32-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,3,0,0]
-; KNL32-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,1,1]
; KNL32-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3],ymm0[4],ymm1[5,6,7]
; KNL32-NEXT: retl
%addV = fadd <4 x float> <float 0.0,float 1.0,float 2.0,float 0.0> , <float 0.0,float 1.0,float 2.0,float 0.0>
@@ -545,39 +543,35 @@ define <8 x float> @expand14(<4 x float> %a) {
define <8 x float> @expand15(<4 x float> %a) {
; SKX64-LABEL: expand15:
; SKX64: # BB#0:
-; SKX64-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,1,1,3]
-; SKX64-NEXT: vmovaps {{.*#+}} ymm0 = <0,2,4,0,u,u,u,u>
-; SKX64-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[0,1,0,0]
+; SKX64-NEXT: vpermilps {{.*#+}} xmm1 = mem[0,1,0,0]
+; SKX64-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3]
; SKX64-NEXT: vmovaps {{.*#+}} ymm0 = [0,1,8,3,10,3,2,3]
-; SKX64-NEXT: vpermi2ps %ymm1, %ymm2, %ymm0
+; SKX64-NEXT: vpermi2ps %ymm2, %ymm1, %ymm0
; SKX64-NEXT: retq
;
; KNL64-LABEL: expand15:
; KNL64: # BB#0:
+; KNL64-NEXT: vpermilps {{.*#+}} xmm1 = mem[0,1,0,0]
+; KNL64-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,1,1]
; KNL64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3]
; KNL64-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,3]
-; KNL64-NEXT: vmovaps {{.*#+}} ymm1 = <0,2,4,0,u,u,u,u>
-; KNL64-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,1,0,0]
-; KNL64-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,1,1]
; KNL64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3],ymm0[4],ymm1[5,6,7]
; KNL64-NEXT: retq
;
; SKX32-LABEL: expand15:
; SKX32: # BB#0:
-; SKX32-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,1,1,3]
-; SKX32-NEXT: vmovaps {{.*#+}} ymm0 = <0,2,4,0,u,u,u,u>
-; SKX32-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[0,1,0,0]
+; SKX32-NEXT: vpermilps {{.*#+}} xmm1 = mem[0,1,0,0]
+; SKX32-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3]
; SKX32-NEXT: vmovaps {{.*#+}} ymm0 = [0,1,8,3,10,3,2,3]
-; SKX32-NEXT: vpermi2ps %ymm1, %ymm2, %ymm0
+; SKX32-NEXT: vpermi2ps %ymm2, %ymm1, %ymm0
; SKX32-NEXT: retl
;
; KNL32-LABEL: expand15:
; KNL32: # BB#0:
+; KNL32-NEXT: vpermilps {{.*#+}} xmm1 = mem[0,1,0,0]
+; KNL32-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,1,1]
; KNL32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3]
; KNL32-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,3]
-; KNL32-NEXT: vmovaps {{.*#+}} ymm1 = <0,2,4,0,u,u,u,u>
-; KNL32-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,1,0,0]
-; KNL32-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,1,1]
; KNL32-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3],ymm0[4],ymm1[5,6,7]
; KNL32-NEXT: retl
%addV = fadd <4 x float> <float 0.0,float 1.0,float 2.0,float 0.0> , <float 0.0,float 1.0,float 2.0,float 0.0>
diff --git a/llvm/test/CodeGen/X86/widened-broadcast.ll b/llvm/test/CodeGen/X86/widened-broadcast.ll
index 6b2e4de5cda..42c4c23c634 100644
--- a/llvm/test/CodeGen/X86/widened-broadcast.ll
+++ b/llvm/test/CodeGen/X86/widened-broadcast.ll
@@ -151,8 +151,7 @@ define <8 x i32> @load_splat_8i32_8i32_01010101(<8 x i32>* %ptr) nounwind uwtabl
;
; AVX1-LABEL: load_splat_8i32_8i32_01010101:
; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vmovapd (%rdi), %ymm0
-; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: retq
;
@@ -288,8 +287,7 @@ define <16 x i16> @load_splat_16i16_16i16_0101010101010101(<16 x i16>* %ptr) nou
;
; AVX1-LABEL: load_splat_16i16_16i16_0101010101010101:
; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vmovaps (%rdi), %ymm0
-; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,0,0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: retq
;
@@ -315,22 +313,10 @@ define <16 x i16> @load_splat_16i16_16i16_0123012301230123(<16 x i16>* %ptr) nou
; SSE-NEXT: movdqa %xmm0, %xmm1
; SSE-NEXT: retq
;
-; AVX1-LABEL: load_splat_16i16_16i16_0123012301230123:
-; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vbroadcastsd (%rdi), %ymm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: load_splat_16i16_16i16_0123012301230123:
-; AVX2: # BB#0: # %entry
-; AVX2-NEXT: vmovaps (%rdi), %ymm0
-; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: load_splat_16i16_16i16_0123012301230123:
-; AVX512: # BB#0: # %entry
-; AVX512-NEXT: vmovaps (%rdi), %ymm0
-; AVX512-NEXT: vbroadcastsd %xmm0, %ymm0
-; AVX512-NEXT: retq
+; AVX-LABEL: load_splat_16i16_16i16_0123012301230123:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vbroadcastsd (%rdi), %ymm0
+; AVX-NEXT: retq
entry:
%ld = load <16 x i16>, <16 x i16>* %ptr
%ret = shufflevector <16 x i16> %ld, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3,i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
@@ -513,8 +499,7 @@ define <32 x i8> @load_splat_32i8_32i8_01010101010101010101010101010101(<32 x i8
;
; AVX1-LABEL: load_splat_32i8_32i8_01010101010101010101010101010101:
; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vmovdqa (%rdi), %ymm0
-; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = mem[0,0,0,0,4,5,6,7]
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: retq
@@ -587,26 +572,10 @@ define <4 x float> @load_splat_4f32_8f32_0000(<8 x float>* %ptr) nounwind uwtabl
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; SSE-NEXT: retq
;
-; AVX1-LABEL: load_splat_4f32_8f32_0000:
-; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vmovaps (%rdi), %ymm0
-; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
-; AVX1-NEXT: vzeroupper
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: load_splat_4f32_8f32_0000:
-; AVX2: # BB#0: # %entry
-; AVX2-NEXT: vmovaps (%rdi), %ymm0
-; AVX2-NEXT: vbroadcastss %xmm0, %xmm0
-; AVX2-NEXT: vzeroupper
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: load_splat_4f32_8f32_0000:
-; AVX512: # BB#0: # %entry
-; AVX512-NEXT: vmovaps (%rdi), %ymm0
-; AVX512-NEXT: vbroadcastss %xmm0, %xmm0
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX-LABEL: load_splat_4f32_8f32_0000:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vbroadcastss (%rdi), %xmm0
+; AVX-NEXT: retq
entry:
%ld = load <8 x float>, <8 x float>* %ptr
%ret = shufflevector <8 x float> %ld, <8 x float> undef, <4 x i32> zeroinitializer
@@ -627,22 +596,10 @@ define <8 x float> @load_splat_8f32_16f32_89898989(<16 x float>* %ptr) nounwind
; SSE42-NEXT: movapd %xmm0, %xmm1
; SSE42-NEXT: retq
;
-; AVX1-LABEL: load_splat_8f32_16f32_89898989:
-; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vbroadcastsd 32(%rdi), %ymm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: load_splat_8f32_16f32_89898989:
-; AVX2: # BB#0: # %entry
-; AVX2-NEXT: vbroadcastsd 32(%rdi), %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: load_splat_8f32_16f32_89898989:
-; AVX512: # BB#0: # %entry
-; AVX512-NEXT: vmovapd (%rdi), %zmm0
-; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; AVX512-NEXT: vbroadcastsd %xmm0, %ymm0
-; AVX512-NEXT: retq
+; AVX-LABEL: load_splat_8f32_16f32_89898989:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vbroadcastsd 32(%rdi), %ymm0
+; AVX-NEXT: retq
entry:
%ld = load <16 x float>, <16 x float>* %ptr
%ret = shufflevector <16 x float> %ld, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 8, i32 9, i32 8, i32 9, i32 8, i32 9>
diff --git a/llvm/test/CodeGen/X86/x86-interleaved-access.ll b/llvm/test/CodeGen/X86/x86-interleaved-access.ll
index 6fbec91e77a..f4d0503f4a7 100644
--- a/llvm/test/CodeGen/X86/x86-interleaved-access.ll
+++ b/llvm/test/CodeGen/X86/x86-interleaved-access.ll
@@ -57,10 +57,8 @@ define <4 x double> @load_factorf64_1(<16 x double>* %ptr) {
; AVX1: # BB#0:
; AVX1-NEXT: vmovups (%rdi), %ymm0
; AVX1-NEXT: vmovups 32(%rdi), %ymm1
-; AVX1-NEXT: vmovups 64(%rdi), %ymm2
-; AVX1-NEXT: vmovups 96(%rdi), %ymm3
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
+; AVX1-NEXT: vinsertf128 $1, 64(%rdi), %ymm0, %ymm0
+; AVX1-NEXT: vinsertf128 $1, 96(%rdi), %ymm1, %ymm1
; AVX1-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
; AVX1-NEXT: vmulpd %ymm0, %ymm0, %ymm0
; AVX1-NEXT: retq
@@ -69,10 +67,8 @@ define <4 x double> @load_factorf64_1(<16 x double>* %ptr) {
; AVX2: # BB#0:
; AVX2-NEXT: vmovupd (%rdi), %ymm0
; AVX2-NEXT: vmovupd 32(%rdi), %ymm1
-; AVX2-NEXT: vmovupd 64(%rdi), %ymm2
-; AVX2-NEXT: vmovupd 96(%rdi), %ymm3
-; AVX2-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX2-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
+; AVX2-NEXT: vinsertf128 $1, 64(%rdi), %ymm0, %ymm0
+; AVX2-NEXT: vinsertf128 $1, 96(%rdi), %ymm1, %ymm1
; AVX2-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
; AVX2-NEXT: vmulpd %ymm0, %ymm0, %ymm0
; AVX2-NEXT: retq
OpenPOWER on IntegriCloud