diff options
Diffstat (limited to 'llvm/test/CodeGen/X86')
-rw-r--r-- | llvm/test/CodeGen/X86/avx512bw-intrinsics.ll | 6 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll | 3 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-avx512.ll | 34 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/widened-broadcast.ll | 73 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/x86-interleaved-access.ll | 12 |
5 files changed, 37 insertions, 91 deletions
diff --git a/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll b/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll index 3337f42eb14..51f9a382ccb 100644 --- a/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll @@ -2216,9 +2216,9 @@ define i32@test_int_x86_avx512_kunpck_wd(i32 %x0, i32 %x1) { ; ; AVX512F-32-LABEL: test_int_x86_avx512_kunpck_wd: ; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: kunpckwd %k1, %k0, %k0 +; AVX512F-32-NEXT: kmovw {{[0-9]+}}(%esp), %k0 +; AVX512F-32-NEXT: kmovw {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: kunpckwd %k0, %k1, %k0 ; AVX512F-32-NEXT: kmovd %k0, %eax ; AVX512F-32-NEXT: retl %res = call i32 @llvm.x86.avx512.kunpck.wd(i32 %x0, i32 %x1) diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll index fa3471c2fe4..2e65bd8c75c 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll @@ -282,8 +282,7 @@ define <16 x i32> @shuffle_v16i32_0_1_2_19_u_u_u_u_u_u_u_u_u_u_u_u(<16 x i32> %a define <8 x float> @shuffle_v16f32_extract_256(float* %RET, float* %a) { ; ALL-LABEL: shuffle_v16f32_extract_256: ; ALL: # BB#0: -; ALL-NEXT: vmovups (%rsi), %zmm0 -; ALL-NEXT: vextractf32x8 $1, %zmm0, %ymm0 +; ALL-NEXT: vmovups 32(%rsi), %ymm0 ; ALL-NEXT: retq %ptr_a = bitcast float* %a to <16 x float>* %v_a = load <16 x float>, <16 x float>* %ptr_a, align 4 diff --git a/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll b/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll index 5aab21749d1..706edd27a3f 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll @@ -511,11 +511,10 @@ define <8 x float> @expand14(<4 x float> %a) { ; ; KNL64-LABEL: expand14: ; KNL64: # BB#0: +; KNL64-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,3,0,0] +; KNL64-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,1,1] ; KNL64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3] ; KNL64-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,3] -; KNL64-NEXT: vmovaps {{.*#+}} ymm1 = <0,2,4,0,u,u,u,u> -; KNL64-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,3,0,0] -; KNL64-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,1,1] ; KNL64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3],ymm0[4],ymm1[5,6,7] ; KNL64-NEXT: retq ; @@ -529,11 +528,10 @@ define <8 x float> @expand14(<4 x float> %a) { ; ; KNL32-LABEL: expand14: ; KNL32: # BB#0: +; KNL32-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,3,0,0] +; KNL32-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,1,1] ; KNL32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3] ; KNL32-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,3] -; KNL32-NEXT: vmovaps {{.*#+}} ymm1 = <0,2,4,0,u,u,u,u> -; KNL32-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,3,0,0] -; KNL32-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,1,1] ; KNL32-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3],ymm0[4],ymm1[5,6,7] ; KNL32-NEXT: retl %addV = fadd <4 x float> <float 0.0,float 1.0,float 2.0,float 0.0> , <float 0.0,float 1.0,float 2.0,float 0.0> @@ -545,39 +543,35 @@ define <8 x float> @expand14(<4 x float> %a) { define <8 x float> @expand15(<4 x float> %a) { ; SKX64-LABEL: expand15: ; SKX64: # BB#0: -; SKX64-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,1,1,3] -; SKX64-NEXT: vmovaps {{.*#+}} ymm0 = <0,2,4,0,u,u,u,u> -; SKX64-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[0,1,0,0] +; SKX64-NEXT: vpermilps {{.*#+}} xmm1 = mem[0,1,0,0] +; SKX64-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3] ; SKX64-NEXT: vmovaps {{.*#+}} ymm0 = [0,1,8,3,10,3,2,3] -; SKX64-NEXT: vpermi2ps %ymm1, %ymm2, %ymm0 +; SKX64-NEXT: vpermi2ps %ymm2, %ymm1, %ymm0 ; SKX64-NEXT: retq ; ; KNL64-LABEL: expand15: ; KNL64: # BB#0: +; KNL64-NEXT: vpermilps {{.*#+}} xmm1 = mem[0,1,0,0] +; KNL64-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,1,1] ; KNL64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3] ; KNL64-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,3] -; KNL64-NEXT: vmovaps {{.*#+}} ymm1 = <0,2,4,0,u,u,u,u> -; KNL64-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,1,0,0] -; KNL64-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,1,1] ; KNL64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3],ymm0[4],ymm1[5,6,7] ; KNL64-NEXT: retq ; ; SKX32-LABEL: expand15: ; SKX32: # BB#0: -; SKX32-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,1,1,3] -; SKX32-NEXT: vmovaps {{.*#+}} ymm0 = <0,2,4,0,u,u,u,u> -; SKX32-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[0,1,0,0] +; SKX32-NEXT: vpermilps {{.*#+}} xmm1 = mem[0,1,0,0] +; SKX32-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3] ; SKX32-NEXT: vmovaps {{.*#+}} ymm0 = [0,1,8,3,10,3,2,3] -; SKX32-NEXT: vpermi2ps %ymm1, %ymm2, %ymm0 +; SKX32-NEXT: vpermi2ps %ymm2, %ymm1, %ymm0 ; SKX32-NEXT: retl ; ; KNL32-LABEL: expand15: ; KNL32: # BB#0: +; KNL32-NEXT: vpermilps {{.*#+}} xmm1 = mem[0,1,0,0] +; KNL32-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,1,1] ; KNL32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3] ; KNL32-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,3] -; KNL32-NEXT: vmovaps {{.*#+}} ymm1 = <0,2,4,0,u,u,u,u> -; KNL32-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,1,0,0] -; KNL32-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,1,1] ; KNL32-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3],ymm0[4],ymm1[5,6,7] ; KNL32-NEXT: retl %addV = fadd <4 x float> <float 0.0,float 1.0,float 2.0,float 0.0> , <float 0.0,float 1.0,float 2.0,float 0.0> diff --git a/llvm/test/CodeGen/X86/widened-broadcast.ll b/llvm/test/CodeGen/X86/widened-broadcast.ll index 6b2e4de5cda..42c4c23c634 100644 --- a/llvm/test/CodeGen/X86/widened-broadcast.ll +++ b/llvm/test/CodeGen/X86/widened-broadcast.ll @@ -151,8 +151,7 @@ define <8 x i32> @load_splat_8i32_8i32_01010101(<8 x i32>* %ptr) nounwind uwtabl ; ; AVX1-LABEL: load_splat_8i32_8i32_01010101: ; AVX1: # BB#0: # %entry -; AVX1-NEXT: vmovapd (%rdi), %ymm0 -; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] +; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; AVX1-NEXT: retq ; @@ -288,8 +287,7 @@ define <16 x i16> @load_splat_16i16_16i16_0101010101010101(<16 x i16>* %ptr) nou ; ; AVX1-LABEL: load_splat_16i16_16i16_0101010101010101: ; AVX1: # BB#0: # %entry -; AVX1-NEXT: vmovaps (%rdi), %ymm0 -; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] +; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,0,0,0] ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; AVX1-NEXT: retq ; @@ -315,22 +313,10 @@ define <16 x i16> @load_splat_16i16_16i16_0123012301230123(<16 x i16>* %ptr) nou ; SSE-NEXT: movdqa %xmm0, %xmm1 ; SSE-NEXT: retq ; -; AVX1-LABEL: load_splat_16i16_16i16_0123012301230123: -; AVX1: # BB#0: # %entry -; AVX1-NEXT: vbroadcastsd (%rdi), %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: load_splat_16i16_16i16_0123012301230123: -; AVX2: # BB#0: # %entry -; AVX2-NEXT: vmovaps (%rdi), %ymm0 -; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512-LABEL: load_splat_16i16_16i16_0123012301230123: -; AVX512: # BB#0: # %entry -; AVX512-NEXT: vmovaps (%rdi), %ymm0 -; AVX512-NEXT: vbroadcastsd %xmm0, %ymm0 -; AVX512-NEXT: retq +; AVX-LABEL: load_splat_16i16_16i16_0123012301230123: +; AVX: # BB#0: # %entry +; AVX-NEXT: vbroadcastsd (%rdi), %ymm0 +; AVX-NEXT: retq entry: %ld = load <16 x i16>, <16 x i16>* %ptr %ret = shufflevector <16 x i16> %ld, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3,i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> @@ -513,8 +499,7 @@ define <32 x i8> @load_splat_32i8_32i8_01010101010101010101010101010101(<32 x i8 ; ; AVX1-LABEL: load_splat_32i8_32i8_01010101010101010101010101010101: ; AVX1: # BB#0: # %entry -; AVX1-NEXT: vmovdqa (%rdi), %ymm0 -; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = mem[0,0,0,0,4,5,6,7] ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; AVX1-NEXT: retq @@ -587,26 +572,10 @@ define <4 x float> @load_splat_4f32_8f32_0000(<8 x float>* %ptr) nounwind uwtabl ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0] ; SSE-NEXT: retq ; -; AVX1-LABEL: load_splat_4f32_8f32_0000: -; AVX1: # BB#0: # %entry -; AVX1-NEXT: vmovaps (%rdi), %ymm0 -; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] -; AVX1-NEXT: vzeroupper -; AVX1-NEXT: retq -; -; AVX2-LABEL: load_splat_4f32_8f32_0000: -; AVX2: # BB#0: # %entry -; AVX2-NEXT: vmovaps (%rdi), %ymm0 -; AVX2-NEXT: vbroadcastss %xmm0, %xmm0 -; AVX2-NEXT: vzeroupper -; AVX2-NEXT: retq -; -; AVX512-LABEL: load_splat_4f32_8f32_0000: -; AVX512: # BB#0: # %entry -; AVX512-NEXT: vmovaps (%rdi), %ymm0 -; AVX512-NEXT: vbroadcastss %xmm0, %xmm0 -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX-LABEL: load_splat_4f32_8f32_0000: +; AVX: # BB#0: # %entry +; AVX-NEXT: vbroadcastss (%rdi), %xmm0 +; AVX-NEXT: retq entry: %ld = load <8 x float>, <8 x float>* %ptr %ret = shufflevector <8 x float> %ld, <8 x float> undef, <4 x i32> zeroinitializer @@ -627,22 +596,10 @@ define <8 x float> @load_splat_8f32_16f32_89898989(<16 x float>* %ptr) nounwind ; SSE42-NEXT: movapd %xmm0, %xmm1 ; SSE42-NEXT: retq ; -; AVX1-LABEL: load_splat_8f32_16f32_89898989: -; AVX1: # BB#0: # %entry -; AVX1-NEXT: vbroadcastsd 32(%rdi), %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: load_splat_8f32_16f32_89898989: -; AVX2: # BB#0: # %entry -; AVX2-NEXT: vbroadcastsd 32(%rdi), %ymm0 -; AVX2-NEXT: retq -; -; AVX512-LABEL: load_splat_8f32_16f32_89898989: -; AVX512: # BB#0: # %entry -; AVX512-NEXT: vmovapd (%rdi), %zmm0 -; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm0 -; AVX512-NEXT: vbroadcastsd %xmm0, %ymm0 -; AVX512-NEXT: retq +; AVX-LABEL: load_splat_8f32_16f32_89898989: +; AVX: # BB#0: # %entry +; AVX-NEXT: vbroadcastsd 32(%rdi), %ymm0 +; AVX-NEXT: retq entry: %ld = load <16 x float>, <16 x float>* %ptr %ret = shufflevector <16 x float> %ld, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 8, i32 9, i32 8, i32 9, i32 8, i32 9> diff --git a/llvm/test/CodeGen/X86/x86-interleaved-access.ll b/llvm/test/CodeGen/X86/x86-interleaved-access.ll index 6fbec91e77a..f4d0503f4a7 100644 --- a/llvm/test/CodeGen/X86/x86-interleaved-access.ll +++ b/llvm/test/CodeGen/X86/x86-interleaved-access.ll @@ -57,10 +57,8 @@ define <4 x double> @load_factorf64_1(<16 x double>* %ptr) { ; AVX1: # BB#0: ; AVX1-NEXT: vmovups (%rdi), %ymm0 ; AVX1-NEXT: vmovups 32(%rdi), %ymm1 -; AVX1-NEXT: vmovups 64(%rdi), %ymm2 -; AVX1-NEXT: vmovups 96(%rdi), %ymm3 -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 -; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 +; AVX1-NEXT: vinsertf128 $1, 64(%rdi), %ymm0, %ymm0 +; AVX1-NEXT: vinsertf128 $1, 96(%rdi), %ymm1, %ymm1 ; AVX1-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] ; AVX1-NEXT: vmulpd %ymm0, %ymm0, %ymm0 ; AVX1-NEXT: retq @@ -69,10 +67,8 @@ define <4 x double> @load_factorf64_1(<16 x double>* %ptr) { ; AVX2: # BB#0: ; AVX2-NEXT: vmovupd (%rdi), %ymm0 ; AVX2-NEXT: vmovupd 32(%rdi), %ymm1 -; AVX2-NEXT: vmovupd 64(%rdi), %ymm2 -; AVX2-NEXT: vmovupd 96(%rdi), %ymm3 -; AVX2-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 -; AVX2-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 +; AVX2-NEXT: vinsertf128 $1, 64(%rdi), %ymm0, %ymm0 +; AVX2-NEXT: vinsertf128 $1, 96(%rdi), %ymm1, %ymm1 ; AVX2-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] ; AVX2-NEXT: vmulpd %ymm0, %ymm0, %ymm0 ; AVX2-NEXT: retq |