From 83b0a98902077b2bc95bde4162e0f210d4b50f61 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 18 Jan 2018 07:44:09 +0000 Subject: [X86] Use vmovdqu64/vmovdqa64 for unmasked integer vector stores for consistency with loads. Previously we used 64 for vXi64 stores and 32 for everything else. This change uses 64 for everything just like do for loads. llvm-svn: 322820 --- llvm/test/CodeGen/X86/avg.ll | 12 ++++++------ llvm/test/CodeGen/X86/avx512-insert-extract.ll | 10 +++++----- llvm/test/CodeGen/X86/avx512-insert-extract_i1.ll | 2 +- llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll | 2 +- .../CodeGen/X86/avx512bw-intrinsics-upgrade.ll | 8 ++++---- llvm/test/CodeGen/X86/pr34605.ll | 2 +- llvm/test/CodeGen/X86/x86-interleaved-access.ll | 22 +++++++++++----------- 7 files changed, 29 insertions(+), 29 deletions(-) (limited to 'llvm/test/CodeGen') diff --git a/llvm/test/CodeGen/X86/avg.ll b/llvm/test/CodeGen/X86/avg.ll index dd11f6ca293..8e1e5f3b5ca 100644 --- a/llvm/test/CodeGen/X86/avg.ll +++ b/llvm/test/CodeGen/X86/avg.ll @@ -606,7 +606,7 @@ define void @avg_v64i8(<64 x i8>* %a, <64 x i8>* %b) nounwind { ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 (%rsi), %zmm0 ; AVX512BW-NEXT: vpavgb (%rdi), %zmm0, %zmm0 -; AVX512BW-NEXT: vmovdqu32 %zmm0, (%rax) +; AVX512BW-NEXT: vmovdqu64 %zmm0, (%rax) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq %1 = load <64 x i8>, <64 x i8>* %a @@ -790,7 +790,7 @@ define void @avg_v32i16(<32 x i16>* %a, <32 x i16>* %b) nounwind { ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 (%rsi), %zmm0 ; AVX512BW-NEXT: vpavgw (%rdi), %zmm0, %zmm0 -; AVX512BW-NEXT: vmovdqu32 %zmm0, (%rax) +; AVX512BW-NEXT: vmovdqu64 %zmm0, (%rax) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq %1 = load <32 x i16>, <32 x i16>* %a @@ -998,7 +998,7 @@ define void @avg_v64i8_2(<64 x i8>* %a, <64 x i8>* %b) nounwind { ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 (%rsi), %zmm0 ; AVX512BW-NEXT: vpavgb %zmm0, %zmm0, %zmm0 -; AVX512BW-NEXT: vmovdqu32 %zmm0, (%rax) +; AVX512BW-NEXT: vmovdqu64 %zmm0, (%rax) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq %1 = load <64 x i8>, <64 x i8>* %a @@ -1183,7 +1183,7 @@ define void @avg_v32i16_2(<32 x i16>* %a, <32 x i16>* %b) nounwind { ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0 ; AVX512BW-NEXT: vpavgw (%rsi), %zmm0, %zmm0 -; AVX512BW-NEXT: vmovdqu32 %zmm0, (%rax) +; AVX512BW-NEXT: vmovdqu64 %zmm0, (%rax) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq %1 = load <32 x i16>, <32 x i16>* %a @@ -1373,7 +1373,7 @@ define void @avg_v64i8_const(<64 x i8>* %a) nounwind { ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0 ; AVX512BW-NEXT: vpavgb {{.*}}(%rip), %zmm0, %zmm0 -; AVX512BW-NEXT: vmovdqu32 %zmm0, (%rax) +; AVX512BW-NEXT: vmovdqu64 %zmm0, (%rax) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq %1 = load <64 x i8>, <64 x i8>* %a @@ -1539,7 +1539,7 @@ define void @avg_v32i16_const(<32 x i16>* %a) nounwind { ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0 ; AVX512BW-NEXT: vpavgw {{.*}}(%rip), %zmm0, %zmm0 -; AVX512BW-NEXT: vmovdqu32 %zmm0, (%rax) +; AVX512BW-NEXT: vmovdqu64 %zmm0, (%rax) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq %1 = load <32 x i16>, <32 x i16>* %a diff --git a/llvm/test/CodeGen/X86/avx512-insert-extract.ll b/llvm/test/CodeGen/X86/avx512-insert-extract.ll index de392995522..42d84b6b5d2 100644 --- a/llvm/test/CodeGen/X86/avx512-insert-extract.ll +++ b/llvm/test/CodeGen/X86/avx512-insert-extract.ll @@ -1838,7 +1838,7 @@ define i64 @test_insertelement_variable_v64i1(<64 x i8> %a, i8 %b, i32 %index) { ; SKX-NEXT: andl $63, %esi ; SKX-NEXT: testb %dil, %dil ; SKX-NEXT: vpmovm2b %k0, %zmm0 -; SKX-NEXT: vmovdqa32 %zmm0, (%rsp) +; SKX-NEXT: vmovdqa64 %zmm0, (%rsp) ; SKX-NEXT: setne (%rsp,%rsi) ; SKX-NEXT: vpsllw $7, (%rsp), %zmm0 ; SKX-NEXT: vpmovb2m %zmm0, %k0 @@ -2148,9 +2148,9 @@ define i96 @test_insertelement_variable_v96i1(<96 x i8> %a, i8 %b, i32 %index) { ; SKX-NEXT: andl $127, %eax ; SKX-NEXT: cmpb $0, 736(%rbp) ; SKX-NEXT: vpmovm2b %k1, %zmm0 -; SKX-NEXT: vmovdqa32 %zmm0, {{[0-9]+}}(%rsp) +; SKX-NEXT: vmovdqa64 %zmm0, {{[0-9]+}}(%rsp) ; SKX-NEXT: vpmovm2b %k0, %zmm0 -; SKX-NEXT: vmovdqa32 %zmm0, (%rsp) +; SKX-NEXT: vmovdqa64 %zmm0, (%rsp) ; SKX-NEXT: setne (%rsp,%rax) ; SKX-NEXT: vpsllw $7, {{[0-9]+}}(%rsp), %zmm0 ; SKX-NEXT: vpmovb2m %zmm0, %k0 @@ -2265,9 +2265,9 @@ define i128 @test_insertelement_variable_v128i1(<128 x i8> %a, i8 %b, i32 %index ; SKX-NEXT: andl $127, %esi ; SKX-NEXT: testb %dil, %dil ; SKX-NEXT: vpmovm2b %k1, %zmm0 -; SKX-NEXT: vmovdqa32 %zmm0, {{[0-9]+}}(%rsp) +; SKX-NEXT: vmovdqa64 %zmm0, {{[0-9]+}}(%rsp) ; SKX-NEXT: vpmovm2b %k0, %zmm0 -; SKX-NEXT: vmovdqa32 %zmm0, (%rsp) +; SKX-NEXT: vmovdqa64 %zmm0, (%rsp) ; SKX-NEXT: setne (%rsp,%rsi) ; SKX-NEXT: vpsllw $7, {{[0-9]+}}(%rsp), %zmm0 ; SKX-NEXT: vpmovb2m %zmm0, %k0 diff --git a/llvm/test/CodeGen/X86/avx512-insert-extract_i1.ll b/llvm/test/CodeGen/X86/avx512-insert-extract_i1.ll index 9283fd32d74..9bfb4722da9 100644 --- a/llvm/test/CodeGen/X86/avx512-insert-extract_i1.ll +++ b/llvm/test/CodeGen/X86/avx512-insert-extract_i1.ll @@ -16,7 +16,7 @@ define zeroext i8 @test_extractelement_varible_v64i1(<64 x i8> %a, <64 x i8> %b, ; SKX-NEXT: ## kill: def %edi killed %edi def %rdi ; SKX-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 ; SKX-NEXT: vpmovm2b %k0, %zmm0 -; SKX-NEXT: vmovdqa32 %zmm0, (%rsp) +; SKX-NEXT: vmovdqa64 %zmm0, (%rsp) ; SKX-NEXT: andl $63, %edi ; SKX-NEXT: movzbl (%rsp,%rdi), %eax ; SKX-NEXT: andl $1, %eax diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll index 0a19852043e..bd6618f0e61 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll @@ -313,7 +313,7 @@ define void@test_int_x86_avx512_mask_storeu_d_512(i8* %ptr1, i8* %ptr2, <16 x i3 ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovw %edx, %k1 ; CHECK-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1} -; CHECK-NEXT: vmovdqu32 %zmm0, (%rsi) +; CHECK-NEXT: vmovdqu64 %zmm0, (%rsi) ; CHECK-NEXT: retq call void @llvm.x86.avx512.mask.storeu.d.512(i8* %ptr1, <16 x i32> %x1, i16 %x2) call void @llvm.x86.avx512.mask.storeu.d.512(i8* %ptr2, <16 x i32> %x1, i16 -1) diff --git a/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll index df75b89e612..555c7fcb9f2 100644 --- a/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll @@ -119,7 +119,7 @@ define void@test_int_x86_avx512_mask_storeu_b_512(i8* %ptr1, i8* %ptr2, <64 x i8 ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: kmovq %rdx, %k1 ; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rdi) {%k1} -; AVX512BW-NEXT: vmovdqu32 %zmm0, (%rsi) +; AVX512BW-NEXT: vmovdqu64 %zmm0, (%rsi) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; @@ -129,7 +129,7 @@ define void@test_int_x86_avx512_mask_storeu_b_512(i8* %ptr1, i8* %ptr2, <64 x i8 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1 ; AVX512F-32-NEXT: vmovdqu8 %zmm0, (%ecx) {%k1} -; AVX512F-32-NEXT: vmovdqu32 %zmm0, (%eax) +; AVX512F-32-NEXT: vmovdqu64 %zmm0, (%eax) ; AVX512F-32-NEXT: vzeroupper ; AVX512F-32-NEXT: retl call void @llvm.x86.avx512.mask.storeu.b.512(i8* %ptr1, <64 x i8> %x1, i64 %x2) @@ -144,7 +144,7 @@ define void@test_int_x86_avx512_mask_storeu_w_512(i8* %ptr1, i8* %ptr2, <32 x i1 ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: kmovd %edx, %k1 ; AVX512BW-NEXT: vmovdqu16 %zmm0, (%rdi) {%k1} -; AVX512BW-NEXT: vmovdqu32 %zmm0, (%rsi) +; AVX512BW-NEXT: vmovdqu64 %zmm0, (%rsi) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; @@ -154,7 +154,7 @@ define void@test_int_x86_avx512_mask_storeu_w_512(i8* %ptr1, i8* %ptr2, <32 x i1 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 ; AVX512F-32-NEXT: vmovdqu16 %zmm0, (%ecx) {%k1} -; AVX512F-32-NEXT: vmovdqu32 %zmm0, (%eax) +; AVX512F-32-NEXT: vmovdqu64 %zmm0, (%eax) ; AVX512F-32-NEXT: vzeroupper ; AVX512F-32-NEXT: retl call void @llvm.x86.avx512.mask.storeu.w.512(i8* %ptr1, <32 x i16> %x1, i32 %x2) diff --git a/llvm/test/CodeGen/X86/pr34605.ll b/llvm/test/CodeGen/X86/pr34605.ll index 2d51a53dc41..95459ce960a 100644 --- a/llvm/test/CodeGen/X86/pr34605.ll +++ b/llvm/test/CodeGen/X86/pr34605.ll @@ -19,7 +19,7 @@ define void @pr34605(i8* nocapture %s, i32 %p) { ; CHECK-NEXT: kandq %k1, %k0, %k1 ; CHECK-NEXT: vmovdqu8 {{\.LCPI.*}}, %zmm0 {%k1} {z} ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; CHECK-NEXT: vmovdqu32 %zmm0, (%eax) +; CHECK-NEXT: vmovdqu64 %zmm0, (%eax) ; CHECK-NEXT: vmovups %zmm1, 64(%eax) ; CHECK-NEXT: vmovups %zmm1, 128(%eax) ; CHECK-NEXT: vmovups %zmm1, 192(%eax) diff --git a/llvm/test/CodeGen/X86/x86-interleaved-access.ll b/llvm/test/CodeGen/X86/x86-interleaved-access.ll index 70197c7ce3a..c62f5df086b 100644 --- a/llvm/test/CodeGen/X86/x86-interleaved-access.ll +++ b/llvm/test/CodeGen/X86/x86-interleaved-access.ll @@ -361,8 +361,8 @@ define void @interleaved_store_vf32_i8_stride4(<32 x i8> %x1, <32 x i8> %x2, <32 ; AVX512-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm4[2,3],ymm0[2,3] ; AVX512-NEXT: vinserti64x4 $1, %ymm5, %zmm2, %zmm2 ; AVX512-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512-NEXT: vmovdqa32 %zmm0, 64(%rdi) -; AVX512-NEXT: vmovdqa32 %zmm2, (%rdi) +; AVX512-NEXT: vmovdqa64 %zmm0, 64(%rdi) +; AVX512-NEXT: vmovdqa64 %zmm2, (%rdi) ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %v1 = shufflevector <32 x i8> %x1, <32 x i8> %x2, <64 x i32> @@ -420,7 +420,7 @@ define void @interleaved_store_vf16_i8_stride4(<16 x i8> %x1, <16 x i8> %x2, <16 ; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm3, %ymm1 ; AVX512-NEXT: vinserti128 $1, %xmm0, %ymm4, %ymm0 ; AVX512-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512-NEXT: vmovdqa32 %zmm0, (%rdi) +; AVX512-NEXT: vmovdqa64 %zmm0, (%rdi) ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %v1 = shufflevector <16 x i8> %x1, <16 x i8> %x2, <32 x i32> @@ -1390,7 +1390,7 @@ define void @interleaved_store_vf32_i8_stride3(<32 x i8> %a, <32 x i8> %b, <32 x ; AVX512-NEXT: vpshufb %ymm4, %ymm0, %ymm0 ; AVX512-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm1 ; AVX512-NEXT: vmovdqu %ymm0, 64(%rdi) -; AVX512-NEXT: vmovdqu32 %zmm1, (%rdi) +; AVX512-NEXT: vmovdqu64 %zmm1, (%rdi) ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <64 x i32> @@ -1538,9 +1538,9 @@ define void @interleaved_store_vf64_i8_stride3(<64 x i8> %a, <64 x i8> %b, <64 x ; AVX512-NEXT: vinserti64x4 $1, %ymm5, %zmm3, %zmm1 ; AVX512-NEXT: vinserti64x4 $1, %ymm7, %zmm6, %zmm3 ; AVX512-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0 -; AVX512-NEXT: vmovdqu32 %zmm0, 128(%rdi) -; AVX512-NEXT: vmovdqu32 %zmm3, 64(%rdi) -; AVX512-NEXT: vmovdqu32 %zmm1, (%rdi) +; AVX512-NEXT: vmovdqu64 %zmm0, 128(%rdi) +; AVX512-NEXT: vmovdqu64 %zmm3, 64(%rdi) +; AVX512-NEXT: vmovdqu64 %zmm1, (%rdi) ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = shufflevector <64 x i8> %a, <64 x i8> %b, <128 x i32> @@ -1874,10 +1874,10 @@ define void @interleaved_store_vf64_i8_stride4(<64 x i8> %a, <64 x i8> %b, <64 x ; AVX512-NEXT: vinserti64x4 $1, %ymm7, %zmm6, %zmm3 ; AVX512-NEXT: vinserti64x4 $1, %ymm9, %zmm8, %zmm4 ; AVX512-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512-NEXT: vmovdqa32 %zmm0, 192(%rdi) -; AVX512-NEXT: vmovdqa32 %zmm3, 64(%rdi) -; AVX512-NEXT: vmovdqa32 %zmm4, 128(%rdi) -; AVX512-NEXT: vmovdqa32 %zmm2, (%rdi) +; AVX512-NEXT: vmovdqa64 %zmm0, 192(%rdi) +; AVX512-NEXT: vmovdqa64 %zmm3, 64(%rdi) +; AVX512-NEXT: vmovdqa64 %zmm4, 128(%rdi) +; AVX512-NEXT: vmovdqa64 %zmm2, (%rdi) ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = shufflevector <64 x i8> %a, <64 x i8> %b, <128 x i32> -- cgit v1.2.3