[X86] Remove some intrinsic instructions from hasPartialRegUpdate

Summary: These intrinsic instructions are all selected from intrinsics that have well defined behavior for where the upper bits come from. It's not the same place as the lower bits. As you can see we were suppressing load folding for these instructions in some cases. In none of the cases was the separate load helping avoid a partial dependency on the destination register. So we should just go ahead and allow the load to be folded. Only foldMemoryOperand was suppressing folding for these. They all have patterns for folding sse_load_f32/f64 that aren't gated with OptForSize, but sse_load_f32/f64 doesn't allow 128-bit vector loads. It only allows scalar_to_vector and vzmovl of scalar loads to match. There's no reason we can't allow a 128-bit vector load to be narrowed so I would like to fix sse_load_f32/f64 to allow that. And if I do that it changes some of these same test cases to fold the load too. Reviewers: spatel, zvi, RKSimon Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D27611 llvm-svn: 289419
author: Craig Topper <craig.topper@gmail.com> 2016-12-12 05:07:17 +0000
committer: Craig Topper <craig.topper@gmail.com> 2016-12-12 05:07:17 +0000
commit: 081c0e2864419b07a7c843cbc839b4882baa3c9c (patch)
tree: 9be88624bcb82ebfaec96b58d80042681e56b48f /llvm/test/CodeGen
parent: 2a452ebf76833379f8217531b5027457e11d9dd5 (diff)
download: bcm5719-llvm-081c0e2864419b07a7c843cbc839b4882baa3c9c.tar.gz
bcm5719-llvm-081c0e2864419b07a7c843cbc839b4882baa3c9c.zip
5 files changed, 25 insertions, 14 deletions
diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
index 9d1ab922d96..cf10691b2ab 100644
--- a/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
+++ b/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
@@ -1227,14 +1227,12 @@ define <4 x float> @test_mm_cvtsd_ss_load(<4 x float> %a0, <2 x double>* %p1) {
 ; X32-LABEL: test_mm_cvtsd_ss_load:
 ; X32:       # BB#0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movaps (%eax), %xmm1
-; X32-NEXT:    cvtsd2ss %xmm1, %xmm0
+; X32-NEXT:    cvtsd2ss (%eax), %xmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm_cvtsd_ss_load:
 ; X64:       # BB#0:
-; X64-NEXT:    movaps (%rdi), %xmm1
-; X64-NEXT:    cvtsd2ss %xmm1, %xmm0
+; X64-NEXT:    cvtsd2ss (%rdi), %xmm0
 ; X64-NEXT:    retq
   %a1 = load <2 x double>, <2 x double>* %p1
   %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1)
diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll
index 747bee127e3..694e303e635 100644
--- a/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll
+++ b/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll
@@ -370,8 +370,7 @@ define <4 x float> @test_x86_sse2_cvtsd2ss_load(<4 x float> %a0, <2 x double>* %
 ; SSE-LABEL: test_x86_sse2_cvtsd2ss_load:
 ; SSE:       ## BB#0:
 ; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
-; SSE-NEXT:    movaps (%eax), %xmm1 ## encoding: [0x0f,0x28,0x08]
-; SSE-NEXT:    cvtsd2ss %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x5a,0xc1]
+; SSE-NEXT:    cvtsd2ss (%eax), %xmm0 ## encoding: [0xf2,0x0f,0x5a,0x00]
 ; SSE-NEXT:    retl ## encoding: [0xc3]
 ;
 ; VCHECK-LABEL: test_x86_sse2_cvtsd2ss_load:
@@ -444,8 +443,7 @@ define <2 x double> @test_x86_sse2_cvtss2sd_load(<2 x double> %a0, <4 x float>*
 ; SSE-LABEL: test_x86_sse2_cvtss2sd_load:
 ; SSE:       ## BB#0:
 ; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
-; SSE-NEXT:    movaps (%eax), %xmm1 ## encoding: [0x0f,0x28,0x08]
-; SSE-NEXT:    cvtss2sd %xmm1, %xmm0 ## encoding: [0xf3,0x0f,0x5a,0xc1]
+; SSE-NEXT:    cvtss2sd (%eax), %xmm0 ## encoding: [0xf3,0x0f,0x5a,0x00]
 ; SSE-NEXT:    retl ## encoding: [0xc3]
 ;
 ; VCHECK-LABEL: test_x86_sse2_cvtss2sd_load:
diff --git a/llvm/test/CodeGen/X86/sse41-intrinsics-x86.ll b/llvm/test/CodeGen/X86/sse41-intrinsics-x86.ll
index b77f472faf3..c17ec8c3593 100644
--- a/llvm/test/CodeGen/X86/sse41-intrinsics-x86.ll
+++ b/llvm/test/CodeGen/X86/sse41-intrinsics-x86.ll
@@ -467,6 +467,24 @@ define <2 x double> @test_x86_sse41_round_sd(<2 x double> %a0, <2 x double> %a1)
 declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
 
 
+define <2 x double> @test_x86_sse41_round_sd_load(<2 x double> %a0, <2 x double>* %a1) {
+; SSE41-LABEL: test_x86_sse41_round_sd_load:
+; SSE41:       ## BB#0:
+; SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
+; SSE41-NEXT:    roundsd $7, (%eax), %xmm0 ## encoding: [0x66,0x0f,0x3a,0x0b,0x00,0x07]
+; SSE41-NEXT:    retl ## encoding: [0xc3]
+;
+; VCHECK-LABEL: test_x86_sse41_round_sd_load:
+; VCHECK:       ## BB#0:
+; VCHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
+; VCHECK-NEXT:    vroundsd $7, (%eax), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0b,0x00,0x07]
+; VCHECK-NEXT:    retl ## encoding: [0xc3]
+  %a1b = load <2 x double>, <2 x double>* %a1
+  %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1b, i32 7) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+
+
 define <4 x float> @test_x86_sse41_round_ss(<4 x float> %a0, <4 x float> %a1) {
 ; SSE41-LABEL: test_x86_sse41_round_ss:
 ; SSE41:       ## BB#0:
diff --git a/llvm/test/CodeGen/X86/sse_partial_update.ll b/llvm/test/CodeGen/X86/sse_partial_update.ll
index bd207c99dbd..8dfb8ee7007 100644
--- a/llvm/test/CodeGen/X86/sse_partial_update.ll
+++ b/llvm/test/CodeGen/X86/sse_partial_update.ll
@@ -98,9 +98,8 @@ declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
 define <2 x double> @load_fold_cvtss2sd_int(<4 x float> *%a) {
 ; CHECK-LABEL: load_fold_cvtss2sd_int:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    movaps (%rdi), %xmm1
 ; CHECK-NEXT:    xorps %xmm0, %xmm0
-; CHECK-NEXT:    cvtss2sd %xmm1, %xmm0
+; CHECK-NEXT:    cvtss2sd (%rdi), %xmm0
 ; CHECK-NEXT:    retq
   %ld = load <4 x float>, <4 x float> *%a
   %x = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> <double 0x0, double 0x0>, <4 x float> %ld)
diff --git a/llvm/test/CodeGen/X86/vec_ss_load_fold.ll b/llvm/test/CodeGen/X86/vec_ss_load_fold.ll
index 591f6e6ced1..edb5940fa30 100644
--- a/llvm/test/CodeGen/X86/vec_ss_load_fold.ll
+++ b/llvm/test/CodeGen/X86/vec_ss_load_fold.ll
@@ -200,8 +200,7 @@ define <4 x float> @test4(<4 x float> %A, float *%b, i32 %C) nounwind {
 ; X32-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; X32-NEXT:    movaps %xmm0, (%esp) ## 16-byte Spill
 ; X32-NEXT:    calll _f
-; X32-NEXT:    movaps (%esp), %xmm1 ## 16-byte Reload
-; X32-NEXT:    roundss $4, %xmm1, %xmm0
+; X32-NEXT:    roundss $4, (%esp), %xmm0 ## 16-byte Folded Reload
 ; X32-NEXT:    addl $28, %esp
 ; X32-NEXT:    retl
 ;
@@ -211,8 +210,7 @@ define <4 x float> @test4(<4 x float> %A, float *%b, i32 %C) nounwind {
 ; X64-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; X64-NEXT:    movaps %xmm0, (%rsp) ## 16-byte Spill
 ; X64-NEXT:    callq _f
-; X64-NEXT:    movaps (%rsp), %xmm1 ## 16-byte Reload
-; X64-NEXT:    roundss $4, %xmm1, %xmm0
+; X64-NEXT:    roundss $4, (%rsp), %xmm0 ## 16-byte Folded Reload
 ; X64-NEXT:    addq $24, %rsp
 ; X64-NEXT:    retq
 ;
author	Craig Topper <craig.topper@gmail.com>	2016-12-12 05:07:17 +0000
committer	Craig Topper <craig.topper@gmail.com>	2016-12-12 05:07:17 +0000
commit	081c0e2864419b07a7c843cbc839b4882baa3c9c (patch)
tree	9be88624bcb82ebfaec96b58d80042681e56b48f /llvm/test/CodeGen
parent	2a452ebf76833379f8217531b5027457e11d9dd5 (diff)
download	bcm5719-llvm-081c0e2864419b07a7c843cbc839b4882baa3c9c.tar.gz bcm5719-llvm-081c0e2864419b07a7c843cbc839b4882baa3c9c.zip