summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2016-07-26 10:41:28 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2016-07-26 10:41:28 +0000
commit019e1024267da0344853a9f33b8bc8d2765961b7 (patch)
tree73a036be68a650da43e41487500f2f95199d1654 /llvm/test
parent68a204ddc18879dba2f2880f6db5e3c8067b0382 (diff)
downloadbcm5719-llvm-019e1024267da0344853a9f33b8bc8d2765961b7.tar.gz
bcm5719-llvm-019e1024267da0344853a9f33b8bc8d2765961b7.zip
[X86][SSE] Fixed issue with memory folding of (v)cvtsd2ss intrinsics
Fixed typo in the intrinsic definitions of (v)cvtsd2ss with memory folding. This was only unearthed when rL276102 started using the intrinsic again..... llvm-svn: 276740
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll18
-rw-r--r--llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll38
2 files changed, 56 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
index 803f364a824..d3ebba93c76 100644
--- a/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
+++ b/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
@@ -1223,6 +1223,24 @@ define <4 x float> @test_mm_cvtsd_ss(<4 x float> %a0, <2 x double> %a1) {
}
declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone
+define <4 x float> @test_mm_cvtsd_ss_load(<4 x float> %a0, <2 x double>* %p1) {
+; X32-LABEL: test_mm_cvtsd_ss_load:
+; X32: # BB#0:
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movaps (%eax), %xmm1
+; X32-NEXT: cvtsd2ss %xmm1, %xmm0
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_cvtsd_ss_load:
+; X64: # BB#0:
+; X64-NEXT: movaps (%rdi), %xmm1
+; X64-NEXT: cvtsd2ss %xmm1, %xmm0
+; X64-NEXT: retq
+ %a1 = load <2 x double>, <2 x double>* %p1
+ %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1)
+ ret <4 x float> %res
+}
+
define i32 @test_mm_cvtsi128_si32(<2 x i64> %a0) nounwind {
; X32-LABEL: test_mm_cvtsi128_si32:
; X32: # BB#0:
diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll
index e682e8dd1d0..3ae3aecabaf 100644
--- a/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll
+++ b/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll
@@ -274,6 +274,25 @@ define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) {
declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone
+define <4 x float> @test_x86_sse2_cvtsd2ss_load(<4 x float> %a0, <2 x double>* %p1) {
+; SSE-LABEL: test_x86_sse2_cvtsd2ss_load:
+; SSE: ## BB#0:
+; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SSE-NEXT: movaps (%eax), %xmm1
+; SSE-NEXT: cvtsd2ss %xmm1, %xmm0
+; SSE-NEXT: retl
+;
+; KNL-LABEL: test_x86_sse2_cvtsd2ss_load:
+; KNL: ## BB#0:
+; KNL-NEXT: movl {{[0-9]+}}(%esp), %eax
+; KNL-NEXT: vcvtsd2ss (%eax), %xmm0, %xmm0
+; KNL-NEXT: retl
+ %a1 = load <2 x double>, <2 x double>* %p1
+ %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1]
+ ret <4 x float> %res
+}
+
+
define <2 x double> @test_x86_sse2_cvtsi2sd(<2 x double> %a0, i32 %a1) {
; SSE-LABEL: test_x86_sse2_cvtsi2sd:
; SSE: ## BB#0:
@@ -306,6 +325,25 @@ define <2 x double> @test_x86_sse2_cvtss2sd(<2 x double> %a0, <4 x float> %a1) {
declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone
+define <2 x double> @test_x86_sse2_cvtss2sd_load(<2 x double> %a0, <4 x float>* %p1) {
+; SSE-LABEL: test_x86_sse2_cvtss2sd_load:
+; SSE: ## BB#0:
+; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SSE-NEXT: movaps (%eax), %xmm1
+; SSE-NEXT: cvtss2sd %xmm1, %xmm0
+; SSE-NEXT: retl
+;
+; KNL-LABEL: test_x86_sse2_cvtss2sd_load:
+; KNL: ## BB#0:
+; KNL-NEXT: movl {{[0-9]+}}(%esp), %eax
+; KNL-NEXT: vcvtss2sd (%eax), %xmm0, %xmm0
+; KNL-NEXT: retl
+ %a1 = load <4 x float>, <4 x float>* %p1
+ %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1]
+ ret <2 x double> %res
+}
+
+
define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) {
; SSE-LABEL: test_x86_sse2_cvttpd2dq:
; SSE: ## BB#0:
OpenPOWER on IntegriCloud