diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-07-26 10:41:28 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-07-26 10:41:28 +0000 |
| commit | 019e1024267da0344853a9f33b8bc8d2765961b7 (patch) | |
| tree | 73a036be68a650da43e41487500f2f95199d1654 /llvm/test | |
| parent | 68a204ddc18879dba2f2880f6db5e3c8067b0382 (diff) | |
| download | bcm5719-llvm-019e1024267da0344853a9f33b8bc8d2765961b7.tar.gz bcm5719-llvm-019e1024267da0344853a9f33b8bc8d2765961b7.zip | |
[X86][SSE] Fixed issue with memory folding of (v)cvtsd2ss intrinsics
Fixed typo in the intrinsic definitions of (v)cvtsd2ss with memory folding.
This was only unearthed when rL276102 started using the intrinsic again.....
llvm-svn: 276740
Diffstat (limited to 'llvm/test')
| -rw-r--r-- | llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll | 18 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll | 38 |
2 files changed, 56 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll index 803f364a824..d3ebba93c76 100644 --- a/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll @@ -1223,6 +1223,24 @@ define <4 x float> @test_mm_cvtsd_ss(<4 x float> %a0, <2 x double> %a1) { } declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone +define <4 x float> @test_mm_cvtsd_ss_load(<4 x float> %a0, <2 x double>* %p1) { +; X32-LABEL: test_mm_cvtsd_ss_load: +; X32: # BB#0: +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movaps (%eax), %xmm1 +; X32-NEXT: cvtsd2ss %xmm1, %xmm0 +; X32-NEXT: retl +; +; X64-LABEL: test_mm_cvtsd_ss_load: +; X64: # BB#0: +; X64-NEXT: movaps (%rdi), %xmm1 +; X64-NEXT: cvtsd2ss %xmm1, %xmm0 +; X64-NEXT: retq + %a1 = load <2 x double>, <2 x double>* %p1 + %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) + ret <4 x float> %res +} + define i32 @test_mm_cvtsi128_si32(<2 x i64> %a0) nounwind { ; X32-LABEL: test_mm_cvtsi128_si32: ; X32: # BB#0: diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll index e682e8dd1d0..3ae3aecabaf 100644 --- a/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll @@ -274,6 +274,25 @@ define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) { declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone +define <4 x float> @test_x86_sse2_cvtsd2ss_load(<4 x float> %a0, <2 x double>* %p1) { +; SSE-LABEL: test_x86_sse2_cvtsd2ss_load: +; SSE: ## BB#0: +; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; SSE-NEXT: movaps (%eax), %xmm1 +; SSE-NEXT: cvtsd2ss %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_cvtsd2ss_load: +; KNL: ## BB#0: +; KNL-NEXT: movl {{[0-9]+}}(%esp), %eax +; KNL-NEXT: vcvtsd2ss (%eax), %xmm0, %xmm0 +; KNL-NEXT: retl + %a1 = load <2 x double>, <2 x double>* %p1 + %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} + + define <2 x double> @test_x86_sse2_cvtsi2sd(<2 x double> %a0, i32 %a1) { ; SSE-LABEL: test_x86_sse2_cvtsi2sd: ; SSE: ## BB#0: @@ -306,6 +325,25 @@ define <2 x double> @test_x86_sse2_cvtss2sd(<2 x double> %a0, <4 x float> %a1) { declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone +define <2 x double> @test_x86_sse2_cvtss2sd_load(<2 x double> %a0, <4 x float>* %p1) { +; SSE-LABEL: test_x86_sse2_cvtss2sd_load: +; SSE: ## BB#0: +; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; SSE-NEXT: movaps (%eax), %xmm1 +; SSE-NEXT: cvtss2sd %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_cvtss2sd_load: +; KNL: ## BB#0: +; KNL-NEXT: movl {{[0-9]+}}(%esp), %eax +; KNL-NEXT: vcvtss2sd (%eax), %xmm0, %xmm0 +; KNL-NEXT: retl + %a1 = load <4 x float>, <4 x float>* %p1 + %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} + + define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) { ; SSE-LABEL: test_x86_sse2_cvttpd2dq: ; SSE: ## BB#0: |

