diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-07-26 10:41:28 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-07-26 10:41:28 +0000 |
| commit | 019e1024267da0344853a9f33b8bc8d2765961b7 (patch) | |
| tree | 73a036be68a650da43e41487500f2f95199d1654 /llvm | |
| parent | 68a204ddc18879dba2f2880f6db5e3c8067b0382 (diff) | |
| download | bcm5719-llvm-019e1024267da0344853a9f33b8bc8d2765961b7.tar.gz bcm5719-llvm-019e1024267da0344853a9f33b8bc8d2765961b7.zip | |
[X86][SSE] Fixed issue with memory folding of (v)cvtsd2ss intrinsics
Fixed typo in the intrinsic definitions of (v)cvtsd2ss with memory folding.
This was only unearthed when rL276102 started using the intrinsic again.....
llvm-svn: 276740
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 4 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll | 18 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll | 38 |
3 files changed, 58 insertions, 2 deletions
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index f2eaeed6294..aedb337e3e6 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -1820,7 +1820,7 @@ def Int_VCVTSD2SSrr: I<0x5A, MRMSrcReg, (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))], IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[HasAVX]>, Sched<[WriteCvtF2F]>; -def Int_VCVTSD2SSrm: I<0x5A, MRMSrcReg, +def Int_VCVTSD2SSrm: I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtsd2ss @@ -1836,7 +1836,7 @@ def Int_CVTSD2SSrr: I<0x5A, MRMSrcReg, (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))], IIC_SSE_CVT_Scalar_RR>, XD, Requires<[UseSSE2]>, Sched<[WriteCvtF2F]>; -def Int_CVTSD2SSrm: I<0x5A, MRMSrcReg, +def Int_CVTSD2SSrm: I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), "cvtsd2ss\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtsd2ss diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll index 803f364a824..d3ebba93c76 100644 --- a/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll @@ -1223,6 +1223,24 @@ define <4 x float> @test_mm_cvtsd_ss(<4 x float> %a0, <2 x double> %a1) { } declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone +define <4 x float> @test_mm_cvtsd_ss_load(<4 x float> %a0, <2 x double>* %p1) { +; X32-LABEL: test_mm_cvtsd_ss_load: +; X32: # BB#0: +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movaps (%eax), %xmm1 +; X32-NEXT: cvtsd2ss %xmm1, %xmm0 +; X32-NEXT: retl +; +; X64-LABEL: test_mm_cvtsd_ss_load: +; X64: # BB#0: +; X64-NEXT: movaps (%rdi), %xmm1 +; X64-NEXT: cvtsd2ss %xmm1, %xmm0 +; X64-NEXT: retq + %a1 = load <2 x double>, <2 x double>* %p1 + %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) + ret <4 x float> %res +} + define i32 @test_mm_cvtsi128_si32(<2 x i64> %a0) nounwind { ; X32-LABEL: test_mm_cvtsi128_si32: ; X32: # BB#0: diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll index e682e8dd1d0..3ae3aecabaf 100644 --- a/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll @@ -274,6 +274,25 @@ define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) { declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone +define <4 x float> @test_x86_sse2_cvtsd2ss_load(<4 x float> %a0, <2 x double>* %p1) { +; SSE-LABEL: test_x86_sse2_cvtsd2ss_load: +; SSE: ## BB#0: +; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; SSE-NEXT: movaps (%eax), %xmm1 +; SSE-NEXT: cvtsd2ss %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_cvtsd2ss_load: +; KNL: ## BB#0: +; KNL-NEXT: movl {{[0-9]+}}(%esp), %eax +; KNL-NEXT: vcvtsd2ss (%eax), %xmm0, %xmm0 +; KNL-NEXT: retl + %a1 = load <2 x double>, <2 x double>* %p1 + %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} + + define <2 x double> @test_x86_sse2_cvtsi2sd(<2 x double> %a0, i32 %a1) { ; SSE-LABEL: test_x86_sse2_cvtsi2sd: ; SSE: ## BB#0: @@ -306,6 +325,25 @@ define <2 x double> @test_x86_sse2_cvtss2sd(<2 x double> %a0, <4 x float> %a1) { declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone +define <2 x double> @test_x86_sse2_cvtss2sd_load(<2 x double> %a0, <4 x float>* %p1) { +; SSE-LABEL: test_x86_sse2_cvtss2sd_load: +; SSE: ## BB#0: +; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; SSE-NEXT: movaps (%eax), %xmm1 +; SSE-NEXT: cvtss2sd %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_cvtss2sd_load: +; KNL: ## BB#0: +; KNL-NEXT: movl {{[0-9]+}}(%esp), %eax +; KNL-NEXT: vcvtss2sd (%eax), %xmm0, %xmm0 +; KNL-NEXT: retl + %a1 = load <4 x float>, <4 x float>* %p1 + %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} + + define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) { ; SSE-LABEL: test_x86_sse2_cvttpd2dq: ; SSE: ## BB#0: |

