diff options
-rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll | 18 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll | 38 |
3 files changed, 58 insertions, 2 deletions
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index f2eaeed6294..aedb337e3e6 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -1820,7 +1820,7 @@ def Int_VCVTSD2SSrr: I<0x5A, MRMSrcReg, (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))], IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[HasAVX]>, Sched<[WriteCvtF2F]>; -def Int_VCVTSD2SSrm: I<0x5A, MRMSrcReg, +def Int_VCVTSD2SSrm: I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtsd2ss @@ -1836,7 +1836,7 @@ def Int_CVTSD2SSrr: I<0x5A, MRMSrcReg, (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))], IIC_SSE_CVT_Scalar_RR>, XD, Requires<[UseSSE2]>, Sched<[WriteCvtF2F]>; -def Int_CVTSD2SSrm: I<0x5A, MRMSrcReg, +def Int_CVTSD2SSrm: I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), "cvtsd2ss\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtsd2ss diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll index 803f364a824..d3ebba93c76 100644 --- a/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll @@ -1223,6 +1223,24 @@ define <4 x float> @test_mm_cvtsd_ss(<4 x float> %a0, <2 x double> %a1) { } declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone +define <4 x float> @test_mm_cvtsd_ss_load(<4 x float> %a0, <2 x double>* %p1) { +; X32-LABEL: test_mm_cvtsd_ss_load: +; X32: # BB#0: +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movaps (%eax), %xmm1 +; X32-NEXT: cvtsd2ss %xmm1, %xmm0 +; X32-NEXT: retl +; +; X64-LABEL: test_mm_cvtsd_ss_load: +; X64: # BB#0: +; X64-NEXT: movaps (%rdi), %xmm1 +; X64-NEXT: cvtsd2ss %xmm1, %xmm0 +; X64-NEXT: retq + %a1 = load <2 x double>, <2 x double>* %p1 + %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) + ret <4 x float> %res +} + define i32 @test_mm_cvtsi128_si32(<2 x i64> %a0) nounwind { ; X32-LABEL: test_mm_cvtsi128_si32: ; X32: # BB#0: diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll index e682e8dd1d0..3ae3aecabaf 100644 --- a/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll @@ -274,6 +274,25 @@ define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) { declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone +define <4 x float> @test_x86_sse2_cvtsd2ss_load(<4 x float> %a0, <2 x double>* %p1) { +; SSE-LABEL: test_x86_sse2_cvtsd2ss_load: +; SSE: ## BB#0: +; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; SSE-NEXT: movaps (%eax), %xmm1 +; SSE-NEXT: cvtsd2ss %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_cvtsd2ss_load: +; KNL: ## BB#0: +; KNL-NEXT: movl {{[0-9]+}}(%esp), %eax +; KNL-NEXT: vcvtsd2ss (%eax), %xmm0, %xmm0 +; KNL-NEXT: retl + %a1 = load <2 x double>, <2 x double>* %p1 + %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} + + define <2 x double> @test_x86_sse2_cvtsi2sd(<2 x double> %a0, i32 %a1) { ; SSE-LABEL: test_x86_sse2_cvtsi2sd: ; SSE: ## BB#0: @@ -306,6 +325,25 @@ define <2 x double> @test_x86_sse2_cvtss2sd(<2 x double> %a0, <4 x float> %a1) { declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone +define <2 x double> @test_x86_sse2_cvtss2sd_load(<2 x double> %a0, <4 x float>* %p1) { +; SSE-LABEL: test_x86_sse2_cvtss2sd_load: +; SSE: ## BB#0: +; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; SSE-NEXT: movaps (%eax), %xmm1 +; SSE-NEXT: cvtss2sd %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_cvtss2sd_load: +; KNL: ## BB#0: +; KNL-NEXT: movl {{[0-9]+}}(%esp), %eax +; KNL-NEXT: vcvtss2sd (%eax), %xmm0, %xmm0 +; KNL-NEXT: retl + %a1 = load <4 x float>, <4 x float>* %p1 + %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} + + define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) { ; SSE-LABEL: test_x86_sse2_cvttpd2dq: ; SSE: ## BB#0: |