summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2016-07-26 10:41:28 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2016-07-26 10:41:28 +0000
commit019e1024267da0344853a9f33b8bc8d2765961b7 (patch)
tree73a036be68a650da43e41487500f2f95199d1654 /llvm
parent68a204ddc18879dba2f2880f6db5e3c8067b0382 (diff)
downloadbcm5719-llvm-019e1024267da0344853a9f33b8bc8d2765961b7.tar.gz
bcm5719-llvm-019e1024267da0344853a9f33b8bc8d2765961b7.zip
[X86][SSE] Fixed issue with memory folding of (v)cvtsd2ss intrinsics
Fixed typo in the intrinsic definitions of (v)cvtsd2ss with memory folding. This was only unearthed when rL276102 started using the intrinsic again..... llvm-svn: 276740
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td4
-rw-r--r--llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll18
-rw-r--r--llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll38
3 files changed, 58 insertions, 2 deletions
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index f2eaeed6294..aedb337e3e6 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -1820,7 +1820,7 @@ def Int_VCVTSD2SSrr: I<0x5A, MRMSrcReg,
(int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))],
IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[HasAVX]>,
Sched<[WriteCvtF2F]>;
-def Int_VCVTSD2SSrm: I<0x5A, MRMSrcReg,
+def Int_VCVTSD2SSrm: I<0x5A, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (int_x86_sse2_cvtsd2ss
@@ -1836,7 +1836,7 @@ def Int_CVTSD2SSrr: I<0x5A, MRMSrcReg,
(int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))],
IIC_SSE_CVT_Scalar_RR>, XD, Requires<[UseSSE2]>,
Sched<[WriteCvtF2F]>;
-def Int_CVTSD2SSrm: I<0x5A, MRMSrcReg,
+def Int_CVTSD2SSrm: I<0x5A, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
"cvtsd2ss\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, (int_x86_sse2_cvtsd2ss
diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
index 803f364a824..d3ebba93c76 100644
--- a/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
+++ b/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
@@ -1223,6 +1223,24 @@ define <4 x float> @test_mm_cvtsd_ss(<4 x float> %a0, <2 x double> %a1) {
}
declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone
+define <4 x float> @test_mm_cvtsd_ss_load(<4 x float> %a0, <2 x double>* %p1) {
+; X32-LABEL: test_mm_cvtsd_ss_load:
+; X32: # BB#0:
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movaps (%eax), %xmm1
+; X32-NEXT: cvtsd2ss %xmm1, %xmm0
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_cvtsd_ss_load:
+; X64: # BB#0:
+; X64-NEXT: movaps (%rdi), %xmm1
+; X64-NEXT: cvtsd2ss %xmm1, %xmm0
+; X64-NEXT: retq
+ %a1 = load <2 x double>, <2 x double>* %p1
+ %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1)
+ ret <4 x float> %res
+}
+
define i32 @test_mm_cvtsi128_si32(<2 x i64> %a0) nounwind {
; X32-LABEL: test_mm_cvtsi128_si32:
; X32: # BB#0:
diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll
index e682e8dd1d0..3ae3aecabaf 100644
--- a/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll
+++ b/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll
@@ -274,6 +274,25 @@ define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) {
declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone
+define <4 x float> @test_x86_sse2_cvtsd2ss_load(<4 x float> %a0, <2 x double>* %p1) {
+; SSE-LABEL: test_x86_sse2_cvtsd2ss_load:
+; SSE: ## BB#0:
+; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SSE-NEXT: movaps (%eax), %xmm1
+; SSE-NEXT: cvtsd2ss %xmm1, %xmm0
+; SSE-NEXT: retl
+;
+; KNL-LABEL: test_x86_sse2_cvtsd2ss_load:
+; KNL: ## BB#0:
+; KNL-NEXT: movl {{[0-9]+}}(%esp), %eax
+; KNL-NEXT: vcvtsd2ss (%eax), %xmm0, %xmm0
+; KNL-NEXT: retl
+ %a1 = load <2 x double>, <2 x double>* %p1
+ %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1]
+ ret <4 x float> %res
+}
+
+
define <2 x double> @test_x86_sse2_cvtsi2sd(<2 x double> %a0, i32 %a1) {
; SSE-LABEL: test_x86_sse2_cvtsi2sd:
; SSE: ## BB#0:
@@ -306,6 +325,25 @@ define <2 x double> @test_x86_sse2_cvtss2sd(<2 x double> %a0, <4 x float> %a1) {
declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone
+define <2 x double> @test_x86_sse2_cvtss2sd_load(<2 x double> %a0, <4 x float>* %p1) {
+; SSE-LABEL: test_x86_sse2_cvtss2sd_load:
+; SSE: ## BB#0:
+; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SSE-NEXT: movaps (%eax), %xmm1
+; SSE-NEXT: cvtss2sd %xmm1, %xmm0
+; SSE-NEXT: retl
+;
+; KNL-LABEL: test_x86_sse2_cvtss2sd_load:
+; KNL: ## BB#0:
+; KNL-NEXT: movl {{[0-9]+}}(%esp), %eax
+; KNL-NEXT: vcvtss2sd (%eax), %xmm0, %xmm0
+; KNL-NEXT: retl
+ %a1 = load <4 x float>, <4 x float>* %p1
+ %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1]
+ ret <2 x double> %res
+}
+
+
define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) {
; SSE-LABEL: test_x86_sse2_cvttpd2dq:
; SSE: ## BB#0:
OpenPOWER on IntegriCloud