diff options
author | Sanjay Patel <spatel@rotateright.com> | 2014-12-19 22:16:28 +0000 |
---|---|---|
committer | Sanjay Patel <spatel@rotateright.com> | 2014-12-19 22:16:28 +0000 |
commit | 1da5f1645b343db2deb637f2ab566d83981a868c (patch) | |
tree | 01cac4b91aeed8f91235f98b1a5eafe54370e73b /llvm/test/CodeGen | |
parent | 5352f35a89a8a527e51ac3a08fe6782be71bdbc9 (diff) | |
download | bcm5719-llvm-1da5f1645b343db2deb637f2ab566d83981a868c.tar.gz bcm5719-llvm-1da5f1645b343db2deb637f2ab566d83981a868c.zip |
Model sqrtss as a binary operation with one source operand tied to the destination (PR14221)
This is a continuation of r167064 ( http://llvm.org/viewvc/llvm-project?view=revision&revision=167064 ).
That patch started to fix PR14221 ( http://llvm.org/bugs/show_bug.cgi?id=14221 ), but it was not completed.
Differential Revision: http://reviews.llvm.org/D6330
llvm-svn: 224624
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r-- | llvm/test/CodeGen/X86/sse_partial_update.ll | 43 |
1 files changed, 38 insertions, 5 deletions
diff --git a/llvm/test/CodeGen/X86/sse_partial_update.ll b/llvm/test/CodeGen/X86/sse_partial_update.ll index 2c16a554aeb..a88ab014641 100644 --- a/llvm/test/CodeGen/X86/sse_partial_update.ll +++ b/llvm/test/CodeGen/X86/sse_partial_update.ll @@ -5,11 +5,18 @@ ; There is a mismatch between the intrinsic and the actual instruction. ; The actual instruction has a partial update of dest, while the intrinsic ; passes through the upper FP values. Here, we make sure the source and -; destination of rsqrtss are the same. -define void @t1(<4 x float> %a) nounwind uwtable ssp { +; destination of each scalar unary op are the same. + +define void @rsqrtss(<4 x float> %a) nounwind uwtable ssp { entry: -; CHECK-LABEL: t1: +; CHECK-LABEL: rsqrtss: ; CHECK: rsqrtss %xmm0, %xmm0 +; CHECK-NEXT: cvtss2sd %xmm0 +; CHECK-NEXT: shufps +; CHECK-NEXT: cvtss2sd %xmm0 +; CHECK-NEXT: movap +; CHECK-NEXT: jmp + %0 = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a) nounwind %a.addr.0.extract = extractelement <4 x float> %0, i32 0 %conv = fpext float %a.addr.0.extract to double @@ -21,10 +28,16 @@ entry: declare void @callee(double, double) declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone -define void @t2(<4 x float> %a) nounwind uwtable ssp { +define void @rcpss(<4 x float> %a) nounwind uwtable ssp { entry: -; CHECK-LABEL: t2: +; CHECK-LABEL: rcpss: ; CHECK: rcpss %xmm0, %xmm0 +; CHECK-NEXT: cvtss2sd %xmm0 +; CHECK-NEXT: shufps +; CHECK-NEXT: cvtss2sd %xmm0 +; CHECK-NEXT: movap +; CHECK-NEXT: jmp + %0 = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a) nounwind %a.addr.0.extract = extractelement <4 x float> %0, i32 0 %conv = fpext float %a.addr.0.extract to double @@ -34,3 +47,23 @@ entry: ret void } declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone + +define void @sqrtss(<4 x float> %a) nounwind uwtable ssp { +entry: +; CHECK-LABEL: sqrtss: +; CHECK: sqrtss %xmm0, %xmm0 +; CHECK-NEXT: cvtss2sd %xmm0 +; CHECK-NEXT: shufps +; CHECK-NEXT: cvtss2sd %xmm0 +; CHECK-NEXT: movap +; CHECK-NEXT: jmp + + %0 = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a) nounwind + %a.addr.0.extract = extractelement <4 x float> %0, i32 0 + %conv = fpext float %a.addr.0.extract to double + %a.addr.4.extract = extractelement <4 x float> %0, i32 1 + %conv3 = fpext float %a.addr.4.extract to double + tail call void @callee(double %conv, double %conv3) nounwind + ret void +} +declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone |