diff options
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r-- | llvm/test/CodeGen/X86/sse_partial_update.ll | 43 |
1 files changed, 38 insertions, 5 deletions
diff --git a/llvm/test/CodeGen/X86/sse_partial_update.ll b/llvm/test/CodeGen/X86/sse_partial_update.ll index 2c16a554aeb..a88ab014641 100644 --- a/llvm/test/CodeGen/X86/sse_partial_update.ll +++ b/llvm/test/CodeGen/X86/sse_partial_update.ll @@ -5,11 +5,18 @@ ; There is a mismatch between the intrinsic and the actual instruction. ; The actual instruction has a partial update of dest, while the intrinsic ; passes through the upper FP values. Here, we make sure the source and -; destination of rsqrtss are the same. -define void @t1(<4 x float> %a) nounwind uwtable ssp { +; destination of each scalar unary op are the same. + +define void @rsqrtss(<4 x float> %a) nounwind uwtable ssp { entry: -; CHECK-LABEL: t1: +; CHECK-LABEL: rsqrtss: ; CHECK: rsqrtss %xmm0, %xmm0 +; CHECK-NEXT: cvtss2sd %xmm0 +; CHECK-NEXT: shufps +; CHECK-NEXT: cvtss2sd %xmm0 +; CHECK-NEXT: movap +; CHECK-NEXT: jmp + %0 = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a) nounwind %a.addr.0.extract = extractelement <4 x float> %0, i32 0 %conv = fpext float %a.addr.0.extract to double @@ -21,10 +28,16 @@ entry: declare void @callee(double, double) declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone -define void @t2(<4 x float> %a) nounwind uwtable ssp { +define void @rcpss(<4 x float> %a) nounwind uwtable ssp { entry: -; CHECK-LABEL: t2: +; CHECK-LABEL: rcpss: ; CHECK: rcpss %xmm0, %xmm0 +; CHECK-NEXT: cvtss2sd %xmm0 +; CHECK-NEXT: shufps +; CHECK-NEXT: cvtss2sd %xmm0 +; CHECK-NEXT: movap +; CHECK-NEXT: jmp + %0 = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a) nounwind %a.addr.0.extract = extractelement <4 x float> %0, i32 0 %conv = fpext float %a.addr.0.extract to double @@ -34,3 +47,23 @@ entry: ret void } declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone + +define void @sqrtss(<4 x float> %a) nounwind uwtable ssp { +entry: +; CHECK-LABEL: sqrtss: +; CHECK: sqrtss %xmm0, %xmm0 +; CHECK-NEXT: cvtss2sd %xmm0 +; CHECK-NEXT: shufps +; CHECK-NEXT: cvtss2sd %xmm0 +; CHECK-NEXT: movap +; CHECK-NEXT: jmp + + %0 = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a) nounwind + %a.addr.0.extract = extractelement <4 x float> %0, i32 0 + %conv = fpext float %a.addr.0.extract to double + %a.addr.4.extract = extractelement <4 x float> %0, i32 1 + %conv3 = fpext float %a.addr.4.extract to double + tail call void @callee(double %conv, double %conv3) nounwind + ret void +} +declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone |