From 3d497cd778261818eb76b5ad97f7f751cfebfcbb Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Thu, 9 Oct 2014 21:26:35 +0000
Subject: Improve sqrt estimate algorithm (fast-math)

This patch changes the fast-math implementation for calculating sqrt(x) from:
y = 1 / (1 / sqrt(x))
to:
y = x * (1 / sqrt(x))

This has 2 benefits: less code / faster code and one less estimate instruction
that may lose precision.

The only target that will be affected (until http://reviews.llvm.org/D5658 is approved)
is PPC. The difference in codegen for PPC is 2 less flops for a single-precision sqrtf
or vector sqrtf and 4 less flops for a double-precision sqrt.
We also eliminate a constant load and extra register usage.

Differential Revision: http://reviews.llvm.org/D5682

llvm-svn: 219445
---
 llvm/test/CodeGen/PowerPC/recipest.ll | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

(limited to 'llvm/test/CodeGen/PowerPC/recipest.ll')

diff --git a/llvm/test/CodeGen/PowerPC/recipest.ll b/llvm/test/CodeGen/PowerPC/recipest.ll
index de74c043ece..2f6a3eca488 100644
--- a/llvm/test/CodeGen/PowerPC/recipest.ll
+++ b/llvm/test/CodeGen/PowerPC/recipest.ll
@@ -197,11 +197,7 @@ define double @foo3(double %a) nounwind {
 ; CHECK-NEXT: fmul
 ; CHECK-NEXT: fmadd
 ; CHECK-NEXT: fmul
-; CHECK-NEXT: fre
-; CHECK-NEXT: fnmsub
-; CHECK-NEXT: fmadd
-; CHECK-NEXT: fnmsub
-; CHECK-NEXT: fmadd
+; CHECK-NEXT: fmul
 ; CHECK: blr
 
 ; CHECK-SAFE: @foo3
@@ -220,9 +216,7 @@ define float @goo3(float %a) nounwind {
 ; CHECK: fmuls
 ; CHECK-NEXT: fmadds
 ; CHECK-NEXT: fmuls
-; CHECK-NEXT: fres
-; CHECK-NEXT: fnmsubs
-; CHECK-NEXT: fmadds
+; CHECK-NEXT: fmuls
 ; CHECK: blr
 
 ; CHECK-SAFE: @goo3
@@ -236,7 +230,6 @@ define <4 x float> @hoo3(<4 x float> %a) nounwind {
 
 ; CHECK: @hoo3
 ; CHECK: vrsqrtefp
-; CHECK-DAG: vrefp
 ; CHECK-DAG: vcmpeqfp
 
 ; CHECK-SAFE: @hoo3
-- 
cgit v1.2.3