summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/PowerPC/fmf-propagation.ll
diff options
context:
space:
mode:
authorNemanja Ivanovic <nemanja.i.ibm@gmail.com>2019-05-07 13:48:03 +0000
committerNemanja Ivanovic <nemanja.i.ibm@gmail.com>2019-05-07 13:48:03 +0000
commitb4f028f0f3f6c4326ff9ff3c077f71b01d5adc2e (patch)
treedc0d51ef3204f44cdf1dec98975ded5442bad306 /llvm/test/CodeGen/PowerPC/fmf-propagation.ll
parent5c922f698847a7059e9015dcf73721af3be852a3 (diff)
downloadbcm5719-llvm-b4f028f0f3f6c4326ff9ff3c077f71b01d5adc2e.tar.gz
bcm5719-llvm-b4f028f0f3f6c4326ff9ff3c077f71b01d5adc2e.zip
[PowerPC] Use the two-constant NR algorithm for refining estimates
The single-constant algorithm produces infinities on a lot of denormal values. The precision of the two-constant algorithm is actually sufficient across the range of denormals. We will switch to that algorithm for now to avoid the infinities on denormals. In the future, we will re-evaluate the algorithm to find the optimal one for PowerPC. Differential revision: https://reviews.llvm.org/D60037 llvm-svn: 360144
Diffstat (limited to 'llvm/test/CodeGen/PowerPC/fmf-propagation.ll')
-rw-r--r--llvm/test/CodeGen/PowerPC/fmf-propagation.ll69
1 files changed, 33 insertions, 36 deletions
diff --git a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll
index ea40e4edd34..0ce4701d683 100644
--- a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll
+++ b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll
@@ -284,16 +284,16 @@ define float @sqrt_afn(float %x) {
; FMF-NEXT: fcmpu 0, 1, 0
; FMF-NEXT: beq 0, .LBB10_2
; FMF-NEXT: # %bb.1:
+; FMF-NEXT: xsrsqrtesp 0, 1
; FMF-NEXT: addis 3, 2, .LCPI10_0@toc@ha
-; FMF-NEXT: xsrsqrtesp 3, 1
-; FMF-NEXT: lfs 0, .LCPI10_0@toc@l(3)
-; FMF-NEXT: xsmulsp 2, 1, 0
-; FMF-NEXT: xsmulsp 4, 3, 3
-; FMF-NEXT: xssubsp 2, 2, 1
-; FMF-NEXT: xsmulsp 2, 2, 4
-; FMF-NEXT: xssubsp 0, 0, 2
-; FMF-NEXT: xsmulsp 0, 3, 0
-; FMF-NEXT: xsmulsp 0, 0, 1
+; FMF-NEXT: addis 4, 2, .LCPI10_1@toc@ha
+; FMF-NEXT: lfs 2, .LCPI10_0@toc@l(3)
+; FMF-NEXT: lfs 3, .LCPI10_1@toc@l(4)
+; FMF-NEXT: xsmulsp 1, 1, 0
+; FMF-NEXT: xsmulsp 0, 1, 0
+; FMF-NEXT: xsmulsp 1, 1, 2
+; FMF-NEXT: xsaddsp 0, 0, 3
+; FMF-NEXT: xsmulsp 0, 1, 0
; FMF-NEXT: .LBB10_2:
; FMF-NEXT: fmr 1, 0
; FMF-NEXT: blr
@@ -304,16 +304,15 @@ define float @sqrt_afn(float %x) {
; GLOBAL-NEXT: fcmpu 0, 1, 0
; GLOBAL-NEXT: beq 0, .LBB10_2
; GLOBAL-NEXT: # %bb.1:
-; GLOBAL-NEXT: xsrsqrtesp 2, 1
-; GLOBAL-NEXT: fneg 0, 1
+; GLOBAL-NEXT: xsrsqrtesp 0, 1
; GLOBAL-NEXT: addis 3, 2, .LCPI10_0@toc@ha
-; GLOBAL-NEXT: fmr 4, 1
-; GLOBAL-NEXT: lfs 3, .LCPI10_0@toc@l(3)
-; GLOBAL-NEXT: xsmaddasp 4, 0, 3
-; GLOBAL-NEXT: xsmulsp 0, 2, 2
-; GLOBAL-NEXT: xsmaddasp 3, 4, 0
-; GLOBAL-NEXT: xsmulsp 0, 2, 3
-; GLOBAL-NEXT: xsmulsp 0, 0, 1
+; GLOBAL-NEXT: addis 4, 2, .LCPI10_1@toc@ha
+; GLOBAL-NEXT: lfs 2, .LCPI10_0@toc@l(3)
+; GLOBAL-NEXT: lfs 3, .LCPI10_1@toc@l(4)
+; GLOBAL-NEXT: xsmulsp 1, 1, 0
+; GLOBAL-NEXT: xsmaddasp 2, 1, 0
+; GLOBAL-NEXT: xsmulsp 0, 1, 3
+; GLOBAL-NEXT: xsmulsp 0, 0, 2
; GLOBAL-NEXT: .LBB10_2:
; GLOBAL-NEXT: fmr 1, 0
; GLOBAL-NEXT: blr
@@ -338,16 +337,15 @@ define float @sqrt_fast(float %x) {
; FMF-NEXT: fcmpu 0, 1, 0
; FMF-NEXT: beq 0, .LBB11_2
; FMF-NEXT: # %bb.1:
-; FMF-NEXT: xsrsqrtesp 2, 1
-; FMF-NEXT: fneg 0, 1
+; FMF-NEXT: xsrsqrtesp 0, 1
; FMF-NEXT: addis 3, 2, .LCPI11_0@toc@ha
-; FMF-NEXT: fmr 4, 1
-; FMF-NEXT: lfs 3, .LCPI11_0@toc@l(3)
-; FMF-NEXT: xsmaddasp 4, 0, 3
-; FMF-NEXT: xsmulsp 0, 2, 2
-; FMF-NEXT: xsmaddasp 3, 4, 0
-; FMF-NEXT: xsmulsp 0, 2, 3
-; FMF-NEXT: xsmulsp 0, 0, 1
+; FMF-NEXT: addis 4, 2, .LCPI11_1@toc@ha
+; FMF-NEXT: lfs 2, .LCPI11_0@toc@l(3)
+; FMF-NEXT: lfs 3, .LCPI11_1@toc@l(4)
+; FMF-NEXT: xsmulsp 1, 1, 0
+; FMF-NEXT: xsmaddasp 2, 1, 0
+; FMF-NEXT: xsmulsp 0, 1, 3
+; FMF-NEXT: xsmulsp 0, 0, 2
; FMF-NEXT: .LBB11_2:
; FMF-NEXT: fmr 1, 0
; FMF-NEXT: blr
@@ -358,16 +356,15 @@ define float @sqrt_fast(float %x) {
; GLOBAL-NEXT: fcmpu 0, 1, 0
; GLOBAL-NEXT: beq 0, .LBB11_2
; GLOBAL-NEXT: # %bb.1:
-; GLOBAL-NEXT: xsrsqrtesp 2, 1
-; GLOBAL-NEXT: fneg 0, 1
+; GLOBAL-NEXT: xsrsqrtesp 0, 1
; GLOBAL-NEXT: addis 3, 2, .LCPI11_0@toc@ha
-; GLOBAL-NEXT: fmr 4, 1
-; GLOBAL-NEXT: lfs 3, .LCPI11_0@toc@l(3)
-; GLOBAL-NEXT: xsmaddasp 4, 0, 3
-; GLOBAL-NEXT: xsmulsp 0, 2, 2
-; GLOBAL-NEXT: xsmaddasp 3, 4, 0
-; GLOBAL-NEXT: xsmulsp 0, 2, 3
-; GLOBAL-NEXT: xsmulsp 0, 0, 1
+; GLOBAL-NEXT: addis 4, 2, .LCPI11_1@toc@ha
+; GLOBAL-NEXT: lfs 2, .LCPI11_0@toc@l(3)
+; GLOBAL-NEXT: lfs 3, .LCPI11_1@toc@l(4)
+; GLOBAL-NEXT: xsmulsp 1, 1, 0
+; GLOBAL-NEXT: xsmaddasp 2, 1, 0
+; GLOBAL-NEXT: xsmulsp 0, 1, 3
+; GLOBAL-NEXT: xsmulsp 0, 0, 2
; GLOBAL-NEXT: .LBB11_2:
; GLOBAL-NEXT: fmr 1, 0
; GLOBAL-NEXT: blr
OpenPOWER on IntegriCloud