summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/PowerPC/recipest.ll
diff options
context:
space:
mode:
authorSanjay Patel <spatel@rotateright.com>2014-10-06 19:31:18 +0000
committerSanjay Patel <spatel@rotateright.com>2014-10-06 19:31:18 +0000
commit7bc9185ab58e29ea5f6b13f203f04a80f3c2ec68 (patch)
tree64eeaf092e2ebf663bdeeddc7dbb6c85e62a4d77 /llvm/test/CodeGen/PowerPC/recipest.ll
parentcad3a5f7d4742a25dcd7dd1b5b87aca2f43f8311 (diff)
downloadbcm5719-llvm-7bc9185ab58e29ea5f6b13f203f04a80f3c2ec68.tar.gz
bcm5719-llvm-7bc9185ab58e29ea5f6b13f203f04a80f3c2ec68.zip
Fast-math fold: x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
The motivation is to recognize code such as this from /llvm/projects/test-suite/SingleSource/Benchmarks/BenchmarkGame/n-body.c: float distance = sqrt(dx * dx + dy * dy + dz * dz); float mag = dt / (distance * distance * distance); Without this patch, we don't match the sqrt as a reciprocal sqrt, so for PPC the new testcase in this patch produces: addis 3, 2, .LCPI4_2@toc@ha lfs 4, .LCPI4_2@toc@l(3) addis 3, 2, .LCPI4_1@toc@ha lfs 0, .LCPI4_1@toc@l(3) fcmpu 0, 1, 4 beq 0, .LBB4_2 # BB#1: frsqrtes 4, 1 addis 3, 2, .LCPI4_0@toc@ha lfs 5, .LCPI4_0@toc@l(3) fnmsubs 13, 1, 5, 1 fmuls 6, 4, 4 fmadds 1, 13, 6, 5 fmuls 1, 4, 1 fres 4, 1 <--- reciprocal of reciprocal square root fnmsubs 1, 1, 4, 0 fmadds 4, 4, 1, 4 .LBB4_2: fmuls 1, 4, 2 fres 2, 1 fnmsubs 0, 1, 2, 0 fmadds 0, 2, 0, 2 fmuls 1, 3, 0 blr After the patch, this simplifies to: frsqrtes 0, 1 addis 3, 2, .LCPI4_1@toc@ha fres 5, 2 lfs 4, .LCPI4_1@toc@l(3) addis 3, 2, .LCPI4_0@toc@ha lfs 7, .LCPI4_0@toc@l(3) fnmsubs 13, 1, 4, 1 fmuls 6, 0, 0 fnmsubs 2, 2, 5, 7 fmadds 1, 13, 6, 4 fmadds 2, 5, 2, 5 fmuls 0, 0, 1 fmuls 0, 0, 2 fmuls 1, 3, 0 blr Differential Revision: http://reviews.llvm.org/D5628 llvm-svn: 219139
Diffstat (limited to 'llvm/test/CodeGen/PowerPC/recipest.ll')
-rw-r--r--llvm/test/CodeGen/PowerPC/recipest.ll28
1 files changed, 28 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/PowerPC/recipest.ll b/llvm/test/CodeGen/PowerPC/recipest.ll
index d9c5d4061c8..de74c043ece 100644
--- a/llvm/test/CodeGen/PowerPC/recipest.ll
+++ b/llvm/test/CodeGen/PowerPC/recipest.ll
@@ -96,6 +96,34 @@ define float @goo(float %a, float %b) nounwind {
; CHECK-SAFE: blr
}
+; Recognize that this is rsqrt(a) * rcp(b) * c,
+; not 1 / ( 1 / sqrt(a)) * rcp(b) * c.
+define float @rsqrt_fmul(float %a, float %b, float %c) {
+ %x = call float @llvm.sqrt.f32(float %a)
+ %y = fmul float %x, %b
+ %z = fdiv float %c, %y
+ ret float %z
+
+; CHECK: @rsqrt_fmul
+; CHECK-DAG: frsqrtes
+; CHECK-DAG: fres
+; CHECK-DAG: fnmsubs
+; CHECK-DAG: fmuls
+; CHECK-DAG: fnmsubs
+; CHECK-DAG: fmadds
+; CHECK-DAG: fmadds
+; CHECK: fmuls
+; CHECK-NEXT: fmuls
+; CHECK-NEXT: fmuls
+; CHECK-NEXT: blr
+
+; CHECK-SAFE: @rsqrt_fmul
+; CHECK-SAFE: fsqrts
+; CHECK-SAFE: fmuls
+; CHECK-SAFE: fdivs
+; CHECK-SAFE: blr
+}
+
define <4 x float> @hoo(<4 x float> %a, <4 x float> %b) nounwind {
%x = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %b)
%r = fdiv <4 x float> %a, %x
OpenPOWER on IntegriCloud