diff options
author | Sanjay Patel <spatel@rotateright.com> | 2014-10-06 19:31:18 +0000 |
---|---|---|
committer | Sanjay Patel <spatel@rotateright.com> | 2014-10-06 19:31:18 +0000 |
commit | 7bc9185ab58e29ea5f6b13f203f04a80f3c2ec68 (patch) | |
tree | 64eeaf092e2ebf663bdeeddc7dbb6c85e62a4d77 /llvm/test/CodeGen | |
parent | cad3a5f7d4742a25dcd7dd1b5b87aca2f43f8311 (diff) | |
download | bcm5719-llvm-7bc9185ab58e29ea5f6b13f203f04a80f3c2ec68.tar.gz bcm5719-llvm-7bc9185ab58e29ea5f6b13f203f04a80f3c2ec68.zip |
Fast-math fold: x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
The motivation is to recognize code such as this from /llvm/projects/test-suite/SingleSource/Benchmarks/BenchmarkGame/n-body.c:
float distance = sqrt(dx * dx + dy * dy + dz * dz);
float mag = dt / (distance * distance * distance);
Without this patch, we don't match the sqrt as a reciprocal sqrt, so for PPC the new testcase in this patch produces:
addis 3, 2, .LCPI4_2@toc@ha
lfs 4, .LCPI4_2@toc@l(3)
addis 3, 2, .LCPI4_1@toc@ha
lfs 0, .LCPI4_1@toc@l(3)
fcmpu 0, 1, 4
beq 0, .LBB4_2
# BB#1:
frsqrtes 4, 1
addis 3, 2, .LCPI4_0@toc@ha
lfs 5, .LCPI4_0@toc@l(3)
fnmsubs 13, 1, 5, 1
fmuls 6, 4, 4
fmadds 1, 13, 6, 5
fmuls 1, 4, 1
fres 4, 1 <--- reciprocal of reciprocal square root
fnmsubs 1, 1, 4, 0
fmadds 4, 4, 1, 4
.LBB4_2:
fmuls 1, 4, 2
fres 2, 1
fnmsubs 0, 1, 2, 0
fmadds 0, 2, 0, 2
fmuls 1, 3, 0
blr
After the patch, this simplifies to:
frsqrtes 0, 1
addis 3, 2, .LCPI4_1@toc@ha
fres 5, 2
lfs 4, .LCPI4_1@toc@l(3)
addis 3, 2, .LCPI4_0@toc@ha
lfs 7, .LCPI4_0@toc@l(3)
fnmsubs 13, 1, 4, 1
fmuls 6, 0, 0
fnmsubs 2, 2, 5, 7
fmadds 1, 13, 6, 4
fmadds 2, 5, 2, 5
fmuls 0, 0, 1
fmuls 0, 0, 2
fmuls 1, 3, 0
blr
Differential Revision: http://reviews.llvm.org/D5628
llvm-svn: 219139
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r-- | llvm/test/CodeGen/PowerPC/recipest.ll | 28 |
1 files changed, 28 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/PowerPC/recipest.ll b/llvm/test/CodeGen/PowerPC/recipest.ll index d9c5d4061c8..de74c043ece 100644 --- a/llvm/test/CodeGen/PowerPC/recipest.ll +++ b/llvm/test/CodeGen/PowerPC/recipest.ll @@ -96,6 +96,34 @@ define float @goo(float %a, float %b) nounwind { ; CHECK-SAFE: blr } +; Recognize that this is rsqrt(a) * rcp(b) * c, +; not 1 / ( 1 / sqrt(a)) * rcp(b) * c. +define float @rsqrt_fmul(float %a, float %b, float %c) { + %x = call float @llvm.sqrt.f32(float %a) + %y = fmul float %x, %b + %z = fdiv float %c, %y + ret float %z + +; CHECK: @rsqrt_fmul +; CHECK-DAG: frsqrtes +; CHECK-DAG: fres +; CHECK-DAG: fnmsubs +; CHECK-DAG: fmuls +; CHECK-DAG: fnmsubs +; CHECK-DAG: fmadds +; CHECK-DAG: fmadds +; CHECK: fmuls +; CHECK-NEXT: fmuls +; CHECK-NEXT: fmuls +; CHECK-NEXT: blr + +; CHECK-SAFE: @rsqrt_fmul +; CHECK-SAFE: fsqrts +; CHECK-SAFE: fmuls +; CHECK-SAFE: fdivs +; CHECK-SAFE: blr +} + define <4 x float> @hoo(<4 x float> %a, <4 x float> %b) nounwind { %x = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %b) %r = fdiv <4 x float> %a, %x |