diff options
| author | Sanjay Patel <spatel@rotateright.com> | 2014-10-16 18:48:17 +0000 |
|---|---|---|
| committer | Sanjay Patel <spatel@rotateright.com> | 2014-10-16 18:48:17 +0000 |
| commit | c699a6117b0f33739cdbe63fff46f95c79b5133b (patch) | |
| tree | e29dbf975378a9a26369ce78ae503317baaf6026 /llvm/test/Transforms | |
| parent | d70f3c20b8c0ff71638ac2ee774b4e5a021be521 (diff) | |
| download | bcm5719-llvm-c699a6117b0f33739cdbe63fff46f95c79b5133b.tar.gz bcm5719-llvm-c699a6117b0f33739cdbe63fff46f95c79b5133b.zip | |
fold: sqrt(x * x * y) -> fabs(x) * sqrt(y)
If a square root call has an FP multiplication argument that can be reassociated,
then we can hoist a repeated factor out of the square root call and into a fabs().
In the simplest case, this:
y = sqrt(x * x);
becomes this:
y = fabs(x);
This patch relies on an earlier optimization in instcombine or reassociate to put the
multiplication tree into a canonical form, so we don't have to search over
every permutation of the multiplication tree.
Because there are no IR-level FastMathFlags for intrinsics (PR21290), we have to
use function-level attributes to do this optimization. This needs to be fixed
for both the intrinsics and in the backend.
Differential Revision: http://reviews.llvm.org/D5787
llvm-svn: 219944
Diffstat (limited to 'llvm/test/Transforms')
| -rw-r--r-- | llvm/test/Transforms/InstCombine/fast-math.ll | 170 |
1 files changed, 170 insertions, 0 deletions
diff --git a/llvm/test/Transforms/InstCombine/fast-math.ll b/llvm/test/Transforms/InstCombine/fast-math.ll index 2ee4b0f2c38..b0ec8952bb3 100644 --- a/llvm/test/Transforms/InstCombine/fast-math.ll +++ b/llvm/test/Transforms/InstCombine/fast-math.ll @@ -530,3 +530,173 @@ define float @fact_div6(float %x) { ; CHECK: fact_div6 ; CHECK: %t3 = fsub fast float %t1, %t2 } + +; ========================================================================= +; +; Test-cases for square root +; +; ========================================================================= + +; A squared factor fed into a square root intrinsic should be hoisted out +; as a fabs() value. +; We have to rely on a function-level attribute to enable this optimization +; because intrinsics don't currently have access to IR-level fast-math +; flags. If that changes, we can relax the requirement on all of these +; tests to just specify 'fast' on the sqrt. + +attributes #0 = { "unsafe-fp-math" = "true" } + +declare double @llvm.sqrt.f64(double) + +define double @sqrt_intrinsic_arg_squared(double %x) #0 { + %mul = fmul fast double %x, %x + %sqrt = call double @llvm.sqrt.f64(double %mul) + ret double %sqrt + +; CHECK-LABEL: sqrt_intrinsic_arg_squared( +; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x) +; CHECK-NEXT: ret double %fabs +} + +; Check all 6 combinations of a 3-way multiplication tree where +; one factor is repeated. + +define double @sqrt_intrinsic_three_args1(double %x, double %y) #0 { + %mul = fmul fast double %y, %x + %mul2 = fmul fast double %mul, %x + %sqrt = call double @llvm.sqrt.f64(double %mul2) + ret double %sqrt + +; CHECK-LABEL: sqrt_intrinsic_three_args1( +; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x) +; CHECK-NEXT: %sqrt1 = call double @llvm.sqrt.f64(double %y) +; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1 +; CHECK-NEXT: ret double %1 +} + +define double @sqrt_intrinsic_three_args2(double %x, double %y) #0 { + %mul = fmul fast double %x, %y + %mul2 = fmul fast double %mul, %x + %sqrt = call double @llvm.sqrt.f64(double %mul2) + ret double %sqrt + +; CHECK-LABEL: sqrt_intrinsic_three_args2( +; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x) +; CHECK-NEXT: %sqrt1 = call double @llvm.sqrt.f64(double %y) +; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1 +; CHECK-NEXT: ret double %1 +} + +define double @sqrt_intrinsic_three_args3(double %x, double %y) #0 { + %mul = fmul fast double %x, %x + %mul2 = fmul fast double %mul, %y + %sqrt = call double @llvm.sqrt.f64(double %mul2) + ret double %sqrt + +; CHECK-LABEL: sqrt_intrinsic_three_args3( +; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x) +; CHECK-NEXT: %sqrt1 = call double @llvm.sqrt.f64(double %y) +; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1 +; CHECK-NEXT: ret double %1 +} + +define double @sqrt_intrinsic_three_args4(double %x, double %y) #0 { + %mul = fmul fast double %y, %x + %mul2 = fmul fast double %x, %mul + %sqrt = call double @llvm.sqrt.f64(double %mul2) + ret double %sqrt + +; CHECK-LABEL: sqrt_intrinsic_three_args4( +; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x) +; CHECK-NEXT: %sqrt1 = call double @llvm.sqrt.f64(double %y) +; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1 +; CHECK-NEXT: ret double %1 +} + +define double @sqrt_intrinsic_three_args5(double %x, double %y) #0 { + %mul = fmul fast double %x, %y + %mul2 = fmul fast double %x, %mul + %sqrt = call double @llvm.sqrt.f64(double %mul2) + ret double %sqrt + +; CHECK-LABEL: sqrt_intrinsic_three_args5( +; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x) +; CHECK-NEXT: %sqrt1 = call double @llvm.sqrt.f64(double %y) +; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1 +; CHECK-NEXT: ret double %1 +} + +define double @sqrt_intrinsic_three_args6(double %x, double %y) #0 { + %mul = fmul fast double %x, %x + %mul2 = fmul fast double %y, %mul + %sqrt = call double @llvm.sqrt.f64(double %mul2) + ret double %sqrt + +; CHECK-LABEL: sqrt_intrinsic_three_args6( +; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x) +; CHECK-NEXT: %sqrt1 = call double @llvm.sqrt.f64(double %y) +; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1 +; CHECK-NEXT: ret double %1 +} + +define double @sqrt_intrinsic_arg_4th(double %x) #0 { + %mul = fmul fast double %x, %x + %mul2 = fmul fast double %mul, %mul + %sqrt = call double @llvm.sqrt.f64(double %mul2) + ret double %sqrt + +; CHECK-LABEL: sqrt_intrinsic_arg_4th( +; CHECK-NEXT: %mul = fmul fast double %x, %x +; CHECK-NEXT: ret double %mul +} + +define double @sqrt_intrinsic_arg_5th(double %x) #0 { + %mul = fmul fast double %x, %x + %mul2 = fmul fast double %mul, %x + %mul3 = fmul fast double %mul2, %mul + %sqrt = call double @llvm.sqrt.f64(double %mul3) + ret double %sqrt + +; CHECK-LABEL: sqrt_intrinsic_arg_5th( +; CHECK-NEXT: %mul = fmul fast double %x, %x +; CHECK-NEXT: %sqrt1 = call double @llvm.sqrt.f64(double %x) +; CHECK-NEXT: %1 = fmul fast double %mul, %sqrt1 +; CHECK-NEXT: ret double %1 +} + +; Check that square root calls have the same behavior. + +declare float @sqrtf(float) +declare double @sqrt(double) +declare fp128 @sqrtl(fp128) + +define float @sqrt_call_squared_f32(float %x) #0 { + %mul = fmul fast float %x, %x + %sqrt = call float @sqrtf(float %mul) + ret float %sqrt + +; CHECK-LABEL: sqrt_call_squared_f32( +; CHECK-NEXT: %fabs = call float @llvm.fabs.f32(float %x) +; CHECK-NEXT: ret float %fabs +} + +define double @sqrt_call_squared_f64(double %x) #0 { + %mul = fmul fast double %x, %x + %sqrt = call double @sqrt(double %mul) + ret double %sqrt + +; CHECK-LABEL: sqrt_call_squared_f64( +; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x) +; CHECK-NEXT: ret double %fabs +} + +define fp128 @sqrt_call_squared_f128(fp128 %x) #0 { + %mul = fmul fast fp128 %x, %x + %sqrt = call fp128 @sqrtl(fp128 %mul) + ret fp128 %sqrt + +; CHECK-LABEL: sqrt_call_squared_f128( +; CHECK-NEXT: %fabs = call fp128 @llvm.fabs.f128(fp128 %x) +; CHECK-NEXT: ret fp128 %fabs +} + |

