diff options
author | Weiming Zhao <weimingz@codeaurora.org> | 2015-12-04 22:00:47 +0000 |
---|---|---|
committer | Weiming Zhao <weimingz@codeaurora.org> | 2015-12-04 22:00:47 +0000 |
commit | 8213072a453bcb012b31d231dae4f44a589ddd52 (patch) | |
tree | b304e388662152ead7974a33788200a667bf0077 /llvm | |
parent | b51aafd28e949f97fae9af70ca9e3ea9b683f098 (diff) | |
download | bcm5719-llvm-8213072a453bcb012b31d231dae4f44a589ddd52.tar.gz bcm5719-llvm-8213072a453bcb012b31d231dae4f44a589ddd52.zip |
[SimplifyLibCalls] Optimization for pow(x, n) where n is some constant
Summary:
In order to avoid calling pow function we generate repeated fmul when n is a
positive or negative whole number.
For each exponent we pre-compute Addition Chains in order to minimize the no.
of fmuls.
Refer: http://wwwhomes.uni-bielefeld.de/achim/addition_chain.html
We pre-compute addition chains for exponents upto 32 (which results in a max of
7 fmuls).
For eg:
4 = 2+2
5 = 2+3
6 = 3+3 and so on
Hence,
pow(x, 4.0) ==> y = fmul x, x
x = fmul y, y
ret x
For negative exponents, we simply compute the reciprocal of the final result.
Note: This transformation is only enabled under fast-math.
Patch by Mandeep Singh Grang <mgrang@codeaurora.org>
Reviewers: weimingz, majnemer, escha, davide, scanon, joerg
Subscribers: probinson, escha, llvm-commits
Differential Revision: http://reviews.llvm.org/D13994
llvm-svn: 254776
Diffstat (limited to 'llvm')
-rw-r--r-- | llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp | 51 | ||||
-rw-r--r-- | llvm/test/Transforms/InstCombine/pow-4.ll | 120 |
2 files changed, 171 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index 83afb1a65ac..df75ed96893 100644 --- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1058,6 +1058,31 @@ Value *LibCallSimplifier::optimizeCos(CallInst *CI, IRBuilder<> &B) { return Ret; } +static Value *getPow(Value *InnerChain[33], unsigned Exp, IRBuilder<> &B) { + // Multiplications calculated using Addition Chains. + // Refer: http://wwwhomes.uni-bielefeld.de/achim/addition_chain.html + + assert(Exp != 0 && "Incorrect exponent 0 not handled"); + + if (InnerChain[Exp]) + return InnerChain[Exp]; + + static const unsigned AddChain[33][2] = { + {0, 0}, // Unused. + {0, 0}, // Unused (base case = pow1). + {1, 1}, // Unused (pre-computed). + {1, 2}, {2, 2}, {2, 3}, {3, 3}, {2, 5}, {4, 4}, + {1, 8}, {5, 5}, {1, 10}, {6, 6}, {4, 9}, {7, 7}, + {3, 12}, {8, 8}, {8, 9}, {2, 16}, {1, 18}, {10, 10}, + {6, 15}, {11, 11}, {3, 20}, {12, 12}, {8, 17}, {13, 13}, + {3, 24}, {14, 14}, {4, 25}, {15, 15}, {3, 28}, {16, 16}, + }; + + InnerChain[Exp] = B.CreateFMul(getPow(InnerChain, AddChain[Exp][0], B), + getPow(InnerChain, AddChain[Exp][1], B)); + return InnerChain[Exp]; +} + Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); Value *Ret = nullptr; @@ -1156,6 +1181,32 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { return B.CreateFMul(Op1, Op1, "pow2"); if (Op2C->isExactlyValue(-1.0)) // pow(x, -1.0) -> 1.0/x return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), Op1, "powrecip"); + + // In -ffast-math, generate repeated fmul instead of generating pow(x, n). + if (unsafeFPMath) { + APFloat V = abs(Op2C->getValueAPF()); + // We limit to a max of 7 fmul(s). Thus max exponent is 32. + // This transformation applies to integer exponents only. + if (V.compare(APFloat(V.getSemantics(), 32.0)) == APFloat::cmpGreaterThan || + !V.isInteger()) + return nullptr; + + // We will memoize intermediate products of the Addition Chain. + Value *InnerChain[33] = {nullptr}; + InnerChain[1] = Op1; + InnerChain[2] = B.CreateFMul(Op1, Op1); + + // We cannot readily convert a non-double type (like float) to a double. + // So we first convert V to something which could be converted to double. + bool ignored; + V.convert(APFloat::IEEEdouble, APFloat::rmTowardZero, &ignored); + Value *FMul = getPow(InnerChain, V.convertToDouble(), B); + // For negative exponents simply compute the reciprocal. + if (Op2C->isNegative()) + FMul = B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), FMul); + return FMul; + } + return nullptr; } diff --git a/llvm/test/Transforms/InstCombine/pow-4.ll b/llvm/test/Transforms/InstCombine/pow-4.ll new file mode 100644 index 00000000000..76ef4c5de92 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/pow-4.ll @@ -0,0 +1,120 @@ +; Test that the pow library call simplifier works correctly. + +; RUN: opt -instcombine -S < %s | FileCheck %s + +; Function Attrs: nounwind readnone +declare double @llvm.pow.f64(double, double) +declare float @llvm.pow.f32(float, float) + +; pow(x, 4.0f) +define float @test_simplify_4f(float %x) #0 { +; CHECK-LABEL: @test_simplify_4f( +; CHECK-NOT: pow +; CHECK-NEXT: %1 = fmul float %x, %x +; CHECK-NEXT: %2 = fmul float %1, %1 +; CHECK-NEXT: ret float %2 + %1 = call float @llvm.pow.f32(float %x, float 4.000000e+00) + ret float %1 +} + +; pow(x, 3.0) +define double @test_simplify_3(double %x) #0 { +; CHECK-LABEL: @test_simplify_3( +; CHECK-NOT: pow +; CHECK-NEXT: %1 = fmul double %x, %x +; CHECK-NEXT: %2 = fmul double %1, %x +; CHECK-NEXT: ret double %2 + %1 = call double @llvm.pow.f64(double %x, double 3.000000e+00) + ret double %1 +} + +; pow(x, 4.0) +define double @test_simplify_4(double %x) #0 { +; CHECK-LABEL: @test_simplify_4( +; CHECK-NOT: pow +; CHECK-NEXT: %1 = fmul double %x, %x +; CHECK-NEXT: %2 = fmul double %1, %1 +; CHECK-NEXT: ret double %2 + %1 = call double @llvm.pow.f64(double %x, double 4.000000e+00) + ret double %1 +} + +; pow(x, 15.0) +define double @test_simplify_15(double %x) #0 { +; CHECK-LABEL: @test_simplify_15( +; CHECK-NOT: pow +; CHECK-NEXT: %1 = fmul double %x, %x +; CHECK-NEXT: %2 = fmul double %1, %x +; CHECK-NEXT: %3 = fmul double %2, %2 +; CHECK-NEXT: %4 = fmul double %3, %3 +; CHECK-NEXT: %5 = fmul double %2, %4 +; CHECK-NEXT: ret double %5 + %1 = call double @llvm.pow.f64(double %x, double 1.500000e+01) + ret double %1 +} + +; pow(x, -7.0) +define double @test_simplify_neg_7(double %x) #0 { +; CHECK-LABEL: @test_simplify_neg_7( +; CHECK-NOT: pow +; CHECK-NEXT: %1 = fmul double %x, %x +; CHECK-NEXT: %2 = fmul double %1, %x +; CHECK-NEXT: %3 = fmul double %1, %2 +; CHECK-NEXT: %4 = fmul double %1, %3 +; CHECK-NEXT: %5 = fdiv double 1.000000e+00, %4 +; CHECK-NEXT: ret double %5 + %1 = call double @llvm.pow.f64(double %x, double -7.000000e+00) + ret double %1 +} + +; pow(x, -19.0) +define double @test_simplify_neg_19(double %x) #0 { +; CHECK-LABEL: @test_simplify_neg_19( +; CHECK-NOT: pow +; CHECK-NEXT: %1 = fmul double %x, %x +; CHECK-NEXT: %2 = fmul double %1, %1 +; CHECK-NEXT: %3 = fmul double %2, %2 +; CHECK-NEXT: %4 = fmul double %3, %3 +; CHECK-NEXT: %5 = fmul double %1, %4 +; CHECK-NEXT: %6 = fmul double %5, %x +; CHECK-NEXT: %7 = fdiv double 1.000000e+00, %6 +; CHECK-NEXT: ret double %7 + %1 = call double @llvm.pow.f64(double %x, double -1.900000e+01) + ret double %1 +} + +; pow(x, 11.23) +define double @test_simplify_11_23(double %x) #0 { +; CHECK-LABEL: @test_simplify_11_23( +; CHECK-NOT: fmul +; CHECK-NEXT: %1 = call double @llvm.pow.f64(double %x, double 1.123000e+01) +; CHECK-NEXT: ret double %1 + %1 = call double @llvm.pow.f64(double %x, double 1.123000e+01) + ret double %1 +} + +; pow(x, 32.0) +define double @test_simplify_32(double %x) #0 { +; CHECK-LABEL: @test_simplify_32( +; CHECK-NOT: pow +; CHECK-NEXT: %1 = fmul double %x, %x +; CHECK-NEXT: %2 = fmul double %1, %1 +; CHECK-NEXT: %3 = fmul double %2, %2 +; CHECK-NEXT: %4 = fmul double %3, %3 +; CHECK-NEXT: %5 = fmul double %4, %4 +; CHECK-NEXT: ret double %5 + %1 = call double @llvm.pow.f64(double %x, double 3.200000e+01) + ret double %1 +} + +; pow(x, 33.0) +define double @test_simplify_33(double %x) #0 { +; CHECK-LABEL: @test_simplify_33( +; CHECK-NOT: fmul +; CHECK-NEXT: %1 = call double @llvm.pow.f64(double %x, double 3.300000e+01) +; CHECK-NEXT: ret double %1 + %1 = call double @llvm.pow.f64(double %x, double 3.300000e+01) + ret double %1 +} + +attributes #0 = { nounwind readnone "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="true" "use-soft-float"="false" } |