diff options
author | Sanjay Patel <spatel@rotateright.com> | 2015-08-16 20:18:19 +0000 |
---|---|---|
committer | Sanjay Patel <spatel@rotateright.com> | 2015-08-16 20:18:19 +0000 |
commit | 57fd1dc5db880044c6899eae5b840caac1fe790e (patch) | |
tree | 27175c4bad090bcdca1ecb8dbbb1c1315a624ed0 /llvm/lib | |
parent | 3278b7cd7c80231b2e93edadea922b09051264fa (diff) | |
download | bcm5719-llvm-57fd1dc5db880044c6899eae5b840caac1fe790e.tar.gz bcm5719-llvm-57fd1dc5db880044c6899eae5b840caac1fe790e.zip |
transform fmin/fmax calls when possible (PR24314)
If we can ignore NaNs, fmin/fmax libcalls can become compare and select
(this is what we turn std::min / std::max into).
This IR should then be optimized in the backend to whatever is best for
any given target. Eg, x86 can use minss/maxss instructions.
This should solve PR24314:
https://llvm.org/bugs/show_bug.cgi?id=24314
Differential Revision: http://reviews.llvm.org/D11866
llvm-svn: 245187
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp | 63 |
1 files changed, 61 insertions, 2 deletions
diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index bb03c98b492..50513642016 100644 --- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1184,6 +1184,60 @@ Value *LibCallSimplifier::optimizeFabs(CallInst *CI, IRBuilder<> &B) { return Ret; } +Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) { + // If we can shrink the call to a float function rather than a double + // function, do that first. + Function *Callee = CI->getCalledFunction(); + if ((Callee->getName() == "fmin" && TLI->has(LibFunc::fminf)) || + (Callee->getName() == "fmax" && TLI->has(LibFunc::fmaxf))) { + Value *Ret = optimizeBinaryDoubleFP(CI, B); + if (Ret) + return Ret; + } + + // Make sure this has 2 arguments of FP type which match the result type. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) || + FT->getParamType(0) != FT->getParamType(1) || + !FT->getParamType(0)->isFloatingPointTy()) + return nullptr; + + // FIXME: For finer-grain optimization, we need intrinsics to have the same + // fast-math flag decorations that are applied to FP instructions. For now, + // we have to rely on the function-level attributes to do this optimization + // because there's no other way to express that the calls can be relaxed. + IRBuilder<true, ConstantFolder, + IRBuilderDefaultInserter<true> >::FastMathFlagGuard Guard(B); + FastMathFlags FMF; + Function *F = CI->getParent()->getParent(); + Attribute Attr = F->getFnAttribute("unsafe-fp-math"); + if (Attr.getValueAsString() == "true") { + // Unsafe algebra sets all fast-math-flags to true. + FMF.setUnsafeAlgebra(); + } else { + // At a minimum, no-nans-fp-math must be true. + Attr = F->getFnAttribute("no-nans-fp-math"); + if (Attr.getValueAsString() != "true") + return nullptr; + // No-signed-zeros is implied by the definitions of fmax/fmin themselves: + // "Ideally, fmax would be sensitive to the sign of zero, for example + // fmax(−0. 0, +0. 0) would return +0; however, implementation in software + // might be impractical." + FMF.setNoSignedZeros(); + FMF.setNoNaNs(); + } + B.SetFastMathFlags(FMF); + + // We have a relaxed floating-point environment. We can ignore NaN-handling + // and transform to a compare and select. We do not have to consider errno or + // exceptions, because fmin/fmax do not have those. + Value *Op0 = CI->getArgOperand(0); + Value *Op1 = CI->getArgOperand(1); + Value *Cmp = Callee->getName().startswith("fmin") ? + B.CreateFCmpOLT(Op0, Op1) : B.CreateFCmpOGT(Op0, Op1); + return B.CreateSelect(Cmp, Op0, Op1); +} + Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); @@ -2110,11 +2164,16 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) { return optimizeUnaryDoubleFP(CI, Builder, true); return nullptr; case LibFunc::copysign: - case LibFunc::fmin: - case LibFunc::fmax: if (hasFloatVersion(FuncName)) return optimizeBinaryDoubleFP(CI, Builder); return nullptr; + case LibFunc::fminf: + case LibFunc::fmin: + case LibFunc::fminl: + case LibFunc::fmaxf: + case LibFunc::fmax: + case LibFunc::fmaxl: + return optimizeFMinFMax(CI, Builder); default: return nullptr; } |