From 28b2ae3692e507e307efb3c775d8d3b27e40c10c Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Thu, 12 Sep 2013 23:57:55 +0000 Subject: Restore the sqrt -> llvm.sqrt mapping in fast-math mode This restores the sqrt -> llvm.sqrt mapping, but only in fast-math mode (specifically, when the UnsafeFPMath or NoNaNsFPMath CodeGen options are enabled). The @llvm.sqrt* intrinsics have slightly different semantics from the libm call, specifically, they are undefined when given a non-zero negative number (the libm calls will always return NaN for any negative number). This mapping was removed in r100613, and replaced with a TODO, but at that time the fast-math flags were not yet implemented. Now that we have these, restoring this mapping is important because it will enable autovectorization of sqrt calls in loops (at least in fast-math mode). llvm-svn: 190646 --- clang/lib/CodeGen/CGBuiltin.cpp | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'clang/lib/CodeGen/CGBuiltin.cpp') diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 5b5b39f5e34..e6cfe64471e 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -1282,12 +1282,19 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, case Builtin::BIsqrt: case Builtin::BIsqrtf: case Builtin::BIsqrtl: { - // TODO: there is currently no set of optimizer flags - // sufficient for us to rewrite sqrt to @llvm.sqrt. - // -fmath-errno=0 is not good enough; we need finiteness. - // We could probably precondition the call with an ult - // against 0, but is that worth the complexity? - break; + // Transform a call to sqrt* into a @llvm.sqrt.* intrinsic call, but only + // in finite- or unsafe-math mode (the intrinsic has different semantics + // for handling negative numbers compared to the library function, so + // -fmath-errno=0 is not enough). + if (!FD->hasAttr()) + break; + if (!(CGM.getCodeGenOpts().UnsafeFPMath || + CGM.getCodeGenOpts().NoNaNsFPMath)) + break; + Value *Arg0 = EmitScalarExpr(E->getArg(0)); + llvm::Type *ArgType = Arg0->getType(); + Value *F = CGM.getIntrinsic(Intrinsic::sqrt, ArgType); + return RValue::get(Builder.CreateCall(F, Arg0)); } case Builtin::BIpow: -- cgit v1.2.3