diff options
-rw-r--r-- | clang/lib/CodeGen/CGBuiltin.cpp | 19 | ||||
-rw-r--r-- | clang/test/CodeGen/libcalls.c | 5 |
2 files changed, 18 insertions, 6 deletions
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 5b5b39f5e34..e6cfe64471e 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -1282,12 +1282,19 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, case Builtin::BIsqrt: case Builtin::BIsqrtf: case Builtin::BIsqrtl: { - // TODO: there is currently no set of optimizer flags - // sufficient for us to rewrite sqrt to @llvm.sqrt. - // -fmath-errno=0 is not good enough; we need finiteness. - // We could probably precondition the call with an ult - // against 0, but is that worth the complexity? - break; + // Transform a call to sqrt* into a @llvm.sqrt.* intrinsic call, but only + // in finite- or unsafe-math mode (the intrinsic has different semantics + // for handling negative numbers compared to the library function, so + // -fmath-errno=0 is not enough). + if (!FD->hasAttr<ConstAttr>()) + break; + if (!(CGM.getCodeGenOpts().UnsafeFPMath || + CGM.getCodeGenOpts().NoNaNsFPMath)) + break; + Value *Arg0 = EmitScalarExpr(E->getArg(0)); + llvm::Type *ArgType = Arg0->getType(); + Value *F = CGM.getIntrinsic(Intrinsic::sqrt, ArgType); + return RValue::get(Builder.CreateCall(F, Arg0)); } case Builtin::BIpow: diff --git a/clang/test/CodeGen/libcalls.c b/clang/test/CodeGen/libcalls.c index 520b79b146a..3112c875734 100644 --- a/clang/test/CodeGen/libcalls.c +++ b/clang/test/CodeGen/libcalls.c @@ -1,8 +1,10 @@ // RUN: %clang_cc1 -fmath-errno -emit-llvm -o - %s -triple i386-unknown-unknown | FileCheck -check-prefix CHECK-YES %s // RUN: %clang_cc1 -emit-llvm -o - %s -triple i386-unknown-unknown | FileCheck -check-prefix CHECK-NO %s +// RUN: %clang_cc1 -menable-unsafe-fp-math -emit-llvm -o - %s -triple i386-unknown-unknown | FileCheck -check-prefix CHECK-FAST %s // CHECK-YES-LABEL: define void @test_sqrt // CHECK-NO-LABEL: define void @test_sqrt +// CHECK-FAST-LABEL: define void @test_sqrt void test_sqrt(float a0, double a1, long double a2) { // Following llvm-gcc's lead, we never emit these as intrinsics; // no-math-errno isn't good enough. We could probably use intrinsics @@ -27,6 +29,9 @@ void test_sqrt(float a0, double a1, long double a2) { // CHECK-NO: declare float @sqrtf(float) [[NUW_RN:#[0-9]+]] // CHECK-NO: declare double @sqrt(double) [[NUW_RN]] // CHECK-NO: declare x86_fp80 @sqrtl(x86_fp80) [[NUW_RN]] +// CHECK-FAST: declare float @llvm.sqrt.f32(float) +// CHECK-FAST: declare double @llvm.sqrt.f64(double) +// CHECK-FAST: declare x86_fp80 @llvm.sqrt.f80(x86_fp80) // CHECK-YES-LABEL: define void @test_pow // CHECK-NO-LABEL: define void @test_pow |